xref: /openbmc/linux/net/unix/af_unix.c (revision 95e9fd10)
1 /*
2  * NET4:	Implementation of BSD Unix domain sockets.
3  *
4  * Authors:	Alan Cox, <alan@lxorguk.ukuu.org.uk>
5  *
6  *		This program is free software; you can redistribute it and/or
7  *		modify it under the terms of the GNU General Public License
8  *		as published by the Free Software Foundation; either version
9  *		2 of the License, or (at your option) any later version.
10  *
11  * Fixes:
12  *		Linus Torvalds	:	Assorted bug cures.
13  *		Niibe Yutaka	:	async I/O support.
14  *		Carsten Paeth	:	PF_UNIX check, address fixes.
15  *		Alan Cox	:	Limit size of allocated blocks.
16  *		Alan Cox	:	Fixed the stupid socketpair bug.
17  *		Alan Cox	:	BSD compatibility fine tuning.
18  *		Alan Cox	:	Fixed a bug in connect when interrupted.
19  *		Alan Cox	:	Sorted out a proper draft version of
20  *					file descriptor passing hacked up from
21  *					Mike Shaver's work.
22  *		Marty Leisner	:	Fixes to fd passing
23  *		Nick Nevin	:	recvmsg bugfix.
24  *		Alan Cox	:	Started proper garbage collector
25  *		Heiko EiBfeldt	:	Missing verify_area check
26  *		Alan Cox	:	Started POSIXisms
27  *		Andreas Schwab	:	Replace inode by dentry for proper
28  *					reference counting
29  *		Kirk Petersen	:	Made this a module
30  *	    Christoph Rohland	:	Elegant non-blocking accept/connect algorithm.
31  *					Lots of bug fixes.
32  *	     Alexey Kuznetosv	:	Repaired (I hope) bugs introduces
33  *					by above two patches.
34  *	     Andrea Arcangeli	:	If possible we block in connect(2)
35  *					if the max backlog of the listen socket
36  *					is been reached. This won't break
37  *					old apps and it will avoid huge amount
38  *					of socks hashed (this for unix_gc()
39  *					performances reasons).
40  *					Security fix that limits the max
41  *					number of socks to 2*max_files and
42  *					the number of skb queueable in the
43  *					dgram receiver.
44  *		Artur Skawina   :	Hash function optimizations
45  *	     Alexey Kuznetsov   :	Full scale SMP. Lot of bugs are introduced 8)
46  *	      Malcolm Beattie   :	Set peercred for socketpair
47  *	     Michal Ostrowski   :       Module initialization cleanup.
48  *	     Arnaldo C. Melo	:	Remove MOD_{INC,DEC}_USE_COUNT,
49  *	     				the core infrastructure is doing that
50  *	     				for all net proto families now (2.5.69+)
51  *
52  *
53  * Known differences from reference BSD that was tested:
54  *
55  *	[TO FIX]
56  *	ECONNREFUSED is not returned from one end of a connected() socket to the
57  *		other the moment one end closes.
58  *	fstat() doesn't return st_dev=0, and give the blksize as high water mark
59  *		and a fake inode identifier (nor the BSD first socket fstat twice bug).
60  *	[NOT TO FIX]
61  *	accept() returns a path name even if the connecting socket has closed
62  *		in the meantime (BSD loses the path and gives up).
63  *	accept() returns 0 length path for an unbound connector. BSD returns 16
64  *		and a null first byte in the path (but not for gethost/peername - BSD bug ??)
65  *	socketpair(...SOCK_RAW..) doesn't panic the kernel.
66  *	BSD af_unix apparently has connect forgetting to block properly.
67  *		(need to check this with the POSIX spec in detail)
68  *
69  * Differences from 2.0.0-11-... (ANK)
70  *	Bug fixes and improvements.
71  *		- client shutdown killed server socket.
72  *		- removed all useless cli/sti pairs.
73  *
74  *	Semantic changes/extensions.
75  *		- generic control message passing.
76  *		- SCM_CREDENTIALS control message.
77  *		- "Abstract" (not FS based) socket bindings.
78  *		  Abstract names are sequences of bytes (not zero terminated)
79  *		  started by 0, so that this name space does not intersect
80  *		  with BSD names.
81  */
82 
83 #include <linux/module.h>
84 #include <linux/kernel.h>
85 #include <linux/signal.h>
86 #include <linux/sched.h>
87 #include <linux/errno.h>
88 #include <linux/string.h>
89 #include <linux/stat.h>
90 #include <linux/dcache.h>
91 #include <linux/namei.h>
92 #include <linux/socket.h>
93 #include <linux/un.h>
94 #include <linux/fcntl.h>
95 #include <linux/termios.h>
96 #include <linux/sockios.h>
97 #include <linux/net.h>
98 #include <linux/in.h>
99 #include <linux/fs.h>
100 #include <linux/slab.h>
101 #include <asm/uaccess.h>
102 #include <linux/skbuff.h>
103 #include <linux/netdevice.h>
104 #include <net/net_namespace.h>
105 #include <net/sock.h>
106 #include <net/tcp_states.h>
107 #include <net/af_unix.h>
108 #include <linux/proc_fs.h>
109 #include <linux/seq_file.h>
110 #include <net/scm.h>
111 #include <linux/init.h>
112 #include <linux/poll.h>
113 #include <linux/rtnetlink.h>
114 #include <linux/mount.h>
115 #include <net/checksum.h>
116 #include <linux/security.h>
117 
118 struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE];
119 EXPORT_SYMBOL_GPL(unix_socket_table);
120 DEFINE_SPINLOCK(unix_table_lock);
121 EXPORT_SYMBOL_GPL(unix_table_lock);
122 static atomic_long_t unix_nr_socks;
123 
124 
125 static struct hlist_head *unix_sockets_unbound(void *addr)
126 {
127 	unsigned long hash = (unsigned long)addr;
128 
129 	hash ^= hash >> 16;
130 	hash ^= hash >> 8;
131 	hash %= UNIX_HASH_SIZE;
132 	return &unix_socket_table[UNIX_HASH_SIZE + hash];
133 }
134 
135 #define UNIX_ABSTRACT(sk)	(unix_sk(sk)->addr->hash < UNIX_HASH_SIZE)
136 
137 #ifdef CONFIG_SECURITY_NETWORK
138 static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
139 {
140 	memcpy(UNIXSID(skb), &scm->secid, sizeof(u32));
141 }
142 
143 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
144 {
145 	scm->secid = *UNIXSID(skb);
146 }
147 #else
148 static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
149 { }
150 
151 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
152 { }
153 #endif /* CONFIG_SECURITY_NETWORK */
154 
155 /*
156  *  SMP locking strategy:
157  *    hash table is protected with spinlock unix_table_lock
158  *    each socket state is protected by separate spin lock.
159  */
160 
161 static inline unsigned int unix_hash_fold(__wsum n)
162 {
163 	unsigned int hash = (__force unsigned int)n;
164 
165 	hash ^= hash>>16;
166 	hash ^= hash>>8;
167 	return hash&(UNIX_HASH_SIZE-1);
168 }
169 
170 #define unix_peer(sk) (unix_sk(sk)->peer)
171 
172 static inline int unix_our_peer(struct sock *sk, struct sock *osk)
173 {
174 	return unix_peer(osk) == sk;
175 }
176 
177 static inline int unix_may_send(struct sock *sk, struct sock *osk)
178 {
179 	return unix_peer(osk) == NULL || unix_our_peer(sk, osk);
180 }
181 
182 static inline int unix_recvq_full(struct sock const *sk)
183 {
184 	return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog;
185 }
186 
187 struct sock *unix_peer_get(struct sock *s)
188 {
189 	struct sock *peer;
190 
191 	unix_state_lock(s);
192 	peer = unix_peer(s);
193 	if (peer)
194 		sock_hold(peer);
195 	unix_state_unlock(s);
196 	return peer;
197 }
198 EXPORT_SYMBOL_GPL(unix_peer_get);
199 
200 static inline void unix_release_addr(struct unix_address *addr)
201 {
202 	if (atomic_dec_and_test(&addr->refcnt))
203 		kfree(addr);
204 }
205 
206 /*
207  *	Check unix socket name:
208  *		- should be not zero length.
209  *	        - if started by not zero, should be NULL terminated (FS object)
210  *		- if started by zero, it is abstract name.
211  */
212 
213 static int unix_mkname(struct sockaddr_un *sunaddr, int len, unsigned int *hashp)
214 {
215 	if (len <= sizeof(short) || len > sizeof(*sunaddr))
216 		return -EINVAL;
217 	if (!sunaddr || sunaddr->sun_family != AF_UNIX)
218 		return -EINVAL;
219 	if (sunaddr->sun_path[0]) {
220 		/*
221 		 * This may look like an off by one error but it is a bit more
222 		 * subtle. 108 is the longest valid AF_UNIX path for a binding.
223 		 * sun_path[108] doesn't as such exist.  However in kernel space
224 		 * we are guaranteed that it is a valid memory location in our
225 		 * kernel address buffer.
226 		 */
227 		((char *)sunaddr)[len] = 0;
228 		len = strlen(sunaddr->sun_path)+1+sizeof(short);
229 		return len;
230 	}
231 
232 	*hashp = unix_hash_fold(csum_partial(sunaddr, len, 0));
233 	return len;
234 }
235 
236 static void __unix_remove_socket(struct sock *sk)
237 {
238 	sk_del_node_init(sk);
239 }
240 
241 static void __unix_insert_socket(struct hlist_head *list, struct sock *sk)
242 {
243 	WARN_ON(!sk_unhashed(sk));
244 	sk_add_node(sk, list);
245 }
246 
247 static inline void unix_remove_socket(struct sock *sk)
248 {
249 	spin_lock(&unix_table_lock);
250 	__unix_remove_socket(sk);
251 	spin_unlock(&unix_table_lock);
252 }
253 
254 static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk)
255 {
256 	spin_lock(&unix_table_lock);
257 	__unix_insert_socket(list, sk);
258 	spin_unlock(&unix_table_lock);
259 }
260 
261 static struct sock *__unix_find_socket_byname(struct net *net,
262 					      struct sockaddr_un *sunname,
263 					      int len, int type, unsigned int hash)
264 {
265 	struct sock *s;
266 	struct hlist_node *node;
267 
268 	sk_for_each(s, node, &unix_socket_table[hash ^ type]) {
269 		struct unix_sock *u = unix_sk(s);
270 
271 		if (!net_eq(sock_net(s), net))
272 			continue;
273 
274 		if (u->addr->len == len &&
275 		    !memcmp(u->addr->name, sunname, len))
276 			goto found;
277 	}
278 	s = NULL;
279 found:
280 	return s;
281 }
282 
283 static inline struct sock *unix_find_socket_byname(struct net *net,
284 						   struct sockaddr_un *sunname,
285 						   int len, int type,
286 						   unsigned int hash)
287 {
288 	struct sock *s;
289 
290 	spin_lock(&unix_table_lock);
291 	s = __unix_find_socket_byname(net, sunname, len, type, hash);
292 	if (s)
293 		sock_hold(s);
294 	spin_unlock(&unix_table_lock);
295 	return s;
296 }
297 
298 static struct sock *unix_find_socket_byinode(struct inode *i)
299 {
300 	struct sock *s;
301 	struct hlist_node *node;
302 
303 	spin_lock(&unix_table_lock);
304 	sk_for_each(s, node,
305 		    &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
306 		struct dentry *dentry = unix_sk(s)->path.dentry;
307 
308 		if (dentry && dentry->d_inode == i) {
309 			sock_hold(s);
310 			goto found;
311 		}
312 	}
313 	s = NULL;
314 found:
315 	spin_unlock(&unix_table_lock);
316 	return s;
317 }
318 
319 static inline int unix_writable(struct sock *sk)
320 {
321 	return (atomic_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
322 }
323 
324 static void unix_write_space(struct sock *sk)
325 {
326 	struct socket_wq *wq;
327 
328 	rcu_read_lock();
329 	if (unix_writable(sk)) {
330 		wq = rcu_dereference(sk->sk_wq);
331 		if (wq_has_sleeper(wq))
332 			wake_up_interruptible_sync_poll(&wq->wait,
333 				POLLOUT | POLLWRNORM | POLLWRBAND);
334 		sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
335 	}
336 	rcu_read_unlock();
337 }
338 
339 /* When dgram socket disconnects (or changes its peer), we clear its receive
340  * queue of packets arrived from previous peer. First, it allows to do
341  * flow control based only on wmem_alloc; second, sk connected to peer
342  * may receive messages only from that peer. */
343 static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
344 {
345 	if (!skb_queue_empty(&sk->sk_receive_queue)) {
346 		skb_queue_purge(&sk->sk_receive_queue);
347 		wake_up_interruptible_all(&unix_sk(sk)->peer_wait);
348 
349 		/* If one link of bidirectional dgram pipe is disconnected,
350 		 * we signal error. Messages are lost. Do not make this,
351 		 * when peer was not connected to us.
352 		 */
353 		if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
354 			other->sk_err = ECONNRESET;
355 			other->sk_error_report(other);
356 		}
357 	}
358 }
359 
360 static void unix_sock_destructor(struct sock *sk)
361 {
362 	struct unix_sock *u = unix_sk(sk);
363 
364 	skb_queue_purge(&sk->sk_receive_queue);
365 
366 	WARN_ON(atomic_read(&sk->sk_wmem_alloc));
367 	WARN_ON(!sk_unhashed(sk));
368 	WARN_ON(sk->sk_socket);
369 	if (!sock_flag(sk, SOCK_DEAD)) {
370 		printk(KERN_INFO "Attempt to release alive unix socket: %p\n", sk);
371 		return;
372 	}
373 
374 	if (u->addr)
375 		unix_release_addr(u->addr);
376 
377 	atomic_long_dec(&unix_nr_socks);
378 	local_bh_disable();
379 	sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
380 	local_bh_enable();
381 #ifdef UNIX_REFCNT_DEBUG
382 	printk(KERN_DEBUG "UNIX %p is destroyed, %ld are still alive.\n", sk,
383 		atomic_long_read(&unix_nr_socks));
384 #endif
385 }
386 
387 static int unix_release_sock(struct sock *sk, int embrion)
388 {
389 	struct unix_sock *u = unix_sk(sk);
390 	struct path path;
391 	struct sock *skpair;
392 	struct sk_buff *skb;
393 	int state;
394 
395 	unix_remove_socket(sk);
396 
397 	/* Clear state */
398 	unix_state_lock(sk);
399 	sock_orphan(sk);
400 	sk->sk_shutdown = SHUTDOWN_MASK;
401 	path	     = u->path;
402 	u->path.dentry = NULL;
403 	u->path.mnt = NULL;
404 	state = sk->sk_state;
405 	sk->sk_state = TCP_CLOSE;
406 	unix_state_unlock(sk);
407 
408 	wake_up_interruptible_all(&u->peer_wait);
409 
410 	skpair = unix_peer(sk);
411 
412 	if (skpair != NULL) {
413 		if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
414 			unix_state_lock(skpair);
415 			/* No more writes */
416 			skpair->sk_shutdown = SHUTDOWN_MASK;
417 			if (!skb_queue_empty(&sk->sk_receive_queue) || embrion)
418 				skpair->sk_err = ECONNRESET;
419 			unix_state_unlock(skpair);
420 			skpair->sk_state_change(skpair);
421 			sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
422 		}
423 		sock_put(skpair); /* It may now die */
424 		unix_peer(sk) = NULL;
425 	}
426 
427 	/* Try to flush out this socket. Throw out buffers at least */
428 
429 	while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
430 		if (state == TCP_LISTEN)
431 			unix_release_sock(skb->sk, 1);
432 		/* passed fds are erased in the kfree_skb hook	      */
433 		kfree_skb(skb);
434 	}
435 
436 	if (path.dentry)
437 		path_put(&path);
438 
439 	sock_put(sk);
440 
441 	/* ---- Socket is dead now and most probably destroyed ---- */
442 
443 	/*
444 	 * Fixme: BSD difference: In BSD all sockets connected to use get
445 	 *	  ECONNRESET and we die on the spot. In Linux we behave
446 	 *	  like files and pipes do and wait for the last
447 	 *	  dereference.
448 	 *
449 	 * Can't we simply set sock->err?
450 	 *
451 	 *	  What the above comment does talk about? --ANK(980817)
452 	 */
453 
454 	if (unix_tot_inflight)
455 		unix_gc();		/* Garbage collect fds */
456 
457 	return 0;
458 }
459 
460 static void init_peercred(struct sock *sk)
461 {
462 	put_pid(sk->sk_peer_pid);
463 	if (sk->sk_peer_cred)
464 		put_cred(sk->sk_peer_cred);
465 	sk->sk_peer_pid  = get_pid(task_tgid(current));
466 	sk->sk_peer_cred = get_current_cred();
467 }
468 
469 static void copy_peercred(struct sock *sk, struct sock *peersk)
470 {
471 	put_pid(sk->sk_peer_pid);
472 	if (sk->sk_peer_cred)
473 		put_cred(sk->sk_peer_cred);
474 	sk->sk_peer_pid  = get_pid(peersk->sk_peer_pid);
475 	sk->sk_peer_cred = get_cred(peersk->sk_peer_cred);
476 }
477 
478 static int unix_listen(struct socket *sock, int backlog)
479 {
480 	int err;
481 	struct sock *sk = sock->sk;
482 	struct unix_sock *u = unix_sk(sk);
483 	struct pid *old_pid = NULL;
484 	const struct cred *old_cred = NULL;
485 
486 	err = -EOPNOTSUPP;
487 	if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
488 		goto out;	/* Only stream/seqpacket sockets accept */
489 	err = -EINVAL;
490 	if (!u->addr)
491 		goto out;	/* No listens on an unbound socket */
492 	unix_state_lock(sk);
493 	if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
494 		goto out_unlock;
495 	if (backlog > sk->sk_max_ack_backlog)
496 		wake_up_interruptible_all(&u->peer_wait);
497 	sk->sk_max_ack_backlog	= backlog;
498 	sk->sk_state		= TCP_LISTEN;
499 	/* set credentials so connect can copy them */
500 	init_peercred(sk);
501 	err = 0;
502 
503 out_unlock:
504 	unix_state_unlock(sk);
505 	put_pid(old_pid);
506 	if (old_cred)
507 		put_cred(old_cred);
508 out:
509 	return err;
510 }
511 
512 static int unix_release(struct socket *);
513 static int unix_bind(struct socket *, struct sockaddr *, int);
514 static int unix_stream_connect(struct socket *, struct sockaddr *,
515 			       int addr_len, int flags);
516 static int unix_socketpair(struct socket *, struct socket *);
517 static int unix_accept(struct socket *, struct socket *, int);
518 static int unix_getname(struct socket *, struct sockaddr *, int *, int);
519 static unsigned int unix_poll(struct file *, struct socket *, poll_table *);
520 static unsigned int unix_dgram_poll(struct file *, struct socket *,
521 				    poll_table *);
522 static int unix_ioctl(struct socket *, unsigned int, unsigned long);
523 static int unix_shutdown(struct socket *, int);
524 static int unix_stream_sendmsg(struct kiocb *, struct socket *,
525 			       struct msghdr *, size_t);
526 static int unix_stream_recvmsg(struct kiocb *, struct socket *,
527 			       struct msghdr *, size_t, int);
528 static int unix_dgram_sendmsg(struct kiocb *, struct socket *,
529 			      struct msghdr *, size_t);
530 static int unix_dgram_recvmsg(struct kiocb *, struct socket *,
531 			      struct msghdr *, size_t, int);
532 static int unix_dgram_connect(struct socket *, struct sockaddr *,
533 			      int, int);
534 static int unix_seqpacket_sendmsg(struct kiocb *, struct socket *,
535 				  struct msghdr *, size_t);
536 static int unix_seqpacket_recvmsg(struct kiocb *, struct socket *,
537 				  struct msghdr *, size_t, int);
538 
539 static void unix_set_peek_off(struct sock *sk, int val)
540 {
541 	struct unix_sock *u = unix_sk(sk);
542 
543 	mutex_lock(&u->readlock);
544 	sk->sk_peek_off = val;
545 	mutex_unlock(&u->readlock);
546 }
547 
548 
549 static const struct proto_ops unix_stream_ops = {
550 	.family =	PF_UNIX,
551 	.owner =	THIS_MODULE,
552 	.release =	unix_release,
553 	.bind =		unix_bind,
554 	.connect =	unix_stream_connect,
555 	.socketpair =	unix_socketpair,
556 	.accept =	unix_accept,
557 	.getname =	unix_getname,
558 	.poll =		unix_poll,
559 	.ioctl =	unix_ioctl,
560 	.listen =	unix_listen,
561 	.shutdown =	unix_shutdown,
562 	.setsockopt =	sock_no_setsockopt,
563 	.getsockopt =	sock_no_getsockopt,
564 	.sendmsg =	unix_stream_sendmsg,
565 	.recvmsg =	unix_stream_recvmsg,
566 	.mmap =		sock_no_mmap,
567 	.sendpage =	sock_no_sendpage,
568 	.set_peek_off =	unix_set_peek_off,
569 };
570 
571 static const struct proto_ops unix_dgram_ops = {
572 	.family =	PF_UNIX,
573 	.owner =	THIS_MODULE,
574 	.release =	unix_release,
575 	.bind =		unix_bind,
576 	.connect =	unix_dgram_connect,
577 	.socketpair =	unix_socketpair,
578 	.accept =	sock_no_accept,
579 	.getname =	unix_getname,
580 	.poll =		unix_dgram_poll,
581 	.ioctl =	unix_ioctl,
582 	.listen =	sock_no_listen,
583 	.shutdown =	unix_shutdown,
584 	.setsockopt =	sock_no_setsockopt,
585 	.getsockopt =	sock_no_getsockopt,
586 	.sendmsg =	unix_dgram_sendmsg,
587 	.recvmsg =	unix_dgram_recvmsg,
588 	.mmap =		sock_no_mmap,
589 	.sendpage =	sock_no_sendpage,
590 	.set_peek_off =	unix_set_peek_off,
591 };
592 
593 static const struct proto_ops unix_seqpacket_ops = {
594 	.family =	PF_UNIX,
595 	.owner =	THIS_MODULE,
596 	.release =	unix_release,
597 	.bind =		unix_bind,
598 	.connect =	unix_stream_connect,
599 	.socketpair =	unix_socketpair,
600 	.accept =	unix_accept,
601 	.getname =	unix_getname,
602 	.poll =		unix_dgram_poll,
603 	.ioctl =	unix_ioctl,
604 	.listen =	unix_listen,
605 	.shutdown =	unix_shutdown,
606 	.setsockopt =	sock_no_setsockopt,
607 	.getsockopt =	sock_no_getsockopt,
608 	.sendmsg =	unix_seqpacket_sendmsg,
609 	.recvmsg =	unix_seqpacket_recvmsg,
610 	.mmap =		sock_no_mmap,
611 	.sendpage =	sock_no_sendpage,
612 	.set_peek_off =	unix_set_peek_off,
613 };
614 
615 static struct proto unix_proto = {
616 	.name			= "UNIX",
617 	.owner			= THIS_MODULE,
618 	.obj_size		= sizeof(struct unix_sock),
619 };
620 
621 /*
622  * AF_UNIX sockets do not interact with hardware, hence they
623  * dont trigger interrupts - so it's safe for them to have
624  * bh-unsafe locking for their sk_receive_queue.lock. Split off
625  * this special lock-class by reinitializing the spinlock key:
626  */
627 static struct lock_class_key af_unix_sk_receive_queue_lock_key;
628 
629 static struct sock *unix_create1(struct net *net, struct socket *sock)
630 {
631 	struct sock *sk = NULL;
632 	struct unix_sock *u;
633 
634 	atomic_long_inc(&unix_nr_socks);
635 	if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files())
636 		goto out;
637 
638 	sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto);
639 	if (!sk)
640 		goto out;
641 
642 	sock_init_data(sock, sk);
643 	lockdep_set_class(&sk->sk_receive_queue.lock,
644 				&af_unix_sk_receive_queue_lock_key);
645 
646 	sk->sk_write_space	= unix_write_space;
647 	sk->sk_max_ack_backlog	= net->unx.sysctl_max_dgram_qlen;
648 	sk->sk_destruct		= unix_sock_destructor;
649 	u	  = unix_sk(sk);
650 	u->path.dentry = NULL;
651 	u->path.mnt = NULL;
652 	spin_lock_init(&u->lock);
653 	atomic_long_set(&u->inflight, 0);
654 	INIT_LIST_HEAD(&u->link);
655 	mutex_init(&u->readlock); /* single task reading lock */
656 	init_waitqueue_head(&u->peer_wait);
657 	unix_insert_socket(unix_sockets_unbound(sk), sk);
658 out:
659 	if (sk == NULL)
660 		atomic_long_dec(&unix_nr_socks);
661 	else {
662 		local_bh_disable();
663 		sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
664 		local_bh_enable();
665 	}
666 	return sk;
667 }
668 
669 static int unix_create(struct net *net, struct socket *sock, int protocol,
670 		       int kern)
671 {
672 	if (protocol && protocol != PF_UNIX)
673 		return -EPROTONOSUPPORT;
674 
675 	sock->state = SS_UNCONNECTED;
676 
677 	switch (sock->type) {
678 	case SOCK_STREAM:
679 		sock->ops = &unix_stream_ops;
680 		break;
681 		/*
682 		 *	Believe it or not BSD has AF_UNIX, SOCK_RAW though
683 		 *	nothing uses it.
684 		 */
685 	case SOCK_RAW:
686 		sock->type = SOCK_DGRAM;
687 	case SOCK_DGRAM:
688 		sock->ops = &unix_dgram_ops;
689 		break;
690 	case SOCK_SEQPACKET:
691 		sock->ops = &unix_seqpacket_ops;
692 		break;
693 	default:
694 		return -ESOCKTNOSUPPORT;
695 	}
696 
697 	return unix_create1(net, sock) ? 0 : -ENOMEM;
698 }
699 
700 static int unix_release(struct socket *sock)
701 {
702 	struct sock *sk = sock->sk;
703 
704 	if (!sk)
705 		return 0;
706 
707 	sock->sk = NULL;
708 
709 	return unix_release_sock(sk, 0);
710 }
711 
712 static int unix_autobind(struct socket *sock)
713 {
714 	struct sock *sk = sock->sk;
715 	struct net *net = sock_net(sk);
716 	struct unix_sock *u = unix_sk(sk);
717 	static u32 ordernum = 1;
718 	struct unix_address *addr;
719 	int err;
720 	unsigned int retries = 0;
721 
722 	mutex_lock(&u->readlock);
723 
724 	err = 0;
725 	if (u->addr)
726 		goto out;
727 
728 	err = -ENOMEM;
729 	addr = kzalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL);
730 	if (!addr)
731 		goto out;
732 
733 	addr->name->sun_family = AF_UNIX;
734 	atomic_set(&addr->refcnt, 1);
735 
736 retry:
737 	addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short);
738 	addr->hash = unix_hash_fold(csum_partial(addr->name, addr->len, 0));
739 
740 	spin_lock(&unix_table_lock);
741 	ordernum = (ordernum+1)&0xFFFFF;
742 
743 	if (__unix_find_socket_byname(net, addr->name, addr->len, sock->type,
744 				      addr->hash)) {
745 		spin_unlock(&unix_table_lock);
746 		/*
747 		 * __unix_find_socket_byname() may take long time if many names
748 		 * are already in use.
749 		 */
750 		cond_resched();
751 		/* Give up if all names seems to be in use. */
752 		if (retries++ == 0xFFFFF) {
753 			err = -ENOSPC;
754 			kfree(addr);
755 			goto out;
756 		}
757 		goto retry;
758 	}
759 	addr->hash ^= sk->sk_type;
760 
761 	__unix_remove_socket(sk);
762 	u->addr = addr;
763 	__unix_insert_socket(&unix_socket_table[addr->hash], sk);
764 	spin_unlock(&unix_table_lock);
765 	err = 0;
766 
767 out:	mutex_unlock(&u->readlock);
768 	return err;
769 }
770 
771 static struct sock *unix_find_other(struct net *net,
772 				    struct sockaddr_un *sunname, int len,
773 				    int type, unsigned int hash, int *error)
774 {
775 	struct sock *u;
776 	struct path path;
777 	int err = 0;
778 
779 	if (sunname->sun_path[0]) {
780 		struct inode *inode;
781 		err = kern_path(sunname->sun_path, LOOKUP_FOLLOW, &path);
782 		if (err)
783 			goto fail;
784 		inode = path.dentry->d_inode;
785 		err = inode_permission(inode, MAY_WRITE);
786 		if (err)
787 			goto put_fail;
788 
789 		err = -ECONNREFUSED;
790 		if (!S_ISSOCK(inode->i_mode))
791 			goto put_fail;
792 		u = unix_find_socket_byinode(inode);
793 		if (!u)
794 			goto put_fail;
795 
796 		if (u->sk_type == type)
797 			touch_atime(&path);
798 
799 		path_put(&path);
800 
801 		err = -EPROTOTYPE;
802 		if (u->sk_type != type) {
803 			sock_put(u);
804 			goto fail;
805 		}
806 	} else {
807 		err = -ECONNREFUSED;
808 		u = unix_find_socket_byname(net, sunname, len, type, hash);
809 		if (u) {
810 			struct dentry *dentry;
811 			dentry = unix_sk(u)->path.dentry;
812 			if (dentry)
813 				touch_atime(&unix_sk(u)->path);
814 		} else
815 			goto fail;
816 	}
817 	return u;
818 
819 put_fail:
820 	path_put(&path);
821 fail:
822 	*error = err;
823 	return NULL;
824 }
825 
826 static int unix_mknod(const char *sun_path, umode_t mode, struct path *res)
827 {
828 	struct dentry *dentry;
829 	struct path path;
830 	int err = 0;
831 	/*
832 	 * Get the parent directory, calculate the hash for last
833 	 * component.
834 	 */
835 	dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0);
836 	err = PTR_ERR(dentry);
837 	if (IS_ERR(dentry))
838 		return err;
839 
840 	/*
841 	 * All right, let's create it.
842 	 */
843 	err = security_path_mknod(&path, dentry, mode, 0);
844 	if (!err) {
845 		err = vfs_mknod(path.dentry->d_inode, dentry, mode, 0);
846 		if (!err) {
847 			res->mnt = mntget(path.mnt);
848 			res->dentry = dget(dentry);
849 		}
850 	}
851 	done_path_create(&path, dentry);
852 	return err;
853 }
854 
855 static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
856 {
857 	struct sock *sk = sock->sk;
858 	struct net *net = sock_net(sk);
859 	struct unix_sock *u = unix_sk(sk);
860 	struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
861 	char *sun_path = sunaddr->sun_path;
862 	int err;
863 	unsigned int hash;
864 	struct unix_address *addr;
865 	struct hlist_head *list;
866 
867 	err = -EINVAL;
868 	if (sunaddr->sun_family != AF_UNIX)
869 		goto out;
870 
871 	if (addr_len == sizeof(short)) {
872 		err = unix_autobind(sock);
873 		goto out;
874 	}
875 
876 	err = unix_mkname(sunaddr, addr_len, &hash);
877 	if (err < 0)
878 		goto out;
879 	addr_len = err;
880 
881 	mutex_lock(&u->readlock);
882 
883 	err = -EINVAL;
884 	if (u->addr)
885 		goto out_up;
886 
887 	err = -ENOMEM;
888 	addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
889 	if (!addr)
890 		goto out_up;
891 
892 	memcpy(addr->name, sunaddr, addr_len);
893 	addr->len = addr_len;
894 	addr->hash = hash ^ sk->sk_type;
895 	atomic_set(&addr->refcnt, 1);
896 
897 	if (sun_path[0]) {
898 		struct path path;
899 		umode_t mode = S_IFSOCK |
900 		       (SOCK_INODE(sock)->i_mode & ~current_umask());
901 		err = unix_mknod(sun_path, mode, &path);
902 		if (err) {
903 			if (err == -EEXIST)
904 				err = -EADDRINUSE;
905 			unix_release_addr(addr);
906 			goto out_up;
907 		}
908 		addr->hash = UNIX_HASH_SIZE;
909 		hash = path.dentry->d_inode->i_ino & (UNIX_HASH_SIZE-1);
910 		spin_lock(&unix_table_lock);
911 		u->path = path;
912 		list = &unix_socket_table[hash];
913 	} else {
914 		spin_lock(&unix_table_lock);
915 		err = -EADDRINUSE;
916 		if (__unix_find_socket_byname(net, sunaddr, addr_len,
917 					      sk->sk_type, hash)) {
918 			unix_release_addr(addr);
919 			goto out_unlock;
920 		}
921 
922 		list = &unix_socket_table[addr->hash];
923 	}
924 
925 	err = 0;
926 	__unix_remove_socket(sk);
927 	u->addr = addr;
928 	__unix_insert_socket(list, sk);
929 
930 out_unlock:
931 	spin_unlock(&unix_table_lock);
932 out_up:
933 	mutex_unlock(&u->readlock);
934 out:
935 	return err;
936 }
937 
938 static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
939 {
940 	if (unlikely(sk1 == sk2) || !sk2) {
941 		unix_state_lock(sk1);
942 		return;
943 	}
944 	if (sk1 < sk2) {
945 		unix_state_lock(sk1);
946 		unix_state_lock_nested(sk2);
947 	} else {
948 		unix_state_lock(sk2);
949 		unix_state_lock_nested(sk1);
950 	}
951 }
952 
953 static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)
954 {
955 	if (unlikely(sk1 == sk2) || !sk2) {
956 		unix_state_unlock(sk1);
957 		return;
958 	}
959 	unix_state_unlock(sk1);
960 	unix_state_unlock(sk2);
961 }
962 
963 static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
964 			      int alen, int flags)
965 {
966 	struct sock *sk = sock->sk;
967 	struct net *net = sock_net(sk);
968 	struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr;
969 	struct sock *other;
970 	unsigned int hash;
971 	int err;
972 
973 	if (addr->sa_family != AF_UNSPEC) {
974 		err = unix_mkname(sunaddr, alen, &hash);
975 		if (err < 0)
976 			goto out;
977 		alen = err;
978 
979 		if (test_bit(SOCK_PASSCRED, &sock->flags) &&
980 		    !unix_sk(sk)->addr && (err = unix_autobind(sock)) != 0)
981 			goto out;
982 
983 restart:
984 		other = unix_find_other(net, sunaddr, alen, sock->type, hash, &err);
985 		if (!other)
986 			goto out;
987 
988 		unix_state_double_lock(sk, other);
989 
990 		/* Apparently VFS overslept socket death. Retry. */
991 		if (sock_flag(other, SOCK_DEAD)) {
992 			unix_state_double_unlock(sk, other);
993 			sock_put(other);
994 			goto restart;
995 		}
996 
997 		err = -EPERM;
998 		if (!unix_may_send(sk, other))
999 			goto out_unlock;
1000 
1001 		err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1002 		if (err)
1003 			goto out_unlock;
1004 
1005 	} else {
1006 		/*
1007 		 *	1003.1g breaking connected state with AF_UNSPEC
1008 		 */
1009 		other = NULL;
1010 		unix_state_double_lock(sk, other);
1011 	}
1012 
1013 	/*
1014 	 * If it was connected, reconnect.
1015 	 */
1016 	if (unix_peer(sk)) {
1017 		struct sock *old_peer = unix_peer(sk);
1018 		unix_peer(sk) = other;
1019 		unix_state_double_unlock(sk, other);
1020 
1021 		if (other != old_peer)
1022 			unix_dgram_disconnected(sk, old_peer);
1023 		sock_put(old_peer);
1024 	} else {
1025 		unix_peer(sk) = other;
1026 		unix_state_double_unlock(sk, other);
1027 	}
1028 	return 0;
1029 
1030 out_unlock:
1031 	unix_state_double_unlock(sk, other);
1032 	sock_put(other);
1033 out:
1034 	return err;
1035 }
1036 
1037 static long unix_wait_for_peer(struct sock *other, long timeo)
1038 {
1039 	struct unix_sock *u = unix_sk(other);
1040 	int sched;
1041 	DEFINE_WAIT(wait);
1042 
1043 	prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);
1044 
1045 	sched = !sock_flag(other, SOCK_DEAD) &&
1046 		!(other->sk_shutdown & RCV_SHUTDOWN) &&
1047 		unix_recvq_full(other);
1048 
1049 	unix_state_unlock(other);
1050 
1051 	if (sched)
1052 		timeo = schedule_timeout(timeo);
1053 
1054 	finish_wait(&u->peer_wait, &wait);
1055 	return timeo;
1056 }
1057 
1058 static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
1059 			       int addr_len, int flags)
1060 {
1061 	struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1062 	struct sock *sk = sock->sk;
1063 	struct net *net = sock_net(sk);
1064 	struct unix_sock *u = unix_sk(sk), *newu, *otheru;
1065 	struct sock *newsk = NULL;
1066 	struct sock *other = NULL;
1067 	struct sk_buff *skb = NULL;
1068 	unsigned int hash;
1069 	int st;
1070 	int err;
1071 	long timeo;
1072 
1073 	err = unix_mkname(sunaddr, addr_len, &hash);
1074 	if (err < 0)
1075 		goto out;
1076 	addr_len = err;
1077 
1078 	if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr &&
1079 	    (err = unix_autobind(sock)) != 0)
1080 		goto out;
1081 
1082 	timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
1083 
1084 	/* First of all allocate resources.
1085 	   If we will make it after state is locked,
1086 	   we will have to recheck all again in any case.
1087 	 */
1088 
1089 	err = -ENOMEM;
1090 
1091 	/* create new sock for complete connection */
1092 	newsk = unix_create1(sock_net(sk), NULL);
1093 	if (newsk == NULL)
1094 		goto out;
1095 
1096 	/* Allocate skb for sending to listening sock */
1097 	skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
1098 	if (skb == NULL)
1099 		goto out;
1100 
1101 restart:
1102 	/*  Find listening sock. */
1103 	other = unix_find_other(net, sunaddr, addr_len, sk->sk_type, hash, &err);
1104 	if (!other)
1105 		goto out;
1106 
1107 	/* Latch state of peer */
1108 	unix_state_lock(other);
1109 
1110 	/* Apparently VFS overslept socket death. Retry. */
1111 	if (sock_flag(other, SOCK_DEAD)) {
1112 		unix_state_unlock(other);
1113 		sock_put(other);
1114 		goto restart;
1115 	}
1116 
1117 	err = -ECONNREFUSED;
1118 	if (other->sk_state != TCP_LISTEN)
1119 		goto out_unlock;
1120 	if (other->sk_shutdown & RCV_SHUTDOWN)
1121 		goto out_unlock;
1122 
1123 	if (unix_recvq_full(other)) {
1124 		err = -EAGAIN;
1125 		if (!timeo)
1126 			goto out_unlock;
1127 
1128 		timeo = unix_wait_for_peer(other, timeo);
1129 
1130 		err = sock_intr_errno(timeo);
1131 		if (signal_pending(current))
1132 			goto out;
1133 		sock_put(other);
1134 		goto restart;
1135 	}
1136 
1137 	/* Latch our state.
1138 
1139 	   It is tricky place. We need to grab our state lock and cannot
1140 	   drop lock on peer. It is dangerous because deadlock is
1141 	   possible. Connect to self case and simultaneous
1142 	   attempt to connect are eliminated by checking socket
1143 	   state. other is TCP_LISTEN, if sk is TCP_LISTEN we
1144 	   check this before attempt to grab lock.
1145 
1146 	   Well, and we have to recheck the state after socket locked.
1147 	 */
1148 	st = sk->sk_state;
1149 
1150 	switch (st) {
1151 	case TCP_CLOSE:
1152 		/* This is ok... continue with connect */
1153 		break;
1154 	case TCP_ESTABLISHED:
1155 		/* Socket is already connected */
1156 		err = -EISCONN;
1157 		goto out_unlock;
1158 	default:
1159 		err = -EINVAL;
1160 		goto out_unlock;
1161 	}
1162 
1163 	unix_state_lock_nested(sk);
1164 
1165 	if (sk->sk_state != st) {
1166 		unix_state_unlock(sk);
1167 		unix_state_unlock(other);
1168 		sock_put(other);
1169 		goto restart;
1170 	}
1171 
1172 	err = security_unix_stream_connect(sk, other, newsk);
1173 	if (err) {
1174 		unix_state_unlock(sk);
1175 		goto out_unlock;
1176 	}
1177 
1178 	/* The way is open! Fastly set all the necessary fields... */
1179 
1180 	sock_hold(sk);
1181 	unix_peer(newsk)	= sk;
1182 	newsk->sk_state		= TCP_ESTABLISHED;
1183 	newsk->sk_type		= sk->sk_type;
1184 	init_peercred(newsk);
1185 	newu = unix_sk(newsk);
1186 	RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq);
1187 	otheru = unix_sk(other);
1188 
1189 	/* copy address information from listening to new sock*/
1190 	if (otheru->addr) {
1191 		atomic_inc(&otheru->addr->refcnt);
1192 		newu->addr = otheru->addr;
1193 	}
1194 	if (otheru->path.dentry) {
1195 		path_get(&otheru->path);
1196 		newu->path = otheru->path;
1197 	}
1198 
1199 	/* Set credentials */
1200 	copy_peercred(sk, other);
1201 
1202 	sock->state	= SS_CONNECTED;
1203 	sk->sk_state	= TCP_ESTABLISHED;
1204 	sock_hold(newsk);
1205 
1206 	smp_mb__after_atomic_inc();	/* sock_hold() does an atomic_inc() */
1207 	unix_peer(sk)	= newsk;
1208 
1209 	unix_state_unlock(sk);
1210 
1211 	/* take ten and and send info to listening sock */
1212 	spin_lock(&other->sk_receive_queue.lock);
1213 	__skb_queue_tail(&other->sk_receive_queue, skb);
1214 	spin_unlock(&other->sk_receive_queue.lock);
1215 	unix_state_unlock(other);
1216 	other->sk_data_ready(other, 0);
1217 	sock_put(other);
1218 	return 0;
1219 
1220 out_unlock:
1221 	if (other)
1222 		unix_state_unlock(other);
1223 
1224 out:
1225 	kfree_skb(skb);
1226 	if (newsk)
1227 		unix_release_sock(newsk, 0);
1228 	if (other)
1229 		sock_put(other);
1230 	return err;
1231 }
1232 
1233 static int unix_socketpair(struct socket *socka, struct socket *sockb)
1234 {
1235 	struct sock *ska = socka->sk, *skb = sockb->sk;
1236 
1237 	/* Join our sockets back to back */
1238 	sock_hold(ska);
1239 	sock_hold(skb);
1240 	unix_peer(ska) = skb;
1241 	unix_peer(skb) = ska;
1242 	init_peercred(ska);
1243 	init_peercred(skb);
1244 
1245 	if (ska->sk_type != SOCK_DGRAM) {
1246 		ska->sk_state = TCP_ESTABLISHED;
1247 		skb->sk_state = TCP_ESTABLISHED;
1248 		socka->state  = SS_CONNECTED;
1249 		sockb->state  = SS_CONNECTED;
1250 	}
1251 	return 0;
1252 }
1253 
1254 static int unix_accept(struct socket *sock, struct socket *newsock, int flags)
1255 {
1256 	struct sock *sk = sock->sk;
1257 	struct sock *tsk;
1258 	struct sk_buff *skb;
1259 	int err;
1260 
1261 	err = -EOPNOTSUPP;
1262 	if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
1263 		goto out;
1264 
1265 	err = -EINVAL;
1266 	if (sk->sk_state != TCP_LISTEN)
1267 		goto out;
1268 
1269 	/* If socket state is TCP_LISTEN it cannot change (for now...),
1270 	 * so that no locks are necessary.
1271 	 */
1272 
1273 	skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err);
1274 	if (!skb) {
1275 		/* This means receive shutdown. */
1276 		if (err == 0)
1277 			err = -EINVAL;
1278 		goto out;
1279 	}
1280 
1281 	tsk = skb->sk;
1282 	skb_free_datagram(sk, skb);
1283 	wake_up_interruptible(&unix_sk(sk)->peer_wait);
1284 
1285 	/* attach accepted sock to socket */
1286 	unix_state_lock(tsk);
1287 	newsock->state = SS_CONNECTED;
1288 	sock_graft(tsk, newsock);
1289 	unix_state_unlock(tsk);
1290 	return 0;
1291 
1292 out:
1293 	return err;
1294 }
1295 
1296 
1297 static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_len, int peer)
1298 {
1299 	struct sock *sk = sock->sk;
1300 	struct unix_sock *u;
1301 	DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, uaddr);
1302 	int err = 0;
1303 
1304 	if (peer) {
1305 		sk = unix_peer_get(sk);
1306 
1307 		err = -ENOTCONN;
1308 		if (!sk)
1309 			goto out;
1310 		err = 0;
1311 	} else {
1312 		sock_hold(sk);
1313 	}
1314 
1315 	u = unix_sk(sk);
1316 	unix_state_lock(sk);
1317 	if (!u->addr) {
1318 		sunaddr->sun_family = AF_UNIX;
1319 		sunaddr->sun_path[0] = 0;
1320 		*uaddr_len = sizeof(short);
1321 	} else {
1322 		struct unix_address *addr = u->addr;
1323 
1324 		*uaddr_len = addr->len;
1325 		memcpy(sunaddr, addr->name, *uaddr_len);
1326 	}
1327 	unix_state_unlock(sk);
1328 	sock_put(sk);
1329 out:
1330 	return err;
1331 }
1332 
1333 static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1334 {
1335 	int i;
1336 
1337 	scm->fp = UNIXCB(skb).fp;
1338 	UNIXCB(skb).fp = NULL;
1339 
1340 	for (i = scm->fp->count-1; i >= 0; i--)
1341 		unix_notinflight(scm->fp->fp[i]);
1342 }
1343 
1344 static void unix_destruct_scm(struct sk_buff *skb)
1345 {
1346 	struct scm_cookie scm;
1347 	memset(&scm, 0, sizeof(scm));
1348 	scm.pid  = UNIXCB(skb).pid;
1349 	scm.cred = UNIXCB(skb).cred;
1350 	if (UNIXCB(skb).fp)
1351 		unix_detach_fds(&scm, skb);
1352 
1353 	/* Alas, it calls VFS */
1354 	/* So fscking what? fput() had been SMP-safe since the last Summer */
1355 	scm_destroy(&scm);
1356 	sock_wfree(skb);
1357 }
1358 
1359 #define MAX_RECURSION_LEVEL 4
1360 
1361 static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1362 {
1363 	int i;
1364 	unsigned char max_level = 0;
1365 	int unix_sock_count = 0;
1366 
1367 	for (i = scm->fp->count - 1; i >= 0; i--) {
1368 		struct sock *sk = unix_get_socket(scm->fp->fp[i]);
1369 
1370 		if (sk) {
1371 			unix_sock_count++;
1372 			max_level = max(max_level,
1373 					unix_sk(sk)->recursion_level);
1374 		}
1375 	}
1376 	if (unlikely(max_level > MAX_RECURSION_LEVEL))
1377 		return -ETOOMANYREFS;
1378 
1379 	/*
1380 	 * Need to duplicate file references for the sake of garbage
1381 	 * collection.  Otherwise a socket in the fps might become a
1382 	 * candidate for GC while the skb is not yet queued.
1383 	 */
1384 	UNIXCB(skb).fp = scm_fp_dup(scm->fp);
1385 	if (!UNIXCB(skb).fp)
1386 		return -ENOMEM;
1387 
1388 	if (unix_sock_count) {
1389 		for (i = scm->fp->count - 1; i >= 0; i--)
1390 			unix_inflight(scm->fp->fp[i]);
1391 	}
1392 	return max_level;
1393 }
1394 
1395 static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)
1396 {
1397 	int err = 0;
1398 
1399 	UNIXCB(skb).pid  = get_pid(scm->pid);
1400 	if (scm->cred)
1401 		UNIXCB(skb).cred = get_cred(scm->cred);
1402 	UNIXCB(skb).fp = NULL;
1403 	if (scm->fp && send_fds)
1404 		err = unix_attach_fds(scm, skb);
1405 
1406 	skb->destructor = unix_destruct_scm;
1407 	return err;
1408 }
1409 
1410 /*
1411  * Some apps rely on write() giving SCM_CREDENTIALS
1412  * We include credentials if source or destination socket
1413  * asserted SOCK_PASSCRED.
1414  */
1415 static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock,
1416 			    const struct sock *other)
1417 {
1418 	if (UNIXCB(skb).cred)
1419 		return;
1420 	if (test_bit(SOCK_PASSCRED, &sock->flags) ||
1421 	    !other->sk_socket ||
1422 	    test_bit(SOCK_PASSCRED, &other->sk_socket->flags)) {
1423 		UNIXCB(skb).pid  = get_pid(task_tgid(current));
1424 		UNIXCB(skb).cred = get_current_cred();
1425 	}
1426 }
1427 
1428 /*
1429  *	Send AF_UNIX data.
1430  */
1431 
1432 static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock,
1433 			      struct msghdr *msg, size_t len)
1434 {
1435 	struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
1436 	struct sock *sk = sock->sk;
1437 	struct net *net = sock_net(sk);
1438 	struct unix_sock *u = unix_sk(sk);
1439 	struct sockaddr_un *sunaddr = msg->msg_name;
1440 	struct sock *other = NULL;
1441 	int namelen = 0; /* fake GCC */
1442 	int err;
1443 	unsigned int hash;
1444 	struct sk_buff *skb;
1445 	long timeo;
1446 	struct scm_cookie tmp_scm;
1447 	int max_level;
1448 	int data_len = 0;
1449 
1450 	if (NULL == siocb->scm)
1451 		siocb->scm = &tmp_scm;
1452 	wait_for_unix_gc();
1453 	err = scm_send(sock, msg, siocb->scm, false);
1454 	if (err < 0)
1455 		return err;
1456 
1457 	err = -EOPNOTSUPP;
1458 	if (msg->msg_flags&MSG_OOB)
1459 		goto out;
1460 
1461 	if (msg->msg_namelen) {
1462 		err = unix_mkname(sunaddr, msg->msg_namelen, &hash);
1463 		if (err < 0)
1464 			goto out;
1465 		namelen = err;
1466 	} else {
1467 		sunaddr = NULL;
1468 		err = -ENOTCONN;
1469 		other = unix_peer_get(sk);
1470 		if (!other)
1471 			goto out;
1472 	}
1473 
1474 	if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr
1475 	    && (err = unix_autobind(sock)) != 0)
1476 		goto out;
1477 
1478 	err = -EMSGSIZE;
1479 	if (len > sk->sk_sndbuf - 32)
1480 		goto out;
1481 
1482 	if (len > SKB_MAX_ALLOC)
1483 		data_len = min_t(size_t,
1484 				 len - SKB_MAX_ALLOC,
1485 				 MAX_SKB_FRAGS * PAGE_SIZE);
1486 
1487 	skb = sock_alloc_send_pskb(sk, len - data_len, data_len,
1488 				   msg->msg_flags & MSG_DONTWAIT, &err);
1489 	if (skb == NULL)
1490 		goto out;
1491 
1492 	err = unix_scm_to_skb(siocb->scm, skb, true);
1493 	if (err < 0)
1494 		goto out_free;
1495 	max_level = err + 1;
1496 	unix_get_secdata(siocb->scm, skb);
1497 
1498 	skb_put(skb, len - data_len);
1499 	skb->data_len = data_len;
1500 	skb->len = len;
1501 	err = skb_copy_datagram_from_iovec(skb, 0, msg->msg_iov, 0, len);
1502 	if (err)
1503 		goto out_free;
1504 
1505 	timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
1506 
1507 restart:
1508 	if (!other) {
1509 		err = -ECONNRESET;
1510 		if (sunaddr == NULL)
1511 			goto out_free;
1512 
1513 		other = unix_find_other(net, sunaddr, namelen, sk->sk_type,
1514 					hash, &err);
1515 		if (other == NULL)
1516 			goto out_free;
1517 	}
1518 
1519 	if (sk_filter(other, skb) < 0) {
1520 		/* Toss the packet but do not return any error to the sender */
1521 		err = len;
1522 		goto out_free;
1523 	}
1524 
1525 	unix_state_lock(other);
1526 	err = -EPERM;
1527 	if (!unix_may_send(sk, other))
1528 		goto out_unlock;
1529 
1530 	if (sock_flag(other, SOCK_DEAD)) {
1531 		/*
1532 		 *	Check with 1003.1g - what should
1533 		 *	datagram error
1534 		 */
1535 		unix_state_unlock(other);
1536 		sock_put(other);
1537 
1538 		err = 0;
1539 		unix_state_lock(sk);
1540 		if (unix_peer(sk) == other) {
1541 			unix_peer(sk) = NULL;
1542 			unix_state_unlock(sk);
1543 
1544 			unix_dgram_disconnected(sk, other);
1545 			sock_put(other);
1546 			err = -ECONNREFUSED;
1547 		} else {
1548 			unix_state_unlock(sk);
1549 		}
1550 
1551 		other = NULL;
1552 		if (err)
1553 			goto out_free;
1554 		goto restart;
1555 	}
1556 
1557 	err = -EPIPE;
1558 	if (other->sk_shutdown & RCV_SHUTDOWN)
1559 		goto out_unlock;
1560 
1561 	if (sk->sk_type != SOCK_SEQPACKET) {
1562 		err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1563 		if (err)
1564 			goto out_unlock;
1565 	}
1566 
1567 	if (unix_peer(other) != sk && unix_recvq_full(other)) {
1568 		if (!timeo) {
1569 			err = -EAGAIN;
1570 			goto out_unlock;
1571 		}
1572 
1573 		timeo = unix_wait_for_peer(other, timeo);
1574 
1575 		err = sock_intr_errno(timeo);
1576 		if (signal_pending(current))
1577 			goto out_free;
1578 
1579 		goto restart;
1580 	}
1581 
1582 	if (sock_flag(other, SOCK_RCVTSTAMP))
1583 		__net_timestamp(skb);
1584 	maybe_add_creds(skb, sock, other);
1585 	skb_queue_tail(&other->sk_receive_queue, skb);
1586 	if (max_level > unix_sk(other)->recursion_level)
1587 		unix_sk(other)->recursion_level = max_level;
1588 	unix_state_unlock(other);
1589 	other->sk_data_ready(other, len);
1590 	sock_put(other);
1591 	scm_destroy(siocb->scm);
1592 	return len;
1593 
1594 out_unlock:
1595 	unix_state_unlock(other);
1596 out_free:
1597 	kfree_skb(skb);
1598 out:
1599 	if (other)
1600 		sock_put(other);
1601 	scm_destroy(siocb->scm);
1602 	return err;
1603 }
1604 
1605 
1606 static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
1607 			       struct msghdr *msg, size_t len)
1608 {
1609 	struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
1610 	struct sock *sk = sock->sk;
1611 	struct sock *other = NULL;
1612 	int err, size;
1613 	struct sk_buff *skb;
1614 	int sent = 0;
1615 	struct scm_cookie tmp_scm;
1616 	bool fds_sent = false;
1617 	int max_level;
1618 
1619 	if (NULL == siocb->scm)
1620 		siocb->scm = &tmp_scm;
1621 	wait_for_unix_gc();
1622 	err = scm_send(sock, msg, siocb->scm, false);
1623 	if (err < 0)
1624 		return err;
1625 
1626 	err = -EOPNOTSUPP;
1627 	if (msg->msg_flags&MSG_OOB)
1628 		goto out_err;
1629 
1630 	if (msg->msg_namelen) {
1631 		err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
1632 		goto out_err;
1633 	} else {
1634 		err = -ENOTCONN;
1635 		other = unix_peer(sk);
1636 		if (!other)
1637 			goto out_err;
1638 	}
1639 
1640 	if (sk->sk_shutdown & SEND_SHUTDOWN)
1641 		goto pipe_err;
1642 
1643 	while (sent < len) {
1644 		/*
1645 		 *	Optimisation for the fact that under 0.01% of X
1646 		 *	messages typically need breaking up.
1647 		 */
1648 
1649 		size = len-sent;
1650 
1651 		/* Keep two messages in the pipe so it schedules better */
1652 		if (size > ((sk->sk_sndbuf >> 1) - 64))
1653 			size = (sk->sk_sndbuf >> 1) - 64;
1654 
1655 		if (size > SKB_MAX_ALLOC)
1656 			size = SKB_MAX_ALLOC;
1657 
1658 		/*
1659 		 *	Grab a buffer
1660 		 */
1661 
1662 		skb = sock_alloc_send_skb(sk, size, msg->msg_flags&MSG_DONTWAIT,
1663 					  &err);
1664 
1665 		if (skb == NULL)
1666 			goto out_err;
1667 
1668 		/*
1669 		 *	If you pass two values to the sock_alloc_send_skb
1670 		 *	it tries to grab the large buffer with GFP_NOFS
1671 		 *	(which can fail easily), and if it fails grab the
1672 		 *	fallback size buffer which is under a page and will
1673 		 *	succeed. [Alan]
1674 		 */
1675 		size = min_t(int, size, skb_tailroom(skb));
1676 
1677 
1678 		/* Only send the fds in the first buffer */
1679 		err = unix_scm_to_skb(siocb->scm, skb, !fds_sent);
1680 		if (err < 0) {
1681 			kfree_skb(skb);
1682 			goto out_err;
1683 		}
1684 		max_level = err + 1;
1685 		fds_sent = true;
1686 
1687 		err = memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size);
1688 		if (err) {
1689 			kfree_skb(skb);
1690 			goto out_err;
1691 		}
1692 
1693 		unix_state_lock(other);
1694 
1695 		if (sock_flag(other, SOCK_DEAD) ||
1696 		    (other->sk_shutdown & RCV_SHUTDOWN))
1697 			goto pipe_err_free;
1698 
1699 		maybe_add_creds(skb, sock, other);
1700 		skb_queue_tail(&other->sk_receive_queue, skb);
1701 		if (max_level > unix_sk(other)->recursion_level)
1702 			unix_sk(other)->recursion_level = max_level;
1703 		unix_state_unlock(other);
1704 		other->sk_data_ready(other, size);
1705 		sent += size;
1706 	}
1707 
1708 	scm_destroy(siocb->scm);
1709 	siocb->scm = NULL;
1710 
1711 	return sent;
1712 
1713 pipe_err_free:
1714 	unix_state_unlock(other);
1715 	kfree_skb(skb);
1716 pipe_err:
1717 	if (sent == 0 && !(msg->msg_flags&MSG_NOSIGNAL))
1718 		send_sig(SIGPIPE, current, 0);
1719 	err = -EPIPE;
1720 out_err:
1721 	scm_destroy(siocb->scm);
1722 	siocb->scm = NULL;
1723 	return sent ? : err;
1724 }
1725 
1726 static int unix_seqpacket_sendmsg(struct kiocb *kiocb, struct socket *sock,
1727 				  struct msghdr *msg, size_t len)
1728 {
1729 	int err;
1730 	struct sock *sk = sock->sk;
1731 
1732 	err = sock_error(sk);
1733 	if (err)
1734 		return err;
1735 
1736 	if (sk->sk_state != TCP_ESTABLISHED)
1737 		return -ENOTCONN;
1738 
1739 	if (msg->msg_namelen)
1740 		msg->msg_namelen = 0;
1741 
1742 	return unix_dgram_sendmsg(kiocb, sock, msg, len);
1743 }
1744 
1745 static int unix_seqpacket_recvmsg(struct kiocb *iocb, struct socket *sock,
1746 			      struct msghdr *msg, size_t size,
1747 			      int flags)
1748 {
1749 	struct sock *sk = sock->sk;
1750 
1751 	if (sk->sk_state != TCP_ESTABLISHED)
1752 		return -ENOTCONN;
1753 
1754 	return unix_dgram_recvmsg(iocb, sock, msg, size, flags);
1755 }
1756 
1757 static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
1758 {
1759 	struct unix_sock *u = unix_sk(sk);
1760 
1761 	msg->msg_namelen = 0;
1762 	if (u->addr) {
1763 		msg->msg_namelen = u->addr->len;
1764 		memcpy(msg->msg_name, u->addr->name, u->addr->len);
1765 	}
1766 }
1767 
1768 static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock,
1769 			      struct msghdr *msg, size_t size,
1770 			      int flags)
1771 {
1772 	struct sock_iocb *siocb = kiocb_to_siocb(iocb);
1773 	struct scm_cookie tmp_scm;
1774 	struct sock *sk = sock->sk;
1775 	struct unix_sock *u = unix_sk(sk);
1776 	int noblock = flags & MSG_DONTWAIT;
1777 	struct sk_buff *skb;
1778 	int err;
1779 	int peeked, skip;
1780 
1781 	err = -EOPNOTSUPP;
1782 	if (flags&MSG_OOB)
1783 		goto out;
1784 
1785 	msg->msg_namelen = 0;
1786 
1787 	err = mutex_lock_interruptible(&u->readlock);
1788 	if (err) {
1789 		err = sock_intr_errno(sock_rcvtimeo(sk, noblock));
1790 		goto out;
1791 	}
1792 
1793 	skip = sk_peek_offset(sk, flags);
1794 
1795 	skb = __skb_recv_datagram(sk, flags, &peeked, &skip, &err);
1796 	if (!skb) {
1797 		unix_state_lock(sk);
1798 		/* Signal EOF on disconnected non-blocking SEQPACKET socket. */
1799 		if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN &&
1800 		    (sk->sk_shutdown & RCV_SHUTDOWN))
1801 			err = 0;
1802 		unix_state_unlock(sk);
1803 		goto out_unlock;
1804 	}
1805 
1806 	wake_up_interruptible_sync_poll(&u->peer_wait,
1807 					POLLOUT | POLLWRNORM | POLLWRBAND);
1808 
1809 	if (msg->msg_name)
1810 		unix_copy_addr(msg, skb->sk);
1811 
1812 	if (size > skb->len - skip)
1813 		size = skb->len - skip;
1814 	else if (size < skb->len - skip)
1815 		msg->msg_flags |= MSG_TRUNC;
1816 
1817 	err = skb_copy_datagram_iovec(skb, skip, msg->msg_iov, size);
1818 	if (err)
1819 		goto out_free;
1820 
1821 	if (sock_flag(sk, SOCK_RCVTSTAMP))
1822 		__sock_recv_timestamp(msg, sk, skb);
1823 
1824 	if (!siocb->scm) {
1825 		siocb->scm = &tmp_scm;
1826 		memset(&tmp_scm, 0, sizeof(tmp_scm));
1827 	}
1828 	scm_set_cred(siocb->scm, UNIXCB(skb).pid, UNIXCB(skb).cred);
1829 	unix_set_secdata(siocb->scm, skb);
1830 
1831 	if (!(flags & MSG_PEEK)) {
1832 		if (UNIXCB(skb).fp)
1833 			unix_detach_fds(siocb->scm, skb);
1834 
1835 		sk_peek_offset_bwd(sk, skb->len);
1836 	} else {
1837 		/* It is questionable: on PEEK we could:
1838 		   - do not return fds - good, but too simple 8)
1839 		   - return fds, and do not return them on read (old strategy,
1840 		     apparently wrong)
1841 		   - clone fds (I chose it for now, it is the most universal
1842 		     solution)
1843 
1844 		   POSIX 1003.1g does not actually define this clearly
1845 		   at all. POSIX 1003.1g doesn't define a lot of things
1846 		   clearly however!
1847 
1848 		*/
1849 
1850 		sk_peek_offset_fwd(sk, size);
1851 
1852 		if (UNIXCB(skb).fp)
1853 			siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp);
1854 	}
1855 	err = (flags & MSG_TRUNC) ? skb->len - skip : size;
1856 
1857 	scm_recv(sock, msg, siocb->scm, flags);
1858 
1859 out_free:
1860 	skb_free_datagram(sk, skb);
1861 out_unlock:
1862 	mutex_unlock(&u->readlock);
1863 out:
1864 	return err;
1865 }
1866 
1867 /*
1868  *	Sleep until data has arrive. But check for races..
1869  */
1870 
1871 static long unix_stream_data_wait(struct sock *sk, long timeo)
1872 {
1873 	DEFINE_WAIT(wait);
1874 
1875 	unix_state_lock(sk);
1876 
1877 	for (;;) {
1878 		prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
1879 
1880 		if (!skb_queue_empty(&sk->sk_receive_queue) ||
1881 		    sk->sk_err ||
1882 		    (sk->sk_shutdown & RCV_SHUTDOWN) ||
1883 		    signal_pending(current) ||
1884 		    !timeo)
1885 			break;
1886 
1887 		set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1888 		unix_state_unlock(sk);
1889 		timeo = schedule_timeout(timeo);
1890 		unix_state_lock(sk);
1891 		clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1892 	}
1893 
1894 	finish_wait(sk_sleep(sk), &wait);
1895 	unix_state_unlock(sk);
1896 	return timeo;
1897 }
1898 
1899 
1900 
1901 static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
1902 			       struct msghdr *msg, size_t size,
1903 			       int flags)
1904 {
1905 	struct sock_iocb *siocb = kiocb_to_siocb(iocb);
1906 	struct scm_cookie tmp_scm;
1907 	struct sock *sk = sock->sk;
1908 	struct unix_sock *u = unix_sk(sk);
1909 	struct sockaddr_un *sunaddr = msg->msg_name;
1910 	int copied = 0;
1911 	int check_creds = 0;
1912 	int target;
1913 	int err = 0;
1914 	long timeo;
1915 	int skip;
1916 
1917 	err = -EINVAL;
1918 	if (sk->sk_state != TCP_ESTABLISHED)
1919 		goto out;
1920 
1921 	err = -EOPNOTSUPP;
1922 	if (flags&MSG_OOB)
1923 		goto out;
1924 
1925 	target = sock_rcvlowat(sk, flags&MSG_WAITALL, size);
1926 	timeo = sock_rcvtimeo(sk, flags&MSG_DONTWAIT);
1927 
1928 	msg->msg_namelen = 0;
1929 
1930 	/* Lock the socket to prevent queue disordering
1931 	 * while sleeps in memcpy_tomsg
1932 	 */
1933 
1934 	if (!siocb->scm) {
1935 		siocb->scm = &tmp_scm;
1936 		memset(&tmp_scm, 0, sizeof(tmp_scm));
1937 	}
1938 
1939 	err = mutex_lock_interruptible(&u->readlock);
1940 	if (err) {
1941 		err = sock_intr_errno(timeo);
1942 		goto out;
1943 	}
1944 
1945 	skip = sk_peek_offset(sk, flags);
1946 
1947 	do {
1948 		int chunk;
1949 		struct sk_buff *skb;
1950 
1951 		unix_state_lock(sk);
1952 		skb = skb_peek(&sk->sk_receive_queue);
1953 again:
1954 		if (skb == NULL) {
1955 			unix_sk(sk)->recursion_level = 0;
1956 			if (copied >= target)
1957 				goto unlock;
1958 
1959 			/*
1960 			 *	POSIX 1003.1g mandates this order.
1961 			 */
1962 
1963 			err = sock_error(sk);
1964 			if (err)
1965 				goto unlock;
1966 			if (sk->sk_shutdown & RCV_SHUTDOWN)
1967 				goto unlock;
1968 
1969 			unix_state_unlock(sk);
1970 			err = -EAGAIN;
1971 			if (!timeo)
1972 				break;
1973 			mutex_unlock(&u->readlock);
1974 
1975 			timeo = unix_stream_data_wait(sk, timeo);
1976 
1977 			if (signal_pending(current)
1978 			    ||  mutex_lock_interruptible(&u->readlock)) {
1979 				err = sock_intr_errno(timeo);
1980 				goto out;
1981 			}
1982 
1983 			continue;
1984  unlock:
1985 			unix_state_unlock(sk);
1986 			break;
1987 		}
1988 
1989 		if (skip >= skb->len) {
1990 			skip -= skb->len;
1991 			skb = skb_peek_next(skb, &sk->sk_receive_queue);
1992 			goto again;
1993 		}
1994 
1995 		unix_state_unlock(sk);
1996 
1997 		if (check_creds) {
1998 			/* Never glue messages from different writers */
1999 			if ((UNIXCB(skb).pid  != siocb->scm->pid) ||
2000 			    (UNIXCB(skb).cred != siocb->scm->cred))
2001 				break;
2002 		} else {
2003 			/* Copy credentials */
2004 			scm_set_cred(siocb->scm, UNIXCB(skb).pid, UNIXCB(skb).cred);
2005 			check_creds = 1;
2006 		}
2007 
2008 		/* Copy address just once */
2009 		if (sunaddr) {
2010 			unix_copy_addr(msg, skb->sk);
2011 			sunaddr = NULL;
2012 		}
2013 
2014 		chunk = min_t(unsigned int, skb->len - skip, size);
2015 		if (memcpy_toiovec(msg->msg_iov, skb->data + skip, chunk)) {
2016 			if (copied == 0)
2017 				copied = -EFAULT;
2018 			break;
2019 		}
2020 		copied += chunk;
2021 		size -= chunk;
2022 
2023 		/* Mark read part of skb as used */
2024 		if (!(flags & MSG_PEEK)) {
2025 			skb_pull(skb, chunk);
2026 
2027 			sk_peek_offset_bwd(sk, chunk);
2028 
2029 			if (UNIXCB(skb).fp)
2030 				unix_detach_fds(siocb->scm, skb);
2031 
2032 			if (skb->len)
2033 				break;
2034 
2035 			skb_unlink(skb, &sk->sk_receive_queue);
2036 			consume_skb(skb);
2037 
2038 			if (siocb->scm->fp)
2039 				break;
2040 		} else {
2041 			/* It is questionable, see note in unix_dgram_recvmsg.
2042 			 */
2043 			if (UNIXCB(skb).fp)
2044 				siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp);
2045 
2046 			sk_peek_offset_fwd(sk, chunk);
2047 
2048 			break;
2049 		}
2050 	} while (size);
2051 
2052 	mutex_unlock(&u->readlock);
2053 	scm_recv(sock, msg, siocb->scm, flags);
2054 out:
2055 	return copied ? : err;
2056 }
2057 
2058 static int unix_shutdown(struct socket *sock, int mode)
2059 {
2060 	struct sock *sk = sock->sk;
2061 	struct sock *other;
2062 
2063 	mode = (mode+1)&(RCV_SHUTDOWN|SEND_SHUTDOWN);
2064 
2065 	if (!mode)
2066 		return 0;
2067 
2068 	unix_state_lock(sk);
2069 	sk->sk_shutdown |= mode;
2070 	other = unix_peer(sk);
2071 	if (other)
2072 		sock_hold(other);
2073 	unix_state_unlock(sk);
2074 	sk->sk_state_change(sk);
2075 
2076 	if (other &&
2077 		(sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
2078 
2079 		int peer_mode = 0;
2080 
2081 		if (mode&RCV_SHUTDOWN)
2082 			peer_mode |= SEND_SHUTDOWN;
2083 		if (mode&SEND_SHUTDOWN)
2084 			peer_mode |= RCV_SHUTDOWN;
2085 		unix_state_lock(other);
2086 		other->sk_shutdown |= peer_mode;
2087 		unix_state_unlock(other);
2088 		other->sk_state_change(other);
2089 		if (peer_mode == SHUTDOWN_MASK)
2090 			sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
2091 		else if (peer_mode & RCV_SHUTDOWN)
2092 			sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
2093 	}
2094 	if (other)
2095 		sock_put(other);
2096 
2097 	return 0;
2098 }
2099 
2100 long unix_inq_len(struct sock *sk)
2101 {
2102 	struct sk_buff *skb;
2103 	long amount = 0;
2104 
2105 	if (sk->sk_state == TCP_LISTEN)
2106 		return -EINVAL;
2107 
2108 	spin_lock(&sk->sk_receive_queue.lock);
2109 	if (sk->sk_type == SOCK_STREAM ||
2110 	    sk->sk_type == SOCK_SEQPACKET) {
2111 		skb_queue_walk(&sk->sk_receive_queue, skb)
2112 			amount += skb->len;
2113 	} else {
2114 		skb = skb_peek(&sk->sk_receive_queue);
2115 		if (skb)
2116 			amount = skb->len;
2117 	}
2118 	spin_unlock(&sk->sk_receive_queue.lock);
2119 
2120 	return amount;
2121 }
2122 EXPORT_SYMBOL_GPL(unix_inq_len);
2123 
2124 long unix_outq_len(struct sock *sk)
2125 {
2126 	return sk_wmem_alloc_get(sk);
2127 }
2128 EXPORT_SYMBOL_GPL(unix_outq_len);
2129 
2130 static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
2131 {
2132 	struct sock *sk = sock->sk;
2133 	long amount = 0;
2134 	int err;
2135 
2136 	switch (cmd) {
2137 	case SIOCOUTQ:
2138 		amount = unix_outq_len(sk);
2139 		err = put_user(amount, (int __user *)arg);
2140 		break;
2141 	case SIOCINQ:
2142 		amount = unix_inq_len(sk);
2143 		if (amount < 0)
2144 			err = amount;
2145 		else
2146 			err = put_user(amount, (int __user *)arg);
2147 		break;
2148 	default:
2149 		err = -ENOIOCTLCMD;
2150 		break;
2151 	}
2152 	return err;
2153 }
2154 
2155 static unsigned int unix_poll(struct file *file, struct socket *sock, poll_table *wait)
2156 {
2157 	struct sock *sk = sock->sk;
2158 	unsigned int mask;
2159 
2160 	sock_poll_wait(file, sk_sleep(sk), wait);
2161 	mask = 0;
2162 
2163 	/* exceptional events? */
2164 	if (sk->sk_err)
2165 		mask |= POLLERR;
2166 	if (sk->sk_shutdown == SHUTDOWN_MASK)
2167 		mask |= POLLHUP;
2168 	if (sk->sk_shutdown & RCV_SHUTDOWN)
2169 		mask |= POLLRDHUP | POLLIN | POLLRDNORM;
2170 
2171 	/* readable? */
2172 	if (!skb_queue_empty(&sk->sk_receive_queue))
2173 		mask |= POLLIN | POLLRDNORM;
2174 
2175 	/* Connection-based need to check for termination and startup */
2176 	if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) &&
2177 	    sk->sk_state == TCP_CLOSE)
2178 		mask |= POLLHUP;
2179 
2180 	/*
2181 	 * we set writable also when the other side has shut down the
2182 	 * connection. This prevents stuck sockets.
2183 	 */
2184 	if (unix_writable(sk))
2185 		mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
2186 
2187 	return mask;
2188 }
2189 
2190 static unsigned int unix_dgram_poll(struct file *file, struct socket *sock,
2191 				    poll_table *wait)
2192 {
2193 	struct sock *sk = sock->sk, *other;
2194 	unsigned int mask, writable;
2195 
2196 	sock_poll_wait(file, sk_sleep(sk), wait);
2197 	mask = 0;
2198 
2199 	/* exceptional events? */
2200 	if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
2201 		mask |= POLLERR;
2202 	if (sk->sk_shutdown & RCV_SHUTDOWN)
2203 		mask |= POLLRDHUP | POLLIN | POLLRDNORM;
2204 	if (sk->sk_shutdown == SHUTDOWN_MASK)
2205 		mask |= POLLHUP;
2206 
2207 	/* readable? */
2208 	if (!skb_queue_empty(&sk->sk_receive_queue))
2209 		mask |= POLLIN | POLLRDNORM;
2210 
2211 	/* Connection-based need to check for termination and startup */
2212 	if (sk->sk_type == SOCK_SEQPACKET) {
2213 		if (sk->sk_state == TCP_CLOSE)
2214 			mask |= POLLHUP;
2215 		/* connection hasn't started yet? */
2216 		if (sk->sk_state == TCP_SYN_SENT)
2217 			return mask;
2218 	}
2219 
2220 	/* No write status requested, avoid expensive OUT tests. */
2221 	if (!(poll_requested_events(wait) & (POLLWRBAND|POLLWRNORM|POLLOUT)))
2222 		return mask;
2223 
2224 	writable = unix_writable(sk);
2225 	other = unix_peer_get(sk);
2226 	if (other) {
2227 		if (unix_peer(other) != sk) {
2228 			sock_poll_wait(file, &unix_sk(other)->peer_wait, wait);
2229 			if (unix_recvq_full(other))
2230 				writable = 0;
2231 		}
2232 		sock_put(other);
2233 	}
2234 
2235 	if (writable)
2236 		mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
2237 	else
2238 		set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
2239 
2240 	return mask;
2241 }
2242 
2243 #ifdef CONFIG_PROC_FS
2244 
2245 #define BUCKET_SPACE (BITS_PER_LONG - (UNIX_HASH_BITS + 1) - 1)
2246 
2247 #define get_bucket(x) ((x) >> BUCKET_SPACE)
2248 #define get_offset(x) ((x) & ((1L << BUCKET_SPACE) - 1))
2249 #define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o))
2250 
2251 static struct sock *unix_from_bucket(struct seq_file *seq, loff_t *pos)
2252 {
2253 	unsigned long offset = get_offset(*pos);
2254 	unsigned long bucket = get_bucket(*pos);
2255 	struct sock *sk;
2256 	unsigned long count = 0;
2257 
2258 	for (sk = sk_head(&unix_socket_table[bucket]); sk; sk = sk_next(sk)) {
2259 		if (sock_net(sk) != seq_file_net(seq))
2260 			continue;
2261 		if (++count == offset)
2262 			break;
2263 	}
2264 
2265 	return sk;
2266 }
2267 
2268 static struct sock *unix_next_socket(struct seq_file *seq,
2269 				     struct sock *sk,
2270 				     loff_t *pos)
2271 {
2272 	unsigned long bucket;
2273 
2274 	while (sk > (struct sock *)SEQ_START_TOKEN) {
2275 		sk = sk_next(sk);
2276 		if (!sk)
2277 			goto next_bucket;
2278 		if (sock_net(sk) == seq_file_net(seq))
2279 			return sk;
2280 	}
2281 
2282 	do {
2283 		sk = unix_from_bucket(seq, pos);
2284 		if (sk)
2285 			return sk;
2286 
2287 next_bucket:
2288 		bucket = get_bucket(*pos) + 1;
2289 		*pos = set_bucket_offset(bucket, 1);
2290 	} while (bucket < ARRAY_SIZE(unix_socket_table));
2291 
2292 	return NULL;
2293 }
2294 
2295 static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
2296 	__acquires(unix_table_lock)
2297 {
2298 	spin_lock(&unix_table_lock);
2299 
2300 	if (!*pos)
2301 		return SEQ_START_TOKEN;
2302 
2303 	if (get_bucket(*pos) >= ARRAY_SIZE(unix_socket_table))
2304 		return NULL;
2305 
2306 	return unix_next_socket(seq, NULL, pos);
2307 }
2308 
2309 static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2310 {
2311 	++*pos;
2312 	return unix_next_socket(seq, v, pos);
2313 }
2314 
2315 static void unix_seq_stop(struct seq_file *seq, void *v)
2316 	__releases(unix_table_lock)
2317 {
2318 	spin_unlock(&unix_table_lock);
2319 }
2320 
2321 static int unix_seq_show(struct seq_file *seq, void *v)
2322 {
2323 
2324 	if (v == SEQ_START_TOKEN)
2325 		seq_puts(seq, "Num       RefCount Protocol Flags    Type St "
2326 			 "Inode Path\n");
2327 	else {
2328 		struct sock *s = v;
2329 		struct unix_sock *u = unix_sk(s);
2330 		unix_state_lock(s);
2331 
2332 		seq_printf(seq, "%pK: %08X %08X %08X %04X %02X %5lu",
2333 			s,
2334 			atomic_read(&s->sk_refcnt),
2335 			0,
2336 			s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
2337 			s->sk_type,
2338 			s->sk_socket ?
2339 			(s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
2340 			(s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
2341 			sock_i_ino(s));
2342 
2343 		if (u->addr) {
2344 			int i, len;
2345 			seq_putc(seq, ' ');
2346 
2347 			i = 0;
2348 			len = u->addr->len - sizeof(short);
2349 			if (!UNIX_ABSTRACT(s))
2350 				len--;
2351 			else {
2352 				seq_putc(seq, '@');
2353 				i++;
2354 			}
2355 			for ( ; i < len; i++)
2356 				seq_putc(seq, u->addr->name->sun_path[i]);
2357 		}
2358 		unix_state_unlock(s);
2359 		seq_putc(seq, '\n');
2360 	}
2361 
2362 	return 0;
2363 }
2364 
2365 static const struct seq_operations unix_seq_ops = {
2366 	.start  = unix_seq_start,
2367 	.next   = unix_seq_next,
2368 	.stop   = unix_seq_stop,
2369 	.show   = unix_seq_show,
2370 };
2371 
2372 static int unix_seq_open(struct inode *inode, struct file *file)
2373 {
2374 	return seq_open_net(inode, file, &unix_seq_ops,
2375 			    sizeof(struct seq_net_private));
2376 }
2377 
2378 static const struct file_operations unix_seq_fops = {
2379 	.owner		= THIS_MODULE,
2380 	.open		= unix_seq_open,
2381 	.read		= seq_read,
2382 	.llseek		= seq_lseek,
2383 	.release	= seq_release_net,
2384 };
2385 
2386 #endif
2387 
2388 static const struct net_proto_family unix_family_ops = {
2389 	.family = PF_UNIX,
2390 	.create = unix_create,
2391 	.owner	= THIS_MODULE,
2392 };
2393 
2394 
2395 static int __net_init unix_net_init(struct net *net)
2396 {
2397 	int error = -ENOMEM;
2398 
2399 	net->unx.sysctl_max_dgram_qlen = 10;
2400 	if (unix_sysctl_register(net))
2401 		goto out;
2402 
2403 #ifdef CONFIG_PROC_FS
2404 	if (!proc_net_fops_create(net, "unix", 0, &unix_seq_fops)) {
2405 		unix_sysctl_unregister(net);
2406 		goto out;
2407 	}
2408 #endif
2409 	error = 0;
2410 out:
2411 	return error;
2412 }
2413 
2414 static void __net_exit unix_net_exit(struct net *net)
2415 {
2416 	unix_sysctl_unregister(net);
2417 	proc_net_remove(net, "unix");
2418 }
2419 
2420 static struct pernet_operations unix_net_ops = {
2421 	.init = unix_net_init,
2422 	.exit = unix_net_exit,
2423 };
2424 
2425 static int __init af_unix_init(void)
2426 {
2427 	int rc = -1;
2428 	struct sk_buff *dummy_skb;
2429 
2430 	BUILD_BUG_ON(sizeof(struct unix_skb_parms) > sizeof(dummy_skb->cb));
2431 
2432 	rc = proto_register(&unix_proto, 1);
2433 	if (rc != 0) {
2434 		printk(KERN_CRIT "%s: Cannot create unix_sock SLAB cache!\n",
2435 		       __func__);
2436 		goto out;
2437 	}
2438 
2439 	sock_register(&unix_family_ops);
2440 	register_pernet_subsys(&unix_net_ops);
2441 out:
2442 	return rc;
2443 }
2444 
2445 static void __exit af_unix_exit(void)
2446 {
2447 	sock_unregister(PF_UNIX);
2448 	proto_unregister(&unix_proto);
2449 	unregister_pernet_subsys(&unix_net_ops);
2450 }
2451 
2452 /* Earlier than device_initcall() so that other drivers invoking
2453    request_module() don't end up in a loop when modprobe tries
2454    to use a UNIX socket. But later than subsys_initcall() because
2455    we depend on stuff initialised there */
2456 fs_initcall(af_unix_init);
2457 module_exit(af_unix_exit);
2458 
2459 MODULE_LICENSE("GPL");
2460 MODULE_ALIAS_NETPROTO(PF_UNIX);
2461