xref: /openbmc/linux/net/unix/af_unix.c (revision 0f4630f3)
1 /*
2  * NET4:	Implementation of BSD Unix domain sockets.
3  *
4  * Authors:	Alan Cox, <alan@lxorguk.ukuu.org.uk>
5  *
6  *		This program is free software; you can redistribute it and/or
7  *		modify it under the terms of the GNU General Public License
8  *		as published by the Free Software Foundation; either version
9  *		2 of the License, or (at your option) any later version.
10  *
11  * Fixes:
12  *		Linus Torvalds	:	Assorted bug cures.
13  *		Niibe Yutaka	:	async I/O support.
14  *		Carsten Paeth	:	PF_UNIX check, address fixes.
15  *		Alan Cox	:	Limit size of allocated blocks.
16  *		Alan Cox	:	Fixed the stupid socketpair bug.
17  *		Alan Cox	:	BSD compatibility fine tuning.
18  *		Alan Cox	:	Fixed a bug in connect when interrupted.
19  *		Alan Cox	:	Sorted out a proper draft version of
20  *					file descriptor passing hacked up from
21  *					Mike Shaver's work.
22  *		Marty Leisner	:	Fixes to fd passing
23  *		Nick Nevin	:	recvmsg bugfix.
24  *		Alan Cox	:	Started proper garbage collector
25  *		Heiko EiBfeldt	:	Missing verify_area check
26  *		Alan Cox	:	Started POSIXisms
27  *		Andreas Schwab	:	Replace inode by dentry for proper
28  *					reference counting
29  *		Kirk Petersen	:	Made this a module
30  *	    Christoph Rohland	:	Elegant non-blocking accept/connect algorithm.
31  *					Lots of bug fixes.
32  *	     Alexey Kuznetosv	:	Repaired (I hope) bugs introduces
33  *					by above two patches.
34  *	     Andrea Arcangeli	:	If possible we block in connect(2)
35  *					if the max backlog of the listen socket
36  *					is been reached. This won't break
37  *					old apps and it will avoid huge amount
38  *					of socks hashed (this for unix_gc()
39  *					performances reasons).
40  *					Security fix that limits the max
41  *					number of socks to 2*max_files and
42  *					the number of skb queueable in the
43  *					dgram receiver.
44  *		Artur Skawina   :	Hash function optimizations
45  *	     Alexey Kuznetsov   :	Full scale SMP. Lot of bugs are introduced 8)
46  *	      Malcolm Beattie   :	Set peercred for socketpair
47  *	     Michal Ostrowski   :       Module initialization cleanup.
48  *	     Arnaldo C. Melo	:	Remove MOD_{INC,DEC}_USE_COUNT,
49  *	     				the core infrastructure is doing that
50  *	     				for all net proto families now (2.5.69+)
51  *
52  *
53  * Known differences from reference BSD that was tested:
54  *
55  *	[TO FIX]
56  *	ECONNREFUSED is not returned from one end of a connected() socket to the
57  *		other the moment one end closes.
58  *	fstat() doesn't return st_dev=0, and give the blksize as high water mark
59  *		and a fake inode identifier (nor the BSD first socket fstat twice bug).
60  *	[NOT TO FIX]
61  *	accept() returns a path name even if the connecting socket has closed
62  *		in the meantime (BSD loses the path and gives up).
63  *	accept() returns 0 length path for an unbound connector. BSD returns 16
64  *		and a null first byte in the path (but not for gethost/peername - BSD bug ??)
65  *	socketpair(...SOCK_RAW..) doesn't panic the kernel.
66  *	BSD af_unix apparently has connect forgetting to block properly.
67  *		(need to check this with the POSIX spec in detail)
68  *
69  * Differences from 2.0.0-11-... (ANK)
70  *	Bug fixes and improvements.
71  *		- client shutdown killed server socket.
72  *		- removed all useless cli/sti pairs.
73  *
74  *	Semantic changes/extensions.
75  *		- generic control message passing.
76  *		- SCM_CREDENTIALS control message.
77  *		- "Abstract" (not FS based) socket bindings.
78  *		  Abstract names are sequences of bytes (not zero terminated)
79  *		  started by 0, so that this name space does not intersect
80  *		  with BSD names.
81  */
82 
83 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
84 
85 #include <linux/module.h>
86 #include <linux/kernel.h>
87 #include <linux/signal.h>
88 #include <linux/sched.h>
89 #include <linux/errno.h>
90 #include <linux/string.h>
91 #include <linux/stat.h>
92 #include <linux/dcache.h>
93 #include <linux/namei.h>
94 #include <linux/socket.h>
95 #include <linux/un.h>
96 #include <linux/fcntl.h>
97 #include <linux/termios.h>
98 #include <linux/sockios.h>
99 #include <linux/net.h>
100 #include <linux/in.h>
101 #include <linux/fs.h>
102 #include <linux/slab.h>
103 #include <asm/uaccess.h>
104 #include <linux/skbuff.h>
105 #include <linux/netdevice.h>
106 #include <net/net_namespace.h>
107 #include <net/sock.h>
108 #include <net/tcp_states.h>
109 #include <net/af_unix.h>
110 #include <linux/proc_fs.h>
111 #include <linux/seq_file.h>
112 #include <net/scm.h>
113 #include <linux/init.h>
114 #include <linux/poll.h>
115 #include <linux/rtnetlink.h>
116 #include <linux/mount.h>
117 #include <net/checksum.h>
118 #include <linux/security.h>
119 #include <linux/freezer.h>
120 
121 struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE];
122 EXPORT_SYMBOL_GPL(unix_socket_table);
123 DEFINE_SPINLOCK(unix_table_lock);
124 EXPORT_SYMBOL_GPL(unix_table_lock);
125 static atomic_long_t unix_nr_socks;
126 
127 
128 static struct hlist_head *unix_sockets_unbound(void *addr)
129 {
130 	unsigned long hash = (unsigned long)addr;
131 
132 	hash ^= hash >> 16;
133 	hash ^= hash >> 8;
134 	hash %= UNIX_HASH_SIZE;
135 	return &unix_socket_table[UNIX_HASH_SIZE + hash];
136 }
137 
138 #define UNIX_ABSTRACT(sk)	(unix_sk(sk)->addr->hash < UNIX_HASH_SIZE)
139 
140 #ifdef CONFIG_SECURITY_NETWORK
141 static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
142 {
143 	UNIXCB(skb).secid = scm->secid;
144 }
145 
146 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
147 {
148 	scm->secid = UNIXCB(skb).secid;
149 }
150 
151 static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
152 {
153 	return (scm->secid == UNIXCB(skb).secid);
154 }
155 #else
156 static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
157 { }
158 
159 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
160 { }
161 
162 static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
163 {
164 	return true;
165 }
166 #endif /* CONFIG_SECURITY_NETWORK */
167 
168 /*
169  *  SMP locking strategy:
170  *    hash table is protected with spinlock unix_table_lock
171  *    each socket state is protected by separate spin lock.
172  */
173 
174 static inline unsigned int unix_hash_fold(__wsum n)
175 {
176 	unsigned int hash = (__force unsigned int)csum_fold(n);
177 
178 	hash ^= hash>>8;
179 	return hash&(UNIX_HASH_SIZE-1);
180 }
181 
182 #define unix_peer(sk) (unix_sk(sk)->peer)
183 
184 static inline int unix_our_peer(struct sock *sk, struct sock *osk)
185 {
186 	return unix_peer(osk) == sk;
187 }
188 
189 static inline int unix_may_send(struct sock *sk, struct sock *osk)
190 {
191 	return unix_peer(osk) == NULL || unix_our_peer(sk, osk);
192 }
193 
194 static inline int unix_recvq_full(struct sock const *sk)
195 {
196 	return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog;
197 }
198 
199 struct sock *unix_peer_get(struct sock *s)
200 {
201 	struct sock *peer;
202 
203 	unix_state_lock(s);
204 	peer = unix_peer(s);
205 	if (peer)
206 		sock_hold(peer);
207 	unix_state_unlock(s);
208 	return peer;
209 }
210 EXPORT_SYMBOL_GPL(unix_peer_get);
211 
212 static inline void unix_release_addr(struct unix_address *addr)
213 {
214 	if (atomic_dec_and_test(&addr->refcnt))
215 		kfree(addr);
216 }
217 
218 /*
219  *	Check unix socket name:
220  *		- should be not zero length.
221  *	        - if started by not zero, should be NULL terminated (FS object)
222  *		- if started by zero, it is abstract name.
223  */
224 
225 static int unix_mkname(struct sockaddr_un *sunaddr, int len, unsigned int *hashp)
226 {
227 	if (len <= sizeof(short) || len > sizeof(*sunaddr))
228 		return -EINVAL;
229 	if (!sunaddr || sunaddr->sun_family != AF_UNIX)
230 		return -EINVAL;
231 	if (sunaddr->sun_path[0]) {
232 		/*
233 		 * This may look like an off by one error but it is a bit more
234 		 * subtle. 108 is the longest valid AF_UNIX path for a binding.
235 		 * sun_path[108] doesn't as such exist.  However in kernel space
236 		 * we are guaranteed that it is a valid memory location in our
237 		 * kernel address buffer.
238 		 */
239 		((char *)sunaddr)[len] = 0;
240 		len = strlen(sunaddr->sun_path)+1+sizeof(short);
241 		return len;
242 	}
243 
244 	*hashp = unix_hash_fold(csum_partial(sunaddr, len, 0));
245 	return len;
246 }
247 
248 static void __unix_remove_socket(struct sock *sk)
249 {
250 	sk_del_node_init(sk);
251 }
252 
253 static void __unix_insert_socket(struct hlist_head *list, struct sock *sk)
254 {
255 	WARN_ON(!sk_unhashed(sk));
256 	sk_add_node(sk, list);
257 }
258 
259 static inline void unix_remove_socket(struct sock *sk)
260 {
261 	spin_lock(&unix_table_lock);
262 	__unix_remove_socket(sk);
263 	spin_unlock(&unix_table_lock);
264 }
265 
266 static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk)
267 {
268 	spin_lock(&unix_table_lock);
269 	__unix_insert_socket(list, sk);
270 	spin_unlock(&unix_table_lock);
271 }
272 
273 static struct sock *__unix_find_socket_byname(struct net *net,
274 					      struct sockaddr_un *sunname,
275 					      int len, int type, unsigned int hash)
276 {
277 	struct sock *s;
278 
279 	sk_for_each(s, &unix_socket_table[hash ^ type]) {
280 		struct unix_sock *u = unix_sk(s);
281 
282 		if (!net_eq(sock_net(s), net))
283 			continue;
284 
285 		if (u->addr->len == len &&
286 		    !memcmp(u->addr->name, sunname, len))
287 			goto found;
288 	}
289 	s = NULL;
290 found:
291 	return s;
292 }
293 
294 static inline struct sock *unix_find_socket_byname(struct net *net,
295 						   struct sockaddr_un *sunname,
296 						   int len, int type,
297 						   unsigned int hash)
298 {
299 	struct sock *s;
300 
301 	spin_lock(&unix_table_lock);
302 	s = __unix_find_socket_byname(net, sunname, len, type, hash);
303 	if (s)
304 		sock_hold(s);
305 	spin_unlock(&unix_table_lock);
306 	return s;
307 }
308 
309 static struct sock *unix_find_socket_byinode(struct inode *i)
310 {
311 	struct sock *s;
312 
313 	spin_lock(&unix_table_lock);
314 	sk_for_each(s,
315 		    &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
316 		struct dentry *dentry = unix_sk(s)->path.dentry;
317 
318 		if (dentry && d_backing_inode(dentry) == i) {
319 			sock_hold(s);
320 			goto found;
321 		}
322 	}
323 	s = NULL;
324 found:
325 	spin_unlock(&unix_table_lock);
326 	return s;
327 }
328 
329 /* Support code for asymmetrically connected dgram sockets
330  *
331  * If a datagram socket is connected to a socket not itself connected
332  * to the first socket (eg, /dev/log), clients may only enqueue more
333  * messages if the present receive queue of the server socket is not
334  * "too large". This means there's a second writeability condition
335  * poll and sendmsg need to test. The dgram recv code will do a wake
336  * up on the peer_wait wait queue of a socket upon reception of a
337  * datagram which needs to be propagated to sleeping would-be writers
338  * since these might not have sent anything so far. This can't be
339  * accomplished via poll_wait because the lifetime of the server
340  * socket might be less than that of its clients if these break their
341  * association with it or if the server socket is closed while clients
342  * are still connected to it and there's no way to inform "a polling
343  * implementation" that it should let go of a certain wait queue
344  *
345  * In order to propagate a wake up, a wait_queue_t of the client
346  * socket is enqueued on the peer_wait queue of the server socket
347  * whose wake function does a wake_up on the ordinary client socket
348  * wait queue. This connection is established whenever a write (or
349  * poll for write) hit the flow control condition and broken when the
350  * association to the server socket is dissolved or after a wake up
351  * was relayed.
352  */
353 
354 static int unix_dgram_peer_wake_relay(wait_queue_t *q, unsigned mode, int flags,
355 				      void *key)
356 {
357 	struct unix_sock *u;
358 	wait_queue_head_t *u_sleep;
359 
360 	u = container_of(q, struct unix_sock, peer_wake);
361 
362 	__remove_wait_queue(&unix_sk(u->peer_wake.private)->peer_wait,
363 			    q);
364 	u->peer_wake.private = NULL;
365 
366 	/* relaying can only happen while the wq still exists */
367 	u_sleep = sk_sleep(&u->sk);
368 	if (u_sleep)
369 		wake_up_interruptible_poll(u_sleep, key);
370 
371 	return 0;
372 }
373 
374 static int unix_dgram_peer_wake_connect(struct sock *sk, struct sock *other)
375 {
376 	struct unix_sock *u, *u_other;
377 	int rc;
378 
379 	u = unix_sk(sk);
380 	u_other = unix_sk(other);
381 	rc = 0;
382 	spin_lock(&u_other->peer_wait.lock);
383 
384 	if (!u->peer_wake.private) {
385 		u->peer_wake.private = other;
386 		__add_wait_queue(&u_other->peer_wait, &u->peer_wake);
387 
388 		rc = 1;
389 	}
390 
391 	spin_unlock(&u_other->peer_wait.lock);
392 	return rc;
393 }
394 
395 static void unix_dgram_peer_wake_disconnect(struct sock *sk,
396 					    struct sock *other)
397 {
398 	struct unix_sock *u, *u_other;
399 
400 	u = unix_sk(sk);
401 	u_other = unix_sk(other);
402 	spin_lock(&u_other->peer_wait.lock);
403 
404 	if (u->peer_wake.private == other) {
405 		__remove_wait_queue(&u_other->peer_wait, &u->peer_wake);
406 		u->peer_wake.private = NULL;
407 	}
408 
409 	spin_unlock(&u_other->peer_wait.lock);
410 }
411 
412 static void unix_dgram_peer_wake_disconnect_wakeup(struct sock *sk,
413 						   struct sock *other)
414 {
415 	unix_dgram_peer_wake_disconnect(sk, other);
416 	wake_up_interruptible_poll(sk_sleep(sk),
417 				   POLLOUT |
418 				   POLLWRNORM |
419 				   POLLWRBAND);
420 }
421 
422 /* preconditions:
423  *	- unix_peer(sk) == other
424  *	- association is stable
425  */
426 static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other)
427 {
428 	int connected;
429 
430 	connected = unix_dgram_peer_wake_connect(sk, other);
431 
432 	if (unix_recvq_full(other))
433 		return 1;
434 
435 	if (connected)
436 		unix_dgram_peer_wake_disconnect(sk, other);
437 
438 	return 0;
439 }
440 
441 static int unix_writable(const struct sock *sk)
442 {
443 	return sk->sk_state != TCP_LISTEN &&
444 	       (atomic_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
445 }
446 
447 static void unix_write_space(struct sock *sk)
448 {
449 	struct socket_wq *wq;
450 
451 	rcu_read_lock();
452 	if (unix_writable(sk)) {
453 		wq = rcu_dereference(sk->sk_wq);
454 		if (wq_has_sleeper(wq))
455 			wake_up_interruptible_sync_poll(&wq->wait,
456 				POLLOUT | POLLWRNORM | POLLWRBAND);
457 		sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
458 	}
459 	rcu_read_unlock();
460 }
461 
462 /* When dgram socket disconnects (or changes its peer), we clear its receive
463  * queue of packets arrived from previous peer. First, it allows to do
464  * flow control based only on wmem_alloc; second, sk connected to peer
465  * may receive messages only from that peer. */
466 static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
467 {
468 	if (!skb_queue_empty(&sk->sk_receive_queue)) {
469 		skb_queue_purge(&sk->sk_receive_queue);
470 		wake_up_interruptible_all(&unix_sk(sk)->peer_wait);
471 
472 		/* If one link of bidirectional dgram pipe is disconnected,
473 		 * we signal error. Messages are lost. Do not make this,
474 		 * when peer was not connected to us.
475 		 */
476 		if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
477 			other->sk_err = ECONNRESET;
478 			other->sk_error_report(other);
479 		}
480 	}
481 }
482 
483 static void unix_sock_destructor(struct sock *sk)
484 {
485 	struct unix_sock *u = unix_sk(sk);
486 
487 	skb_queue_purge(&sk->sk_receive_queue);
488 
489 	WARN_ON(atomic_read(&sk->sk_wmem_alloc));
490 	WARN_ON(!sk_unhashed(sk));
491 	WARN_ON(sk->sk_socket);
492 	if (!sock_flag(sk, SOCK_DEAD)) {
493 		pr_info("Attempt to release alive unix socket: %p\n", sk);
494 		return;
495 	}
496 
497 	if (u->addr)
498 		unix_release_addr(u->addr);
499 
500 	atomic_long_dec(&unix_nr_socks);
501 	local_bh_disable();
502 	sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
503 	local_bh_enable();
504 #ifdef UNIX_REFCNT_DEBUG
505 	pr_debug("UNIX %p is destroyed, %ld are still alive.\n", sk,
506 		atomic_long_read(&unix_nr_socks));
507 #endif
508 }
509 
510 static void unix_release_sock(struct sock *sk, int embrion)
511 {
512 	struct unix_sock *u = unix_sk(sk);
513 	struct path path;
514 	struct sock *skpair;
515 	struct sk_buff *skb;
516 	int state;
517 
518 	unix_remove_socket(sk);
519 
520 	/* Clear state */
521 	unix_state_lock(sk);
522 	sock_orphan(sk);
523 	sk->sk_shutdown = SHUTDOWN_MASK;
524 	path	     = u->path;
525 	u->path.dentry = NULL;
526 	u->path.mnt = NULL;
527 	state = sk->sk_state;
528 	sk->sk_state = TCP_CLOSE;
529 	unix_state_unlock(sk);
530 
531 	wake_up_interruptible_all(&u->peer_wait);
532 
533 	skpair = unix_peer(sk);
534 
535 	if (skpair != NULL) {
536 		if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
537 			unix_state_lock(skpair);
538 			/* No more writes */
539 			skpair->sk_shutdown = SHUTDOWN_MASK;
540 			if (!skb_queue_empty(&sk->sk_receive_queue) || embrion)
541 				skpair->sk_err = ECONNRESET;
542 			unix_state_unlock(skpair);
543 			skpair->sk_state_change(skpair);
544 			sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
545 		}
546 
547 		unix_dgram_peer_wake_disconnect(sk, skpair);
548 		sock_put(skpair); /* It may now die */
549 		unix_peer(sk) = NULL;
550 	}
551 
552 	/* Try to flush out this socket. Throw out buffers at least */
553 
554 	while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
555 		if (state == TCP_LISTEN)
556 			unix_release_sock(skb->sk, 1);
557 		/* passed fds are erased in the kfree_skb hook	      */
558 		UNIXCB(skb).consumed = skb->len;
559 		kfree_skb(skb);
560 	}
561 
562 	if (path.dentry)
563 		path_put(&path);
564 
565 	sock_put(sk);
566 
567 	/* ---- Socket is dead now and most probably destroyed ---- */
568 
569 	/*
570 	 * Fixme: BSD difference: In BSD all sockets connected to us get
571 	 *	  ECONNRESET and we die on the spot. In Linux we behave
572 	 *	  like files and pipes do and wait for the last
573 	 *	  dereference.
574 	 *
575 	 * Can't we simply set sock->err?
576 	 *
577 	 *	  What the above comment does talk about? --ANK(980817)
578 	 */
579 
580 	if (unix_tot_inflight)
581 		unix_gc();		/* Garbage collect fds */
582 }
583 
584 static void init_peercred(struct sock *sk)
585 {
586 	put_pid(sk->sk_peer_pid);
587 	if (sk->sk_peer_cred)
588 		put_cred(sk->sk_peer_cred);
589 	sk->sk_peer_pid  = get_pid(task_tgid(current));
590 	sk->sk_peer_cred = get_current_cred();
591 }
592 
593 static void copy_peercred(struct sock *sk, struct sock *peersk)
594 {
595 	put_pid(sk->sk_peer_pid);
596 	if (sk->sk_peer_cred)
597 		put_cred(sk->sk_peer_cred);
598 	sk->sk_peer_pid  = get_pid(peersk->sk_peer_pid);
599 	sk->sk_peer_cred = get_cred(peersk->sk_peer_cred);
600 }
601 
602 static int unix_listen(struct socket *sock, int backlog)
603 {
604 	int err;
605 	struct sock *sk = sock->sk;
606 	struct unix_sock *u = unix_sk(sk);
607 	struct pid *old_pid = NULL;
608 
609 	err = -EOPNOTSUPP;
610 	if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
611 		goto out;	/* Only stream/seqpacket sockets accept */
612 	err = -EINVAL;
613 	if (!u->addr)
614 		goto out;	/* No listens on an unbound socket */
615 	unix_state_lock(sk);
616 	if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
617 		goto out_unlock;
618 	if (backlog > sk->sk_max_ack_backlog)
619 		wake_up_interruptible_all(&u->peer_wait);
620 	sk->sk_max_ack_backlog	= backlog;
621 	sk->sk_state		= TCP_LISTEN;
622 	/* set credentials so connect can copy them */
623 	init_peercred(sk);
624 	err = 0;
625 
626 out_unlock:
627 	unix_state_unlock(sk);
628 	put_pid(old_pid);
629 out:
630 	return err;
631 }
632 
633 static int unix_release(struct socket *);
634 static int unix_bind(struct socket *, struct sockaddr *, int);
635 static int unix_stream_connect(struct socket *, struct sockaddr *,
636 			       int addr_len, int flags);
637 static int unix_socketpair(struct socket *, struct socket *);
638 static int unix_accept(struct socket *, struct socket *, int);
639 static int unix_getname(struct socket *, struct sockaddr *, int *, int);
640 static unsigned int unix_poll(struct file *, struct socket *, poll_table *);
641 static unsigned int unix_dgram_poll(struct file *, struct socket *,
642 				    poll_table *);
643 static int unix_ioctl(struct socket *, unsigned int, unsigned long);
644 static int unix_shutdown(struct socket *, int);
645 static int unix_stream_sendmsg(struct socket *, struct msghdr *, size_t);
646 static int unix_stream_recvmsg(struct socket *, struct msghdr *, size_t, int);
647 static ssize_t unix_stream_sendpage(struct socket *, struct page *, int offset,
648 				    size_t size, int flags);
649 static ssize_t unix_stream_splice_read(struct socket *,  loff_t *ppos,
650 				       struct pipe_inode_info *, size_t size,
651 				       unsigned int flags);
652 static int unix_dgram_sendmsg(struct socket *, struct msghdr *, size_t);
653 static int unix_dgram_recvmsg(struct socket *, struct msghdr *, size_t, int);
654 static int unix_dgram_connect(struct socket *, struct sockaddr *,
655 			      int, int);
656 static int unix_seqpacket_sendmsg(struct socket *, struct msghdr *, size_t);
657 static int unix_seqpacket_recvmsg(struct socket *, struct msghdr *, size_t,
658 				  int);
659 
660 static int unix_set_peek_off(struct sock *sk, int val)
661 {
662 	struct unix_sock *u = unix_sk(sk);
663 
664 	if (mutex_lock_interruptible(&u->readlock))
665 		return -EINTR;
666 
667 	sk->sk_peek_off = val;
668 	mutex_unlock(&u->readlock);
669 
670 	return 0;
671 }
672 
673 
674 static const struct proto_ops unix_stream_ops = {
675 	.family =	PF_UNIX,
676 	.owner =	THIS_MODULE,
677 	.release =	unix_release,
678 	.bind =		unix_bind,
679 	.connect =	unix_stream_connect,
680 	.socketpair =	unix_socketpair,
681 	.accept =	unix_accept,
682 	.getname =	unix_getname,
683 	.poll =		unix_poll,
684 	.ioctl =	unix_ioctl,
685 	.listen =	unix_listen,
686 	.shutdown =	unix_shutdown,
687 	.setsockopt =	sock_no_setsockopt,
688 	.getsockopt =	sock_no_getsockopt,
689 	.sendmsg =	unix_stream_sendmsg,
690 	.recvmsg =	unix_stream_recvmsg,
691 	.mmap =		sock_no_mmap,
692 	.sendpage =	unix_stream_sendpage,
693 	.splice_read =	unix_stream_splice_read,
694 	.set_peek_off =	unix_set_peek_off,
695 };
696 
697 static const struct proto_ops unix_dgram_ops = {
698 	.family =	PF_UNIX,
699 	.owner =	THIS_MODULE,
700 	.release =	unix_release,
701 	.bind =		unix_bind,
702 	.connect =	unix_dgram_connect,
703 	.socketpair =	unix_socketpair,
704 	.accept =	sock_no_accept,
705 	.getname =	unix_getname,
706 	.poll =		unix_dgram_poll,
707 	.ioctl =	unix_ioctl,
708 	.listen =	sock_no_listen,
709 	.shutdown =	unix_shutdown,
710 	.setsockopt =	sock_no_setsockopt,
711 	.getsockopt =	sock_no_getsockopt,
712 	.sendmsg =	unix_dgram_sendmsg,
713 	.recvmsg =	unix_dgram_recvmsg,
714 	.mmap =		sock_no_mmap,
715 	.sendpage =	sock_no_sendpage,
716 	.set_peek_off =	unix_set_peek_off,
717 };
718 
719 static const struct proto_ops unix_seqpacket_ops = {
720 	.family =	PF_UNIX,
721 	.owner =	THIS_MODULE,
722 	.release =	unix_release,
723 	.bind =		unix_bind,
724 	.connect =	unix_stream_connect,
725 	.socketpair =	unix_socketpair,
726 	.accept =	unix_accept,
727 	.getname =	unix_getname,
728 	.poll =		unix_dgram_poll,
729 	.ioctl =	unix_ioctl,
730 	.listen =	unix_listen,
731 	.shutdown =	unix_shutdown,
732 	.setsockopt =	sock_no_setsockopt,
733 	.getsockopt =	sock_no_getsockopt,
734 	.sendmsg =	unix_seqpacket_sendmsg,
735 	.recvmsg =	unix_seqpacket_recvmsg,
736 	.mmap =		sock_no_mmap,
737 	.sendpage =	sock_no_sendpage,
738 	.set_peek_off =	unix_set_peek_off,
739 };
740 
741 static struct proto unix_proto = {
742 	.name			= "UNIX",
743 	.owner			= THIS_MODULE,
744 	.obj_size		= sizeof(struct unix_sock),
745 };
746 
747 /*
748  * AF_UNIX sockets do not interact with hardware, hence they
749  * dont trigger interrupts - so it's safe for them to have
750  * bh-unsafe locking for their sk_receive_queue.lock. Split off
751  * this special lock-class by reinitializing the spinlock key:
752  */
753 static struct lock_class_key af_unix_sk_receive_queue_lock_key;
754 
755 static struct sock *unix_create1(struct net *net, struct socket *sock, int kern)
756 {
757 	struct sock *sk = NULL;
758 	struct unix_sock *u;
759 
760 	atomic_long_inc(&unix_nr_socks);
761 	if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files())
762 		goto out;
763 
764 	sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto, kern);
765 	if (!sk)
766 		goto out;
767 
768 	sock_init_data(sock, sk);
769 	lockdep_set_class(&sk->sk_receive_queue.lock,
770 				&af_unix_sk_receive_queue_lock_key);
771 
772 	sk->sk_write_space	= unix_write_space;
773 	sk->sk_max_ack_backlog	= net->unx.sysctl_max_dgram_qlen;
774 	sk->sk_destruct		= unix_sock_destructor;
775 	u	  = unix_sk(sk);
776 	u->path.dentry = NULL;
777 	u->path.mnt = NULL;
778 	spin_lock_init(&u->lock);
779 	atomic_long_set(&u->inflight, 0);
780 	INIT_LIST_HEAD(&u->link);
781 	mutex_init(&u->readlock); /* single task reading lock */
782 	init_waitqueue_head(&u->peer_wait);
783 	init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay);
784 	unix_insert_socket(unix_sockets_unbound(sk), sk);
785 out:
786 	if (sk == NULL)
787 		atomic_long_dec(&unix_nr_socks);
788 	else {
789 		local_bh_disable();
790 		sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
791 		local_bh_enable();
792 	}
793 	return sk;
794 }
795 
796 static int unix_create(struct net *net, struct socket *sock, int protocol,
797 		       int kern)
798 {
799 	if (protocol && protocol != PF_UNIX)
800 		return -EPROTONOSUPPORT;
801 
802 	sock->state = SS_UNCONNECTED;
803 
804 	switch (sock->type) {
805 	case SOCK_STREAM:
806 		sock->ops = &unix_stream_ops;
807 		break;
808 		/*
809 		 *	Believe it or not BSD has AF_UNIX, SOCK_RAW though
810 		 *	nothing uses it.
811 		 */
812 	case SOCK_RAW:
813 		sock->type = SOCK_DGRAM;
814 	case SOCK_DGRAM:
815 		sock->ops = &unix_dgram_ops;
816 		break;
817 	case SOCK_SEQPACKET:
818 		sock->ops = &unix_seqpacket_ops;
819 		break;
820 	default:
821 		return -ESOCKTNOSUPPORT;
822 	}
823 
824 	return unix_create1(net, sock, kern) ? 0 : -ENOMEM;
825 }
826 
827 static int unix_release(struct socket *sock)
828 {
829 	struct sock *sk = sock->sk;
830 
831 	if (!sk)
832 		return 0;
833 
834 	unix_release_sock(sk, 0);
835 	sock->sk = NULL;
836 
837 	return 0;
838 }
839 
840 static int unix_autobind(struct socket *sock)
841 {
842 	struct sock *sk = sock->sk;
843 	struct net *net = sock_net(sk);
844 	struct unix_sock *u = unix_sk(sk);
845 	static u32 ordernum = 1;
846 	struct unix_address *addr;
847 	int err;
848 	unsigned int retries = 0;
849 
850 	err = mutex_lock_interruptible(&u->readlock);
851 	if (err)
852 		return err;
853 
854 	err = 0;
855 	if (u->addr)
856 		goto out;
857 
858 	err = -ENOMEM;
859 	addr = kzalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL);
860 	if (!addr)
861 		goto out;
862 
863 	addr->name->sun_family = AF_UNIX;
864 	atomic_set(&addr->refcnt, 1);
865 
866 retry:
867 	addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short);
868 	addr->hash = unix_hash_fold(csum_partial(addr->name, addr->len, 0));
869 
870 	spin_lock(&unix_table_lock);
871 	ordernum = (ordernum+1)&0xFFFFF;
872 
873 	if (__unix_find_socket_byname(net, addr->name, addr->len, sock->type,
874 				      addr->hash)) {
875 		spin_unlock(&unix_table_lock);
876 		/*
877 		 * __unix_find_socket_byname() may take long time if many names
878 		 * are already in use.
879 		 */
880 		cond_resched();
881 		/* Give up if all names seems to be in use. */
882 		if (retries++ == 0xFFFFF) {
883 			err = -ENOSPC;
884 			kfree(addr);
885 			goto out;
886 		}
887 		goto retry;
888 	}
889 	addr->hash ^= sk->sk_type;
890 
891 	__unix_remove_socket(sk);
892 	u->addr = addr;
893 	__unix_insert_socket(&unix_socket_table[addr->hash], sk);
894 	spin_unlock(&unix_table_lock);
895 	err = 0;
896 
897 out:	mutex_unlock(&u->readlock);
898 	return err;
899 }
900 
901 static struct sock *unix_find_other(struct net *net,
902 				    struct sockaddr_un *sunname, int len,
903 				    int type, unsigned int hash, int *error)
904 {
905 	struct sock *u;
906 	struct path path;
907 	int err = 0;
908 
909 	if (sunname->sun_path[0]) {
910 		struct inode *inode;
911 		err = kern_path(sunname->sun_path, LOOKUP_FOLLOW, &path);
912 		if (err)
913 			goto fail;
914 		inode = d_backing_inode(path.dentry);
915 		err = inode_permission(inode, MAY_WRITE);
916 		if (err)
917 			goto put_fail;
918 
919 		err = -ECONNREFUSED;
920 		if (!S_ISSOCK(inode->i_mode))
921 			goto put_fail;
922 		u = unix_find_socket_byinode(inode);
923 		if (!u)
924 			goto put_fail;
925 
926 		if (u->sk_type == type)
927 			touch_atime(&path);
928 
929 		path_put(&path);
930 
931 		err = -EPROTOTYPE;
932 		if (u->sk_type != type) {
933 			sock_put(u);
934 			goto fail;
935 		}
936 	} else {
937 		err = -ECONNREFUSED;
938 		u = unix_find_socket_byname(net, sunname, len, type, hash);
939 		if (u) {
940 			struct dentry *dentry;
941 			dentry = unix_sk(u)->path.dentry;
942 			if (dentry)
943 				touch_atime(&unix_sk(u)->path);
944 		} else
945 			goto fail;
946 	}
947 	return u;
948 
949 put_fail:
950 	path_put(&path);
951 fail:
952 	*error = err;
953 	return NULL;
954 }
955 
956 static int unix_mknod(const char *sun_path, umode_t mode, struct path *res)
957 {
958 	struct dentry *dentry;
959 	struct path path;
960 	int err = 0;
961 	/*
962 	 * Get the parent directory, calculate the hash for last
963 	 * component.
964 	 */
965 	dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0);
966 	err = PTR_ERR(dentry);
967 	if (IS_ERR(dentry))
968 		return err;
969 
970 	/*
971 	 * All right, let's create it.
972 	 */
973 	err = security_path_mknod(&path, dentry, mode, 0);
974 	if (!err) {
975 		err = vfs_mknod(d_inode(path.dentry), dentry, mode, 0);
976 		if (!err) {
977 			res->mnt = mntget(path.mnt);
978 			res->dentry = dget(dentry);
979 		}
980 	}
981 	done_path_create(&path, dentry);
982 	return err;
983 }
984 
985 static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
986 {
987 	struct sock *sk = sock->sk;
988 	struct net *net = sock_net(sk);
989 	struct unix_sock *u = unix_sk(sk);
990 	struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
991 	char *sun_path = sunaddr->sun_path;
992 	int err;
993 	unsigned int hash;
994 	struct unix_address *addr;
995 	struct hlist_head *list;
996 
997 	err = -EINVAL;
998 	if (sunaddr->sun_family != AF_UNIX)
999 		goto out;
1000 
1001 	if (addr_len == sizeof(short)) {
1002 		err = unix_autobind(sock);
1003 		goto out;
1004 	}
1005 
1006 	err = unix_mkname(sunaddr, addr_len, &hash);
1007 	if (err < 0)
1008 		goto out;
1009 	addr_len = err;
1010 
1011 	err = mutex_lock_interruptible(&u->readlock);
1012 	if (err)
1013 		goto out;
1014 
1015 	err = -EINVAL;
1016 	if (u->addr)
1017 		goto out_up;
1018 
1019 	err = -ENOMEM;
1020 	addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
1021 	if (!addr)
1022 		goto out_up;
1023 
1024 	memcpy(addr->name, sunaddr, addr_len);
1025 	addr->len = addr_len;
1026 	addr->hash = hash ^ sk->sk_type;
1027 	atomic_set(&addr->refcnt, 1);
1028 
1029 	if (sun_path[0]) {
1030 		struct path path;
1031 		umode_t mode = S_IFSOCK |
1032 		       (SOCK_INODE(sock)->i_mode & ~current_umask());
1033 		err = unix_mknod(sun_path, mode, &path);
1034 		if (err) {
1035 			if (err == -EEXIST)
1036 				err = -EADDRINUSE;
1037 			unix_release_addr(addr);
1038 			goto out_up;
1039 		}
1040 		addr->hash = UNIX_HASH_SIZE;
1041 		hash = d_backing_inode(path.dentry)->i_ino & (UNIX_HASH_SIZE-1);
1042 		spin_lock(&unix_table_lock);
1043 		u->path = path;
1044 		list = &unix_socket_table[hash];
1045 	} else {
1046 		spin_lock(&unix_table_lock);
1047 		err = -EADDRINUSE;
1048 		if (__unix_find_socket_byname(net, sunaddr, addr_len,
1049 					      sk->sk_type, hash)) {
1050 			unix_release_addr(addr);
1051 			goto out_unlock;
1052 		}
1053 
1054 		list = &unix_socket_table[addr->hash];
1055 	}
1056 
1057 	err = 0;
1058 	__unix_remove_socket(sk);
1059 	u->addr = addr;
1060 	__unix_insert_socket(list, sk);
1061 
1062 out_unlock:
1063 	spin_unlock(&unix_table_lock);
1064 out_up:
1065 	mutex_unlock(&u->readlock);
1066 out:
1067 	return err;
1068 }
1069 
1070 static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
1071 {
1072 	if (unlikely(sk1 == sk2) || !sk2) {
1073 		unix_state_lock(sk1);
1074 		return;
1075 	}
1076 	if (sk1 < sk2) {
1077 		unix_state_lock(sk1);
1078 		unix_state_lock_nested(sk2);
1079 	} else {
1080 		unix_state_lock(sk2);
1081 		unix_state_lock_nested(sk1);
1082 	}
1083 }
1084 
1085 static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)
1086 {
1087 	if (unlikely(sk1 == sk2) || !sk2) {
1088 		unix_state_unlock(sk1);
1089 		return;
1090 	}
1091 	unix_state_unlock(sk1);
1092 	unix_state_unlock(sk2);
1093 }
1094 
1095 static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
1096 			      int alen, int flags)
1097 {
1098 	struct sock *sk = sock->sk;
1099 	struct net *net = sock_net(sk);
1100 	struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr;
1101 	struct sock *other;
1102 	unsigned int hash;
1103 	int err;
1104 
1105 	if (addr->sa_family != AF_UNSPEC) {
1106 		err = unix_mkname(sunaddr, alen, &hash);
1107 		if (err < 0)
1108 			goto out;
1109 		alen = err;
1110 
1111 		if (test_bit(SOCK_PASSCRED, &sock->flags) &&
1112 		    !unix_sk(sk)->addr && (err = unix_autobind(sock)) != 0)
1113 			goto out;
1114 
1115 restart:
1116 		other = unix_find_other(net, sunaddr, alen, sock->type, hash, &err);
1117 		if (!other)
1118 			goto out;
1119 
1120 		unix_state_double_lock(sk, other);
1121 
1122 		/* Apparently VFS overslept socket death. Retry. */
1123 		if (sock_flag(other, SOCK_DEAD)) {
1124 			unix_state_double_unlock(sk, other);
1125 			sock_put(other);
1126 			goto restart;
1127 		}
1128 
1129 		err = -EPERM;
1130 		if (!unix_may_send(sk, other))
1131 			goto out_unlock;
1132 
1133 		err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1134 		if (err)
1135 			goto out_unlock;
1136 
1137 	} else {
1138 		/*
1139 		 *	1003.1g breaking connected state with AF_UNSPEC
1140 		 */
1141 		other = NULL;
1142 		unix_state_double_lock(sk, other);
1143 	}
1144 
1145 	/*
1146 	 * If it was connected, reconnect.
1147 	 */
1148 	if (unix_peer(sk)) {
1149 		struct sock *old_peer = unix_peer(sk);
1150 		unix_peer(sk) = other;
1151 		unix_dgram_peer_wake_disconnect_wakeup(sk, old_peer);
1152 
1153 		unix_state_double_unlock(sk, other);
1154 
1155 		if (other != old_peer)
1156 			unix_dgram_disconnected(sk, old_peer);
1157 		sock_put(old_peer);
1158 	} else {
1159 		unix_peer(sk) = other;
1160 		unix_state_double_unlock(sk, other);
1161 	}
1162 	return 0;
1163 
1164 out_unlock:
1165 	unix_state_double_unlock(sk, other);
1166 	sock_put(other);
1167 out:
1168 	return err;
1169 }
1170 
1171 static long unix_wait_for_peer(struct sock *other, long timeo)
1172 {
1173 	struct unix_sock *u = unix_sk(other);
1174 	int sched;
1175 	DEFINE_WAIT(wait);
1176 
1177 	prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);
1178 
1179 	sched = !sock_flag(other, SOCK_DEAD) &&
1180 		!(other->sk_shutdown & RCV_SHUTDOWN) &&
1181 		unix_recvq_full(other);
1182 
1183 	unix_state_unlock(other);
1184 
1185 	if (sched)
1186 		timeo = schedule_timeout(timeo);
1187 
1188 	finish_wait(&u->peer_wait, &wait);
1189 	return timeo;
1190 }
1191 
1192 static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
1193 			       int addr_len, int flags)
1194 {
1195 	struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1196 	struct sock *sk = sock->sk;
1197 	struct net *net = sock_net(sk);
1198 	struct unix_sock *u = unix_sk(sk), *newu, *otheru;
1199 	struct sock *newsk = NULL;
1200 	struct sock *other = NULL;
1201 	struct sk_buff *skb = NULL;
1202 	unsigned int hash;
1203 	int st;
1204 	int err;
1205 	long timeo;
1206 
1207 	err = unix_mkname(sunaddr, addr_len, &hash);
1208 	if (err < 0)
1209 		goto out;
1210 	addr_len = err;
1211 
1212 	if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr &&
1213 	    (err = unix_autobind(sock)) != 0)
1214 		goto out;
1215 
1216 	timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
1217 
1218 	/* First of all allocate resources.
1219 	   If we will make it after state is locked,
1220 	   we will have to recheck all again in any case.
1221 	 */
1222 
1223 	err = -ENOMEM;
1224 
1225 	/* create new sock for complete connection */
1226 	newsk = unix_create1(sock_net(sk), NULL, 0);
1227 	if (newsk == NULL)
1228 		goto out;
1229 
1230 	/* Allocate skb for sending to listening sock */
1231 	skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
1232 	if (skb == NULL)
1233 		goto out;
1234 
1235 restart:
1236 	/*  Find listening sock. */
1237 	other = unix_find_other(net, sunaddr, addr_len, sk->sk_type, hash, &err);
1238 	if (!other)
1239 		goto out;
1240 
1241 	/* Latch state of peer */
1242 	unix_state_lock(other);
1243 
1244 	/* Apparently VFS overslept socket death. Retry. */
1245 	if (sock_flag(other, SOCK_DEAD)) {
1246 		unix_state_unlock(other);
1247 		sock_put(other);
1248 		goto restart;
1249 	}
1250 
1251 	err = -ECONNREFUSED;
1252 	if (other->sk_state != TCP_LISTEN)
1253 		goto out_unlock;
1254 	if (other->sk_shutdown & RCV_SHUTDOWN)
1255 		goto out_unlock;
1256 
1257 	if (unix_recvq_full(other)) {
1258 		err = -EAGAIN;
1259 		if (!timeo)
1260 			goto out_unlock;
1261 
1262 		timeo = unix_wait_for_peer(other, timeo);
1263 
1264 		err = sock_intr_errno(timeo);
1265 		if (signal_pending(current))
1266 			goto out;
1267 		sock_put(other);
1268 		goto restart;
1269 	}
1270 
1271 	/* Latch our state.
1272 
1273 	   It is tricky place. We need to grab our state lock and cannot
1274 	   drop lock on peer. It is dangerous because deadlock is
1275 	   possible. Connect to self case and simultaneous
1276 	   attempt to connect are eliminated by checking socket
1277 	   state. other is TCP_LISTEN, if sk is TCP_LISTEN we
1278 	   check this before attempt to grab lock.
1279 
1280 	   Well, and we have to recheck the state after socket locked.
1281 	 */
1282 	st = sk->sk_state;
1283 
1284 	switch (st) {
1285 	case TCP_CLOSE:
1286 		/* This is ok... continue with connect */
1287 		break;
1288 	case TCP_ESTABLISHED:
1289 		/* Socket is already connected */
1290 		err = -EISCONN;
1291 		goto out_unlock;
1292 	default:
1293 		err = -EINVAL;
1294 		goto out_unlock;
1295 	}
1296 
1297 	unix_state_lock_nested(sk);
1298 
1299 	if (sk->sk_state != st) {
1300 		unix_state_unlock(sk);
1301 		unix_state_unlock(other);
1302 		sock_put(other);
1303 		goto restart;
1304 	}
1305 
1306 	err = security_unix_stream_connect(sk, other, newsk);
1307 	if (err) {
1308 		unix_state_unlock(sk);
1309 		goto out_unlock;
1310 	}
1311 
1312 	/* The way is open! Fastly set all the necessary fields... */
1313 
1314 	sock_hold(sk);
1315 	unix_peer(newsk)	= sk;
1316 	newsk->sk_state		= TCP_ESTABLISHED;
1317 	newsk->sk_type		= sk->sk_type;
1318 	init_peercred(newsk);
1319 	newu = unix_sk(newsk);
1320 	RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq);
1321 	otheru = unix_sk(other);
1322 
1323 	/* copy address information from listening to new sock*/
1324 	if (otheru->addr) {
1325 		atomic_inc(&otheru->addr->refcnt);
1326 		newu->addr = otheru->addr;
1327 	}
1328 	if (otheru->path.dentry) {
1329 		path_get(&otheru->path);
1330 		newu->path = otheru->path;
1331 	}
1332 
1333 	/* Set credentials */
1334 	copy_peercred(sk, other);
1335 
1336 	sock->state	= SS_CONNECTED;
1337 	sk->sk_state	= TCP_ESTABLISHED;
1338 	sock_hold(newsk);
1339 
1340 	smp_mb__after_atomic();	/* sock_hold() does an atomic_inc() */
1341 	unix_peer(sk)	= newsk;
1342 
1343 	unix_state_unlock(sk);
1344 
1345 	/* take ten and and send info to listening sock */
1346 	spin_lock(&other->sk_receive_queue.lock);
1347 	__skb_queue_tail(&other->sk_receive_queue, skb);
1348 	spin_unlock(&other->sk_receive_queue.lock);
1349 	unix_state_unlock(other);
1350 	other->sk_data_ready(other);
1351 	sock_put(other);
1352 	return 0;
1353 
1354 out_unlock:
1355 	if (other)
1356 		unix_state_unlock(other);
1357 
1358 out:
1359 	kfree_skb(skb);
1360 	if (newsk)
1361 		unix_release_sock(newsk, 0);
1362 	if (other)
1363 		sock_put(other);
1364 	return err;
1365 }
1366 
1367 static int unix_socketpair(struct socket *socka, struct socket *sockb)
1368 {
1369 	struct sock *ska = socka->sk, *skb = sockb->sk;
1370 
1371 	/* Join our sockets back to back */
1372 	sock_hold(ska);
1373 	sock_hold(skb);
1374 	unix_peer(ska) = skb;
1375 	unix_peer(skb) = ska;
1376 	init_peercred(ska);
1377 	init_peercred(skb);
1378 
1379 	if (ska->sk_type != SOCK_DGRAM) {
1380 		ska->sk_state = TCP_ESTABLISHED;
1381 		skb->sk_state = TCP_ESTABLISHED;
1382 		socka->state  = SS_CONNECTED;
1383 		sockb->state  = SS_CONNECTED;
1384 	}
1385 	return 0;
1386 }
1387 
1388 static void unix_sock_inherit_flags(const struct socket *old,
1389 				    struct socket *new)
1390 {
1391 	if (test_bit(SOCK_PASSCRED, &old->flags))
1392 		set_bit(SOCK_PASSCRED, &new->flags);
1393 	if (test_bit(SOCK_PASSSEC, &old->flags))
1394 		set_bit(SOCK_PASSSEC, &new->flags);
1395 }
1396 
1397 static int unix_accept(struct socket *sock, struct socket *newsock, int flags)
1398 {
1399 	struct sock *sk = sock->sk;
1400 	struct sock *tsk;
1401 	struct sk_buff *skb;
1402 	int err;
1403 
1404 	err = -EOPNOTSUPP;
1405 	if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
1406 		goto out;
1407 
1408 	err = -EINVAL;
1409 	if (sk->sk_state != TCP_LISTEN)
1410 		goto out;
1411 
1412 	/* If socket state is TCP_LISTEN it cannot change (for now...),
1413 	 * so that no locks are necessary.
1414 	 */
1415 
1416 	skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err);
1417 	if (!skb) {
1418 		/* This means receive shutdown. */
1419 		if (err == 0)
1420 			err = -EINVAL;
1421 		goto out;
1422 	}
1423 
1424 	tsk = skb->sk;
1425 	skb_free_datagram(sk, skb);
1426 	wake_up_interruptible(&unix_sk(sk)->peer_wait);
1427 
1428 	/* attach accepted sock to socket */
1429 	unix_state_lock(tsk);
1430 	newsock->state = SS_CONNECTED;
1431 	unix_sock_inherit_flags(sock, newsock);
1432 	sock_graft(tsk, newsock);
1433 	unix_state_unlock(tsk);
1434 	return 0;
1435 
1436 out:
1437 	return err;
1438 }
1439 
1440 
1441 static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_len, int peer)
1442 {
1443 	struct sock *sk = sock->sk;
1444 	struct unix_sock *u;
1445 	DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, uaddr);
1446 	int err = 0;
1447 
1448 	if (peer) {
1449 		sk = unix_peer_get(sk);
1450 
1451 		err = -ENOTCONN;
1452 		if (!sk)
1453 			goto out;
1454 		err = 0;
1455 	} else {
1456 		sock_hold(sk);
1457 	}
1458 
1459 	u = unix_sk(sk);
1460 	unix_state_lock(sk);
1461 	if (!u->addr) {
1462 		sunaddr->sun_family = AF_UNIX;
1463 		sunaddr->sun_path[0] = 0;
1464 		*uaddr_len = sizeof(short);
1465 	} else {
1466 		struct unix_address *addr = u->addr;
1467 
1468 		*uaddr_len = addr->len;
1469 		memcpy(sunaddr, addr->name, *uaddr_len);
1470 	}
1471 	unix_state_unlock(sk);
1472 	sock_put(sk);
1473 out:
1474 	return err;
1475 }
1476 
1477 static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1478 {
1479 	int i;
1480 
1481 	scm->fp = UNIXCB(skb).fp;
1482 	UNIXCB(skb).fp = NULL;
1483 
1484 	for (i = scm->fp->count-1; i >= 0; i--)
1485 		unix_notinflight(scm->fp->fp[i]);
1486 }
1487 
1488 static void unix_destruct_scm(struct sk_buff *skb)
1489 {
1490 	struct scm_cookie scm;
1491 	memset(&scm, 0, sizeof(scm));
1492 	scm.pid  = UNIXCB(skb).pid;
1493 	if (UNIXCB(skb).fp)
1494 		unix_detach_fds(&scm, skb);
1495 
1496 	/* Alas, it calls VFS */
1497 	/* So fscking what? fput() had been SMP-safe since the last Summer */
1498 	scm_destroy(&scm);
1499 	sock_wfree(skb);
1500 }
1501 
1502 #define MAX_RECURSION_LEVEL 4
1503 
1504 static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1505 {
1506 	int i;
1507 	unsigned char max_level = 0;
1508 	int unix_sock_count = 0;
1509 
1510 	for (i = scm->fp->count - 1; i >= 0; i--) {
1511 		struct sock *sk = unix_get_socket(scm->fp->fp[i]);
1512 
1513 		if (sk) {
1514 			unix_sock_count++;
1515 			max_level = max(max_level,
1516 					unix_sk(sk)->recursion_level);
1517 		}
1518 	}
1519 	if (unlikely(max_level > MAX_RECURSION_LEVEL))
1520 		return -ETOOMANYREFS;
1521 
1522 	/*
1523 	 * Need to duplicate file references for the sake of garbage
1524 	 * collection.  Otherwise a socket in the fps might become a
1525 	 * candidate for GC while the skb is not yet queued.
1526 	 */
1527 	UNIXCB(skb).fp = scm_fp_dup(scm->fp);
1528 	if (!UNIXCB(skb).fp)
1529 		return -ENOMEM;
1530 
1531 	if (unix_sock_count) {
1532 		for (i = scm->fp->count - 1; i >= 0; i--)
1533 			unix_inflight(scm->fp->fp[i]);
1534 	}
1535 	return max_level;
1536 }
1537 
1538 static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)
1539 {
1540 	int err = 0;
1541 
1542 	UNIXCB(skb).pid  = get_pid(scm->pid);
1543 	UNIXCB(skb).uid = scm->creds.uid;
1544 	UNIXCB(skb).gid = scm->creds.gid;
1545 	UNIXCB(skb).fp = NULL;
1546 	unix_get_secdata(scm, skb);
1547 	if (scm->fp && send_fds)
1548 		err = unix_attach_fds(scm, skb);
1549 
1550 	skb->destructor = unix_destruct_scm;
1551 	return err;
1552 }
1553 
1554 static bool unix_passcred_enabled(const struct socket *sock,
1555 				  const struct sock *other)
1556 {
1557 	return test_bit(SOCK_PASSCRED, &sock->flags) ||
1558 	       !other->sk_socket ||
1559 	       test_bit(SOCK_PASSCRED, &other->sk_socket->flags);
1560 }
1561 
1562 /*
1563  * Some apps rely on write() giving SCM_CREDENTIALS
1564  * We include credentials if source or destination socket
1565  * asserted SOCK_PASSCRED.
1566  */
1567 static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock,
1568 			    const struct sock *other)
1569 {
1570 	if (UNIXCB(skb).pid)
1571 		return;
1572 	if (unix_passcred_enabled(sock, other)) {
1573 		UNIXCB(skb).pid  = get_pid(task_tgid(current));
1574 		current_uid_gid(&UNIXCB(skb).uid, &UNIXCB(skb).gid);
1575 	}
1576 }
1577 
1578 static int maybe_init_creds(struct scm_cookie *scm,
1579 			    struct socket *socket,
1580 			    const struct sock *other)
1581 {
1582 	int err;
1583 	struct msghdr msg = { .msg_controllen = 0 };
1584 
1585 	err = scm_send(socket, &msg, scm, false);
1586 	if (err)
1587 		return err;
1588 
1589 	if (unix_passcred_enabled(socket, other)) {
1590 		scm->pid = get_pid(task_tgid(current));
1591 		current_uid_gid(&scm->creds.uid, &scm->creds.gid);
1592 	}
1593 	return err;
1594 }
1595 
1596 static bool unix_skb_scm_eq(struct sk_buff *skb,
1597 			    struct scm_cookie *scm)
1598 {
1599 	const struct unix_skb_parms *u = &UNIXCB(skb);
1600 
1601 	return u->pid == scm->pid &&
1602 	       uid_eq(u->uid, scm->creds.uid) &&
1603 	       gid_eq(u->gid, scm->creds.gid) &&
1604 	       unix_secdata_eq(scm, skb);
1605 }
1606 
1607 /*
1608  *	Send AF_UNIX data.
1609  */
1610 
1611 static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
1612 			      size_t len)
1613 {
1614 	struct sock *sk = sock->sk;
1615 	struct net *net = sock_net(sk);
1616 	struct unix_sock *u = unix_sk(sk);
1617 	DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, msg->msg_name);
1618 	struct sock *other = NULL;
1619 	int namelen = 0; /* fake GCC */
1620 	int err;
1621 	unsigned int hash;
1622 	struct sk_buff *skb;
1623 	long timeo;
1624 	struct scm_cookie scm;
1625 	int max_level;
1626 	int data_len = 0;
1627 	int sk_locked;
1628 
1629 	wait_for_unix_gc();
1630 	err = scm_send(sock, msg, &scm, false);
1631 	if (err < 0)
1632 		return err;
1633 
1634 	err = -EOPNOTSUPP;
1635 	if (msg->msg_flags&MSG_OOB)
1636 		goto out;
1637 
1638 	if (msg->msg_namelen) {
1639 		err = unix_mkname(sunaddr, msg->msg_namelen, &hash);
1640 		if (err < 0)
1641 			goto out;
1642 		namelen = err;
1643 	} else {
1644 		sunaddr = NULL;
1645 		err = -ENOTCONN;
1646 		other = unix_peer_get(sk);
1647 		if (!other)
1648 			goto out;
1649 	}
1650 
1651 	if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr
1652 	    && (err = unix_autobind(sock)) != 0)
1653 		goto out;
1654 
1655 	err = -EMSGSIZE;
1656 	if (len > sk->sk_sndbuf - 32)
1657 		goto out;
1658 
1659 	if (len > SKB_MAX_ALLOC) {
1660 		data_len = min_t(size_t,
1661 				 len - SKB_MAX_ALLOC,
1662 				 MAX_SKB_FRAGS * PAGE_SIZE);
1663 		data_len = PAGE_ALIGN(data_len);
1664 
1665 		BUILD_BUG_ON(SKB_MAX_ALLOC < PAGE_SIZE);
1666 	}
1667 
1668 	skb = sock_alloc_send_pskb(sk, len - data_len, data_len,
1669 				   msg->msg_flags & MSG_DONTWAIT, &err,
1670 				   PAGE_ALLOC_COSTLY_ORDER);
1671 	if (skb == NULL)
1672 		goto out;
1673 
1674 	err = unix_scm_to_skb(&scm, skb, true);
1675 	if (err < 0)
1676 		goto out_free;
1677 	max_level = err + 1;
1678 
1679 	skb_put(skb, len - data_len);
1680 	skb->data_len = data_len;
1681 	skb->len = len;
1682 	err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, len);
1683 	if (err)
1684 		goto out_free;
1685 
1686 	timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
1687 
1688 restart:
1689 	if (!other) {
1690 		err = -ECONNRESET;
1691 		if (sunaddr == NULL)
1692 			goto out_free;
1693 
1694 		other = unix_find_other(net, sunaddr, namelen, sk->sk_type,
1695 					hash, &err);
1696 		if (other == NULL)
1697 			goto out_free;
1698 	}
1699 
1700 	if (sk_filter(other, skb) < 0) {
1701 		/* Toss the packet but do not return any error to the sender */
1702 		err = len;
1703 		goto out_free;
1704 	}
1705 
1706 	sk_locked = 0;
1707 	unix_state_lock(other);
1708 restart_locked:
1709 	err = -EPERM;
1710 	if (!unix_may_send(sk, other))
1711 		goto out_unlock;
1712 
1713 	if (unlikely(sock_flag(other, SOCK_DEAD))) {
1714 		/*
1715 		 *	Check with 1003.1g - what should
1716 		 *	datagram error
1717 		 */
1718 		unix_state_unlock(other);
1719 		sock_put(other);
1720 
1721 		if (!sk_locked)
1722 			unix_state_lock(sk);
1723 
1724 		err = 0;
1725 		if (unix_peer(sk) == other) {
1726 			unix_peer(sk) = NULL;
1727 			unix_dgram_peer_wake_disconnect_wakeup(sk, other);
1728 
1729 			unix_state_unlock(sk);
1730 
1731 			unix_dgram_disconnected(sk, other);
1732 			sock_put(other);
1733 			err = -ECONNREFUSED;
1734 		} else {
1735 			unix_state_unlock(sk);
1736 		}
1737 
1738 		other = NULL;
1739 		if (err)
1740 			goto out_free;
1741 		goto restart;
1742 	}
1743 
1744 	err = -EPIPE;
1745 	if (other->sk_shutdown & RCV_SHUTDOWN)
1746 		goto out_unlock;
1747 
1748 	if (sk->sk_type != SOCK_SEQPACKET) {
1749 		err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1750 		if (err)
1751 			goto out_unlock;
1752 	}
1753 
1754 	if (unlikely(unix_peer(other) != sk && unix_recvq_full(other))) {
1755 		if (timeo) {
1756 			timeo = unix_wait_for_peer(other, timeo);
1757 
1758 			err = sock_intr_errno(timeo);
1759 			if (signal_pending(current))
1760 				goto out_free;
1761 
1762 			goto restart;
1763 		}
1764 
1765 		if (!sk_locked) {
1766 			unix_state_unlock(other);
1767 			unix_state_double_lock(sk, other);
1768 		}
1769 
1770 		if (unix_peer(sk) != other ||
1771 		    unix_dgram_peer_wake_me(sk, other)) {
1772 			err = -EAGAIN;
1773 			sk_locked = 1;
1774 			goto out_unlock;
1775 		}
1776 
1777 		if (!sk_locked) {
1778 			sk_locked = 1;
1779 			goto restart_locked;
1780 		}
1781 	}
1782 
1783 	if (unlikely(sk_locked))
1784 		unix_state_unlock(sk);
1785 
1786 	if (sock_flag(other, SOCK_RCVTSTAMP))
1787 		__net_timestamp(skb);
1788 	maybe_add_creds(skb, sock, other);
1789 	skb_queue_tail(&other->sk_receive_queue, skb);
1790 	if (max_level > unix_sk(other)->recursion_level)
1791 		unix_sk(other)->recursion_level = max_level;
1792 	unix_state_unlock(other);
1793 	other->sk_data_ready(other);
1794 	sock_put(other);
1795 	scm_destroy(&scm);
1796 	return len;
1797 
1798 out_unlock:
1799 	if (sk_locked)
1800 		unix_state_unlock(sk);
1801 	unix_state_unlock(other);
1802 out_free:
1803 	kfree_skb(skb);
1804 out:
1805 	if (other)
1806 		sock_put(other);
1807 	scm_destroy(&scm);
1808 	return err;
1809 }
1810 
1811 /* We use paged skbs for stream sockets, and limit occupancy to 32768
1812  * bytes, and a minimun of a full page.
1813  */
1814 #define UNIX_SKB_FRAGS_SZ (PAGE_SIZE << get_order(32768))
1815 
1816 static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
1817 			       size_t len)
1818 {
1819 	struct sock *sk = sock->sk;
1820 	struct sock *other = NULL;
1821 	int err, size;
1822 	struct sk_buff *skb;
1823 	int sent = 0;
1824 	struct scm_cookie scm;
1825 	bool fds_sent = false;
1826 	int max_level;
1827 	int data_len;
1828 
1829 	wait_for_unix_gc();
1830 	err = scm_send(sock, msg, &scm, false);
1831 	if (err < 0)
1832 		return err;
1833 
1834 	err = -EOPNOTSUPP;
1835 	if (msg->msg_flags&MSG_OOB)
1836 		goto out_err;
1837 
1838 	if (msg->msg_namelen) {
1839 		err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
1840 		goto out_err;
1841 	} else {
1842 		err = -ENOTCONN;
1843 		other = unix_peer(sk);
1844 		if (!other)
1845 			goto out_err;
1846 	}
1847 
1848 	if (sk->sk_shutdown & SEND_SHUTDOWN)
1849 		goto pipe_err;
1850 
1851 	while (sent < len) {
1852 		size = len - sent;
1853 
1854 		/* Keep two messages in the pipe so it schedules better */
1855 		size = min_t(int, size, (sk->sk_sndbuf >> 1) - 64);
1856 
1857 		/* allow fallback to order-0 allocations */
1858 		size = min_t(int, size, SKB_MAX_HEAD(0) + UNIX_SKB_FRAGS_SZ);
1859 
1860 		data_len = max_t(int, 0, size - SKB_MAX_HEAD(0));
1861 
1862 		data_len = min_t(size_t, size, PAGE_ALIGN(data_len));
1863 
1864 		skb = sock_alloc_send_pskb(sk, size - data_len, data_len,
1865 					   msg->msg_flags & MSG_DONTWAIT, &err,
1866 					   get_order(UNIX_SKB_FRAGS_SZ));
1867 		if (!skb)
1868 			goto out_err;
1869 
1870 		/* Only send the fds in the first buffer */
1871 		err = unix_scm_to_skb(&scm, skb, !fds_sent);
1872 		if (err < 0) {
1873 			kfree_skb(skb);
1874 			goto out_err;
1875 		}
1876 		max_level = err + 1;
1877 		fds_sent = true;
1878 
1879 		skb_put(skb, size - data_len);
1880 		skb->data_len = data_len;
1881 		skb->len = size;
1882 		err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size);
1883 		if (err) {
1884 			kfree_skb(skb);
1885 			goto out_err;
1886 		}
1887 
1888 		unix_state_lock(other);
1889 
1890 		if (sock_flag(other, SOCK_DEAD) ||
1891 		    (other->sk_shutdown & RCV_SHUTDOWN))
1892 			goto pipe_err_free;
1893 
1894 		maybe_add_creds(skb, sock, other);
1895 		skb_queue_tail(&other->sk_receive_queue, skb);
1896 		if (max_level > unix_sk(other)->recursion_level)
1897 			unix_sk(other)->recursion_level = max_level;
1898 		unix_state_unlock(other);
1899 		other->sk_data_ready(other);
1900 		sent += size;
1901 	}
1902 
1903 	scm_destroy(&scm);
1904 
1905 	return sent;
1906 
1907 pipe_err_free:
1908 	unix_state_unlock(other);
1909 	kfree_skb(skb);
1910 pipe_err:
1911 	if (sent == 0 && !(msg->msg_flags&MSG_NOSIGNAL))
1912 		send_sig(SIGPIPE, current, 0);
1913 	err = -EPIPE;
1914 out_err:
1915 	scm_destroy(&scm);
1916 	return sent ? : err;
1917 }
1918 
1919 static ssize_t unix_stream_sendpage(struct socket *socket, struct page *page,
1920 				    int offset, size_t size, int flags)
1921 {
1922 	int err;
1923 	bool send_sigpipe = false;
1924 	bool init_scm = true;
1925 	struct scm_cookie scm;
1926 	struct sock *other, *sk = socket->sk;
1927 	struct sk_buff *skb, *newskb = NULL, *tail = NULL;
1928 
1929 	if (flags & MSG_OOB)
1930 		return -EOPNOTSUPP;
1931 
1932 	other = unix_peer(sk);
1933 	if (!other || sk->sk_state != TCP_ESTABLISHED)
1934 		return -ENOTCONN;
1935 
1936 	if (false) {
1937 alloc_skb:
1938 		unix_state_unlock(other);
1939 		mutex_unlock(&unix_sk(other)->readlock);
1940 		newskb = sock_alloc_send_pskb(sk, 0, 0, flags & MSG_DONTWAIT,
1941 					      &err, 0);
1942 		if (!newskb)
1943 			goto err;
1944 	}
1945 
1946 	/* we must acquire readlock as we modify already present
1947 	 * skbs in the sk_receive_queue and mess with skb->len
1948 	 */
1949 	err = mutex_lock_interruptible(&unix_sk(other)->readlock);
1950 	if (err) {
1951 		err = flags & MSG_DONTWAIT ? -EAGAIN : -ERESTARTSYS;
1952 		goto err;
1953 	}
1954 
1955 	if (sk->sk_shutdown & SEND_SHUTDOWN) {
1956 		err = -EPIPE;
1957 		send_sigpipe = true;
1958 		goto err_unlock;
1959 	}
1960 
1961 	unix_state_lock(other);
1962 
1963 	if (sock_flag(other, SOCK_DEAD) ||
1964 	    other->sk_shutdown & RCV_SHUTDOWN) {
1965 		err = -EPIPE;
1966 		send_sigpipe = true;
1967 		goto err_state_unlock;
1968 	}
1969 
1970 	if (init_scm) {
1971 		err = maybe_init_creds(&scm, socket, other);
1972 		if (err)
1973 			goto err_state_unlock;
1974 		init_scm = false;
1975 	}
1976 
1977 	skb = skb_peek_tail(&other->sk_receive_queue);
1978 	if (tail && tail == skb) {
1979 		skb = newskb;
1980 	} else if (!skb || !unix_skb_scm_eq(skb, &scm)) {
1981 		if (newskb) {
1982 			skb = newskb;
1983 		} else {
1984 			tail = skb;
1985 			goto alloc_skb;
1986 		}
1987 	} else if (newskb) {
1988 		/* this is fast path, we don't necessarily need to
1989 		 * call to kfree_skb even though with newskb == NULL
1990 		 * this - does no harm
1991 		 */
1992 		consume_skb(newskb);
1993 		newskb = NULL;
1994 	}
1995 
1996 	if (skb_append_pagefrags(skb, page, offset, size)) {
1997 		tail = skb;
1998 		goto alloc_skb;
1999 	}
2000 
2001 	skb->len += size;
2002 	skb->data_len += size;
2003 	skb->truesize += size;
2004 	atomic_add(size, &sk->sk_wmem_alloc);
2005 
2006 	if (newskb) {
2007 		err = unix_scm_to_skb(&scm, skb, false);
2008 		if (err)
2009 			goto err_state_unlock;
2010 		spin_lock(&other->sk_receive_queue.lock);
2011 		__skb_queue_tail(&other->sk_receive_queue, newskb);
2012 		spin_unlock(&other->sk_receive_queue.lock);
2013 	}
2014 
2015 	unix_state_unlock(other);
2016 	mutex_unlock(&unix_sk(other)->readlock);
2017 
2018 	other->sk_data_ready(other);
2019 	scm_destroy(&scm);
2020 	return size;
2021 
2022 err_state_unlock:
2023 	unix_state_unlock(other);
2024 err_unlock:
2025 	mutex_unlock(&unix_sk(other)->readlock);
2026 err:
2027 	kfree_skb(newskb);
2028 	if (send_sigpipe && !(flags & MSG_NOSIGNAL))
2029 		send_sig(SIGPIPE, current, 0);
2030 	if (!init_scm)
2031 		scm_destroy(&scm);
2032 	return err;
2033 }
2034 
2035 static int unix_seqpacket_sendmsg(struct socket *sock, struct msghdr *msg,
2036 				  size_t len)
2037 {
2038 	int err;
2039 	struct sock *sk = sock->sk;
2040 
2041 	err = sock_error(sk);
2042 	if (err)
2043 		return err;
2044 
2045 	if (sk->sk_state != TCP_ESTABLISHED)
2046 		return -ENOTCONN;
2047 
2048 	if (msg->msg_namelen)
2049 		msg->msg_namelen = 0;
2050 
2051 	return unix_dgram_sendmsg(sock, msg, len);
2052 }
2053 
2054 static int unix_seqpacket_recvmsg(struct socket *sock, struct msghdr *msg,
2055 				  size_t size, int flags)
2056 {
2057 	struct sock *sk = sock->sk;
2058 
2059 	if (sk->sk_state != TCP_ESTABLISHED)
2060 		return -ENOTCONN;
2061 
2062 	return unix_dgram_recvmsg(sock, msg, size, flags);
2063 }
2064 
2065 static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
2066 {
2067 	struct unix_sock *u = unix_sk(sk);
2068 
2069 	if (u->addr) {
2070 		msg->msg_namelen = u->addr->len;
2071 		memcpy(msg->msg_name, u->addr->name, u->addr->len);
2072 	}
2073 }
2074 
2075 static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
2076 			      size_t size, int flags)
2077 {
2078 	struct scm_cookie scm;
2079 	struct sock *sk = sock->sk;
2080 	struct unix_sock *u = unix_sk(sk);
2081 	int noblock = flags & MSG_DONTWAIT;
2082 	struct sk_buff *skb;
2083 	int err;
2084 	int peeked, skip;
2085 
2086 	err = -EOPNOTSUPP;
2087 	if (flags&MSG_OOB)
2088 		goto out;
2089 
2090 	err = mutex_lock_interruptible(&u->readlock);
2091 	if (unlikely(err)) {
2092 		/* recvmsg() in non blocking mode is supposed to return -EAGAIN
2093 		 * sk_rcvtimeo is not honored by mutex_lock_interruptible()
2094 		 */
2095 		err = noblock ? -EAGAIN : -ERESTARTSYS;
2096 		goto out;
2097 	}
2098 
2099 	skip = sk_peek_offset(sk, flags);
2100 
2101 	skb = __skb_recv_datagram(sk, flags, &peeked, &skip, &err);
2102 	if (!skb) {
2103 		unix_state_lock(sk);
2104 		/* Signal EOF on disconnected non-blocking SEQPACKET socket. */
2105 		if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN &&
2106 		    (sk->sk_shutdown & RCV_SHUTDOWN))
2107 			err = 0;
2108 		unix_state_unlock(sk);
2109 		goto out_unlock;
2110 	}
2111 
2112 	wake_up_interruptible_sync_poll(&u->peer_wait,
2113 					POLLOUT | POLLWRNORM | POLLWRBAND);
2114 
2115 	if (msg->msg_name)
2116 		unix_copy_addr(msg, skb->sk);
2117 
2118 	if (size > skb->len - skip)
2119 		size = skb->len - skip;
2120 	else if (size < skb->len - skip)
2121 		msg->msg_flags |= MSG_TRUNC;
2122 
2123 	err = skb_copy_datagram_msg(skb, skip, msg, size);
2124 	if (err)
2125 		goto out_free;
2126 
2127 	if (sock_flag(sk, SOCK_RCVTSTAMP))
2128 		__sock_recv_timestamp(msg, sk, skb);
2129 
2130 	memset(&scm, 0, sizeof(scm));
2131 
2132 	scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2133 	unix_set_secdata(&scm, skb);
2134 
2135 	if (!(flags & MSG_PEEK)) {
2136 		if (UNIXCB(skb).fp)
2137 			unix_detach_fds(&scm, skb);
2138 
2139 		sk_peek_offset_bwd(sk, skb->len);
2140 	} else {
2141 		/* It is questionable: on PEEK we could:
2142 		   - do not return fds - good, but too simple 8)
2143 		   - return fds, and do not return them on read (old strategy,
2144 		     apparently wrong)
2145 		   - clone fds (I chose it for now, it is the most universal
2146 		     solution)
2147 
2148 		   POSIX 1003.1g does not actually define this clearly
2149 		   at all. POSIX 1003.1g doesn't define a lot of things
2150 		   clearly however!
2151 
2152 		*/
2153 
2154 		sk_peek_offset_fwd(sk, size);
2155 
2156 		if (UNIXCB(skb).fp)
2157 			scm.fp = scm_fp_dup(UNIXCB(skb).fp);
2158 	}
2159 	err = (flags & MSG_TRUNC) ? skb->len - skip : size;
2160 
2161 	scm_recv(sock, msg, &scm, flags);
2162 
2163 out_free:
2164 	skb_free_datagram(sk, skb);
2165 out_unlock:
2166 	mutex_unlock(&u->readlock);
2167 out:
2168 	return err;
2169 }
2170 
2171 /*
2172  *	Sleep until more data has arrived. But check for races..
2173  */
2174 static long unix_stream_data_wait(struct sock *sk, long timeo,
2175 				  struct sk_buff *last, unsigned int last_len)
2176 {
2177 	struct sk_buff *tail;
2178 	DEFINE_WAIT(wait);
2179 
2180 	unix_state_lock(sk);
2181 
2182 	for (;;) {
2183 		prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
2184 
2185 		tail = skb_peek_tail(&sk->sk_receive_queue);
2186 		if (tail != last ||
2187 		    (tail && tail->len != last_len) ||
2188 		    sk->sk_err ||
2189 		    (sk->sk_shutdown & RCV_SHUTDOWN) ||
2190 		    signal_pending(current) ||
2191 		    !timeo)
2192 			break;
2193 
2194 		sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2195 		unix_state_unlock(sk);
2196 		timeo = freezable_schedule_timeout(timeo);
2197 		unix_state_lock(sk);
2198 
2199 		if (sock_flag(sk, SOCK_DEAD))
2200 			break;
2201 
2202 		sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2203 	}
2204 
2205 	finish_wait(sk_sleep(sk), &wait);
2206 	unix_state_unlock(sk);
2207 	return timeo;
2208 }
2209 
2210 static unsigned int unix_skb_len(const struct sk_buff *skb)
2211 {
2212 	return skb->len - UNIXCB(skb).consumed;
2213 }
2214 
2215 struct unix_stream_read_state {
2216 	int (*recv_actor)(struct sk_buff *, int, int,
2217 			  struct unix_stream_read_state *);
2218 	struct socket *socket;
2219 	struct msghdr *msg;
2220 	struct pipe_inode_info *pipe;
2221 	size_t size;
2222 	int flags;
2223 	unsigned int splice_flags;
2224 };
2225 
2226 static int unix_stream_read_generic(struct unix_stream_read_state *state)
2227 {
2228 	struct scm_cookie scm;
2229 	struct socket *sock = state->socket;
2230 	struct sock *sk = sock->sk;
2231 	struct unix_sock *u = unix_sk(sk);
2232 	int copied = 0;
2233 	int flags = state->flags;
2234 	int noblock = flags & MSG_DONTWAIT;
2235 	bool check_creds = false;
2236 	int target;
2237 	int err = 0;
2238 	long timeo;
2239 	int skip;
2240 	size_t size = state->size;
2241 	unsigned int last_len;
2242 
2243 	err = -EINVAL;
2244 	if (sk->sk_state != TCP_ESTABLISHED)
2245 		goto out;
2246 
2247 	err = -EOPNOTSUPP;
2248 	if (flags & MSG_OOB)
2249 		goto out;
2250 
2251 	target = sock_rcvlowat(sk, flags & MSG_WAITALL, size);
2252 	timeo = sock_rcvtimeo(sk, noblock);
2253 
2254 	memset(&scm, 0, sizeof(scm));
2255 
2256 	/* Lock the socket to prevent queue disordering
2257 	 * while sleeps in memcpy_tomsg
2258 	 */
2259 	mutex_lock(&u->readlock);
2260 
2261 	if (flags & MSG_PEEK)
2262 		skip = sk_peek_offset(sk, flags);
2263 	else
2264 		skip = 0;
2265 
2266 	do {
2267 		int chunk;
2268 		bool drop_skb;
2269 		struct sk_buff *skb, *last;
2270 
2271 		unix_state_lock(sk);
2272 		if (sock_flag(sk, SOCK_DEAD)) {
2273 			err = -ECONNRESET;
2274 			goto unlock;
2275 		}
2276 		last = skb = skb_peek(&sk->sk_receive_queue);
2277 		last_len = last ? last->len : 0;
2278 again:
2279 		if (skb == NULL) {
2280 			unix_sk(sk)->recursion_level = 0;
2281 			if (copied >= target)
2282 				goto unlock;
2283 
2284 			/*
2285 			 *	POSIX 1003.1g mandates this order.
2286 			 */
2287 
2288 			err = sock_error(sk);
2289 			if (err)
2290 				goto unlock;
2291 			if (sk->sk_shutdown & RCV_SHUTDOWN)
2292 				goto unlock;
2293 
2294 			unix_state_unlock(sk);
2295 			err = -EAGAIN;
2296 			if (!timeo)
2297 				break;
2298 			mutex_unlock(&u->readlock);
2299 
2300 			timeo = unix_stream_data_wait(sk, timeo, last,
2301 						      last_len);
2302 
2303 			if (signal_pending(current)) {
2304 				err = sock_intr_errno(timeo);
2305 				goto out;
2306 			}
2307 
2308 			mutex_lock(&u->readlock);
2309 			continue;
2310 unlock:
2311 			unix_state_unlock(sk);
2312 			break;
2313 		}
2314 
2315 		while (skip >= unix_skb_len(skb)) {
2316 			skip -= unix_skb_len(skb);
2317 			last = skb;
2318 			last_len = skb->len;
2319 			skb = skb_peek_next(skb, &sk->sk_receive_queue);
2320 			if (!skb)
2321 				goto again;
2322 		}
2323 
2324 		unix_state_unlock(sk);
2325 
2326 		if (check_creds) {
2327 			/* Never glue messages from different writers */
2328 			if (!unix_skb_scm_eq(skb, &scm))
2329 				break;
2330 		} else if (test_bit(SOCK_PASSCRED, &sock->flags)) {
2331 			/* Copy credentials */
2332 			scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2333 			unix_set_secdata(&scm, skb);
2334 			check_creds = true;
2335 		}
2336 
2337 		/* Copy address just once */
2338 		if (state->msg && state->msg->msg_name) {
2339 			DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr,
2340 					 state->msg->msg_name);
2341 			unix_copy_addr(state->msg, skb->sk);
2342 			sunaddr = NULL;
2343 		}
2344 
2345 		chunk = min_t(unsigned int, unix_skb_len(skb) - skip, size);
2346 		skb_get(skb);
2347 		chunk = state->recv_actor(skb, skip, chunk, state);
2348 		drop_skb = !unix_skb_len(skb);
2349 		/* skb is only safe to use if !drop_skb */
2350 		consume_skb(skb);
2351 		if (chunk < 0) {
2352 			if (copied == 0)
2353 				copied = -EFAULT;
2354 			break;
2355 		}
2356 		copied += chunk;
2357 		size -= chunk;
2358 
2359 		if (drop_skb) {
2360 			/* the skb was touched by a concurrent reader;
2361 			 * we should not expect anything from this skb
2362 			 * anymore and assume it invalid - we can be
2363 			 * sure it was dropped from the socket queue
2364 			 *
2365 			 * let's report a short read
2366 			 */
2367 			err = 0;
2368 			break;
2369 		}
2370 
2371 		/* Mark read part of skb as used */
2372 		if (!(flags & MSG_PEEK)) {
2373 			UNIXCB(skb).consumed += chunk;
2374 
2375 			sk_peek_offset_bwd(sk, chunk);
2376 
2377 			if (UNIXCB(skb).fp)
2378 				unix_detach_fds(&scm, skb);
2379 
2380 			if (unix_skb_len(skb))
2381 				break;
2382 
2383 			skb_unlink(skb, &sk->sk_receive_queue);
2384 			consume_skb(skb);
2385 
2386 			if (scm.fp)
2387 				break;
2388 		} else {
2389 			/* It is questionable, see note in unix_dgram_recvmsg.
2390 			 */
2391 			if (UNIXCB(skb).fp)
2392 				scm.fp = scm_fp_dup(UNIXCB(skb).fp);
2393 
2394 			sk_peek_offset_fwd(sk, chunk);
2395 
2396 			if (UNIXCB(skb).fp)
2397 				break;
2398 
2399 			skip = 0;
2400 			last = skb;
2401 			last_len = skb->len;
2402 			unix_state_lock(sk);
2403 			skb = skb_peek_next(skb, &sk->sk_receive_queue);
2404 			if (skb)
2405 				goto again;
2406 			unix_state_unlock(sk);
2407 			break;
2408 		}
2409 	} while (size);
2410 
2411 	mutex_unlock(&u->readlock);
2412 	if (state->msg)
2413 		scm_recv(sock, state->msg, &scm, flags);
2414 	else
2415 		scm_destroy(&scm);
2416 out:
2417 	return copied ? : err;
2418 }
2419 
2420 static int unix_stream_read_actor(struct sk_buff *skb,
2421 				  int skip, int chunk,
2422 				  struct unix_stream_read_state *state)
2423 {
2424 	int ret;
2425 
2426 	ret = skb_copy_datagram_msg(skb, UNIXCB(skb).consumed + skip,
2427 				    state->msg, chunk);
2428 	return ret ?: chunk;
2429 }
2430 
2431 static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg,
2432 			       size_t size, int flags)
2433 {
2434 	struct unix_stream_read_state state = {
2435 		.recv_actor = unix_stream_read_actor,
2436 		.socket = sock,
2437 		.msg = msg,
2438 		.size = size,
2439 		.flags = flags
2440 	};
2441 
2442 	return unix_stream_read_generic(&state);
2443 }
2444 
2445 static ssize_t skb_unix_socket_splice(struct sock *sk,
2446 				      struct pipe_inode_info *pipe,
2447 				      struct splice_pipe_desc *spd)
2448 {
2449 	int ret;
2450 	struct unix_sock *u = unix_sk(sk);
2451 
2452 	mutex_unlock(&u->readlock);
2453 	ret = splice_to_pipe(pipe, spd);
2454 	mutex_lock(&u->readlock);
2455 
2456 	return ret;
2457 }
2458 
2459 static int unix_stream_splice_actor(struct sk_buff *skb,
2460 				    int skip, int chunk,
2461 				    struct unix_stream_read_state *state)
2462 {
2463 	return skb_splice_bits(skb, state->socket->sk,
2464 			       UNIXCB(skb).consumed + skip,
2465 			       state->pipe, chunk, state->splice_flags,
2466 			       skb_unix_socket_splice);
2467 }
2468 
2469 static ssize_t unix_stream_splice_read(struct socket *sock,  loff_t *ppos,
2470 				       struct pipe_inode_info *pipe,
2471 				       size_t size, unsigned int flags)
2472 {
2473 	struct unix_stream_read_state state = {
2474 		.recv_actor = unix_stream_splice_actor,
2475 		.socket = sock,
2476 		.pipe = pipe,
2477 		.size = size,
2478 		.splice_flags = flags,
2479 	};
2480 
2481 	if (unlikely(*ppos))
2482 		return -ESPIPE;
2483 
2484 	if (sock->file->f_flags & O_NONBLOCK ||
2485 	    flags & SPLICE_F_NONBLOCK)
2486 		state.flags = MSG_DONTWAIT;
2487 
2488 	return unix_stream_read_generic(&state);
2489 }
2490 
2491 static int unix_shutdown(struct socket *sock, int mode)
2492 {
2493 	struct sock *sk = sock->sk;
2494 	struct sock *other;
2495 
2496 	if (mode < SHUT_RD || mode > SHUT_RDWR)
2497 		return -EINVAL;
2498 	/* This maps:
2499 	 * SHUT_RD   (0) -> RCV_SHUTDOWN  (1)
2500 	 * SHUT_WR   (1) -> SEND_SHUTDOWN (2)
2501 	 * SHUT_RDWR (2) -> SHUTDOWN_MASK (3)
2502 	 */
2503 	++mode;
2504 
2505 	unix_state_lock(sk);
2506 	sk->sk_shutdown |= mode;
2507 	other = unix_peer(sk);
2508 	if (other)
2509 		sock_hold(other);
2510 	unix_state_unlock(sk);
2511 	sk->sk_state_change(sk);
2512 
2513 	if (other &&
2514 		(sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
2515 
2516 		int peer_mode = 0;
2517 
2518 		if (mode&RCV_SHUTDOWN)
2519 			peer_mode |= SEND_SHUTDOWN;
2520 		if (mode&SEND_SHUTDOWN)
2521 			peer_mode |= RCV_SHUTDOWN;
2522 		unix_state_lock(other);
2523 		other->sk_shutdown |= peer_mode;
2524 		unix_state_unlock(other);
2525 		other->sk_state_change(other);
2526 		if (peer_mode == SHUTDOWN_MASK)
2527 			sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
2528 		else if (peer_mode & RCV_SHUTDOWN)
2529 			sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
2530 	}
2531 	if (other)
2532 		sock_put(other);
2533 
2534 	return 0;
2535 }
2536 
2537 long unix_inq_len(struct sock *sk)
2538 {
2539 	struct sk_buff *skb;
2540 	long amount = 0;
2541 
2542 	if (sk->sk_state == TCP_LISTEN)
2543 		return -EINVAL;
2544 
2545 	spin_lock(&sk->sk_receive_queue.lock);
2546 	if (sk->sk_type == SOCK_STREAM ||
2547 	    sk->sk_type == SOCK_SEQPACKET) {
2548 		skb_queue_walk(&sk->sk_receive_queue, skb)
2549 			amount += unix_skb_len(skb);
2550 	} else {
2551 		skb = skb_peek(&sk->sk_receive_queue);
2552 		if (skb)
2553 			amount = skb->len;
2554 	}
2555 	spin_unlock(&sk->sk_receive_queue.lock);
2556 
2557 	return amount;
2558 }
2559 EXPORT_SYMBOL_GPL(unix_inq_len);
2560 
2561 long unix_outq_len(struct sock *sk)
2562 {
2563 	return sk_wmem_alloc_get(sk);
2564 }
2565 EXPORT_SYMBOL_GPL(unix_outq_len);
2566 
2567 static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
2568 {
2569 	struct sock *sk = sock->sk;
2570 	long amount = 0;
2571 	int err;
2572 
2573 	switch (cmd) {
2574 	case SIOCOUTQ:
2575 		amount = unix_outq_len(sk);
2576 		err = put_user(amount, (int __user *)arg);
2577 		break;
2578 	case SIOCINQ:
2579 		amount = unix_inq_len(sk);
2580 		if (amount < 0)
2581 			err = amount;
2582 		else
2583 			err = put_user(amount, (int __user *)arg);
2584 		break;
2585 	default:
2586 		err = -ENOIOCTLCMD;
2587 		break;
2588 	}
2589 	return err;
2590 }
2591 
2592 static unsigned int unix_poll(struct file *file, struct socket *sock, poll_table *wait)
2593 {
2594 	struct sock *sk = sock->sk;
2595 	unsigned int mask;
2596 
2597 	sock_poll_wait(file, sk_sleep(sk), wait);
2598 	mask = 0;
2599 
2600 	/* exceptional events? */
2601 	if (sk->sk_err)
2602 		mask |= POLLERR;
2603 	if (sk->sk_shutdown == SHUTDOWN_MASK)
2604 		mask |= POLLHUP;
2605 	if (sk->sk_shutdown & RCV_SHUTDOWN)
2606 		mask |= POLLRDHUP | POLLIN | POLLRDNORM;
2607 
2608 	/* readable? */
2609 	if (!skb_queue_empty(&sk->sk_receive_queue))
2610 		mask |= POLLIN | POLLRDNORM;
2611 
2612 	/* Connection-based need to check for termination and startup */
2613 	if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) &&
2614 	    sk->sk_state == TCP_CLOSE)
2615 		mask |= POLLHUP;
2616 
2617 	/*
2618 	 * we set writable also when the other side has shut down the
2619 	 * connection. This prevents stuck sockets.
2620 	 */
2621 	if (unix_writable(sk))
2622 		mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
2623 
2624 	return mask;
2625 }
2626 
2627 static unsigned int unix_dgram_poll(struct file *file, struct socket *sock,
2628 				    poll_table *wait)
2629 {
2630 	struct sock *sk = sock->sk, *other;
2631 	unsigned int mask, writable;
2632 
2633 	sock_poll_wait(file, sk_sleep(sk), wait);
2634 	mask = 0;
2635 
2636 	/* exceptional events? */
2637 	if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
2638 		mask |= POLLERR |
2639 			(sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0);
2640 
2641 	if (sk->sk_shutdown & RCV_SHUTDOWN)
2642 		mask |= POLLRDHUP | POLLIN | POLLRDNORM;
2643 	if (sk->sk_shutdown == SHUTDOWN_MASK)
2644 		mask |= POLLHUP;
2645 
2646 	/* readable? */
2647 	if (!skb_queue_empty(&sk->sk_receive_queue))
2648 		mask |= POLLIN | POLLRDNORM;
2649 
2650 	/* Connection-based need to check for termination and startup */
2651 	if (sk->sk_type == SOCK_SEQPACKET) {
2652 		if (sk->sk_state == TCP_CLOSE)
2653 			mask |= POLLHUP;
2654 		/* connection hasn't started yet? */
2655 		if (sk->sk_state == TCP_SYN_SENT)
2656 			return mask;
2657 	}
2658 
2659 	/* No write status requested, avoid expensive OUT tests. */
2660 	if (!(poll_requested_events(wait) & (POLLWRBAND|POLLWRNORM|POLLOUT)))
2661 		return mask;
2662 
2663 	writable = unix_writable(sk);
2664 	if (writable) {
2665 		unix_state_lock(sk);
2666 
2667 		other = unix_peer(sk);
2668 		if (other && unix_peer(other) != sk &&
2669 		    unix_recvq_full(other) &&
2670 		    unix_dgram_peer_wake_me(sk, other))
2671 			writable = 0;
2672 
2673 		unix_state_unlock(sk);
2674 	}
2675 
2676 	if (writable)
2677 		mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
2678 	else
2679 		sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
2680 
2681 	return mask;
2682 }
2683 
2684 #ifdef CONFIG_PROC_FS
2685 
2686 #define BUCKET_SPACE (BITS_PER_LONG - (UNIX_HASH_BITS + 1) - 1)
2687 
2688 #define get_bucket(x) ((x) >> BUCKET_SPACE)
2689 #define get_offset(x) ((x) & ((1L << BUCKET_SPACE) - 1))
2690 #define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o))
2691 
2692 static struct sock *unix_from_bucket(struct seq_file *seq, loff_t *pos)
2693 {
2694 	unsigned long offset = get_offset(*pos);
2695 	unsigned long bucket = get_bucket(*pos);
2696 	struct sock *sk;
2697 	unsigned long count = 0;
2698 
2699 	for (sk = sk_head(&unix_socket_table[bucket]); sk; sk = sk_next(sk)) {
2700 		if (sock_net(sk) != seq_file_net(seq))
2701 			continue;
2702 		if (++count == offset)
2703 			break;
2704 	}
2705 
2706 	return sk;
2707 }
2708 
2709 static struct sock *unix_next_socket(struct seq_file *seq,
2710 				     struct sock *sk,
2711 				     loff_t *pos)
2712 {
2713 	unsigned long bucket;
2714 
2715 	while (sk > (struct sock *)SEQ_START_TOKEN) {
2716 		sk = sk_next(sk);
2717 		if (!sk)
2718 			goto next_bucket;
2719 		if (sock_net(sk) == seq_file_net(seq))
2720 			return sk;
2721 	}
2722 
2723 	do {
2724 		sk = unix_from_bucket(seq, pos);
2725 		if (sk)
2726 			return sk;
2727 
2728 next_bucket:
2729 		bucket = get_bucket(*pos) + 1;
2730 		*pos = set_bucket_offset(bucket, 1);
2731 	} while (bucket < ARRAY_SIZE(unix_socket_table));
2732 
2733 	return NULL;
2734 }
2735 
2736 static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
2737 	__acquires(unix_table_lock)
2738 {
2739 	spin_lock(&unix_table_lock);
2740 
2741 	if (!*pos)
2742 		return SEQ_START_TOKEN;
2743 
2744 	if (get_bucket(*pos) >= ARRAY_SIZE(unix_socket_table))
2745 		return NULL;
2746 
2747 	return unix_next_socket(seq, NULL, pos);
2748 }
2749 
2750 static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2751 {
2752 	++*pos;
2753 	return unix_next_socket(seq, v, pos);
2754 }
2755 
2756 static void unix_seq_stop(struct seq_file *seq, void *v)
2757 	__releases(unix_table_lock)
2758 {
2759 	spin_unlock(&unix_table_lock);
2760 }
2761 
2762 static int unix_seq_show(struct seq_file *seq, void *v)
2763 {
2764 
2765 	if (v == SEQ_START_TOKEN)
2766 		seq_puts(seq, "Num       RefCount Protocol Flags    Type St "
2767 			 "Inode Path\n");
2768 	else {
2769 		struct sock *s = v;
2770 		struct unix_sock *u = unix_sk(s);
2771 		unix_state_lock(s);
2772 
2773 		seq_printf(seq, "%pK: %08X %08X %08X %04X %02X %5lu",
2774 			s,
2775 			atomic_read(&s->sk_refcnt),
2776 			0,
2777 			s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
2778 			s->sk_type,
2779 			s->sk_socket ?
2780 			(s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
2781 			(s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
2782 			sock_i_ino(s));
2783 
2784 		if (u->addr) {
2785 			int i, len;
2786 			seq_putc(seq, ' ');
2787 
2788 			i = 0;
2789 			len = u->addr->len - sizeof(short);
2790 			if (!UNIX_ABSTRACT(s))
2791 				len--;
2792 			else {
2793 				seq_putc(seq, '@');
2794 				i++;
2795 			}
2796 			for ( ; i < len; i++)
2797 				seq_putc(seq, u->addr->name->sun_path[i]);
2798 		}
2799 		unix_state_unlock(s);
2800 		seq_putc(seq, '\n');
2801 	}
2802 
2803 	return 0;
2804 }
2805 
2806 static const struct seq_operations unix_seq_ops = {
2807 	.start  = unix_seq_start,
2808 	.next   = unix_seq_next,
2809 	.stop   = unix_seq_stop,
2810 	.show   = unix_seq_show,
2811 };
2812 
2813 static int unix_seq_open(struct inode *inode, struct file *file)
2814 {
2815 	return seq_open_net(inode, file, &unix_seq_ops,
2816 			    sizeof(struct seq_net_private));
2817 }
2818 
2819 static const struct file_operations unix_seq_fops = {
2820 	.owner		= THIS_MODULE,
2821 	.open		= unix_seq_open,
2822 	.read		= seq_read,
2823 	.llseek		= seq_lseek,
2824 	.release	= seq_release_net,
2825 };
2826 
2827 #endif
2828 
2829 static const struct net_proto_family unix_family_ops = {
2830 	.family = PF_UNIX,
2831 	.create = unix_create,
2832 	.owner	= THIS_MODULE,
2833 };
2834 
2835 
2836 static int __net_init unix_net_init(struct net *net)
2837 {
2838 	int error = -ENOMEM;
2839 
2840 	net->unx.sysctl_max_dgram_qlen = 10;
2841 	if (unix_sysctl_register(net))
2842 		goto out;
2843 
2844 #ifdef CONFIG_PROC_FS
2845 	if (!proc_create("unix", 0, net->proc_net, &unix_seq_fops)) {
2846 		unix_sysctl_unregister(net);
2847 		goto out;
2848 	}
2849 #endif
2850 	error = 0;
2851 out:
2852 	return error;
2853 }
2854 
2855 static void __net_exit unix_net_exit(struct net *net)
2856 {
2857 	unix_sysctl_unregister(net);
2858 	remove_proc_entry("unix", net->proc_net);
2859 }
2860 
2861 static struct pernet_operations unix_net_ops = {
2862 	.init = unix_net_init,
2863 	.exit = unix_net_exit,
2864 };
2865 
2866 static int __init af_unix_init(void)
2867 {
2868 	int rc = -1;
2869 
2870 	BUILD_BUG_ON(sizeof(struct unix_skb_parms) > FIELD_SIZEOF(struct sk_buff, cb));
2871 
2872 	rc = proto_register(&unix_proto, 1);
2873 	if (rc != 0) {
2874 		pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__);
2875 		goto out;
2876 	}
2877 
2878 	sock_register(&unix_family_ops);
2879 	register_pernet_subsys(&unix_net_ops);
2880 out:
2881 	return rc;
2882 }
2883 
2884 static void __exit af_unix_exit(void)
2885 {
2886 	sock_unregister(PF_UNIX);
2887 	proto_unregister(&unix_proto);
2888 	unregister_pernet_subsys(&unix_net_ops);
2889 }
2890 
2891 /* Earlier than device_initcall() so that other drivers invoking
2892    request_module() don't end up in a loop when modprobe tries
2893    to use a UNIX socket. But later than subsys_initcall() because
2894    we depend on stuff initialised there */
2895 fs_initcall(af_unix_init);
2896 module_exit(af_unix_exit);
2897 
2898 MODULE_LICENSE("GPL");
2899 MODULE_ALIAS_NETPROTO(PF_UNIX);
2900