xref: /openbmc/linux/net/unix/af_unix.c (revision 301a1613)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * NET4:	Implementation of BSD Unix domain sockets.
4  *
5  * Authors:	Alan Cox, <alan@lxorguk.ukuu.org.uk>
6  *
7  * Fixes:
8  *		Linus Torvalds	:	Assorted bug cures.
9  *		Niibe Yutaka	:	async I/O support.
10  *		Carsten Paeth	:	PF_UNIX check, address fixes.
11  *		Alan Cox	:	Limit size of allocated blocks.
12  *		Alan Cox	:	Fixed the stupid socketpair bug.
13  *		Alan Cox	:	BSD compatibility fine tuning.
14  *		Alan Cox	:	Fixed a bug in connect when interrupted.
15  *		Alan Cox	:	Sorted out a proper draft version of
16  *					file descriptor passing hacked up from
17  *					Mike Shaver's work.
18  *		Marty Leisner	:	Fixes to fd passing
19  *		Nick Nevin	:	recvmsg bugfix.
20  *		Alan Cox	:	Started proper garbage collector
21  *		Heiko EiBfeldt	:	Missing verify_area check
22  *		Alan Cox	:	Started POSIXisms
23  *		Andreas Schwab	:	Replace inode by dentry for proper
24  *					reference counting
25  *		Kirk Petersen	:	Made this a module
26  *	    Christoph Rohland	:	Elegant non-blocking accept/connect algorithm.
27  *					Lots of bug fixes.
28  *	     Alexey Kuznetosv	:	Repaired (I hope) bugs introduces
29  *					by above two patches.
30  *	     Andrea Arcangeli	:	If possible we block in connect(2)
31  *					if the max backlog of the listen socket
32  *					is been reached. This won't break
33  *					old apps and it will avoid huge amount
34  *					of socks hashed (this for unix_gc()
35  *					performances reasons).
36  *					Security fix that limits the max
37  *					number of socks to 2*max_files and
38  *					the number of skb queueable in the
39  *					dgram receiver.
40  *		Artur Skawina   :	Hash function optimizations
41  *	     Alexey Kuznetsov   :	Full scale SMP. Lot of bugs are introduced 8)
42  *	      Malcolm Beattie   :	Set peercred for socketpair
43  *	     Michal Ostrowski   :       Module initialization cleanup.
44  *	     Arnaldo C. Melo	:	Remove MOD_{INC,DEC}_USE_COUNT,
45  *	     				the core infrastructure is doing that
46  *	     				for all net proto families now (2.5.69+)
47  *
48  * Known differences from reference BSD that was tested:
49  *
50  *	[TO FIX]
51  *	ECONNREFUSED is not returned from one end of a connected() socket to the
52  *		other the moment one end closes.
53  *	fstat() doesn't return st_dev=0, and give the blksize as high water mark
54  *		and a fake inode identifier (nor the BSD first socket fstat twice bug).
55  *	[NOT TO FIX]
56  *	accept() returns a path name even if the connecting socket has closed
57  *		in the meantime (BSD loses the path and gives up).
58  *	accept() returns 0 length path for an unbound connector. BSD returns 16
59  *		and a null first byte in the path (but not for gethost/peername - BSD bug ??)
60  *	socketpair(...SOCK_RAW..) doesn't panic the kernel.
61  *	BSD af_unix apparently has connect forgetting to block properly.
62  *		(need to check this with the POSIX spec in detail)
63  *
64  * Differences from 2.0.0-11-... (ANK)
65  *	Bug fixes and improvements.
66  *		- client shutdown killed server socket.
67  *		- removed all useless cli/sti pairs.
68  *
69  *	Semantic changes/extensions.
70  *		- generic control message passing.
71  *		- SCM_CREDENTIALS control message.
72  *		- "Abstract" (not FS based) socket bindings.
73  *		  Abstract names are sequences of bytes (not zero terminated)
74  *		  started by 0, so that this name space does not intersect
75  *		  with BSD names.
76  */
77 
78 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
79 
80 #include <linux/module.h>
81 #include <linux/kernel.h>
82 #include <linux/signal.h>
83 #include <linux/sched/signal.h>
84 #include <linux/errno.h>
85 #include <linux/string.h>
86 #include <linux/stat.h>
87 #include <linux/dcache.h>
88 #include <linux/namei.h>
89 #include <linux/socket.h>
90 #include <linux/un.h>
91 #include <linux/fcntl.h>
92 #include <linux/termios.h>
93 #include <linux/sockios.h>
94 #include <linux/net.h>
95 #include <linux/in.h>
96 #include <linux/fs.h>
97 #include <linux/slab.h>
98 #include <linux/uaccess.h>
99 #include <linux/skbuff.h>
100 #include <linux/netdevice.h>
101 #include <net/net_namespace.h>
102 #include <net/sock.h>
103 #include <net/tcp_states.h>
104 #include <net/af_unix.h>
105 #include <linux/proc_fs.h>
106 #include <linux/seq_file.h>
107 #include <net/scm.h>
108 #include <linux/init.h>
109 #include <linux/poll.h>
110 #include <linux/rtnetlink.h>
111 #include <linux/mount.h>
112 #include <net/checksum.h>
113 #include <linux/security.h>
114 #include <linux/freezer.h>
115 #include <linux/file.h>
116 
117 #include "scm.h"
118 
119 struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE];
120 EXPORT_SYMBOL_GPL(unix_socket_table);
121 DEFINE_SPINLOCK(unix_table_lock);
122 EXPORT_SYMBOL_GPL(unix_table_lock);
123 static atomic_long_t unix_nr_socks;
124 
125 
126 static struct hlist_head *unix_sockets_unbound(void *addr)
127 {
128 	unsigned long hash = (unsigned long)addr;
129 
130 	hash ^= hash >> 16;
131 	hash ^= hash >> 8;
132 	hash %= UNIX_HASH_SIZE;
133 	return &unix_socket_table[UNIX_HASH_SIZE + hash];
134 }
135 
136 #define UNIX_ABSTRACT(sk)	(unix_sk(sk)->addr->hash < UNIX_HASH_SIZE)
137 
138 #ifdef CONFIG_SECURITY_NETWORK
139 static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
140 {
141 	UNIXCB(skb).secid = scm->secid;
142 }
143 
144 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
145 {
146 	scm->secid = UNIXCB(skb).secid;
147 }
148 
149 static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
150 {
151 	return (scm->secid == UNIXCB(skb).secid);
152 }
153 #else
154 static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
155 { }
156 
157 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
158 { }
159 
160 static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
161 {
162 	return true;
163 }
164 #endif /* CONFIG_SECURITY_NETWORK */
165 
166 /*
167  *  SMP locking strategy:
168  *    hash table is protected with spinlock unix_table_lock
169  *    each socket state is protected by separate spin lock.
170  */
171 
172 static inline unsigned int unix_hash_fold(__wsum n)
173 {
174 	unsigned int hash = (__force unsigned int)csum_fold(n);
175 
176 	hash ^= hash>>8;
177 	return hash&(UNIX_HASH_SIZE-1);
178 }
179 
180 #define unix_peer(sk) (unix_sk(sk)->peer)
181 
182 static inline int unix_our_peer(struct sock *sk, struct sock *osk)
183 {
184 	return unix_peer(osk) == sk;
185 }
186 
187 static inline int unix_may_send(struct sock *sk, struct sock *osk)
188 {
189 	return unix_peer(osk) == NULL || unix_our_peer(sk, osk);
190 }
191 
192 static inline int unix_recvq_full(const struct sock *sk)
193 {
194 	return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog;
195 }
196 
197 static inline int unix_recvq_full_lockless(const struct sock *sk)
198 {
199 	return skb_queue_len_lockless(&sk->sk_receive_queue) >
200 		READ_ONCE(sk->sk_max_ack_backlog);
201 }
202 
203 struct sock *unix_peer_get(struct sock *s)
204 {
205 	struct sock *peer;
206 
207 	unix_state_lock(s);
208 	peer = unix_peer(s);
209 	if (peer)
210 		sock_hold(peer);
211 	unix_state_unlock(s);
212 	return peer;
213 }
214 EXPORT_SYMBOL_GPL(unix_peer_get);
215 
216 static inline void unix_release_addr(struct unix_address *addr)
217 {
218 	if (refcount_dec_and_test(&addr->refcnt))
219 		kfree(addr);
220 }
221 
222 /*
223  *	Check unix socket name:
224  *		- should be not zero length.
225  *	        - if started by not zero, should be NULL terminated (FS object)
226  *		- if started by zero, it is abstract name.
227  */
228 
229 static int unix_mkname(struct sockaddr_un *sunaddr, int len, unsigned int *hashp)
230 {
231 	*hashp = 0;
232 
233 	if (len <= sizeof(short) || len > sizeof(*sunaddr))
234 		return -EINVAL;
235 	if (!sunaddr || sunaddr->sun_family != AF_UNIX)
236 		return -EINVAL;
237 	if (sunaddr->sun_path[0]) {
238 		/*
239 		 * This may look like an off by one error but it is a bit more
240 		 * subtle. 108 is the longest valid AF_UNIX path for a binding.
241 		 * sun_path[108] doesn't as such exist.  However in kernel space
242 		 * we are guaranteed that it is a valid memory location in our
243 		 * kernel address buffer.
244 		 */
245 		((char *)sunaddr)[len] = 0;
246 		len = strlen(sunaddr->sun_path)+1+sizeof(short);
247 		return len;
248 	}
249 
250 	*hashp = unix_hash_fold(csum_partial(sunaddr, len, 0));
251 	return len;
252 }
253 
254 static void __unix_remove_socket(struct sock *sk)
255 {
256 	sk_del_node_init(sk);
257 }
258 
259 static void __unix_insert_socket(struct hlist_head *list, struct sock *sk)
260 {
261 	WARN_ON(!sk_unhashed(sk));
262 	sk_add_node(sk, list);
263 }
264 
265 static inline void unix_remove_socket(struct sock *sk)
266 {
267 	spin_lock(&unix_table_lock);
268 	__unix_remove_socket(sk);
269 	spin_unlock(&unix_table_lock);
270 }
271 
272 static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk)
273 {
274 	spin_lock(&unix_table_lock);
275 	__unix_insert_socket(list, sk);
276 	spin_unlock(&unix_table_lock);
277 }
278 
279 static struct sock *__unix_find_socket_byname(struct net *net,
280 					      struct sockaddr_un *sunname,
281 					      int len, int type, unsigned int hash)
282 {
283 	struct sock *s;
284 
285 	sk_for_each(s, &unix_socket_table[hash ^ type]) {
286 		struct unix_sock *u = unix_sk(s);
287 
288 		if (!net_eq(sock_net(s), net))
289 			continue;
290 
291 		if (u->addr->len == len &&
292 		    !memcmp(u->addr->name, sunname, len))
293 			return s;
294 	}
295 	return NULL;
296 }
297 
298 static inline struct sock *unix_find_socket_byname(struct net *net,
299 						   struct sockaddr_un *sunname,
300 						   int len, int type,
301 						   unsigned int hash)
302 {
303 	struct sock *s;
304 
305 	spin_lock(&unix_table_lock);
306 	s = __unix_find_socket_byname(net, sunname, len, type, hash);
307 	if (s)
308 		sock_hold(s);
309 	spin_unlock(&unix_table_lock);
310 	return s;
311 }
312 
313 static struct sock *unix_find_socket_byinode(struct inode *i)
314 {
315 	struct sock *s;
316 
317 	spin_lock(&unix_table_lock);
318 	sk_for_each(s,
319 		    &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
320 		struct dentry *dentry = unix_sk(s)->path.dentry;
321 
322 		if (dentry && d_backing_inode(dentry) == i) {
323 			sock_hold(s);
324 			goto found;
325 		}
326 	}
327 	s = NULL;
328 found:
329 	spin_unlock(&unix_table_lock);
330 	return s;
331 }
332 
333 /* Support code for asymmetrically connected dgram sockets
334  *
335  * If a datagram socket is connected to a socket not itself connected
336  * to the first socket (eg, /dev/log), clients may only enqueue more
337  * messages if the present receive queue of the server socket is not
338  * "too large". This means there's a second writeability condition
339  * poll and sendmsg need to test. The dgram recv code will do a wake
340  * up on the peer_wait wait queue of a socket upon reception of a
341  * datagram which needs to be propagated to sleeping would-be writers
342  * since these might not have sent anything so far. This can't be
343  * accomplished via poll_wait because the lifetime of the server
344  * socket might be less than that of its clients if these break their
345  * association with it or if the server socket is closed while clients
346  * are still connected to it and there's no way to inform "a polling
347  * implementation" that it should let go of a certain wait queue
348  *
349  * In order to propagate a wake up, a wait_queue_entry_t of the client
350  * socket is enqueued on the peer_wait queue of the server socket
351  * whose wake function does a wake_up on the ordinary client socket
352  * wait queue. This connection is established whenever a write (or
353  * poll for write) hit the flow control condition and broken when the
354  * association to the server socket is dissolved or after a wake up
355  * was relayed.
356  */
357 
358 static int unix_dgram_peer_wake_relay(wait_queue_entry_t *q, unsigned mode, int flags,
359 				      void *key)
360 {
361 	struct unix_sock *u;
362 	wait_queue_head_t *u_sleep;
363 
364 	u = container_of(q, struct unix_sock, peer_wake);
365 
366 	__remove_wait_queue(&unix_sk(u->peer_wake.private)->peer_wait,
367 			    q);
368 	u->peer_wake.private = NULL;
369 
370 	/* relaying can only happen while the wq still exists */
371 	u_sleep = sk_sleep(&u->sk);
372 	if (u_sleep)
373 		wake_up_interruptible_poll(u_sleep, key_to_poll(key));
374 
375 	return 0;
376 }
377 
378 static int unix_dgram_peer_wake_connect(struct sock *sk, struct sock *other)
379 {
380 	struct unix_sock *u, *u_other;
381 	int rc;
382 
383 	u = unix_sk(sk);
384 	u_other = unix_sk(other);
385 	rc = 0;
386 	spin_lock(&u_other->peer_wait.lock);
387 
388 	if (!u->peer_wake.private) {
389 		u->peer_wake.private = other;
390 		__add_wait_queue(&u_other->peer_wait, &u->peer_wake);
391 
392 		rc = 1;
393 	}
394 
395 	spin_unlock(&u_other->peer_wait.lock);
396 	return rc;
397 }
398 
399 static void unix_dgram_peer_wake_disconnect(struct sock *sk,
400 					    struct sock *other)
401 {
402 	struct unix_sock *u, *u_other;
403 
404 	u = unix_sk(sk);
405 	u_other = unix_sk(other);
406 	spin_lock(&u_other->peer_wait.lock);
407 
408 	if (u->peer_wake.private == other) {
409 		__remove_wait_queue(&u_other->peer_wait, &u->peer_wake);
410 		u->peer_wake.private = NULL;
411 	}
412 
413 	spin_unlock(&u_other->peer_wait.lock);
414 }
415 
416 static void unix_dgram_peer_wake_disconnect_wakeup(struct sock *sk,
417 						   struct sock *other)
418 {
419 	unix_dgram_peer_wake_disconnect(sk, other);
420 	wake_up_interruptible_poll(sk_sleep(sk),
421 				   EPOLLOUT |
422 				   EPOLLWRNORM |
423 				   EPOLLWRBAND);
424 }
425 
426 /* preconditions:
427  *	- unix_peer(sk) == other
428  *	- association is stable
429  */
430 static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other)
431 {
432 	int connected;
433 
434 	connected = unix_dgram_peer_wake_connect(sk, other);
435 
436 	/* If other is SOCK_DEAD, we want to make sure we signal
437 	 * POLLOUT, such that a subsequent write() can get a
438 	 * -ECONNREFUSED. Otherwise, if we haven't queued any skbs
439 	 * to other and its full, we will hang waiting for POLLOUT.
440 	 */
441 	if (unix_recvq_full(other) && !sock_flag(other, SOCK_DEAD))
442 		return 1;
443 
444 	if (connected)
445 		unix_dgram_peer_wake_disconnect(sk, other);
446 
447 	return 0;
448 }
449 
450 static int unix_writable(const struct sock *sk)
451 {
452 	return sk->sk_state != TCP_LISTEN &&
453 	       (refcount_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
454 }
455 
456 static void unix_write_space(struct sock *sk)
457 {
458 	struct socket_wq *wq;
459 
460 	rcu_read_lock();
461 	if (unix_writable(sk)) {
462 		wq = rcu_dereference(sk->sk_wq);
463 		if (skwq_has_sleeper(wq))
464 			wake_up_interruptible_sync_poll(&wq->wait,
465 				EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND);
466 		sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
467 	}
468 	rcu_read_unlock();
469 }
470 
471 /* When dgram socket disconnects (or changes its peer), we clear its receive
472  * queue of packets arrived from previous peer. First, it allows to do
473  * flow control based only on wmem_alloc; second, sk connected to peer
474  * may receive messages only from that peer. */
475 static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
476 {
477 	if (!skb_queue_empty(&sk->sk_receive_queue)) {
478 		skb_queue_purge(&sk->sk_receive_queue);
479 		wake_up_interruptible_all(&unix_sk(sk)->peer_wait);
480 
481 		/* If one link of bidirectional dgram pipe is disconnected,
482 		 * we signal error. Messages are lost. Do not make this,
483 		 * when peer was not connected to us.
484 		 */
485 		if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
486 			other->sk_err = ECONNRESET;
487 			other->sk_error_report(other);
488 		}
489 	}
490 }
491 
492 static void unix_sock_destructor(struct sock *sk)
493 {
494 	struct unix_sock *u = unix_sk(sk);
495 
496 	skb_queue_purge(&sk->sk_receive_queue);
497 
498 	WARN_ON(refcount_read(&sk->sk_wmem_alloc));
499 	WARN_ON(!sk_unhashed(sk));
500 	WARN_ON(sk->sk_socket);
501 	if (!sock_flag(sk, SOCK_DEAD)) {
502 		pr_info("Attempt to release alive unix socket: %p\n", sk);
503 		return;
504 	}
505 
506 	if (u->addr)
507 		unix_release_addr(u->addr);
508 
509 	atomic_long_dec(&unix_nr_socks);
510 	local_bh_disable();
511 	sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
512 	local_bh_enable();
513 #ifdef UNIX_REFCNT_DEBUG
514 	pr_debug("UNIX %p is destroyed, %ld are still alive.\n", sk,
515 		atomic_long_read(&unix_nr_socks));
516 #endif
517 }
518 
519 static void unix_release_sock(struct sock *sk, int embrion)
520 {
521 	struct unix_sock *u = unix_sk(sk);
522 	struct path path;
523 	struct sock *skpair;
524 	struct sk_buff *skb;
525 	int state;
526 
527 	unix_remove_socket(sk);
528 
529 	/* Clear state */
530 	unix_state_lock(sk);
531 	sock_orphan(sk);
532 	sk->sk_shutdown = SHUTDOWN_MASK;
533 	path	     = u->path;
534 	u->path.dentry = NULL;
535 	u->path.mnt = NULL;
536 	state = sk->sk_state;
537 	sk->sk_state = TCP_CLOSE;
538 	unix_state_unlock(sk);
539 
540 	wake_up_interruptible_all(&u->peer_wait);
541 
542 	skpair = unix_peer(sk);
543 
544 	if (skpair != NULL) {
545 		if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
546 			unix_state_lock(skpair);
547 			/* No more writes */
548 			skpair->sk_shutdown = SHUTDOWN_MASK;
549 			if (!skb_queue_empty(&sk->sk_receive_queue) || embrion)
550 				skpair->sk_err = ECONNRESET;
551 			unix_state_unlock(skpair);
552 			skpair->sk_state_change(skpair);
553 			sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
554 		}
555 
556 		unix_dgram_peer_wake_disconnect(sk, skpair);
557 		sock_put(skpair); /* It may now die */
558 		unix_peer(sk) = NULL;
559 	}
560 
561 	/* Try to flush out this socket. Throw out buffers at least */
562 
563 	while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
564 		if (state == TCP_LISTEN)
565 			unix_release_sock(skb->sk, 1);
566 		/* passed fds are erased in the kfree_skb hook	      */
567 		UNIXCB(skb).consumed = skb->len;
568 		kfree_skb(skb);
569 	}
570 
571 	if (path.dentry)
572 		path_put(&path);
573 
574 	sock_put(sk);
575 
576 	/* ---- Socket is dead now and most probably destroyed ---- */
577 
578 	/*
579 	 * Fixme: BSD difference: In BSD all sockets connected to us get
580 	 *	  ECONNRESET and we die on the spot. In Linux we behave
581 	 *	  like files and pipes do and wait for the last
582 	 *	  dereference.
583 	 *
584 	 * Can't we simply set sock->err?
585 	 *
586 	 *	  What the above comment does talk about? --ANK(980817)
587 	 */
588 
589 	if (unix_tot_inflight)
590 		unix_gc();		/* Garbage collect fds */
591 }
592 
593 static void init_peercred(struct sock *sk)
594 {
595 	put_pid(sk->sk_peer_pid);
596 	if (sk->sk_peer_cred)
597 		put_cred(sk->sk_peer_cred);
598 	sk->sk_peer_pid  = get_pid(task_tgid(current));
599 	sk->sk_peer_cred = get_current_cred();
600 }
601 
602 static void copy_peercred(struct sock *sk, struct sock *peersk)
603 {
604 	put_pid(sk->sk_peer_pid);
605 	if (sk->sk_peer_cred)
606 		put_cred(sk->sk_peer_cred);
607 	sk->sk_peer_pid  = get_pid(peersk->sk_peer_pid);
608 	sk->sk_peer_cred = get_cred(peersk->sk_peer_cred);
609 }
610 
611 static int unix_listen(struct socket *sock, int backlog)
612 {
613 	int err;
614 	struct sock *sk = sock->sk;
615 	struct unix_sock *u = unix_sk(sk);
616 
617 	err = -EOPNOTSUPP;
618 	if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
619 		goto out;	/* Only stream/seqpacket sockets accept */
620 	err = -EINVAL;
621 	if (!u->addr)
622 		goto out;	/* No listens on an unbound socket */
623 	unix_state_lock(sk);
624 	if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
625 		goto out_unlock;
626 	if (backlog > sk->sk_max_ack_backlog)
627 		wake_up_interruptible_all(&u->peer_wait);
628 	sk->sk_max_ack_backlog	= backlog;
629 	sk->sk_state		= TCP_LISTEN;
630 	/* set credentials so connect can copy them */
631 	init_peercred(sk);
632 	err = 0;
633 
634 out_unlock:
635 	unix_state_unlock(sk);
636 out:
637 	return err;
638 }
639 
640 static int unix_release(struct socket *);
641 static int unix_bind(struct socket *, struct sockaddr *, int);
642 static int unix_stream_connect(struct socket *, struct sockaddr *,
643 			       int addr_len, int flags);
644 static int unix_socketpair(struct socket *, struct socket *);
645 static int unix_accept(struct socket *, struct socket *, int, bool);
646 static int unix_getname(struct socket *, struct sockaddr *, int);
647 static __poll_t unix_poll(struct file *, struct socket *, poll_table *);
648 static __poll_t unix_dgram_poll(struct file *, struct socket *,
649 				    poll_table *);
650 static int unix_ioctl(struct socket *, unsigned int, unsigned long);
651 #ifdef CONFIG_COMPAT
652 static int unix_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg);
653 #endif
654 static int unix_shutdown(struct socket *, int);
655 static int unix_stream_sendmsg(struct socket *, struct msghdr *, size_t);
656 static int unix_stream_recvmsg(struct socket *, struct msghdr *, size_t, int);
657 static ssize_t unix_stream_sendpage(struct socket *, struct page *, int offset,
658 				    size_t size, int flags);
659 static ssize_t unix_stream_splice_read(struct socket *,  loff_t *ppos,
660 				       struct pipe_inode_info *, size_t size,
661 				       unsigned int flags);
662 static int unix_dgram_sendmsg(struct socket *, struct msghdr *, size_t);
663 static int unix_dgram_recvmsg(struct socket *, struct msghdr *, size_t, int);
664 static int unix_dgram_connect(struct socket *, struct sockaddr *,
665 			      int, int);
666 static int unix_seqpacket_sendmsg(struct socket *, struct msghdr *, size_t);
667 static int unix_seqpacket_recvmsg(struct socket *, struct msghdr *, size_t,
668 				  int);
669 
670 static int unix_set_peek_off(struct sock *sk, int val)
671 {
672 	struct unix_sock *u = unix_sk(sk);
673 
674 	if (mutex_lock_interruptible(&u->iolock))
675 		return -EINTR;
676 
677 	sk->sk_peek_off = val;
678 	mutex_unlock(&u->iolock);
679 
680 	return 0;
681 }
682 
683 #ifdef CONFIG_PROC_FS
684 static void unix_show_fdinfo(struct seq_file *m, struct socket *sock)
685 {
686 	struct sock *sk = sock->sk;
687 	struct unix_sock *u;
688 
689 	if (sk) {
690 		u = unix_sk(sock->sk);
691 		seq_printf(m, "scm_fds: %u\n",
692 			   atomic_read(&u->scm_stat.nr_fds));
693 	}
694 }
695 #else
696 #define unix_show_fdinfo NULL
697 #endif
698 
699 static const struct proto_ops unix_stream_ops = {
700 	.family =	PF_UNIX,
701 	.owner =	THIS_MODULE,
702 	.release =	unix_release,
703 	.bind =		unix_bind,
704 	.connect =	unix_stream_connect,
705 	.socketpair =	unix_socketpair,
706 	.accept =	unix_accept,
707 	.getname =	unix_getname,
708 	.poll =		unix_poll,
709 	.ioctl =	unix_ioctl,
710 #ifdef CONFIG_COMPAT
711 	.compat_ioctl =	unix_compat_ioctl,
712 #endif
713 	.listen =	unix_listen,
714 	.shutdown =	unix_shutdown,
715 	.sendmsg =	unix_stream_sendmsg,
716 	.recvmsg =	unix_stream_recvmsg,
717 	.mmap =		sock_no_mmap,
718 	.sendpage =	unix_stream_sendpage,
719 	.splice_read =	unix_stream_splice_read,
720 	.set_peek_off =	unix_set_peek_off,
721 	.show_fdinfo =	unix_show_fdinfo,
722 };
723 
724 static const struct proto_ops unix_dgram_ops = {
725 	.family =	PF_UNIX,
726 	.owner =	THIS_MODULE,
727 	.release =	unix_release,
728 	.bind =		unix_bind,
729 	.connect =	unix_dgram_connect,
730 	.socketpair =	unix_socketpair,
731 	.accept =	sock_no_accept,
732 	.getname =	unix_getname,
733 	.poll =		unix_dgram_poll,
734 	.ioctl =	unix_ioctl,
735 #ifdef CONFIG_COMPAT
736 	.compat_ioctl =	unix_compat_ioctl,
737 #endif
738 	.listen =	sock_no_listen,
739 	.shutdown =	unix_shutdown,
740 	.sendmsg =	unix_dgram_sendmsg,
741 	.recvmsg =	unix_dgram_recvmsg,
742 	.mmap =		sock_no_mmap,
743 	.sendpage =	sock_no_sendpage,
744 	.set_peek_off =	unix_set_peek_off,
745 	.show_fdinfo =	unix_show_fdinfo,
746 };
747 
748 static const struct proto_ops unix_seqpacket_ops = {
749 	.family =	PF_UNIX,
750 	.owner =	THIS_MODULE,
751 	.release =	unix_release,
752 	.bind =		unix_bind,
753 	.connect =	unix_stream_connect,
754 	.socketpair =	unix_socketpair,
755 	.accept =	unix_accept,
756 	.getname =	unix_getname,
757 	.poll =		unix_dgram_poll,
758 	.ioctl =	unix_ioctl,
759 #ifdef CONFIG_COMPAT
760 	.compat_ioctl =	unix_compat_ioctl,
761 #endif
762 	.listen =	unix_listen,
763 	.shutdown =	unix_shutdown,
764 	.sendmsg =	unix_seqpacket_sendmsg,
765 	.recvmsg =	unix_seqpacket_recvmsg,
766 	.mmap =		sock_no_mmap,
767 	.sendpage =	sock_no_sendpage,
768 	.set_peek_off =	unix_set_peek_off,
769 	.show_fdinfo =	unix_show_fdinfo,
770 };
771 
772 static struct proto unix_proto = {
773 	.name			= "UNIX",
774 	.owner			= THIS_MODULE,
775 	.obj_size		= sizeof(struct unix_sock),
776 };
777 
778 static struct sock *unix_create1(struct net *net, struct socket *sock, int kern)
779 {
780 	struct sock *sk = NULL;
781 	struct unix_sock *u;
782 
783 	atomic_long_inc(&unix_nr_socks);
784 	if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files())
785 		goto out;
786 
787 	sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto, kern);
788 	if (!sk)
789 		goto out;
790 
791 	sock_init_data(sock, sk);
792 
793 	sk->sk_allocation	= GFP_KERNEL_ACCOUNT;
794 	sk->sk_write_space	= unix_write_space;
795 	sk->sk_max_ack_backlog	= net->unx.sysctl_max_dgram_qlen;
796 	sk->sk_destruct		= unix_sock_destructor;
797 	u	  = unix_sk(sk);
798 	u->path.dentry = NULL;
799 	u->path.mnt = NULL;
800 	spin_lock_init(&u->lock);
801 	atomic_long_set(&u->inflight, 0);
802 	INIT_LIST_HEAD(&u->link);
803 	mutex_init(&u->iolock); /* single task reading lock */
804 	mutex_init(&u->bindlock); /* single task binding lock */
805 	init_waitqueue_head(&u->peer_wait);
806 	init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay);
807 	memset(&u->scm_stat, 0, sizeof(struct scm_stat));
808 	unix_insert_socket(unix_sockets_unbound(sk), sk);
809 out:
810 	if (sk == NULL)
811 		atomic_long_dec(&unix_nr_socks);
812 	else {
813 		local_bh_disable();
814 		sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
815 		local_bh_enable();
816 	}
817 	return sk;
818 }
819 
820 static int unix_create(struct net *net, struct socket *sock, int protocol,
821 		       int kern)
822 {
823 	if (protocol && protocol != PF_UNIX)
824 		return -EPROTONOSUPPORT;
825 
826 	sock->state = SS_UNCONNECTED;
827 
828 	switch (sock->type) {
829 	case SOCK_STREAM:
830 		sock->ops = &unix_stream_ops;
831 		break;
832 		/*
833 		 *	Believe it or not BSD has AF_UNIX, SOCK_RAW though
834 		 *	nothing uses it.
835 		 */
836 	case SOCK_RAW:
837 		sock->type = SOCK_DGRAM;
838 		fallthrough;
839 	case SOCK_DGRAM:
840 		sock->ops = &unix_dgram_ops;
841 		break;
842 	case SOCK_SEQPACKET:
843 		sock->ops = &unix_seqpacket_ops;
844 		break;
845 	default:
846 		return -ESOCKTNOSUPPORT;
847 	}
848 
849 	return unix_create1(net, sock, kern) ? 0 : -ENOMEM;
850 }
851 
852 static int unix_release(struct socket *sock)
853 {
854 	struct sock *sk = sock->sk;
855 
856 	if (!sk)
857 		return 0;
858 
859 	unix_release_sock(sk, 0);
860 	sock->sk = NULL;
861 
862 	return 0;
863 }
864 
865 static int unix_autobind(struct socket *sock)
866 {
867 	struct sock *sk = sock->sk;
868 	struct net *net = sock_net(sk);
869 	struct unix_sock *u = unix_sk(sk);
870 	static u32 ordernum = 1;
871 	struct unix_address *addr;
872 	int err;
873 	unsigned int retries = 0;
874 
875 	err = mutex_lock_interruptible(&u->bindlock);
876 	if (err)
877 		return err;
878 
879 	if (u->addr)
880 		goto out;
881 
882 	err = -ENOMEM;
883 	addr = kzalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL);
884 	if (!addr)
885 		goto out;
886 
887 	addr->name->sun_family = AF_UNIX;
888 	refcount_set(&addr->refcnt, 1);
889 
890 retry:
891 	addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short);
892 	addr->hash = unix_hash_fold(csum_partial(addr->name, addr->len, 0));
893 
894 	spin_lock(&unix_table_lock);
895 	ordernum = (ordernum+1)&0xFFFFF;
896 
897 	if (__unix_find_socket_byname(net, addr->name, addr->len, sock->type,
898 				      addr->hash)) {
899 		spin_unlock(&unix_table_lock);
900 		/*
901 		 * __unix_find_socket_byname() may take long time if many names
902 		 * are already in use.
903 		 */
904 		cond_resched();
905 		/* Give up if all names seems to be in use. */
906 		if (retries++ == 0xFFFFF) {
907 			err = -ENOSPC;
908 			kfree(addr);
909 			goto out;
910 		}
911 		goto retry;
912 	}
913 	addr->hash ^= sk->sk_type;
914 
915 	__unix_remove_socket(sk);
916 	smp_store_release(&u->addr, addr);
917 	__unix_insert_socket(&unix_socket_table[addr->hash], sk);
918 	spin_unlock(&unix_table_lock);
919 	err = 0;
920 
921 out:	mutex_unlock(&u->bindlock);
922 	return err;
923 }
924 
925 static struct sock *unix_find_other(struct net *net,
926 				    struct sockaddr_un *sunname, int len,
927 				    int type, unsigned int hash, int *error)
928 {
929 	struct sock *u;
930 	struct path path;
931 	int err = 0;
932 
933 	if (sunname->sun_path[0]) {
934 		struct inode *inode;
935 		err = kern_path(sunname->sun_path, LOOKUP_FOLLOW, &path);
936 		if (err)
937 			goto fail;
938 		inode = d_backing_inode(path.dentry);
939 		err = inode_permission(inode, MAY_WRITE);
940 		if (err)
941 			goto put_fail;
942 
943 		err = -ECONNREFUSED;
944 		if (!S_ISSOCK(inode->i_mode))
945 			goto put_fail;
946 		u = unix_find_socket_byinode(inode);
947 		if (!u)
948 			goto put_fail;
949 
950 		if (u->sk_type == type)
951 			touch_atime(&path);
952 
953 		path_put(&path);
954 
955 		err = -EPROTOTYPE;
956 		if (u->sk_type != type) {
957 			sock_put(u);
958 			goto fail;
959 		}
960 	} else {
961 		err = -ECONNREFUSED;
962 		u = unix_find_socket_byname(net, sunname, len, type, hash);
963 		if (u) {
964 			struct dentry *dentry;
965 			dentry = unix_sk(u)->path.dentry;
966 			if (dentry)
967 				touch_atime(&unix_sk(u)->path);
968 		} else
969 			goto fail;
970 	}
971 	return u;
972 
973 put_fail:
974 	path_put(&path);
975 fail:
976 	*error = err;
977 	return NULL;
978 }
979 
980 static int unix_mknod(const char *sun_path, umode_t mode, struct path *res)
981 {
982 	struct dentry *dentry;
983 	struct path path;
984 	int err = 0;
985 	/*
986 	 * Get the parent directory, calculate the hash for last
987 	 * component.
988 	 */
989 	dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0);
990 	err = PTR_ERR(dentry);
991 	if (IS_ERR(dentry))
992 		return err;
993 
994 	/*
995 	 * All right, let's create it.
996 	 */
997 	err = security_path_mknod(&path, dentry, mode, 0);
998 	if (!err) {
999 		err = vfs_mknod(d_inode(path.dentry), dentry, mode, 0);
1000 		if (!err) {
1001 			res->mnt = mntget(path.mnt);
1002 			res->dentry = dget(dentry);
1003 		}
1004 	}
1005 	done_path_create(&path, dentry);
1006 	return err;
1007 }
1008 
1009 static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
1010 {
1011 	struct sock *sk = sock->sk;
1012 	struct net *net = sock_net(sk);
1013 	struct unix_sock *u = unix_sk(sk);
1014 	struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1015 	char *sun_path = sunaddr->sun_path;
1016 	int err;
1017 	unsigned int hash;
1018 	struct unix_address *addr;
1019 	struct hlist_head *list;
1020 	struct path path = { };
1021 
1022 	err = -EINVAL;
1023 	if (addr_len < offsetofend(struct sockaddr_un, sun_family) ||
1024 	    sunaddr->sun_family != AF_UNIX)
1025 		goto out;
1026 
1027 	if (addr_len == sizeof(short)) {
1028 		err = unix_autobind(sock);
1029 		goto out;
1030 	}
1031 
1032 	err = unix_mkname(sunaddr, addr_len, &hash);
1033 	if (err < 0)
1034 		goto out;
1035 	addr_len = err;
1036 
1037 	if (sun_path[0]) {
1038 		umode_t mode = S_IFSOCK |
1039 		       (SOCK_INODE(sock)->i_mode & ~current_umask());
1040 		err = unix_mknod(sun_path, mode, &path);
1041 		if (err) {
1042 			if (err == -EEXIST)
1043 				err = -EADDRINUSE;
1044 			goto out;
1045 		}
1046 	}
1047 
1048 	err = mutex_lock_interruptible(&u->bindlock);
1049 	if (err)
1050 		goto out_put;
1051 
1052 	err = -EINVAL;
1053 	if (u->addr)
1054 		goto out_up;
1055 
1056 	err = -ENOMEM;
1057 	addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
1058 	if (!addr)
1059 		goto out_up;
1060 
1061 	memcpy(addr->name, sunaddr, addr_len);
1062 	addr->len = addr_len;
1063 	addr->hash = hash ^ sk->sk_type;
1064 	refcount_set(&addr->refcnt, 1);
1065 
1066 	if (sun_path[0]) {
1067 		addr->hash = UNIX_HASH_SIZE;
1068 		hash = d_backing_inode(path.dentry)->i_ino & (UNIX_HASH_SIZE - 1);
1069 		spin_lock(&unix_table_lock);
1070 		u->path = path;
1071 		list = &unix_socket_table[hash];
1072 	} else {
1073 		spin_lock(&unix_table_lock);
1074 		err = -EADDRINUSE;
1075 		if (__unix_find_socket_byname(net, sunaddr, addr_len,
1076 					      sk->sk_type, hash)) {
1077 			unix_release_addr(addr);
1078 			goto out_unlock;
1079 		}
1080 
1081 		list = &unix_socket_table[addr->hash];
1082 	}
1083 
1084 	err = 0;
1085 	__unix_remove_socket(sk);
1086 	smp_store_release(&u->addr, addr);
1087 	__unix_insert_socket(list, sk);
1088 
1089 out_unlock:
1090 	spin_unlock(&unix_table_lock);
1091 out_up:
1092 	mutex_unlock(&u->bindlock);
1093 out_put:
1094 	if (err)
1095 		path_put(&path);
1096 out:
1097 	return err;
1098 }
1099 
1100 static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
1101 {
1102 	if (unlikely(sk1 == sk2) || !sk2) {
1103 		unix_state_lock(sk1);
1104 		return;
1105 	}
1106 	if (sk1 < sk2) {
1107 		unix_state_lock(sk1);
1108 		unix_state_lock_nested(sk2);
1109 	} else {
1110 		unix_state_lock(sk2);
1111 		unix_state_lock_nested(sk1);
1112 	}
1113 }
1114 
1115 static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)
1116 {
1117 	if (unlikely(sk1 == sk2) || !sk2) {
1118 		unix_state_unlock(sk1);
1119 		return;
1120 	}
1121 	unix_state_unlock(sk1);
1122 	unix_state_unlock(sk2);
1123 }
1124 
1125 static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
1126 			      int alen, int flags)
1127 {
1128 	struct sock *sk = sock->sk;
1129 	struct net *net = sock_net(sk);
1130 	struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr;
1131 	struct sock *other;
1132 	unsigned int hash;
1133 	int err;
1134 
1135 	err = -EINVAL;
1136 	if (alen < offsetofend(struct sockaddr, sa_family))
1137 		goto out;
1138 
1139 	if (addr->sa_family != AF_UNSPEC) {
1140 		err = unix_mkname(sunaddr, alen, &hash);
1141 		if (err < 0)
1142 			goto out;
1143 		alen = err;
1144 
1145 		if (test_bit(SOCK_PASSCRED, &sock->flags) &&
1146 		    !unix_sk(sk)->addr && (err = unix_autobind(sock)) != 0)
1147 			goto out;
1148 
1149 restart:
1150 		other = unix_find_other(net, sunaddr, alen, sock->type, hash, &err);
1151 		if (!other)
1152 			goto out;
1153 
1154 		unix_state_double_lock(sk, other);
1155 
1156 		/* Apparently VFS overslept socket death. Retry. */
1157 		if (sock_flag(other, SOCK_DEAD)) {
1158 			unix_state_double_unlock(sk, other);
1159 			sock_put(other);
1160 			goto restart;
1161 		}
1162 
1163 		err = -EPERM;
1164 		if (!unix_may_send(sk, other))
1165 			goto out_unlock;
1166 
1167 		err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1168 		if (err)
1169 			goto out_unlock;
1170 
1171 	} else {
1172 		/*
1173 		 *	1003.1g breaking connected state with AF_UNSPEC
1174 		 */
1175 		other = NULL;
1176 		unix_state_double_lock(sk, other);
1177 	}
1178 
1179 	/*
1180 	 * If it was connected, reconnect.
1181 	 */
1182 	if (unix_peer(sk)) {
1183 		struct sock *old_peer = unix_peer(sk);
1184 		unix_peer(sk) = other;
1185 		unix_dgram_peer_wake_disconnect_wakeup(sk, old_peer);
1186 
1187 		unix_state_double_unlock(sk, other);
1188 
1189 		if (other != old_peer)
1190 			unix_dgram_disconnected(sk, old_peer);
1191 		sock_put(old_peer);
1192 	} else {
1193 		unix_peer(sk) = other;
1194 		unix_state_double_unlock(sk, other);
1195 	}
1196 	return 0;
1197 
1198 out_unlock:
1199 	unix_state_double_unlock(sk, other);
1200 	sock_put(other);
1201 out:
1202 	return err;
1203 }
1204 
1205 static long unix_wait_for_peer(struct sock *other, long timeo)
1206 	__releases(&unix_sk(other)->lock)
1207 {
1208 	struct unix_sock *u = unix_sk(other);
1209 	int sched;
1210 	DEFINE_WAIT(wait);
1211 
1212 	prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);
1213 
1214 	sched = !sock_flag(other, SOCK_DEAD) &&
1215 		!(other->sk_shutdown & RCV_SHUTDOWN) &&
1216 		unix_recvq_full(other);
1217 
1218 	unix_state_unlock(other);
1219 
1220 	if (sched)
1221 		timeo = schedule_timeout(timeo);
1222 
1223 	finish_wait(&u->peer_wait, &wait);
1224 	return timeo;
1225 }
1226 
1227 static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
1228 			       int addr_len, int flags)
1229 {
1230 	struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1231 	struct sock *sk = sock->sk;
1232 	struct net *net = sock_net(sk);
1233 	struct unix_sock *u = unix_sk(sk), *newu, *otheru;
1234 	struct sock *newsk = NULL;
1235 	struct sock *other = NULL;
1236 	struct sk_buff *skb = NULL;
1237 	unsigned int hash;
1238 	int st;
1239 	int err;
1240 	long timeo;
1241 
1242 	err = unix_mkname(sunaddr, addr_len, &hash);
1243 	if (err < 0)
1244 		goto out;
1245 	addr_len = err;
1246 
1247 	if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr &&
1248 	    (err = unix_autobind(sock)) != 0)
1249 		goto out;
1250 
1251 	timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
1252 
1253 	/* First of all allocate resources.
1254 	   If we will make it after state is locked,
1255 	   we will have to recheck all again in any case.
1256 	 */
1257 
1258 	err = -ENOMEM;
1259 
1260 	/* create new sock for complete connection */
1261 	newsk = unix_create1(sock_net(sk), NULL, 0);
1262 	if (newsk == NULL)
1263 		goto out;
1264 
1265 	/* Allocate skb for sending to listening sock */
1266 	skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
1267 	if (skb == NULL)
1268 		goto out;
1269 
1270 restart:
1271 	/*  Find listening sock. */
1272 	other = unix_find_other(net, sunaddr, addr_len, sk->sk_type, hash, &err);
1273 	if (!other)
1274 		goto out;
1275 
1276 	/* Latch state of peer */
1277 	unix_state_lock(other);
1278 
1279 	/* Apparently VFS overslept socket death. Retry. */
1280 	if (sock_flag(other, SOCK_DEAD)) {
1281 		unix_state_unlock(other);
1282 		sock_put(other);
1283 		goto restart;
1284 	}
1285 
1286 	err = -ECONNREFUSED;
1287 	if (other->sk_state != TCP_LISTEN)
1288 		goto out_unlock;
1289 	if (other->sk_shutdown & RCV_SHUTDOWN)
1290 		goto out_unlock;
1291 
1292 	if (unix_recvq_full(other)) {
1293 		err = -EAGAIN;
1294 		if (!timeo)
1295 			goto out_unlock;
1296 
1297 		timeo = unix_wait_for_peer(other, timeo);
1298 
1299 		err = sock_intr_errno(timeo);
1300 		if (signal_pending(current))
1301 			goto out;
1302 		sock_put(other);
1303 		goto restart;
1304 	}
1305 
1306 	/* Latch our state.
1307 
1308 	   It is tricky place. We need to grab our state lock and cannot
1309 	   drop lock on peer. It is dangerous because deadlock is
1310 	   possible. Connect to self case and simultaneous
1311 	   attempt to connect are eliminated by checking socket
1312 	   state. other is TCP_LISTEN, if sk is TCP_LISTEN we
1313 	   check this before attempt to grab lock.
1314 
1315 	   Well, and we have to recheck the state after socket locked.
1316 	 */
1317 	st = sk->sk_state;
1318 
1319 	switch (st) {
1320 	case TCP_CLOSE:
1321 		/* This is ok... continue with connect */
1322 		break;
1323 	case TCP_ESTABLISHED:
1324 		/* Socket is already connected */
1325 		err = -EISCONN;
1326 		goto out_unlock;
1327 	default:
1328 		err = -EINVAL;
1329 		goto out_unlock;
1330 	}
1331 
1332 	unix_state_lock_nested(sk);
1333 
1334 	if (sk->sk_state != st) {
1335 		unix_state_unlock(sk);
1336 		unix_state_unlock(other);
1337 		sock_put(other);
1338 		goto restart;
1339 	}
1340 
1341 	err = security_unix_stream_connect(sk, other, newsk);
1342 	if (err) {
1343 		unix_state_unlock(sk);
1344 		goto out_unlock;
1345 	}
1346 
1347 	/* The way is open! Fastly set all the necessary fields... */
1348 
1349 	sock_hold(sk);
1350 	unix_peer(newsk)	= sk;
1351 	newsk->sk_state		= TCP_ESTABLISHED;
1352 	newsk->sk_type		= sk->sk_type;
1353 	init_peercred(newsk);
1354 	newu = unix_sk(newsk);
1355 	RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq);
1356 	otheru = unix_sk(other);
1357 
1358 	/* copy address information from listening to new sock
1359 	 *
1360 	 * The contents of *(otheru->addr) and otheru->path
1361 	 * are seen fully set up here, since we have found
1362 	 * otheru in hash under unix_table_lock.  Insertion
1363 	 * into the hash chain we'd found it in had been done
1364 	 * in an earlier critical area protected by unix_table_lock,
1365 	 * the same one where we'd set *(otheru->addr) contents,
1366 	 * as well as otheru->path and otheru->addr itself.
1367 	 *
1368 	 * Using smp_store_release() here to set newu->addr
1369 	 * is enough to make those stores, as well as stores
1370 	 * to newu->path visible to anyone who gets newu->addr
1371 	 * by smp_load_acquire().  IOW, the same warranties
1372 	 * as for unix_sock instances bound in unix_bind() or
1373 	 * in unix_autobind().
1374 	 */
1375 	if (otheru->path.dentry) {
1376 		path_get(&otheru->path);
1377 		newu->path = otheru->path;
1378 	}
1379 	refcount_inc(&otheru->addr->refcnt);
1380 	smp_store_release(&newu->addr, otheru->addr);
1381 
1382 	/* Set credentials */
1383 	copy_peercred(sk, other);
1384 
1385 	sock->state	= SS_CONNECTED;
1386 	sk->sk_state	= TCP_ESTABLISHED;
1387 	sock_hold(newsk);
1388 
1389 	smp_mb__after_atomic();	/* sock_hold() does an atomic_inc() */
1390 	unix_peer(sk)	= newsk;
1391 
1392 	unix_state_unlock(sk);
1393 
1394 	/* take ten and and send info to listening sock */
1395 	spin_lock(&other->sk_receive_queue.lock);
1396 	__skb_queue_tail(&other->sk_receive_queue, skb);
1397 	spin_unlock(&other->sk_receive_queue.lock);
1398 	unix_state_unlock(other);
1399 	other->sk_data_ready(other);
1400 	sock_put(other);
1401 	return 0;
1402 
1403 out_unlock:
1404 	if (other)
1405 		unix_state_unlock(other);
1406 
1407 out:
1408 	kfree_skb(skb);
1409 	if (newsk)
1410 		unix_release_sock(newsk, 0);
1411 	if (other)
1412 		sock_put(other);
1413 	return err;
1414 }
1415 
1416 static int unix_socketpair(struct socket *socka, struct socket *sockb)
1417 {
1418 	struct sock *ska = socka->sk, *skb = sockb->sk;
1419 
1420 	/* Join our sockets back to back */
1421 	sock_hold(ska);
1422 	sock_hold(skb);
1423 	unix_peer(ska) = skb;
1424 	unix_peer(skb) = ska;
1425 	init_peercred(ska);
1426 	init_peercred(skb);
1427 
1428 	if (ska->sk_type != SOCK_DGRAM) {
1429 		ska->sk_state = TCP_ESTABLISHED;
1430 		skb->sk_state = TCP_ESTABLISHED;
1431 		socka->state  = SS_CONNECTED;
1432 		sockb->state  = SS_CONNECTED;
1433 	}
1434 	return 0;
1435 }
1436 
1437 static void unix_sock_inherit_flags(const struct socket *old,
1438 				    struct socket *new)
1439 {
1440 	if (test_bit(SOCK_PASSCRED, &old->flags))
1441 		set_bit(SOCK_PASSCRED, &new->flags);
1442 	if (test_bit(SOCK_PASSSEC, &old->flags))
1443 		set_bit(SOCK_PASSSEC, &new->flags);
1444 }
1445 
1446 static int unix_accept(struct socket *sock, struct socket *newsock, int flags,
1447 		       bool kern)
1448 {
1449 	struct sock *sk = sock->sk;
1450 	struct sock *tsk;
1451 	struct sk_buff *skb;
1452 	int err;
1453 
1454 	err = -EOPNOTSUPP;
1455 	if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
1456 		goto out;
1457 
1458 	err = -EINVAL;
1459 	if (sk->sk_state != TCP_LISTEN)
1460 		goto out;
1461 
1462 	/* If socket state is TCP_LISTEN it cannot change (for now...),
1463 	 * so that no locks are necessary.
1464 	 */
1465 
1466 	skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err);
1467 	if (!skb) {
1468 		/* This means receive shutdown. */
1469 		if (err == 0)
1470 			err = -EINVAL;
1471 		goto out;
1472 	}
1473 
1474 	tsk = skb->sk;
1475 	skb_free_datagram(sk, skb);
1476 	wake_up_interruptible(&unix_sk(sk)->peer_wait);
1477 
1478 	/* attach accepted sock to socket */
1479 	unix_state_lock(tsk);
1480 	newsock->state = SS_CONNECTED;
1481 	unix_sock_inherit_flags(sock, newsock);
1482 	sock_graft(tsk, newsock);
1483 	unix_state_unlock(tsk);
1484 	return 0;
1485 
1486 out:
1487 	return err;
1488 }
1489 
1490 
1491 static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int peer)
1492 {
1493 	struct sock *sk = sock->sk;
1494 	struct unix_address *addr;
1495 	DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, uaddr);
1496 	int err = 0;
1497 
1498 	if (peer) {
1499 		sk = unix_peer_get(sk);
1500 
1501 		err = -ENOTCONN;
1502 		if (!sk)
1503 			goto out;
1504 		err = 0;
1505 	} else {
1506 		sock_hold(sk);
1507 	}
1508 
1509 	addr = smp_load_acquire(&unix_sk(sk)->addr);
1510 	if (!addr) {
1511 		sunaddr->sun_family = AF_UNIX;
1512 		sunaddr->sun_path[0] = 0;
1513 		err = sizeof(short);
1514 	} else {
1515 		err = addr->len;
1516 		memcpy(sunaddr, addr->name, addr->len);
1517 	}
1518 	sock_put(sk);
1519 out:
1520 	return err;
1521 }
1522 
1523 static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)
1524 {
1525 	int err = 0;
1526 
1527 	UNIXCB(skb).pid  = get_pid(scm->pid);
1528 	UNIXCB(skb).uid = scm->creds.uid;
1529 	UNIXCB(skb).gid = scm->creds.gid;
1530 	UNIXCB(skb).fp = NULL;
1531 	unix_get_secdata(scm, skb);
1532 	if (scm->fp && send_fds)
1533 		err = unix_attach_fds(scm, skb);
1534 
1535 	skb->destructor = unix_destruct_scm;
1536 	return err;
1537 }
1538 
1539 static bool unix_passcred_enabled(const struct socket *sock,
1540 				  const struct sock *other)
1541 {
1542 	return test_bit(SOCK_PASSCRED, &sock->flags) ||
1543 	       !other->sk_socket ||
1544 	       test_bit(SOCK_PASSCRED, &other->sk_socket->flags);
1545 }
1546 
1547 /*
1548  * Some apps rely on write() giving SCM_CREDENTIALS
1549  * We include credentials if source or destination socket
1550  * asserted SOCK_PASSCRED.
1551  */
1552 static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock,
1553 			    const struct sock *other)
1554 {
1555 	if (UNIXCB(skb).pid)
1556 		return;
1557 	if (unix_passcred_enabled(sock, other)) {
1558 		UNIXCB(skb).pid  = get_pid(task_tgid(current));
1559 		current_uid_gid(&UNIXCB(skb).uid, &UNIXCB(skb).gid);
1560 	}
1561 }
1562 
1563 static int maybe_init_creds(struct scm_cookie *scm,
1564 			    struct socket *socket,
1565 			    const struct sock *other)
1566 {
1567 	int err;
1568 	struct msghdr msg = { .msg_controllen = 0 };
1569 
1570 	err = scm_send(socket, &msg, scm, false);
1571 	if (err)
1572 		return err;
1573 
1574 	if (unix_passcred_enabled(socket, other)) {
1575 		scm->pid = get_pid(task_tgid(current));
1576 		current_uid_gid(&scm->creds.uid, &scm->creds.gid);
1577 	}
1578 	return err;
1579 }
1580 
1581 static bool unix_skb_scm_eq(struct sk_buff *skb,
1582 			    struct scm_cookie *scm)
1583 {
1584 	const struct unix_skb_parms *u = &UNIXCB(skb);
1585 
1586 	return u->pid == scm->pid &&
1587 	       uid_eq(u->uid, scm->creds.uid) &&
1588 	       gid_eq(u->gid, scm->creds.gid) &&
1589 	       unix_secdata_eq(scm, skb);
1590 }
1591 
1592 static void scm_stat_add(struct sock *sk, struct sk_buff *skb)
1593 {
1594 	struct scm_fp_list *fp = UNIXCB(skb).fp;
1595 	struct unix_sock *u = unix_sk(sk);
1596 
1597 	if (unlikely(fp && fp->count))
1598 		atomic_add(fp->count, &u->scm_stat.nr_fds);
1599 }
1600 
1601 static void scm_stat_del(struct sock *sk, struct sk_buff *skb)
1602 {
1603 	struct scm_fp_list *fp = UNIXCB(skb).fp;
1604 	struct unix_sock *u = unix_sk(sk);
1605 
1606 	if (unlikely(fp && fp->count))
1607 		atomic_sub(fp->count, &u->scm_stat.nr_fds);
1608 }
1609 
1610 /*
1611  *	Send AF_UNIX data.
1612  */
1613 
1614 static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
1615 			      size_t len)
1616 {
1617 	struct sock *sk = sock->sk;
1618 	struct net *net = sock_net(sk);
1619 	struct unix_sock *u = unix_sk(sk);
1620 	DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, msg->msg_name);
1621 	struct sock *other = NULL;
1622 	int namelen = 0; /* fake GCC */
1623 	int err;
1624 	unsigned int hash;
1625 	struct sk_buff *skb;
1626 	long timeo;
1627 	struct scm_cookie scm;
1628 	int data_len = 0;
1629 	int sk_locked;
1630 
1631 	wait_for_unix_gc();
1632 	err = scm_send(sock, msg, &scm, false);
1633 	if (err < 0)
1634 		return err;
1635 
1636 	err = -EOPNOTSUPP;
1637 	if (msg->msg_flags&MSG_OOB)
1638 		goto out;
1639 
1640 	if (msg->msg_namelen) {
1641 		err = unix_mkname(sunaddr, msg->msg_namelen, &hash);
1642 		if (err < 0)
1643 			goto out;
1644 		namelen = err;
1645 	} else {
1646 		sunaddr = NULL;
1647 		err = -ENOTCONN;
1648 		other = unix_peer_get(sk);
1649 		if (!other)
1650 			goto out;
1651 	}
1652 
1653 	if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr
1654 	    && (err = unix_autobind(sock)) != 0)
1655 		goto out;
1656 
1657 	err = -EMSGSIZE;
1658 	if (len > sk->sk_sndbuf - 32)
1659 		goto out;
1660 
1661 	if (len > SKB_MAX_ALLOC) {
1662 		data_len = min_t(size_t,
1663 				 len - SKB_MAX_ALLOC,
1664 				 MAX_SKB_FRAGS * PAGE_SIZE);
1665 		data_len = PAGE_ALIGN(data_len);
1666 
1667 		BUILD_BUG_ON(SKB_MAX_ALLOC < PAGE_SIZE);
1668 	}
1669 
1670 	skb = sock_alloc_send_pskb(sk, len - data_len, data_len,
1671 				   msg->msg_flags & MSG_DONTWAIT, &err,
1672 				   PAGE_ALLOC_COSTLY_ORDER);
1673 	if (skb == NULL)
1674 		goto out;
1675 
1676 	err = unix_scm_to_skb(&scm, skb, true);
1677 	if (err < 0)
1678 		goto out_free;
1679 
1680 	skb_put(skb, len - data_len);
1681 	skb->data_len = data_len;
1682 	skb->len = len;
1683 	err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, len);
1684 	if (err)
1685 		goto out_free;
1686 
1687 	timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
1688 
1689 restart:
1690 	if (!other) {
1691 		err = -ECONNRESET;
1692 		if (sunaddr == NULL)
1693 			goto out_free;
1694 
1695 		other = unix_find_other(net, sunaddr, namelen, sk->sk_type,
1696 					hash, &err);
1697 		if (other == NULL)
1698 			goto out_free;
1699 	}
1700 
1701 	if (sk_filter(other, skb) < 0) {
1702 		/* Toss the packet but do not return any error to the sender */
1703 		err = len;
1704 		goto out_free;
1705 	}
1706 
1707 	sk_locked = 0;
1708 	unix_state_lock(other);
1709 restart_locked:
1710 	err = -EPERM;
1711 	if (!unix_may_send(sk, other))
1712 		goto out_unlock;
1713 
1714 	if (unlikely(sock_flag(other, SOCK_DEAD))) {
1715 		/*
1716 		 *	Check with 1003.1g - what should
1717 		 *	datagram error
1718 		 */
1719 		unix_state_unlock(other);
1720 		sock_put(other);
1721 
1722 		if (!sk_locked)
1723 			unix_state_lock(sk);
1724 
1725 		err = 0;
1726 		if (unix_peer(sk) == other) {
1727 			unix_peer(sk) = NULL;
1728 			unix_dgram_peer_wake_disconnect_wakeup(sk, other);
1729 
1730 			unix_state_unlock(sk);
1731 
1732 			unix_dgram_disconnected(sk, other);
1733 			sock_put(other);
1734 			err = -ECONNREFUSED;
1735 		} else {
1736 			unix_state_unlock(sk);
1737 		}
1738 
1739 		other = NULL;
1740 		if (err)
1741 			goto out_free;
1742 		goto restart;
1743 	}
1744 
1745 	err = -EPIPE;
1746 	if (other->sk_shutdown & RCV_SHUTDOWN)
1747 		goto out_unlock;
1748 
1749 	if (sk->sk_type != SOCK_SEQPACKET) {
1750 		err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1751 		if (err)
1752 			goto out_unlock;
1753 	}
1754 
1755 	/* other == sk && unix_peer(other) != sk if
1756 	 * - unix_peer(sk) == NULL, destination address bound to sk
1757 	 * - unix_peer(sk) == sk by time of get but disconnected before lock
1758 	 */
1759 	if (other != sk &&
1760 	    unlikely(unix_peer(other) != sk &&
1761 	    unix_recvq_full_lockless(other))) {
1762 		if (timeo) {
1763 			timeo = unix_wait_for_peer(other, timeo);
1764 
1765 			err = sock_intr_errno(timeo);
1766 			if (signal_pending(current))
1767 				goto out_free;
1768 
1769 			goto restart;
1770 		}
1771 
1772 		if (!sk_locked) {
1773 			unix_state_unlock(other);
1774 			unix_state_double_lock(sk, other);
1775 		}
1776 
1777 		if (unix_peer(sk) != other ||
1778 		    unix_dgram_peer_wake_me(sk, other)) {
1779 			err = -EAGAIN;
1780 			sk_locked = 1;
1781 			goto out_unlock;
1782 		}
1783 
1784 		if (!sk_locked) {
1785 			sk_locked = 1;
1786 			goto restart_locked;
1787 		}
1788 	}
1789 
1790 	if (unlikely(sk_locked))
1791 		unix_state_unlock(sk);
1792 
1793 	if (sock_flag(other, SOCK_RCVTSTAMP))
1794 		__net_timestamp(skb);
1795 	maybe_add_creds(skb, sock, other);
1796 	scm_stat_add(other, skb);
1797 	skb_queue_tail(&other->sk_receive_queue, skb);
1798 	unix_state_unlock(other);
1799 	other->sk_data_ready(other);
1800 	sock_put(other);
1801 	scm_destroy(&scm);
1802 	return len;
1803 
1804 out_unlock:
1805 	if (sk_locked)
1806 		unix_state_unlock(sk);
1807 	unix_state_unlock(other);
1808 out_free:
1809 	kfree_skb(skb);
1810 out:
1811 	if (other)
1812 		sock_put(other);
1813 	scm_destroy(&scm);
1814 	return err;
1815 }
1816 
1817 /* We use paged skbs for stream sockets, and limit occupancy to 32768
1818  * bytes, and a minimum of a full page.
1819  */
1820 #define UNIX_SKB_FRAGS_SZ (PAGE_SIZE << get_order(32768))
1821 
1822 static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
1823 			       size_t len)
1824 {
1825 	struct sock *sk = sock->sk;
1826 	struct sock *other = NULL;
1827 	int err, size;
1828 	struct sk_buff *skb;
1829 	int sent = 0;
1830 	struct scm_cookie scm;
1831 	bool fds_sent = false;
1832 	int data_len;
1833 
1834 	wait_for_unix_gc();
1835 	err = scm_send(sock, msg, &scm, false);
1836 	if (err < 0)
1837 		return err;
1838 
1839 	err = -EOPNOTSUPP;
1840 	if (msg->msg_flags&MSG_OOB)
1841 		goto out_err;
1842 
1843 	if (msg->msg_namelen) {
1844 		err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
1845 		goto out_err;
1846 	} else {
1847 		err = -ENOTCONN;
1848 		other = unix_peer(sk);
1849 		if (!other)
1850 			goto out_err;
1851 	}
1852 
1853 	if (sk->sk_shutdown & SEND_SHUTDOWN)
1854 		goto pipe_err;
1855 
1856 	while (sent < len) {
1857 		size = len - sent;
1858 
1859 		/* Keep two messages in the pipe so it schedules better */
1860 		size = min_t(int, size, (sk->sk_sndbuf >> 1) - 64);
1861 
1862 		/* allow fallback to order-0 allocations */
1863 		size = min_t(int, size, SKB_MAX_HEAD(0) + UNIX_SKB_FRAGS_SZ);
1864 
1865 		data_len = max_t(int, 0, size - SKB_MAX_HEAD(0));
1866 
1867 		data_len = min_t(size_t, size, PAGE_ALIGN(data_len));
1868 
1869 		skb = sock_alloc_send_pskb(sk, size - data_len, data_len,
1870 					   msg->msg_flags & MSG_DONTWAIT, &err,
1871 					   get_order(UNIX_SKB_FRAGS_SZ));
1872 		if (!skb)
1873 			goto out_err;
1874 
1875 		/* Only send the fds in the first buffer */
1876 		err = unix_scm_to_skb(&scm, skb, !fds_sent);
1877 		if (err < 0) {
1878 			kfree_skb(skb);
1879 			goto out_err;
1880 		}
1881 		fds_sent = true;
1882 
1883 		skb_put(skb, size - data_len);
1884 		skb->data_len = data_len;
1885 		skb->len = size;
1886 		err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size);
1887 		if (err) {
1888 			kfree_skb(skb);
1889 			goto out_err;
1890 		}
1891 
1892 		unix_state_lock(other);
1893 
1894 		if (sock_flag(other, SOCK_DEAD) ||
1895 		    (other->sk_shutdown & RCV_SHUTDOWN))
1896 			goto pipe_err_free;
1897 
1898 		maybe_add_creds(skb, sock, other);
1899 		scm_stat_add(other, skb);
1900 		skb_queue_tail(&other->sk_receive_queue, skb);
1901 		unix_state_unlock(other);
1902 		other->sk_data_ready(other);
1903 		sent += size;
1904 	}
1905 
1906 	scm_destroy(&scm);
1907 
1908 	return sent;
1909 
1910 pipe_err_free:
1911 	unix_state_unlock(other);
1912 	kfree_skb(skb);
1913 pipe_err:
1914 	if (sent == 0 && !(msg->msg_flags&MSG_NOSIGNAL))
1915 		send_sig(SIGPIPE, current, 0);
1916 	err = -EPIPE;
1917 out_err:
1918 	scm_destroy(&scm);
1919 	return sent ? : err;
1920 }
1921 
1922 static ssize_t unix_stream_sendpage(struct socket *socket, struct page *page,
1923 				    int offset, size_t size, int flags)
1924 {
1925 	int err;
1926 	bool send_sigpipe = false;
1927 	bool init_scm = true;
1928 	struct scm_cookie scm;
1929 	struct sock *other, *sk = socket->sk;
1930 	struct sk_buff *skb, *newskb = NULL, *tail = NULL;
1931 
1932 	if (flags & MSG_OOB)
1933 		return -EOPNOTSUPP;
1934 
1935 	other = unix_peer(sk);
1936 	if (!other || sk->sk_state != TCP_ESTABLISHED)
1937 		return -ENOTCONN;
1938 
1939 	if (false) {
1940 alloc_skb:
1941 		unix_state_unlock(other);
1942 		mutex_unlock(&unix_sk(other)->iolock);
1943 		newskb = sock_alloc_send_pskb(sk, 0, 0, flags & MSG_DONTWAIT,
1944 					      &err, 0);
1945 		if (!newskb)
1946 			goto err;
1947 	}
1948 
1949 	/* we must acquire iolock as we modify already present
1950 	 * skbs in the sk_receive_queue and mess with skb->len
1951 	 */
1952 	err = mutex_lock_interruptible(&unix_sk(other)->iolock);
1953 	if (err) {
1954 		err = flags & MSG_DONTWAIT ? -EAGAIN : -ERESTARTSYS;
1955 		goto err;
1956 	}
1957 
1958 	if (sk->sk_shutdown & SEND_SHUTDOWN) {
1959 		err = -EPIPE;
1960 		send_sigpipe = true;
1961 		goto err_unlock;
1962 	}
1963 
1964 	unix_state_lock(other);
1965 
1966 	if (sock_flag(other, SOCK_DEAD) ||
1967 	    other->sk_shutdown & RCV_SHUTDOWN) {
1968 		err = -EPIPE;
1969 		send_sigpipe = true;
1970 		goto err_state_unlock;
1971 	}
1972 
1973 	if (init_scm) {
1974 		err = maybe_init_creds(&scm, socket, other);
1975 		if (err)
1976 			goto err_state_unlock;
1977 		init_scm = false;
1978 	}
1979 
1980 	skb = skb_peek_tail(&other->sk_receive_queue);
1981 	if (tail && tail == skb) {
1982 		skb = newskb;
1983 	} else if (!skb || !unix_skb_scm_eq(skb, &scm)) {
1984 		if (newskb) {
1985 			skb = newskb;
1986 		} else {
1987 			tail = skb;
1988 			goto alloc_skb;
1989 		}
1990 	} else if (newskb) {
1991 		/* this is fast path, we don't necessarily need to
1992 		 * call to kfree_skb even though with newskb == NULL
1993 		 * this - does no harm
1994 		 */
1995 		consume_skb(newskb);
1996 		newskb = NULL;
1997 	}
1998 
1999 	if (skb_append_pagefrags(skb, page, offset, size)) {
2000 		tail = skb;
2001 		goto alloc_skb;
2002 	}
2003 
2004 	skb->len += size;
2005 	skb->data_len += size;
2006 	skb->truesize += size;
2007 	refcount_add(size, &sk->sk_wmem_alloc);
2008 
2009 	if (newskb) {
2010 		err = unix_scm_to_skb(&scm, skb, false);
2011 		if (err)
2012 			goto err_state_unlock;
2013 		spin_lock(&other->sk_receive_queue.lock);
2014 		__skb_queue_tail(&other->sk_receive_queue, newskb);
2015 		spin_unlock(&other->sk_receive_queue.lock);
2016 	}
2017 
2018 	unix_state_unlock(other);
2019 	mutex_unlock(&unix_sk(other)->iolock);
2020 
2021 	other->sk_data_ready(other);
2022 	scm_destroy(&scm);
2023 	return size;
2024 
2025 err_state_unlock:
2026 	unix_state_unlock(other);
2027 err_unlock:
2028 	mutex_unlock(&unix_sk(other)->iolock);
2029 err:
2030 	kfree_skb(newskb);
2031 	if (send_sigpipe && !(flags & MSG_NOSIGNAL))
2032 		send_sig(SIGPIPE, current, 0);
2033 	if (!init_scm)
2034 		scm_destroy(&scm);
2035 	return err;
2036 }
2037 
2038 static int unix_seqpacket_sendmsg(struct socket *sock, struct msghdr *msg,
2039 				  size_t len)
2040 {
2041 	int err;
2042 	struct sock *sk = sock->sk;
2043 
2044 	err = sock_error(sk);
2045 	if (err)
2046 		return err;
2047 
2048 	if (sk->sk_state != TCP_ESTABLISHED)
2049 		return -ENOTCONN;
2050 
2051 	if (msg->msg_namelen)
2052 		msg->msg_namelen = 0;
2053 
2054 	return unix_dgram_sendmsg(sock, msg, len);
2055 }
2056 
2057 static int unix_seqpacket_recvmsg(struct socket *sock, struct msghdr *msg,
2058 				  size_t size, int flags)
2059 {
2060 	struct sock *sk = sock->sk;
2061 
2062 	if (sk->sk_state != TCP_ESTABLISHED)
2063 		return -ENOTCONN;
2064 
2065 	return unix_dgram_recvmsg(sock, msg, size, flags);
2066 }
2067 
2068 static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
2069 {
2070 	struct unix_address *addr = smp_load_acquire(&unix_sk(sk)->addr);
2071 
2072 	if (addr) {
2073 		msg->msg_namelen = addr->len;
2074 		memcpy(msg->msg_name, addr->name, addr->len);
2075 	}
2076 }
2077 
2078 static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
2079 			      size_t size, int flags)
2080 {
2081 	struct scm_cookie scm;
2082 	struct sock *sk = sock->sk;
2083 	struct unix_sock *u = unix_sk(sk);
2084 	struct sk_buff *skb, *last;
2085 	long timeo;
2086 	int skip;
2087 	int err;
2088 
2089 	err = -EOPNOTSUPP;
2090 	if (flags&MSG_OOB)
2091 		goto out;
2092 
2093 	timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
2094 
2095 	do {
2096 		mutex_lock(&u->iolock);
2097 
2098 		skip = sk_peek_offset(sk, flags);
2099 		skb = __skb_try_recv_datagram(sk, &sk->sk_receive_queue, flags,
2100 					      &skip, &err, &last);
2101 		if (skb) {
2102 			if (!(flags & MSG_PEEK))
2103 				scm_stat_del(sk, skb);
2104 			break;
2105 		}
2106 
2107 		mutex_unlock(&u->iolock);
2108 
2109 		if (err != -EAGAIN)
2110 			break;
2111 	} while (timeo &&
2112 		 !__skb_wait_for_more_packets(sk, &sk->sk_receive_queue,
2113 					      &err, &timeo, last));
2114 
2115 	if (!skb) { /* implies iolock unlocked */
2116 		unix_state_lock(sk);
2117 		/* Signal EOF on disconnected non-blocking SEQPACKET socket. */
2118 		if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN &&
2119 		    (sk->sk_shutdown & RCV_SHUTDOWN))
2120 			err = 0;
2121 		unix_state_unlock(sk);
2122 		goto out;
2123 	}
2124 
2125 	if (wq_has_sleeper(&u->peer_wait))
2126 		wake_up_interruptible_sync_poll(&u->peer_wait,
2127 						EPOLLOUT | EPOLLWRNORM |
2128 						EPOLLWRBAND);
2129 
2130 	if (msg->msg_name)
2131 		unix_copy_addr(msg, skb->sk);
2132 
2133 	if (size > skb->len - skip)
2134 		size = skb->len - skip;
2135 	else if (size < skb->len - skip)
2136 		msg->msg_flags |= MSG_TRUNC;
2137 
2138 	err = skb_copy_datagram_msg(skb, skip, msg, size);
2139 	if (err)
2140 		goto out_free;
2141 
2142 	if (sock_flag(sk, SOCK_RCVTSTAMP))
2143 		__sock_recv_timestamp(msg, sk, skb);
2144 
2145 	memset(&scm, 0, sizeof(scm));
2146 
2147 	scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2148 	unix_set_secdata(&scm, skb);
2149 
2150 	if (!(flags & MSG_PEEK)) {
2151 		if (UNIXCB(skb).fp)
2152 			unix_detach_fds(&scm, skb);
2153 
2154 		sk_peek_offset_bwd(sk, skb->len);
2155 	} else {
2156 		/* It is questionable: on PEEK we could:
2157 		   - do not return fds - good, but too simple 8)
2158 		   - return fds, and do not return them on read (old strategy,
2159 		     apparently wrong)
2160 		   - clone fds (I chose it for now, it is the most universal
2161 		     solution)
2162 
2163 		   POSIX 1003.1g does not actually define this clearly
2164 		   at all. POSIX 1003.1g doesn't define a lot of things
2165 		   clearly however!
2166 
2167 		*/
2168 
2169 		sk_peek_offset_fwd(sk, size);
2170 
2171 		if (UNIXCB(skb).fp)
2172 			scm.fp = scm_fp_dup(UNIXCB(skb).fp);
2173 	}
2174 	err = (flags & MSG_TRUNC) ? skb->len - skip : size;
2175 
2176 	scm_recv(sock, msg, &scm, flags);
2177 
2178 out_free:
2179 	skb_free_datagram(sk, skb);
2180 	mutex_unlock(&u->iolock);
2181 out:
2182 	return err;
2183 }
2184 
2185 /*
2186  *	Sleep until more data has arrived. But check for races..
2187  */
2188 static long unix_stream_data_wait(struct sock *sk, long timeo,
2189 				  struct sk_buff *last, unsigned int last_len,
2190 				  bool freezable)
2191 {
2192 	struct sk_buff *tail;
2193 	DEFINE_WAIT(wait);
2194 
2195 	unix_state_lock(sk);
2196 
2197 	for (;;) {
2198 		prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
2199 
2200 		tail = skb_peek_tail(&sk->sk_receive_queue);
2201 		if (tail != last ||
2202 		    (tail && tail->len != last_len) ||
2203 		    sk->sk_err ||
2204 		    (sk->sk_shutdown & RCV_SHUTDOWN) ||
2205 		    signal_pending(current) ||
2206 		    !timeo)
2207 			break;
2208 
2209 		sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2210 		unix_state_unlock(sk);
2211 		if (freezable)
2212 			timeo = freezable_schedule_timeout(timeo);
2213 		else
2214 			timeo = schedule_timeout(timeo);
2215 		unix_state_lock(sk);
2216 
2217 		if (sock_flag(sk, SOCK_DEAD))
2218 			break;
2219 
2220 		sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2221 	}
2222 
2223 	finish_wait(sk_sleep(sk), &wait);
2224 	unix_state_unlock(sk);
2225 	return timeo;
2226 }
2227 
2228 static unsigned int unix_skb_len(const struct sk_buff *skb)
2229 {
2230 	return skb->len - UNIXCB(skb).consumed;
2231 }
2232 
2233 struct unix_stream_read_state {
2234 	int (*recv_actor)(struct sk_buff *, int, int,
2235 			  struct unix_stream_read_state *);
2236 	struct socket *socket;
2237 	struct msghdr *msg;
2238 	struct pipe_inode_info *pipe;
2239 	size_t size;
2240 	int flags;
2241 	unsigned int splice_flags;
2242 };
2243 
2244 static int unix_stream_read_generic(struct unix_stream_read_state *state,
2245 				    bool freezable)
2246 {
2247 	struct scm_cookie scm;
2248 	struct socket *sock = state->socket;
2249 	struct sock *sk = sock->sk;
2250 	struct unix_sock *u = unix_sk(sk);
2251 	int copied = 0;
2252 	int flags = state->flags;
2253 	int noblock = flags & MSG_DONTWAIT;
2254 	bool check_creds = false;
2255 	int target;
2256 	int err = 0;
2257 	long timeo;
2258 	int skip;
2259 	size_t size = state->size;
2260 	unsigned int last_len;
2261 
2262 	if (unlikely(sk->sk_state != TCP_ESTABLISHED)) {
2263 		err = -EINVAL;
2264 		goto out;
2265 	}
2266 
2267 	if (unlikely(flags & MSG_OOB)) {
2268 		err = -EOPNOTSUPP;
2269 		goto out;
2270 	}
2271 
2272 	target = sock_rcvlowat(sk, flags & MSG_WAITALL, size);
2273 	timeo = sock_rcvtimeo(sk, noblock);
2274 
2275 	memset(&scm, 0, sizeof(scm));
2276 
2277 	/* Lock the socket to prevent queue disordering
2278 	 * while sleeps in memcpy_tomsg
2279 	 */
2280 	mutex_lock(&u->iolock);
2281 
2282 	skip = max(sk_peek_offset(sk, flags), 0);
2283 
2284 	do {
2285 		int chunk;
2286 		bool drop_skb;
2287 		struct sk_buff *skb, *last;
2288 
2289 redo:
2290 		unix_state_lock(sk);
2291 		if (sock_flag(sk, SOCK_DEAD)) {
2292 			err = -ECONNRESET;
2293 			goto unlock;
2294 		}
2295 		last = skb = skb_peek(&sk->sk_receive_queue);
2296 		last_len = last ? last->len : 0;
2297 again:
2298 		if (skb == NULL) {
2299 			if (copied >= target)
2300 				goto unlock;
2301 
2302 			/*
2303 			 *	POSIX 1003.1g mandates this order.
2304 			 */
2305 
2306 			err = sock_error(sk);
2307 			if (err)
2308 				goto unlock;
2309 			if (sk->sk_shutdown & RCV_SHUTDOWN)
2310 				goto unlock;
2311 
2312 			unix_state_unlock(sk);
2313 			if (!timeo) {
2314 				err = -EAGAIN;
2315 				break;
2316 			}
2317 
2318 			mutex_unlock(&u->iolock);
2319 
2320 			timeo = unix_stream_data_wait(sk, timeo, last,
2321 						      last_len, freezable);
2322 
2323 			if (signal_pending(current)) {
2324 				err = sock_intr_errno(timeo);
2325 				scm_destroy(&scm);
2326 				goto out;
2327 			}
2328 
2329 			mutex_lock(&u->iolock);
2330 			goto redo;
2331 unlock:
2332 			unix_state_unlock(sk);
2333 			break;
2334 		}
2335 
2336 		while (skip >= unix_skb_len(skb)) {
2337 			skip -= unix_skb_len(skb);
2338 			last = skb;
2339 			last_len = skb->len;
2340 			skb = skb_peek_next(skb, &sk->sk_receive_queue);
2341 			if (!skb)
2342 				goto again;
2343 		}
2344 
2345 		unix_state_unlock(sk);
2346 
2347 		if (check_creds) {
2348 			/* Never glue messages from different writers */
2349 			if (!unix_skb_scm_eq(skb, &scm))
2350 				break;
2351 		} else if (test_bit(SOCK_PASSCRED, &sock->flags)) {
2352 			/* Copy credentials */
2353 			scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2354 			unix_set_secdata(&scm, skb);
2355 			check_creds = true;
2356 		}
2357 
2358 		/* Copy address just once */
2359 		if (state->msg && state->msg->msg_name) {
2360 			DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr,
2361 					 state->msg->msg_name);
2362 			unix_copy_addr(state->msg, skb->sk);
2363 			sunaddr = NULL;
2364 		}
2365 
2366 		chunk = min_t(unsigned int, unix_skb_len(skb) - skip, size);
2367 		skb_get(skb);
2368 		chunk = state->recv_actor(skb, skip, chunk, state);
2369 		drop_skb = !unix_skb_len(skb);
2370 		/* skb is only safe to use if !drop_skb */
2371 		consume_skb(skb);
2372 		if (chunk < 0) {
2373 			if (copied == 0)
2374 				copied = -EFAULT;
2375 			break;
2376 		}
2377 		copied += chunk;
2378 		size -= chunk;
2379 
2380 		if (drop_skb) {
2381 			/* the skb was touched by a concurrent reader;
2382 			 * we should not expect anything from this skb
2383 			 * anymore and assume it invalid - we can be
2384 			 * sure it was dropped from the socket queue
2385 			 *
2386 			 * let's report a short read
2387 			 */
2388 			err = 0;
2389 			break;
2390 		}
2391 
2392 		/* Mark read part of skb as used */
2393 		if (!(flags & MSG_PEEK)) {
2394 			UNIXCB(skb).consumed += chunk;
2395 
2396 			sk_peek_offset_bwd(sk, chunk);
2397 
2398 			if (UNIXCB(skb).fp) {
2399 				scm_stat_del(sk, skb);
2400 				unix_detach_fds(&scm, skb);
2401 			}
2402 
2403 			if (unix_skb_len(skb))
2404 				break;
2405 
2406 			skb_unlink(skb, &sk->sk_receive_queue);
2407 			consume_skb(skb);
2408 
2409 			if (scm.fp)
2410 				break;
2411 		} else {
2412 			/* It is questionable, see note in unix_dgram_recvmsg.
2413 			 */
2414 			if (UNIXCB(skb).fp)
2415 				scm.fp = scm_fp_dup(UNIXCB(skb).fp);
2416 
2417 			sk_peek_offset_fwd(sk, chunk);
2418 
2419 			if (UNIXCB(skb).fp)
2420 				break;
2421 
2422 			skip = 0;
2423 			last = skb;
2424 			last_len = skb->len;
2425 			unix_state_lock(sk);
2426 			skb = skb_peek_next(skb, &sk->sk_receive_queue);
2427 			if (skb)
2428 				goto again;
2429 			unix_state_unlock(sk);
2430 			break;
2431 		}
2432 	} while (size);
2433 
2434 	mutex_unlock(&u->iolock);
2435 	if (state->msg)
2436 		scm_recv(sock, state->msg, &scm, flags);
2437 	else
2438 		scm_destroy(&scm);
2439 out:
2440 	return copied ? : err;
2441 }
2442 
2443 static int unix_stream_read_actor(struct sk_buff *skb,
2444 				  int skip, int chunk,
2445 				  struct unix_stream_read_state *state)
2446 {
2447 	int ret;
2448 
2449 	ret = skb_copy_datagram_msg(skb, UNIXCB(skb).consumed + skip,
2450 				    state->msg, chunk);
2451 	return ret ?: chunk;
2452 }
2453 
2454 static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg,
2455 			       size_t size, int flags)
2456 {
2457 	struct unix_stream_read_state state = {
2458 		.recv_actor = unix_stream_read_actor,
2459 		.socket = sock,
2460 		.msg = msg,
2461 		.size = size,
2462 		.flags = flags
2463 	};
2464 
2465 	return unix_stream_read_generic(&state, true);
2466 }
2467 
2468 static int unix_stream_splice_actor(struct sk_buff *skb,
2469 				    int skip, int chunk,
2470 				    struct unix_stream_read_state *state)
2471 {
2472 	return skb_splice_bits(skb, state->socket->sk,
2473 			       UNIXCB(skb).consumed + skip,
2474 			       state->pipe, chunk, state->splice_flags);
2475 }
2476 
2477 static ssize_t unix_stream_splice_read(struct socket *sock,  loff_t *ppos,
2478 				       struct pipe_inode_info *pipe,
2479 				       size_t size, unsigned int flags)
2480 {
2481 	struct unix_stream_read_state state = {
2482 		.recv_actor = unix_stream_splice_actor,
2483 		.socket = sock,
2484 		.pipe = pipe,
2485 		.size = size,
2486 		.splice_flags = flags,
2487 	};
2488 
2489 	if (unlikely(*ppos))
2490 		return -ESPIPE;
2491 
2492 	if (sock->file->f_flags & O_NONBLOCK ||
2493 	    flags & SPLICE_F_NONBLOCK)
2494 		state.flags = MSG_DONTWAIT;
2495 
2496 	return unix_stream_read_generic(&state, false);
2497 }
2498 
2499 static int unix_shutdown(struct socket *sock, int mode)
2500 {
2501 	struct sock *sk = sock->sk;
2502 	struct sock *other;
2503 
2504 	if (mode < SHUT_RD || mode > SHUT_RDWR)
2505 		return -EINVAL;
2506 	/* This maps:
2507 	 * SHUT_RD   (0) -> RCV_SHUTDOWN  (1)
2508 	 * SHUT_WR   (1) -> SEND_SHUTDOWN (2)
2509 	 * SHUT_RDWR (2) -> SHUTDOWN_MASK (3)
2510 	 */
2511 	++mode;
2512 
2513 	unix_state_lock(sk);
2514 	sk->sk_shutdown |= mode;
2515 	other = unix_peer(sk);
2516 	if (other)
2517 		sock_hold(other);
2518 	unix_state_unlock(sk);
2519 	sk->sk_state_change(sk);
2520 
2521 	if (other &&
2522 		(sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
2523 
2524 		int peer_mode = 0;
2525 
2526 		if (mode&RCV_SHUTDOWN)
2527 			peer_mode |= SEND_SHUTDOWN;
2528 		if (mode&SEND_SHUTDOWN)
2529 			peer_mode |= RCV_SHUTDOWN;
2530 		unix_state_lock(other);
2531 		other->sk_shutdown |= peer_mode;
2532 		unix_state_unlock(other);
2533 		other->sk_state_change(other);
2534 		if (peer_mode == SHUTDOWN_MASK)
2535 			sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
2536 		else if (peer_mode & RCV_SHUTDOWN)
2537 			sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
2538 	}
2539 	if (other)
2540 		sock_put(other);
2541 
2542 	return 0;
2543 }
2544 
2545 long unix_inq_len(struct sock *sk)
2546 {
2547 	struct sk_buff *skb;
2548 	long amount = 0;
2549 
2550 	if (sk->sk_state == TCP_LISTEN)
2551 		return -EINVAL;
2552 
2553 	spin_lock(&sk->sk_receive_queue.lock);
2554 	if (sk->sk_type == SOCK_STREAM ||
2555 	    sk->sk_type == SOCK_SEQPACKET) {
2556 		skb_queue_walk(&sk->sk_receive_queue, skb)
2557 			amount += unix_skb_len(skb);
2558 	} else {
2559 		skb = skb_peek(&sk->sk_receive_queue);
2560 		if (skb)
2561 			amount = skb->len;
2562 	}
2563 	spin_unlock(&sk->sk_receive_queue.lock);
2564 
2565 	return amount;
2566 }
2567 EXPORT_SYMBOL_GPL(unix_inq_len);
2568 
2569 long unix_outq_len(struct sock *sk)
2570 {
2571 	return sk_wmem_alloc_get(sk);
2572 }
2573 EXPORT_SYMBOL_GPL(unix_outq_len);
2574 
2575 static int unix_open_file(struct sock *sk)
2576 {
2577 	struct path path;
2578 	struct file *f;
2579 	int fd;
2580 
2581 	if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
2582 		return -EPERM;
2583 
2584 	if (!smp_load_acquire(&unix_sk(sk)->addr))
2585 		return -ENOENT;
2586 
2587 	path = unix_sk(sk)->path;
2588 	if (!path.dentry)
2589 		return -ENOENT;
2590 
2591 	path_get(&path);
2592 
2593 	fd = get_unused_fd_flags(O_CLOEXEC);
2594 	if (fd < 0)
2595 		goto out;
2596 
2597 	f = dentry_open(&path, O_PATH, current_cred());
2598 	if (IS_ERR(f)) {
2599 		put_unused_fd(fd);
2600 		fd = PTR_ERR(f);
2601 		goto out;
2602 	}
2603 
2604 	fd_install(fd, f);
2605 out:
2606 	path_put(&path);
2607 
2608 	return fd;
2609 }
2610 
2611 static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
2612 {
2613 	struct sock *sk = sock->sk;
2614 	long amount = 0;
2615 	int err;
2616 
2617 	switch (cmd) {
2618 	case SIOCOUTQ:
2619 		amount = unix_outq_len(sk);
2620 		err = put_user(amount, (int __user *)arg);
2621 		break;
2622 	case SIOCINQ:
2623 		amount = unix_inq_len(sk);
2624 		if (amount < 0)
2625 			err = amount;
2626 		else
2627 			err = put_user(amount, (int __user *)arg);
2628 		break;
2629 	case SIOCUNIXFILE:
2630 		err = unix_open_file(sk);
2631 		break;
2632 	default:
2633 		err = -ENOIOCTLCMD;
2634 		break;
2635 	}
2636 	return err;
2637 }
2638 
2639 #ifdef CONFIG_COMPAT
2640 static int unix_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
2641 {
2642 	return unix_ioctl(sock, cmd, (unsigned long)compat_ptr(arg));
2643 }
2644 #endif
2645 
2646 static __poll_t unix_poll(struct file *file, struct socket *sock, poll_table *wait)
2647 {
2648 	struct sock *sk = sock->sk;
2649 	__poll_t mask;
2650 
2651 	sock_poll_wait(file, sock, wait);
2652 	mask = 0;
2653 
2654 	/* exceptional events? */
2655 	if (sk->sk_err)
2656 		mask |= EPOLLERR;
2657 	if (sk->sk_shutdown == SHUTDOWN_MASK)
2658 		mask |= EPOLLHUP;
2659 	if (sk->sk_shutdown & RCV_SHUTDOWN)
2660 		mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
2661 
2662 	/* readable? */
2663 	if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
2664 		mask |= EPOLLIN | EPOLLRDNORM;
2665 
2666 	/* Connection-based need to check for termination and startup */
2667 	if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) &&
2668 	    sk->sk_state == TCP_CLOSE)
2669 		mask |= EPOLLHUP;
2670 
2671 	/*
2672 	 * we set writable also when the other side has shut down the
2673 	 * connection. This prevents stuck sockets.
2674 	 */
2675 	if (unix_writable(sk))
2676 		mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
2677 
2678 	return mask;
2679 }
2680 
2681 static __poll_t unix_dgram_poll(struct file *file, struct socket *sock,
2682 				    poll_table *wait)
2683 {
2684 	struct sock *sk = sock->sk, *other;
2685 	unsigned int writable;
2686 	__poll_t mask;
2687 
2688 	sock_poll_wait(file, sock, wait);
2689 	mask = 0;
2690 
2691 	/* exceptional events? */
2692 	if (sk->sk_err || !skb_queue_empty_lockless(&sk->sk_error_queue))
2693 		mask |= EPOLLERR |
2694 			(sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0);
2695 
2696 	if (sk->sk_shutdown & RCV_SHUTDOWN)
2697 		mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
2698 	if (sk->sk_shutdown == SHUTDOWN_MASK)
2699 		mask |= EPOLLHUP;
2700 
2701 	/* readable? */
2702 	if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
2703 		mask |= EPOLLIN | EPOLLRDNORM;
2704 
2705 	/* Connection-based need to check for termination and startup */
2706 	if (sk->sk_type == SOCK_SEQPACKET) {
2707 		if (sk->sk_state == TCP_CLOSE)
2708 			mask |= EPOLLHUP;
2709 		/* connection hasn't started yet? */
2710 		if (sk->sk_state == TCP_SYN_SENT)
2711 			return mask;
2712 	}
2713 
2714 	/* No write status requested, avoid expensive OUT tests. */
2715 	if (!(poll_requested_events(wait) & (EPOLLWRBAND|EPOLLWRNORM|EPOLLOUT)))
2716 		return mask;
2717 
2718 	writable = unix_writable(sk);
2719 	if (writable) {
2720 		unix_state_lock(sk);
2721 
2722 		other = unix_peer(sk);
2723 		if (other && unix_peer(other) != sk &&
2724 		    unix_recvq_full(other) &&
2725 		    unix_dgram_peer_wake_me(sk, other))
2726 			writable = 0;
2727 
2728 		unix_state_unlock(sk);
2729 	}
2730 
2731 	if (writable)
2732 		mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
2733 	else
2734 		sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
2735 
2736 	return mask;
2737 }
2738 
2739 #ifdef CONFIG_PROC_FS
2740 
2741 #define BUCKET_SPACE (BITS_PER_LONG - (UNIX_HASH_BITS + 1) - 1)
2742 
2743 #define get_bucket(x) ((x) >> BUCKET_SPACE)
2744 #define get_offset(x) ((x) & ((1L << BUCKET_SPACE) - 1))
2745 #define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o))
2746 
2747 static struct sock *unix_from_bucket(struct seq_file *seq, loff_t *pos)
2748 {
2749 	unsigned long offset = get_offset(*pos);
2750 	unsigned long bucket = get_bucket(*pos);
2751 	struct sock *sk;
2752 	unsigned long count = 0;
2753 
2754 	for (sk = sk_head(&unix_socket_table[bucket]); sk; sk = sk_next(sk)) {
2755 		if (sock_net(sk) != seq_file_net(seq))
2756 			continue;
2757 		if (++count == offset)
2758 			break;
2759 	}
2760 
2761 	return sk;
2762 }
2763 
2764 static struct sock *unix_next_socket(struct seq_file *seq,
2765 				     struct sock *sk,
2766 				     loff_t *pos)
2767 {
2768 	unsigned long bucket;
2769 
2770 	while (sk > (struct sock *)SEQ_START_TOKEN) {
2771 		sk = sk_next(sk);
2772 		if (!sk)
2773 			goto next_bucket;
2774 		if (sock_net(sk) == seq_file_net(seq))
2775 			return sk;
2776 	}
2777 
2778 	do {
2779 		sk = unix_from_bucket(seq, pos);
2780 		if (sk)
2781 			return sk;
2782 
2783 next_bucket:
2784 		bucket = get_bucket(*pos) + 1;
2785 		*pos = set_bucket_offset(bucket, 1);
2786 	} while (bucket < ARRAY_SIZE(unix_socket_table));
2787 
2788 	return NULL;
2789 }
2790 
2791 static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
2792 	__acquires(unix_table_lock)
2793 {
2794 	spin_lock(&unix_table_lock);
2795 
2796 	if (!*pos)
2797 		return SEQ_START_TOKEN;
2798 
2799 	if (get_bucket(*pos) >= ARRAY_SIZE(unix_socket_table))
2800 		return NULL;
2801 
2802 	return unix_next_socket(seq, NULL, pos);
2803 }
2804 
2805 static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2806 {
2807 	++*pos;
2808 	return unix_next_socket(seq, v, pos);
2809 }
2810 
2811 static void unix_seq_stop(struct seq_file *seq, void *v)
2812 	__releases(unix_table_lock)
2813 {
2814 	spin_unlock(&unix_table_lock);
2815 }
2816 
2817 static int unix_seq_show(struct seq_file *seq, void *v)
2818 {
2819 
2820 	if (v == SEQ_START_TOKEN)
2821 		seq_puts(seq, "Num       RefCount Protocol Flags    Type St "
2822 			 "Inode Path\n");
2823 	else {
2824 		struct sock *s = v;
2825 		struct unix_sock *u = unix_sk(s);
2826 		unix_state_lock(s);
2827 
2828 		seq_printf(seq, "%pK: %08X %08X %08X %04X %02X %5lu",
2829 			s,
2830 			refcount_read(&s->sk_refcnt),
2831 			0,
2832 			s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
2833 			s->sk_type,
2834 			s->sk_socket ?
2835 			(s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
2836 			(s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
2837 			sock_i_ino(s));
2838 
2839 		if (u->addr) {	// under unix_table_lock here
2840 			int i, len;
2841 			seq_putc(seq, ' ');
2842 
2843 			i = 0;
2844 			len = u->addr->len - sizeof(short);
2845 			if (!UNIX_ABSTRACT(s))
2846 				len--;
2847 			else {
2848 				seq_putc(seq, '@');
2849 				i++;
2850 			}
2851 			for ( ; i < len; i++)
2852 				seq_putc(seq, u->addr->name->sun_path[i] ?:
2853 					 '@');
2854 		}
2855 		unix_state_unlock(s);
2856 		seq_putc(seq, '\n');
2857 	}
2858 
2859 	return 0;
2860 }
2861 
2862 static const struct seq_operations unix_seq_ops = {
2863 	.start  = unix_seq_start,
2864 	.next   = unix_seq_next,
2865 	.stop   = unix_seq_stop,
2866 	.show   = unix_seq_show,
2867 };
2868 #endif
2869 
2870 static const struct net_proto_family unix_family_ops = {
2871 	.family = PF_UNIX,
2872 	.create = unix_create,
2873 	.owner	= THIS_MODULE,
2874 };
2875 
2876 
2877 static int __net_init unix_net_init(struct net *net)
2878 {
2879 	int error = -ENOMEM;
2880 
2881 	net->unx.sysctl_max_dgram_qlen = 10;
2882 	if (unix_sysctl_register(net))
2883 		goto out;
2884 
2885 #ifdef CONFIG_PROC_FS
2886 	if (!proc_create_net("unix", 0, net->proc_net, &unix_seq_ops,
2887 			sizeof(struct seq_net_private))) {
2888 		unix_sysctl_unregister(net);
2889 		goto out;
2890 	}
2891 #endif
2892 	error = 0;
2893 out:
2894 	return error;
2895 }
2896 
2897 static void __net_exit unix_net_exit(struct net *net)
2898 {
2899 	unix_sysctl_unregister(net);
2900 	remove_proc_entry("unix", net->proc_net);
2901 }
2902 
2903 static struct pernet_operations unix_net_ops = {
2904 	.init = unix_net_init,
2905 	.exit = unix_net_exit,
2906 };
2907 
2908 static int __init af_unix_init(void)
2909 {
2910 	int rc = -1;
2911 
2912 	BUILD_BUG_ON(sizeof(struct unix_skb_parms) > sizeof_field(struct sk_buff, cb));
2913 
2914 	rc = proto_register(&unix_proto, 1);
2915 	if (rc != 0) {
2916 		pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__);
2917 		goto out;
2918 	}
2919 
2920 	sock_register(&unix_family_ops);
2921 	register_pernet_subsys(&unix_net_ops);
2922 out:
2923 	return rc;
2924 }
2925 
2926 static void __exit af_unix_exit(void)
2927 {
2928 	sock_unregister(PF_UNIX);
2929 	proto_unregister(&unix_proto);
2930 	unregister_pernet_subsys(&unix_net_ops);
2931 }
2932 
2933 /* Earlier than device_initcall() so that other drivers invoking
2934    request_module() don't end up in a loop when modprobe tries
2935    to use a UNIX socket. But later than subsys_initcall() because
2936    we depend on stuff initialised there */
2937 fs_initcall(af_unix_init);
2938 module_exit(af_unix_exit);
2939 
2940 MODULE_LICENSE("GPL");
2941 MODULE_ALIAS_NETPROTO(PF_UNIX);
2942