xref: /openbmc/linux/net/unix/af_unix.c (revision de8c12110a130337c8e7e7b8250de0580e644dee)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * NET4:	Implementation of BSD Unix domain sockets.
4  *
5  * Authors:	Alan Cox, <alan@lxorguk.ukuu.org.uk>
6  *
7  * Fixes:
8  *		Linus Torvalds	:	Assorted bug cures.
9  *		Niibe Yutaka	:	async I/O support.
10  *		Carsten Paeth	:	PF_UNIX check, address fixes.
11  *		Alan Cox	:	Limit size of allocated blocks.
12  *		Alan Cox	:	Fixed the stupid socketpair bug.
13  *		Alan Cox	:	BSD compatibility fine tuning.
14  *		Alan Cox	:	Fixed a bug in connect when interrupted.
15  *		Alan Cox	:	Sorted out a proper draft version of
16  *					file descriptor passing hacked up from
17  *					Mike Shaver's work.
18  *		Marty Leisner	:	Fixes to fd passing
19  *		Nick Nevin	:	recvmsg bugfix.
20  *		Alan Cox	:	Started proper garbage collector
21  *		Heiko EiBfeldt	:	Missing verify_area check
22  *		Alan Cox	:	Started POSIXisms
23  *		Andreas Schwab	:	Replace inode by dentry for proper
24  *					reference counting
25  *		Kirk Petersen	:	Made this a module
26  *	    Christoph Rohland	:	Elegant non-blocking accept/connect algorithm.
27  *					Lots of bug fixes.
28  *	     Alexey Kuznetosv	:	Repaired (I hope) bugs introduces
29  *					by above two patches.
30  *	     Andrea Arcangeli	:	If possible we block in connect(2)
31  *					if the max backlog of the listen socket
32  *					is been reached. This won't break
33  *					old apps and it will avoid huge amount
34  *					of socks hashed (this for unix_gc()
35  *					performances reasons).
36  *					Security fix that limits the max
37  *					number of socks to 2*max_files and
38  *					the number of skb queueable in the
39  *					dgram receiver.
40  *		Artur Skawina   :	Hash function optimizations
41  *	     Alexey Kuznetsov   :	Full scale SMP. Lot of bugs are introduced 8)
42  *	      Malcolm Beattie   :	Set peercred for socketpair
43  *	     Michal Ostrowski   :       Module initialization cleanup.
44  *	     Arnaldo C. Melo	:	Remove MOD_{INC,DEC}_USE_COUNT,
45  *	     				the core infrastructure is doing that
46  *	     				for all net proto families now (2.5.69+)
47  *
48  * Known differences from reference BSD that was tested:
49  *
50  *	[TO FIX]
51  *	ECONNREFUSED is not returned from one end of a connected() socket to the
52  *		other the moment one end closes.
53  *	fstat() doesn't return st_dev=0, and give the blksize as high water mark
54  *		and a fake inode identifier (nor the BSD first socket fstat twice bug).
55  *	[NOT TO FIX]
56  *	accept() returns a path name even if the connecting socket has closed
57  *		in the meantime (BSD loses the path and gives up).
58  *	accept() returns 0 length path for an unbound connector. BSD returns 16
59  *		and a null first byte in the path (but not for gethost/peername - BSD bug ??)
60  *	socketpair(...SOCK_RAW..) doesn't panic the kernel.
61  *	BSD af_unix apparently has connect forgetting to block properly.
62  *		(need to check this with the POSIX spec in detail)
63  *
64  * Differences from 2.0.0-11-... (ANK)
65  *	Bug fixes and improvements.
66  *		- client shutdown killed server socket.
67  *		- removed all useless cli/sti pairs.
68  *
69  *	Semantic changes/extensions.
70  *		- generic control message passing.
71  *		- SCM_CREDENTIALS control message.
72  *		- "Abstract" (not FS based) socket bindings.
73  *		  Abstract names are sequences of bytes (not zero terminated)
74  *		  started by 0, so that this name space does not intersect
75  *		  with BSD names.
76  */
77 
78 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
79 
80 #include <linux/module.h>
81 #include <linux/kernel.h>
82 #include <linux/signal.h>
83 #include <linux/sched/signal.h>
84 #include <linux/errno.h>
85 #include <linux/string.h>
86 #include <linux/stat.h>
87 #include <linux/dcache.h>
88 #include <linux/namei.h>
89 #include <linux/socket.h>
90 #include <linux/un.h>
91 #include <linux/fcntl.h>
92 #include <linux/termios.h>
93 #include <linux/sockios.h>
94 #include <linux/net.h>
95 #include <linux/in.h>
96 #include <linux/fs.h>
97 #include <linux/slab.h>
98 #include <linux/uaccess.h>
99 #include <linux/skbuff.h>
100 #include <linux/netdevice.h>
101 #include <net/net_namespace.h>
102 #include <net/sock.h>
103 #include <net/tcp_states.h>
104 #include <net/af_unix.h>
105 #include <linux/proc_fs.h>
106 #include <linux/seq_file.h>
107 #include <net/scm.h>
108 #include <linux/init.h>
109 #include <linux/poll.h>
110 #include <linux/rtnetlink.h>
111 #include <linux/mount.h>
112 #include <net/checksum.h>
113 #include <linux/security.h>
114 #include <linux/freezer.h>
115 #include <linux/file.h>
116 
117 #include "scm.h"
118 
119 struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE];
120 EXPORT_SYMBOL_GPL(unix_socket_table);
121 DEFINE_SPINLOCK(unix_table_lock);
122 EXPORT_SYMBOL_GPL(unix_table_lock);
123 static atomic_long_t unix_nr_socks;
124 
125 
126 static struct hlist_head *unix_sockets_unbound(void *addr)
127 {
128 	unsigned long hash = (unsigned long)addr;
129 
130 	hash ^= hash >> 16;
131 	hash ^= hash >> 8;
132 	hash %= UNIX_HASH_SIZE;
133 	return &unix_socket_table[UNIX_HASH_SIZE + hash];
134 }
135 
136 #define UNIX_ABSTRACT(sk)	(unix_sk(sk)->addr->hash < UNIX_HASH_SIZE)
137 
138 #ifdef CONFIG_SECURITY_NETWORK
139 static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
140 {
141 	UNIXCB(skb).secid = scm->secid;
142 }
143 
144 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
145 {
146 	scm->secid = UNIXCB(skb).secid;
147 }
148 
149 static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
150 {
151 	return (scm->secid == UNIXCB(skb).secid);
152 }
153 #else
154 static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
155 { }
156 
157 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
158 { }
159 
160 static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
161 {
162 	return true;
163 }
164 #endif /* CONFIG_SECURITY_NETWORK */
165 
166 /*
167  *  SMP locking strategy:
168  *    hash table is protected with spinlock unix_table_lock
169  *    each socket state is protected by separate spin lock.
170  */
171 
172 static inline unsigned int unix_hash_fold(__wsum n)
173 {
174 	unsigned int hash = (__force unsigned int)csum_fold(n);
175 
176 	hash ^= hash>>8;
177 	return hash&(UNIX_HASH_SIZE-1);
178 }
179 
180 #define unix_peer(sk) (unix_sk(sk)->peer)
181 
182 static inline int unix_our_peer(struct sock *sk, struct sock *osk)
183 {
184 	return unix_peer(osk) == sk;
185 }
186 
187 static inline int unix_may_send(struct sock *sk, struct sock *osk)
188 {
189 	return unix_peer(osk) == NULL || unix_our_peer(sk, osk);
190 }
191 
192 static inline int unix_recvq_full(const struct sock *sk)
193 {
194 	return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog;
195 }
196 
197 static inline int unix_recvq_full_lockless(const struct sock *sk)
198 {
199 	return skb_queue_len_lockless(&sk->sk_receive_queue) >
200 		READ_ONCE(sk->sk_max_ack_backlog);
201 }
202 
203 struct sock *unix_peer_get(struct sock *s)
204 {
205 	struct sock *peer;
206 
207 	unix_state_lock(s);
208 	peer = unix_peer(s);
209 	if (peer)
210 		sock_hold(peer);
211 	unix_state_unlock(s);
212 	return peer;
213 }
214 EXPORT_SYMBOL_GPL(unix_peer_get);
215 
216 static inline void unix_release_addr(struct unix_address *addr)
217 {
218 	if (refcount_dec_and_test(&addr->refcnt))
219 		kfree(addr);
220 }
221 
222 /*
223  *	Check unix socket name:
224  *		- should be not zero length.
225  *	        - if started by not zero, should be NULL terminated (FS object)
226  *		- if started by zero, it is abstract name.
227  */
228 
229 static int unix_mkname(struct sockaddr_un *sunaddr, int len, unsigned int *hashp)
230 {
231 	*hashp = 0;
232 
233 	if (len <= sizeof(short) || len > sizeof(*sunaddr))
234 		return -EINVAL;
235 	if (!sunaddr || sunaddr->sun_family != AF_UNIX)
236 		return -EINVAL;
237 	if (sunaddr->sun_path[0]) {
238 		/*
239 		 * This may look like an off by one error but it is a bit more
240 		 * subtle. 108 is the longest valid AF_UNIX path for a binding.
241 		 * sun_path[108] doesn't as such exist.  However in kernel space
242 		 * we are guaranteed that it is a valid memory location in our
243 		 * kernel address buffer.
244 		 */
245 		((char *)sunaddr)[len] = 0;
246 		len = strlen(sunaddr->sun_path)+1+sizeof(short);
247 		return len;
248 	}
249 
250 	*hashp = unix_hash_fold(csum_partial(sunaddr, len, 0));
251 	return len;
252 }
253 
254 static void __unix_remove_socket(struct sock *sk)
255 {
256 	sk_del_node_init(sk);
257 }
258 
259 static void __unix_insert_socket(struct hlist_head *list, struct sock *sk)
260 {
261 	WARN_ON(!sk_unhashed(sk));
262 	sk_add_node(sk, list);
263 }
264 
265 static inline void unix_remove_socket(struct sock *sk)
266 {
267 	spin_lock(&unix_table_lock);
268 	__unix_remove_socket(sk);
269 	spin_unlock(&unix_table_lock);
270 }
271 
272 static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk)
273 {
274 	spin_lock(&unix_table_lock);
275 	__unix_insert_socket(list, sk);
276 	spin_unlock(&unix_table_lock);
277 }
278 
279 static struct sock *__unix_find_socket_byname(struct net *net,
280 					      struct sockaddr_un *sunname,
281 					      int len, int type, unsigned int hash)
282 {
283 	struct sock *s;
284 
285 	sk_for_each(s, &unix_socket_table[hash ^ type]) {
286 		struct unix_sock *u = unix_sk(s);
287 
288 		if (!net_eq(sock_net(s), net))
289 			continue;
290 
291 		if (u->addr->len == len &&
292 		    !memcmp(u->addr->name, sunname, len))
293 			return s;
294 	}
295 	return NULL;
296 }
297 
298 static inline struct sock *unix_find_socket_byname(struct net *net,
299 						   struct sockaddr_un *sunname,
300 						   int len, int type,
301 						   unsigned int hash)
302 {
303 	struct sock *s;
304 
305 	spin_lock(&unix_table_lock);
306 	s = __unix_find_socket_byname(net, sunname, len, type, hash);
307 	if (s)
308 		sock_hold(s);
309 	spin_unlock(&unix_table_lock);
310 	return s;
311 }
312 
313 static struct sock *unix_find_socket_byinode(struct inode *i)
314 {
315 	struct sock *s;
316 
317 	spin_lock(&unix_table_lock);
318 	sk_for_each(s,
319 		    &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
320 		struct dentry *dentry = unix_sk(s)->path.dentry;
321 
322 		if (dentry && d_backing_inode(dentry) == i) {
323 			sock_hold(s);
324 			goto found;
325 		}
326 	}
327 	s = NULL;
328 found:
329 	spin_unlock(&unix_table_lock);
330 	return s;
331 }
332 
333 /* Support code for asymmetrically connected dgram sockets
334  *
335  * If a datagram socket is connected to a socket not itself connected
336  * to the first socket (eg, /dev/log), clients may only enqueue more
337  * messages if the present receive queue of the server socket is not
338  * "too large". This means there's a second writeability condition
339  * poll and sendmsg need to test. The dgram recv code will do a wake
340  * up on the peer_wait wait queue of a socket upon reception of a
341  * datagram which needs to be propagated to sleeping would-be writers
342  * since these might not have sent anything so far. This can't be
343  * accomplished via poll_wait because the lifetime of the server
344  * socket might be less than that of its clients if these break their
345  * association with it or if the server socket is closed while clients
346  * are still connected to it and there's no way to inform "a polling
347  * implementation" that it should let go of a certain wait queue
348  *
349  * In order to propagate a wake up, a wait_queue_entry_t of the client
350  * socket is enqueued on the peer_wait queue of the server socket
351  * whose wake function does a wake_up on the ordinary client socket
352  * wait queue. This connection is established whenever a write (or
353  * poll for write) hit the flow control condition and broken when the
354  * association to the server socket is dissolved or after a wake up
355  * was relayed.
356  */
357 
358 static int unix_dgram_peer_wake_relay(wait_queue_entry_t *q, unsigned mode, int flags,
359 				      void *key)
360 {
361 	struct unix_sock *u;
362 	wait_queue_head_t *u_sleep;
363 
364 	u = container_of(q, struct unix_sock, peer_wake);
365 
366 	__remove_wait_queue(&unix_sk(u->peer_wake.private)->peer_wait,
367 			    q);
368 	u->peer_wake.private = NULL;
369 
370 	/* relaying can only happen while the wq still exists */
371 	u_sleep = sk_sleep(&u->sk);
372 	if (u_sleep)
373 		wake_up_interruptible_poll(u_sleep, key_to_poll(key));
374 
375 	return 0;
376 }
377 
378 static int unix_dgram_peer_wake_connect(struct sock *sk, struct sock *other)
379 {
380 	struct unix_sock *u, *u_other;
381 	int rc;
382 
383 	u = unix_sk(sk);
384 	u_other = unix_sk(other);
385 	rc = 0;
386 	spin_lock(&u_other->peer_wait.lock);
387 
388 	if (!u->peer_wake.private) {
389 		u->peer_wake.private = other;
390 		__add_wait_queue(&u_other->peer_wait, &u->peer_wake);
391 
392 		rc = 1;
393 	}
394 
395 	spin_unlock(&u_other->peer_wait.lock);
396 	return rc;
397 }
398 
399 static void unix_dgram_peer_wake_disconnect(struct sock *sk,
400 					    struct sock *other)
401 {
402 	struct unix_sock *u, *u_other;
403 
404 	u = unix_sk(sk);
405 	u_other = unix_sk(other);
406 	spin_lock(&u_other->peer_wait.lock);
407 
408 	if (u->peer_wake.private == other) {
409 		__remove_wait_queue(&u_other->peer_wait, &u->peer_wake);
410 		u->peer_wake.private = NULL;
411 	}
412 
413 	spin_unlock(&u_other->peer_wait.lock);
414 }
415 
416 static void unix_dgram_peer_wake_disconnect_wakeup(struct sock *sk,
417 						   struct sock *other)
418 {
419 	unix_dgram_peer_wake_disconnect(sk, other);
420 	wake_up_interruptible_poll(sk_sleep(sk),
421 				   EPOLLOUT |
422 				   EPOLLWRNORM |
423 				   EPOLLWRBAND);
424 }
425 
426 /* preconditions:
427  *	- unix_peer(sk) == other
428  *	- association is stable
429  */
430 static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other)
431 {
432 	int connected;
433 
434 	connected = unix_dgram_peer_wake_connect(sk, other);
435 
436 	/* If other is SOCK_DEAD, we want to make sure we signal
437 	 * POLLOUT, such that a subsequent write() can get a
438 	 * -ECONNREFUSED. Otherwise, if we haven't queued any skbs
439 	 * to other and its full, we will hang waiting for POLLOUT.
440 	 */
441 	if (unix_recvq_full(other) && !sock_flag(other, SOCK_DEAD))
442 		return 1;
443 
444 	if (connected)
445 		unix_dgram_peer_wake_disconnect(sk, other);
446 
447 	return 0;
448 }
449 
450 static int unix_writable(const struct sock *sk)
451 {
452 	return sk->sk_state != TCP_LISTEN &&
453 	       (refcount_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
454 }
455 
456 static void unix_write_space(struct sock *sk)
457 {
458 	struct socket_wq *wq;
459 
460 	rcu_read_lock();
461 	if (unix_writable(sk)) {
462 		wq = rcu_dereference(sk->sk_wq);
463 		if (skwq_has_sleeper(wq))
464 			wake_up_interruptible_sync_poll(&wq->wait,
465 				EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND);
466 		sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
467 	}
468 	rcu_read_unlock();
469 }
470 
471 /* When dgram socket disconnects (or changes its peer), we clear its receive
472  * queue of packets arrived from previous peer. First, it allows to do
473  * flow control based only on wmem_alloc; second, sk connected to peer
474  * may receive messages only from that peer. */
475 static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
476 {
477 	if (!skb_queue_empty(&sk->sk_receive_queue)) {
478 		skb_queue_purge(&sk->sk_receive_queue);
479 		wake_up_interruptible_all(&unix_sk(sk)->peer_wait);
480 
481 		/* If one link of bidirectional dgram pipe is disconnected,
482 		 * we signal error. Messages are lost. Do not make this,
483 		 * when peer was not connected to us.
484 		 */
485 		if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
486 			other->sk_err = ECONNRESET;
487 			other->sk_error_report(other);
488 		}
489 	}
490 }
491 
492 static void unix_sock_destructor(struct sock *sk)
493 {
494 	struct unix_sock *u = unix_sk(sk);
495 
496 	skb_queue_purge(&sk->sk_receive_queue);
497 
498 	WARN_ON(refcount_read(&sk->sk_wmem_alloc));
499 	WARN_ON(!sk_unhashed(sk));
500 	WARN_ON(sk->sk_socket);
501 	if (!sock_flag(sk, SOCK_DEAD)) {
502 		pr_info("Attempt to release alive unix socket: %p\n", sk);
503 		return;
504 	}
505 
506 	if (u->addr)
507 		unix_release_addr(u->addr);
508 
509 	atomic_long_dec(&unix_nr_socks);
510 	local_bh_disable();
511 	sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
512 	local_bh_enable();
513 #ifdef UNIX_REFCNT_DEBUG
514 	pr_debug("UNIX %p is destroyed, %ld are still alive.\n", sk,
515 		atomic_long_read(&unix_nr_socks));
516 #endif
517 }
518 
519 static void unix_release_sock(struct sock *sk, int embrion)
520 {
521 	struct unix_sock *u = unix_sk(sk);
522 	struct path path;
523 	struct sock *skpair;
524 	struct sk_buff *skb;
525 	int state;
526 
527 	unix_remove_socket(sk);
528 
529 	/* Clear state */
530 	unix_state_lock(sk);
531 	sock_orphan(sk);
532 	sk->sk_shutdown = SHUTDOWN_MASK;
533 	path	     = u->path;
534 	u->path.dentry = NULL;
535 	u->path.mnt = NULL;
536 	state = sk->sk_state;
537 	sk->sk_state = TCP_CLOSE;
538 	unix_state_unlock(sk);
539 
540 	wake_up_interruptible_all(&u->peer_wait);
541 
542 	skpair = unix_peer(sk);
543 
544 	if (skpair != NULL) {
545 		if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
546 			unix_state_lock(skpair);
547 			/* No more writes */
548 			skpair->sk_shutdown = SHUTDOWN_MASK;
549 			if (!skb_queue_empty(&sk->sk_receive_queue) || embrion)
550 				skpair->sk_err = ECONNRESET;
551 			unix_state_unlock(skpair);
552 			skpair->sk_state_change(skpair);
553 			sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
554 		}
555 
556 		unix_dgram_peer_wake_disconnect(sk, skpair);
557 		sock_put(skpair); /* It may now die */
558 		unix_peer(sk) = NULL;
559 	}
560 
561 	/* Try to flush out this socket. Throw out buffers at least */
562 
563 	while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
564 		if (state == TCP_LISTEN)
565 			unix_release_sock(skb->sk, 1);
566 		/* passed fds are erased in the kfree_skb hook	      */
567 		UNIXCB(skb).consumed = skb->len;
568 		kfree_skb(skb);
569 	}
570 
571 	if (path.dentry)
572 		path_put(&path);
573 
574 	sock_put(sk);
575 
576 	/* ---- Socket is dead now and most probably destroyed ---- */
577 
578 	/*
579 	 * Fixme: BSD difference: In BSD all sockets connected to us get
580 	 *	  ECONNRESET and we die on the spot. In Linux we behave
581 	 *	  like files and pipes do and wait for the last
582 	 *	  dereference.
583 	 *
584 	 * Can't we simply set sock->err?
585 	 *
586 	 *	  What the above comment does talk about? --ANK(980817)
587 	 */
588 
589 	if (unix_tot_inflight)
590 		unix_gc();		/* Garbage collect fds */
591 }
592 
593 static void init_peercred(struct sock *sk)
594 {
595 	put_pid(sk->sk_peer_pid);
596 	if (sk->sk_peer_cred)
597 		put_cred(sk->sk_peer_cred);
598 	sk->sk_peer_pid  = get_pid(task_tgid(current));
599 	sk->sk_peer_cred = get_current_cred();
600 }
601 
602 static void copy_peercred(struct sock *sk, struct sock *peersk)
603 {
604 	put_pid(sk->sk_peer_pid);
605 	if (sk->sk_peer_cred)
606 		put_cred(sk->sk_peer_cred);
607 	sk->sk_peer_pid  = get_pid(peersk->sk_peer_pid);
608 	sk->sk_peer_cred = get_cred(peersk->sk_peer_cred);
609 }
610 
611 static int unix_listen(struct socket *sock, int backlog)
612 {
613 	int err;
614 	struct sock *sk = sock->sk;
615 	struct unix_sock *u = unix_sk(sk);
616 
617 	err = -EOPNOTSUPP;
618 	if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
619 		goto out;	/* Only stream/seqpacket sockets accept */
620 	err = -EINVAL;
621 	if (!u->addr)
622 		goto out;	/* No listens on an unbound socket */
623 	unix_state_lock(sk);
624 	if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
625 		goto out_unlock;
626 	if (backlog > sk->sk_max_ack_backlog)
627 		wake_up_interruptible_all(&u->peer_wait);
628 	sk->sk_max_ack_backlog	= backlog;
629 	sk->sk_state		= TCP_LISTEN;
630 	/* set credentials so connect can copy them */
631 	init_peercred(sk);
632 	err = 0;
633 
634 out_unlock:
635 	unix_state_unlock(sk);
636 out:
637 	return err;
638 }
639 
640 static int unix_release(struct socket *);
641 static int unix_bind(struct socket *, struct sockaddr *, int);
642 static int unix_stream_connect(struct socket *, struct sockaddr *,
643 			       int addr_len, int flags);
644 static int unix_socketpair(struct socket *, struct socket *);
645 static int unix_accept(struct socket *, struct socket *, int, bool);
646 static int unix_getname(struct socket *, struct sockaddr *, int);
647 static __poll_t unix_poll(struct file *, struct socket *, poll_table *);
648 static __poll_t unix_dgram_poll(struct file *, struct socket *,
649 				    poll_table *);
650 static int unix_ioctl(struct socket *, unsigned int, unsigned long);
651 #ifdef CONFIG_COMPAT
652 static int unix_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg);
653 #endif
654 static int unix_shutdown(struct socket *, int);
655 static int unix_stream_sendmsg(struct socket *, struct msghdr *, size_t);
656 static int unix_stream_recvmsg(struct socket *, struct msghdr *, size_t, int);
657 static ssize_t unix_stream_sendpage(struct socket *, struct page *, int offset,
658 				    size_t size, int flags);
659 static ssize_t unix_stream_splice_read(struct socket *,  loff_t *ppos,
660 				       struct pipe_inode_info *, size_t size,
661 				       unsigned int flags);
662 static int unix_dgram_sendmsg(struct socket *, struct msghdr *, size_t);
663 static int unix_dgram_recvmsg(struct socket *, struct msghdr *, size_t, int);
664 static int unix_dgram_connect(struct socket *, struct sockaddr *,
665 			      int, int);
666 static int unix_seqpacket_sendmsg(struct socket *, struct msghdr *, size_t);
667 static int unix_seqpacket_recvmsg(struct socket *, struct msghdr *, size_t,
668 				  int);
669 
670 static int unix_set_peek_off(struct sock *sk, int val)
671 {
672 	struct unix_sock *u = unix_sk(sk);
673 
674 	if (mutex_lock_interruptible(&u->iolock))
675 		return -EINTR;
676 
677 	sk->sk_peek_off = val;
678 	mutex_unlock(&u->iolock);
679 
680 	return 0;
681 }
682 
683 #ifdef CONFIG_PROC_FS
684 static void unix_show_fdinfo(struct seq_file *m, struct socket *sock)
685 {
686 	struct sock *sk = sock->sk;
687 	struct unix_sock *u;
688 
689 	if (sk) {
690 		u = unix_sk(sock->sk);
691 		seq_printf(m, "scm_fds: %u\n",
692 			   atomic_read(&u->scm_stat.nr_fds));
693 	}
694 }
695 #else
696 #define unix_show_fdinfo NULL
697 #endif
698 
699 static const struct proto_ops unix_stream_ops = {
700 	.family =	PF_UNIX,
701 	.owner =	THIS_MODULE,
702 	.release =	unix_release,
703 	.bind =		unix_bind,
704 	.connect =	unix_stream_connect,
705 	.socketpair =	unix_socketpair,
706 	.accept =	unix_accept,
707 	.getname =	unix_getname,
708 	.poll =		unix_poll,
709 	.ioctl =	unix_ioctl,
710 #ifdef CONFIG_COMPAT
711 	.compat_ioctl =	unix_compat_ioctl,
712 #endif
713 	.listen =	unix_listen,
714 	.shutdown =	unix_shutdown,
715 	.sendmsg =	unix_stream_sendmsg,
716 	.recvmsg =	unix_stream_recvmsg,
717 	.mmap =		sock_no_mmap,
718 	.sendpage =	unix_stream_sendpage,
719 	.splice_read =	unix_stream_splice_read,
720 	.set_peek_off =	unix_set_peek_off,
721 	.show_fdinfo =	unix_show_fdinfo,
722 };
723 
724 static const struct proto_ops unix_dgram_ops = {
725 	.family =	PF_UNIX,
726 	.owner =	THIS_MODULE,
727 	.release =	unix_release,
728 	.bind =		unix_bind,
729 	.connect =	unix_dgram_connect,
730 	.socketpair =	unix_socketpair,
731 	.accept =	sock_no_accept,
732 	.getname =	unix_getname,
733 	.poll =		unix_dgram_poll,
734 	.ioctl =	unix_ioctl,
735 #ifdef CONFIG_COMPAT
736 	.compat_ioctl =	unix_compat_ioctl,
737 #endif
738 	.listen =	sock_no_listen,
739 	.shutdown =	unix_shutdown,
740 	.sendmsg =	unix_dgram_sendmsg,
741 	.recvmsg =	unix_dgram_recvmsg,
742 	.mmap =		sock_no_mmap,
743 	.sendpage =	sock_no_sendpage,
744 	.set_peek_off =	unix_set_peek_off,
745 	.show_fdinfo =	unix_show_fdinfo,
746 };
747 
748 static const struct proto_ops unix_seqpacket_ops = {
749 	.family =	PF_UNIX,
750 	.owner =	THIS_MODULE,
751 	.release =	unix_release,
752 	.bind =		unix_bind,
753 	.connect =	unix_stream_connect,
754 	.socketpair =	unix_socketpair,
755 	.accept =	unix_accept,
756 	.getname =	unix_getname,
757 	.poll =		unix_dgram_poll,
758 	.ioctl =	unix_ioctl,
759 #ifdef CONFIG_COMPAT
760 	.compat_ioctl =	unix_compat_ioctl,
761 #endif
762 	.listen =	unix_listen,
763 	.shutdown =	unix_shutdown,
764 	.sendmsg =	unix_seqpacket_sendmsg,
765 	.recvmsg =	unix_seqpacket_recvmsg,
766 	.mmap =		sock_no_mmap,
767 	.sendpage =	sock_no_sendpage,
768 	.set_peek_off =	unix_set_peek_off,
769 	.show_fdinfo =	unix_show_fdinfo,
770 };
771 
772 static struct proto unix_proto = {
773 	.name			= "UNIX",
774 	.owner			= THIS_MODULE,
775 	.obj_size		= sizeof(struct unix_sock),
776 };
777 
778 static struct sock *unix_create1(struct net *net, struct socket *sock, int kern)
779 {
780 	struct sock *sk = NULL;
781 	struct unix_sock *u;
782 
783 	atomic_long_inc(&unix_nr_socks);
784 	if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files())
785 		goto out;
786 
787 	sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto, kern);
788 	if (!sk)
789 		goto out;
790 
791 	sock_init_data(sock, sk);
792 
793 	sk->sk_allocation	= GFP_KERNEL_ACCOUNT;
794 	sk->sk_write_space	= unix_write_space;
795 	sk->sk_max_ack_backlog	= net->unx.sysctl_max_dgram_qlen;
796 	sk->sk_destruct		= unix_sock_destructor;
797 	u	  = unix_sk(sk);
798 	u->path.dentry = NULL;
799 	u->path.mnt = NULL;
800 	spin_lock_init(&u->lock);
801 	atomic_long_set(&u->inflight, 0);
802 	INIT_LIST_HEAD(&u->link);
803 	mutex_init(&u->iolock); /* single task reading lock */
804 	mutex_init(&u->bindlock); /* single task binding lock */
805 	init_waitqueue_head(&u->peer_wait);
806 	init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay);
807 	memset(&u->scm_stat, 0, sizeof(struct scm_stat));
808 	unix_insert_socket(unix_sockets_unbound(sk), sk);
809 out:
810 	if (sk == NULL)
811 		atomic_long_dec(&unix_nr_socks);
812 	else {
813 		local_bh_disable();
814 		sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
815 		local_bh_enable();
816 	}
817 	return sk;
818 }
819 
820 static int unix_create(struct net *net, struct socket *sock, int protocol,
821 		       int kern)
822 {
823 	if (protocol && protocol != PF_UNIX)
824 		return -EPROTONOSUPPORT;
825 
826 	sock->state = SS_UNCONNECTED;
827 
828 	switch (sock->type) {
829 	case SOCK_STREAM:
830 		sock->ops = &unix_stream_ops;
831 		break;
832 		/*
833 		 *	Believe it or not BSD has AF_UNIX, SOCK_RAW though
834 		 *	nothing uses it.
835 		 */
836 	case SOCK_RAW:
837 		sock->type = SOCK_DGRAM;
838 		fallthrough;
839 	case SOCK_DGRAM:
840 		sock->ops = &unix_dgram_ops;
841 		break;
842 	case SOCK_SEQPACKET:
843 		sock->ops = &unix_seqpacket_ops;
844 		break;
845 	default:
846 		return -ESOCKTNOSUPPORT;
847 	}
848 
849 	return unix_create1(net, sock, kern) ? 0 : -ENOMEM;
850 }
851 
852 static int unix_release(struct socket *sock)
853 {
854 	struct sock *sk = sock->sk;
855 
856 	if (!sk)
857 		return 0;
858 
859 	unix_release_sock(sk, 0);
860 	sock->sk = NULL;
861 
862 	return 0;
863 }
864 
865 static int unix_autobind(struct socket *sock)
866 {
867 	struct sock *sk = sock->sk;
868 	struct net *net = sock_net(sk);
869 	struct unix_sock *u = unix_sk(sk);
870 	static u32 ordernum = 1;
871 	struct unix_address *addr;
872 	int err;
873 	unsigned int retries = 0;
874 
875 	err = mutex_lock_interruptible(&u->bindlock);
876 	if (err)
877 		return err;
878 
879 	if (u->addr)
880 		goto out;
881 
882 	err = -ENOMEM;
883 	addr = kzalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL);
884 	if (!addr)
885 		goto out;
886 
887 	addr->name->sun_family = AF_UNIX;
888 	refcount_set(&addr->refcnt, 1);
889 
890 retry:
891 	addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short);
892 	addr->hash = unix_hash_fold(csum_partial(addr->name, addr->len, 0));
893 
894 	spin_lock(&unix_table_lock);
895 	ordernum = (ordernum+1)&0xFFFFF;
896 
897 	if (__unix_find_socket_byname(net, addr->name, addr->len, sock->type,
898 				      addr->hash)) {
899 		spin_unlock(&unix_table_lock);
900 		/*
901 		 * __unix_find_socket_byname() may take long time if many names
902 		 * are already in use.
903 		 */
904 		cond_resched();
905 		/* Give up if all names seems to be in use. */
906 		if (retries++ == 0xFFFFF) {
907 			err = -ENOSPC;
908 			kfree(addr);
909 			goto out;
910 		}
911 		goto retry;
912 	}
913 	addr->hash ^= sk->sk_type;
914 
915 	__unix_remove_socket(sk);
916 	smp_store_release(&u->addr, addr);
917 	__unix_insert_socket(&unix_socket_table[addr->hash], sk);
918 	spin_unlock(&unix_table_lock);
919 	err = 0;
920 
921 out:	mutex_unlock(&u->bindlock);
922 	return err;
923 }
924 
925 static struct sock *unix_find_other(struct net *net,
926 				    struct sockaddr_un *sunname, int len,
927 				    int type, unsigned int hash, int *error)
928 {
929 	struct sock *u;
930 	struct path path;
931 	int err = 0;
932 
933 	if (sunname->sun_path[0]) {
934 		struct inode *inode;
935 		err = kern_path(sunname->sun_path, LOOKUP_FOLLOW, &path);
936 		if (err)
937 			goto fail;
938 		inode = d_backing_inode(path.dentry);
939 		err = path_permission(&path, MAY_WRITE);
940 		if (err)
941 			goto put_fail;
942 
943 		err = -ECONNREFUSED;
944 		if (!S_ISSOCK(inode->i_mode))
945 			goto put_fail;
946 		u = unix_find_socket_byinode(inode);
947 		if (!u)
948 			goto put_fail;
949 
950 		if (u->sk_type == type)
951 			touch_atime(&path);
952 
953 		path_put(&path);
954 
955 		err = -EPROTOTYPE;
956 		if (u->sk_type != type) {
957 			sock_put(u);
958 			goto fail;
959 		}
960 	} else {
961 		err = -ECONNREFUSED;
962 		u = unix_find_socket_byname(net, sunname, len, type, hash);
963 		if (u) {
964 			struct dentry *dentry;
965 			dentry = unix_sk(u)->path.dentry;
966 			if (dentry)
967 				touch_atime(&unix_sk(u)->path);
968 		} else
969 			goto fail;
970 	}
971 	return u;
972 
973 put_fail:
974 	path_put(&path);
975 fail:
976 	*error = err;
977 	return NULL;
978 }
979 
980 static int unix_mknod(const char *sun_path, umode_t mode, struct path *res)
981 {
982 	struct dentry *dentry;
983 	struct path path;
984 	int err = 0;
985 	/*
986 	 * Get the parent directory, calculate the hash for last
987 	 * component.
988 	 */
989 	dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0);
990 	err = PTR_ERR(dentry);
991 	if (IS_ERR(dentry))
992 		return err;
993 
994 	/*
995 	 * All right, let's create it.
996 	 */
997 	err = security_path_mknod(&path, dentry, mode, 0);
998 	if (!err) {
999 		err = vfs_mknod(mnt_user_ns(path.mnt), d_inode(path.dentry),
1000 				dentry, mode, 0);
1001 		if (!err) {
1002 			res->mnt = mntget(path.mnt);
1003 			res->dentry = dget(dentry);
1004 		}
1005 	}
1006 	done_path_create(&path, dentry);
1007 	return err;
1008 }
1009 
1010 static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
1011 {
1012 	struct sock *sk = sock->sk;
1013 	struct net *net = sock_net(sk);
1014 	struct unix_sock *u = unix_sk(sk);
1015 	struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1016 	char *sun_path = sunaddr->sun_path;
1017 	int err;
1018 	unsigned int hash;
1019 	struct unix_address *addr;
1020 	struct hlist_head *list;
1021 	struct path path = { };
1022 
1023 	err = -EINVAL;
1024 	if (addr_len < offsetofend(struct sockaddr_un, sun_family) ||
1025 	    sunaddr->sun_family != AF_UNIX)
1026 		goto out;
1027 
1028 	if (addr_len == sizeof(short)) {
1029 		err = unix_autobind(sock);
1030 		goto out;
1031 	}
1032 
1033 	err = unix_mkname(sunaddr, addr_len, &hash);
1034 	if (err < 0)
1035 		goto out;
1036 	addr_len = err;
1037 
1038 	if (sun_path[0]) {
1039 		umode_t mode = S_IFSOCK |
1040 		       (SOCK_INODE(sock)->i_mode & ~current_umask());
1041 		err = unix_mknod(sun_path, mode, &path);
1042 		if (err) {
1043 			if (err == -EEXIST)
1044 				err = -EADDRINUSE;
1045 			goto out;
1046 		}
1047 	}
1048 
1049 	err = mutex_lock_interruptible(&u->bindlock);
1050 	if (err)
1051 		goto out_put;
1052 
1053 	err = -EINVAL;
1054 	if (u->addr)
1055 		goto out_up;
1056 
1057 	err = -ENOMEM;
1058 	addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
1059 	if (!addr)
1060 		goto out_up;
1061 
1062 	memcpy(addr->name, sunaddr, addr_len);
1063 	addr->len = addr_len;
1064 	addr->hash = hash ^ sk->sk_type;
1065 	refcount_set(&addr->refcnt, 1);
1066 
1067 	if (sun_path[0]) {
1068 		addr->hash = UNIX_HASH_SIZE;
1069 		hash = d_backing_inode(path.dentry)->i_ino & (UNIX_HASH_SIZE - 1);
1070 		spin_lock(&unix_table_lock);
1071 		u->path = path;
1072 		list = &unix_socket_table[hash];
1073 	} else {
1074 		spin_lock(&unix_table_lock);
1075 		err = -EADDRINUSE;
1076 		if (__unix_find_socket_byname(net, sunaddr, addr_len,
1077 					      sk->sk_type, hash)) {
1078 			unix_release_addr(addr);
1079 			goto out_unlock;
1080 		}
1081 
1082 		list = &unix_socket_table[addr->hash];
1083 	}
1084 
1085 	err = 0;
1086 	__unix_remove_socket(sk);
1087 	smp_store_release(&u->addr, addr);
1088 	__unix_insert_socket(list, sk);
1089 
1090 out_unlock:
1091 	spin_unlock(&unix_table_lock);
1092 out_up:
1093 	mutex_unlock(&u->bindlock);
1094 out_put:
1095 	if (err)
1096 		path_put(&path);
1097 out:
1098 	return err;
1099 }
1100 
1101 static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
1102 {
1103 	if (unlikely(sk1 == sk2) || !sk2) {
1104 		unix_state_lock(sk1);
1105 		return;
1106 	}
1107 	if (sk1 < sk2) {
1108 		unix_state_lock(sk1);
1109 		unix_state_lock_nested(sk2);
1110 	} else {
1111 		unix_state_lock(sk2);
1112 		unix_state_lock_nested(sk1);
1113 	}
1114 }
1115 
1116 static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)
1117 {
1118 	if (unlikely(sk1 == sk2) || !sk2) {
1119 		unix_state_unlock(sk1);
1120 		return;
1121 	}
1122 	unix_state_unlock(sk1);
1123 	unix_state_unlock(sk2);
1124 }
1125 
1126 static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
1127 			      int alen, int flags)
1128 {
1129 	struct sock *sk = sock->sk;
1130 	struct net *net = sock_net(sk);
1131 	struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr;
1132 	struct sock *other;
1133 	unsigned int hash;
1134 	int err;
1135 
1136 	err = -EINVAL;
1137 	if (alen < offsetofend(struct sockaddr, sa_family))
1138 		goto out;
1139 
1140 	if (addr->sa_family != AF_UNSPEC) {
1141 		err = unix_mkname(sunaddr, alen, &hash);
1142 		if (err < 0)
1143 			goto out;
1144 		alen = err;
1145 
1146 		if (test_bit(SOCK_PASSCRED, &sock->flags) &&
1147 		    !unix_sk(sk)->addr && (err = unix_autobind(sock)) != 0)
1148 			goto out;
1149 
1150 restart:
1151 		other = unix_find_other(net, sunaddr, alen, sock->type, hash, &err);
1152 		if (!other)
1153 			goto out;
1154 
1155 		unix_state_double_lock(sk, other);
1156 
1157 		/* Apparently VFS overslept socket death. Retry. */
1158 		if (sock_flag(other, SOCK_DEAD)) {
1159 			unix_state_double_unlock(sk, other);
1160 			sock_put(other);
1161 			goto restart;
1162 		}
1163 
1164 		err = -EPERM;
1165 		if (!unix_may_send(sk, other))
1166 			goto out_unlock;
1167 
1168 		err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1169 		if (err)
1170 			goto out_unlock;
1171 
1172 	} else {
1173 		/*
1174 		 *	1003.1g breaking connected state with AF_UNSPEC
1175 		 */
1176 		other = NULL;
1177 		unix_state_double_lock(sk, other);
1178 	}
1179 
1180 	/*
1181 	 * If it was connected, reconnect.
1182 	 */
1183 	if (unix_peer(sk)) {
1184 		struct sock *old_peer = unix_peer(sk);
1185 		unix_peer(sk) = other;
1186 		unix_dgram_peer_wake_disconnect_wakeup(sk, old_peer);
1187 
1188 		unix_state_double_unlock(sk, other);
1189 
1190 		if (other != old_peer)
1191 			unix_dgram_disconnected(sk, old_peer);
1192 		sock_put(old_peer);
1193 	} else {
1194 		unix_peer(sk) = other;
1195 		unix_state_double_unlock(sk, other);
1196 	}
1197 	return 0;
1198 
1199 out_unlock:
1200 	unix_state_double_unlock(sk, other);
1201 	sock_put(other);
1202 out:
1203 	return err;
1204 }
1205 
1206 static long unix_wait_for_peer(struct sock *other, long timeo)
1207 	__releases(&unix_sk(other)->lock)
1208 {
1209 	struct unix_sock *u = unix_sk(other);
1210 	int sched;
1211 	DEFINE_WAIT(wait);
1212 
1213 	prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);
1214 
1215 	sched = !sock_flag(other, SOCK_DEAD) &&
1216 		!(other->sk_shutdown & RCV_SHUTDOWN) &&
1217 		unix_recvq_full(other);
1218 
1219 	unix_state_unlock(other);
1220 
1221 	if (sched)
1222 		timeo = schedule_timeout(timeo);
1223 
1224 	finish_wait(&u->peer_wait, &wait);
1225 	return timeo;
1226 }
1227 
1228 static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
1229 			       int addr_len, int flags)
1230 {
1231 	struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1232 	struct sock *sk = sock->sk;
1233 	struct net *net = sock_net(sk);
1234 	struct unix_sock *u = unix_sk(sk), *newu, *otheru;
1235 	struct sock *newsk = NULL;
1236 	struct sock *other = NULL;
1237 	struct sk_buff *skb = NULL;
1238 	unsigned int hash;
1239 	int st;
1240 	int err;
1241 	long timeo;
1242 
1243 	err = unix_mkname(sunaddr, addr_len, &hash);
1244 	if (err < 0)
1245 		goto out;
1246 	addr_len = err;
1247 
1248 	if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr &&
1249 	    (err = unix_autobind(sock)) != 0)
1250 		goto out;
1251 
1252 	timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
1253 
1254 	/* First of all allocate resources.
1255 	   If we will make it after state is locked,
1256 	   we will have to recheck all again in any case.
1257 	 */
1258 
1259 	err = -ENOMEM;
1260 
1261 	/* create new sock for complete connection */
1262 	newsk = unix_create1(sock_net(sk), NULL, 0);
1263 	if (newsk == NULL)
1264 		goto out;
1265 
1266 	/* Allocate skb for sending to listening sock */
1267 	skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
1268 	if (skb == NULL)
1269 		goto out;
1270 
1271 restart:
1272 	/*  Find listening sock. */
1273 	other = unix_find_other(net, sunaddr, addr_len, sk->sk_type, hash, &err);
1274 	if (!other)
1275 		goto out;
1276 
1277 	/* Latch state of peer */
1278 	unix_state_lock(other);
1279 
1280 	/* Apparently VFS overslept socket death. Retry. */
1281 	if (sock_flag(other, SOCK_DEAD)) {
1282 		unix_state_unlock(other);
1283 		sock_put(other);
1284 		goto restart;
1285 	}
1286 
1287 	err = -ECONNREFUSED;
1288 	if (other->sk_state != TCP_LISTEN)
1289 		goto out_unlock;
1290 	if (other->sk_shutdown & RCV_SHUTDOWN)
1291 		goto out_unlock;
1292 
1293 	if (unix_recvq_full(other)) {
1294 		err = -EAGAIN;
1295 		if (!timeo)
1296 			goto out_unlock;
1297 
1298 		timeo = unix_wait_for_peer(other, timeo);
1299 
1300 		err = sock_intr_errno(timeo);
1301 		if (signal_pending(current))
1302 			goto out;
1303 		sock_put(other);
1304 		goto restart;
1305 	}
1306 
1307 	/* Latch our state.
1308 
1309 	   It is tricky place. We need to grab our state lock and cannot
1310 	   drop lock on peer. It is dangerous because deadlock is
1311 	   possible. Connect to self case and simultaneous
1312 	   attempt to connect are eliminated by checking socket
1313 	   state. other is TCP_LISTEN, if sk is TCP_LISTEN we
1314 	   check this before attempt to grab lock.
1315 
1316 	   Well, and we have to recheck the state after socket locked.
1317 	 */
1318 	st = sk->sk_state;
1319 
1320 	switch (st) {
1321 	case TCP_CLOSE:
1322 		/* This is ok... continue with connect */
1323 		break;
1324 	case TCP_ESTABLISHED:
1325 		/* Socket is already connected */
1326 		err = -EISCONN;
1327 		goto out_unlock;
1328 	default:
1329 		err = -EINVAL;
1330 		goto out_unlock;
1331 	}
1332 
1333 	unix_state_lock_nested(sk);
1334 
1335 	if (sk->sk_state != st) {
1336 		unix_state_unlock(sk);
1337 		unix_state_unlock(other);
1338 		sock_put(other);
1339 		goto restart;
1340 	}
1341 
1342 	err = security_unix_stream_connect(sk, other, newsk);
1343 	if (err) {
1344 		unix_state_unlock(sk);
1345 		goto out_unlock;
1346 	}
1347 
1348 	/* The way is open! Fastly set all the necessary fields... */
1349 
1350 	sock_hold(sk);
1351 	unix_peer(newsk)	= sk;
1352 	newsk->sk_state		= TCP_ESTABLISHED;
1353 	newsk->sk_type		= sk->sk_type;
1354 	init_peercred(newsk);
1355 	newu = unix_sk(newsk);
1356 	RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq);
1357 	otheru = unix_sk(other);
1358 
1359 	/* copy address information from listening to new sock
1360 	 *
1361 	 * The contents of *(otheru->addr) and otheru->path
1362 	 * are seen fully set up here, since we have found
1363 	 * otheru in hash under unix_table_lock.  Insertion
1364 	 * into the hash chain we'd found it in had been done
1365 	 * in an earlier critical area protected by unix_table_lock,
1366 	 * the same one where we'd set *(otheru->addr) contents,
1367 	 * as well as otheru->path and otheru->addr itself.
1368 	 *
1369 	 * Using smp_store_release() here to set newu->addr
1370 	 * is enough to make those stores, as well as stores
1371 	 * to newu->path visible to anyone who gets newu->addr
1372 	 * by smp_load_acquire().  IOW, the same warranties
1373 	 * as for unix_sock instances bound in unix_bind() or
1374 	 * in unix_autobind().
1375 	 */
1376 	if (otheru->path.dentry) {
1377 		path_get(&otheru->path);
1378 		newu->path = otheru->path;
1379 	}
1380 	refcount_inc(&otheru->addr->refcnt);
1381 	smp_store_release(&newu->addr, otheru->addr);
1382 
1383 	/* Set credentials */
1384 	copy_peercred(sk, other);
1385 
1386 	sock->state	= SS_CONNECTED;
1387 	sk->sk_state	= TCP_ESTABLISHED;
1388 	sock_hold(newsk);
1389 
1390 	smp_mb__after_atomic();	/* sock_hold() does an atomic_inc() */
1391 	unix_peer(sk)	= newsk;
1392 
1393 	unix_state_unlock(sk);
1394 
1395 	/* take ten and and send info to listening sock */
1396 	spin_lock(&other->sk_receive_queue.lock);
1397 	__skb_queue_tail(&other->sk_receive_queue, skb);
1398 	spin_unlock(&other->sk_receive_queue.lock);
1399 	unix_state_unlock(other);
1400 	other->sk_data_ready(other);
1401 	sock_put(other);
1402 	return 0;
1403 
1404 out_unlock:
1405 	if (other)
1406 		unix_state_unlock(other);
1407 
1408 out:
1409 	kfree_skb(skb);
1410 	if (newsk)
1411 		unix_release_sock(newsk, 0);
1412 	if (other)
1413 		sock_put(other);
1414 	return err;
1415 }
1416 
1417 static int unix_socketpair(struct socket *socka, struct socket *sockb)
1418 {
1419 	struct sock *ska = socka->sk, *skb = sockb->sk;
1420 
1421 	/* Join our sockets back to back */
1422 	sock_hold(ska);
1423 	sock_hold(skb);
1424 	unix_peer(ska) = skb;
1425 	unix_peer(skb) = ska;
1426 	init_peercred(ska);
1427 	init_peercred(skb);
1428 
1429 	if (ska->sk_type != SOCK_DGRAM) {
1430 		ska->sk_state = TCP_ESTABLISHED;
1431 		skb->sk_state = TCP_ESTABLISHED;
1432 		socka->state  = SS_CONNECTED;
1433 		sockb->state  = SS_CONNECTED;
1434 	}
1435 	return 0;
1436 }
1437 
1438 static void unix_sock_inherit_flags(const struct socket *old,
1439 				    struct socket *new)
1440 {
1441 	if (test_bit(SOCK_PASSCRED, &old->flags))
1442 		set_bit(SOCK_PASSCRED, &new->flags);
1443 	if (test_bit(SOCK_PASSSEC, &old->flags))
1444 		set_bit(SOCK_PASSSEC, &new->flags);
1445 }
1446 
1447 static int unix_accept(struct socket *sock, struct socket *newsock, int flags,
1448 		       bool kern)
1449 {
1450 	struct sock *sk = sock->sk;
1451 	struct sock *tsk;
1452 	struct sk_buff *skb;
1453 	int err;
1454 
1455 	err = -EOPNOTSUPP;
1456 	if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
1457 		goto out;
1458 
1459 	err = -EINVAL;
1460 	if (sk->sk_state != TCP_LISTEN)
1461 		goto out;
1462 
1463 	/* If socket state is TCP_LISTEN it cannot change (for now...),
1464 	 * so that no locks are necessary.
1465 	 */
1466 
1467 	skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err);
1468 	if (!skb) {
1469 		/* This means receive shutdown. */
1470 		if (err == 0)
1471 			err = -EINVAL;
1472 		goto out;
1473 	}
1474 
1475 	tsk = skb->sk;
1476 	skb_free_datagram(sk, skb);
1477 	wake_up_interruptible(&unix_sk(sk)->peer_wait);
1478 
1479 	/* attach accepted sock to socket */
1480 	unix_state_lock(tsk);
1481 	newsock->state = SS_CONNECTED;
1482 	unix_sock_inherit_flags(sock, newsock);
1483 	sock_graft(tsk, newsock);
1484 	unix_state_unlock(tsk);
1485 	return 0;
1486 
1487 out:
1488 	return err;
1489 }
1490 
1491 
1492 static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int peer)
1493 {
1494 	struct sock *sk = sock->sk;
1495 	struct unix_address *addr;
1496 	DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, uaddr);
1497 	int err = 0;
1498 
1499 	if (peer) {
1500 		sk = unix_peer_get(sk);
1501 
1502 		err = -ENOTCONN;
1503 		if (!sk)
1504 			goto out;
1505 		err = 0;
1506 	} else {
1507 		sock_hold(sk);
1508 	}
1509 
1510 	addr = smp_load_acquire(&unix_sk(sk)->addr);
1511 	if (!addr) {
1512 		sunaddr->sun_family = AF_UNIX;
1513 		sunaddr->sun_path[0] = 0;
1514 		err = sizeof(short);
1515 	} else {
1516 		err = addr->len;
1517 		memcpy(sunaddr, addr->name, addr->len);
1518 	}
1519 	sock_put(sk);
1520 out:
1521 	return err;
1522 }
1523 
1524 static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)
1525 {
1526 	int err = 0;
1527 
1528 	UNIXCB(skb).pid  = get_pid(scm->pid);
1529 	UNIXCB(skb).uid = scm->creds.uid;
1530 	UNIXCB(skb).gid = scm->creds.gid;
1531 	UNIXCB(skb).fp = NULL;
1532 	unix_get_secdata(scm, skb);
1533 	if (scm->fp && send_fds)
1534 		err = unix_attach_fds(scm, skb);
1535 
1536 	skb->destructor = unix_destruct_scm;
1537 	return err;
1538 }
1539 
1540 static bool unix_passcred_enabled(const struct socket *sock,
1541 				  const struct sock *other)
1542 {
1543 	return test_bit(SOCK_PASSCRED, &sock->flags) ||
1544 	       !other->sk_socket ||
1545 	       test_bit(SOCK_PASSCRED, &other->sk_socket->flags);
1546 }
1547 
1548 /*
1549  * Some apps rely on write() giving SCM_CREDENTIALS
1550  * We include credentials if source or destination socket
1551  * asserted SOCK_PASSCRED.
1552  */
1553 static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock,
1554 			    const struct sock *other)
1555 {
1556 	if (UNIXCB(skb).pid)
1557 		return;
1558 	if (unix_passcred_enabled(sock, other)) {
1559 		UNIXCB(skb).pid  = get_pid(task_tgid(current));
1560 		current_uid_gid(&UNIXCB(skb).uid, &UNIXCB(skb).gid);
1561 	}
1562 }
1563 
1564 static int maybe_init_creds(struct scm_cookie *scm,
1565 			    struct socket *socket,
1566 			    const struct sock *other)
1567 {
1568 	int err;
1569 	struct msghdr msg = { .msg_controllen = 0 };
1570 
1571 	err = scm_send(socket, &msg, scm, false);
1572 	if (err)
1573 		return err;
1574 
1575 	if (unix_passcred_enabled(socket, other)) {
1576 		scm->pid = get_pid(task_tgid(current));
1577 		current_uid_gid(&scm->creds.uid, &scm->creds.gid);
1578 	}
1579 	return err;
1580 }
1581 
1582 static bool unix_skb_scm_eq(struct sk_buff *skb,
1583 			    struct scm_cookie *scm)
1584 {
1585 	const struct unix_skb_parms *u = &UNIXCB(skb);
1586 
1587 	return u->pid == scm->pid &&
1588 	       uid_eq(u->uid, scm->creds.uid) &&
1589 	       gid_eq(u->gid, scm->creds.gid) &&
1590 	       unix_secdata_eq(scm, skb);
1591 }
1592 
1593 static void scm_stat_add(struct sock *sk, struct sk_buff *skb)
1594 {
1595 	struct scm_fp_list *fp = UNIXCB(skb).fp;
1596 	struct unix_sock *u = unix_sk(sk);
1597 
1598 	if (unlikely(fp && fp->count))
1599 		atomic_add(fp->count, &u->scm_stat.nr_fds);
1600 }
1601 
1602 static void scm_stat_del(struct sock *sk, struct sk_buff *skb)
1603 {
1604 	struct scm_fp_list *fp = UNIXCB(skb).fp;
1605 	struct unix_sock *u = unix_sk(sk);
1606 
1607 	if (unlikely(fp && fp->count))
1608 		atomic_sub(fp->count, &u->scm_stat.nr_fds);
1609 }
1610 
1611 /*
1612  *	Send AF_UNIX data.
1613  */
1614 
1615 static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
1616 			      size_t len)
1617 {
1618 	struct sock *sk = sock->sk;
1619 	struct net *net = sock_net(sk);
1620 	struct unix_sock *u = unix_sk(sk);
1621 	DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, msg->msg_name);
1622 	struct sock *other = NULL;
1623 	int namelen = 0; /* fake GCC */
1624 	int err;
1625 	unsigned int hash;
1626 	struct sk_buff *skb;
1627 	long timeo;
1628 	struct scm_cookie scm;
1629 	int data_len = 0;
1630 	int sk_locked;
1631 
1632 	wait_for_unix_gc();
1633 	err = scm_send(sock, msg, &scm, false);
1634 	if (err < 0)
1635 		return err;
1636 
1637 	err = -EOPNOTSUPP;
1638 	if (msg->msg_flags&MSG_OOB)
1639 		goto out;
1640 
1641 	if (msg->msg_namelen) {
1642 		err = unix_mkname(sunaddr, msg->msg_namelen, &hash);
1643 		if (err < 0)
1644 			goto out;
1645 		namelen = err;
1646 	} else {
1647 		sunaddr = NULL;
1648 		err = -ENOTCONN;
1649 		other = unix_peer_get(sk);
1650 		if (!other)
1651 			goto out;
1652 	}
1653 
1654 	if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr
1655 	    && (err = unix_autobind(sock)) != 0)
1656 		goto out;
1657 
1658 	err = -EMSGSIZE;
1659 	if (len > sk->sk_sndbuf - 32)
1660 		goto out;
1661 
1662 	if (len > SKB_MAX_ALLOC) {
1663 		data_len = min_t(size_t,
1664 				 len - SKB_MAX_ALLOC,
1665 				 MAX_SKB_FRAGS * PAGE_SIZE);
1666 		data_len = PAGE_ALIGN(data_len);
1667 
1668 		BUILD_BUG_ON(SKB_MAX_ALLOC < PAGE_SIZE);
1669 	}
1670 
1671 	skb = sock_alloc_send_pskb(sk, len - data_len, data_len,
1672 				   msg->msg_flags & MSG_DONTWAIT, &err,
1673 				   PAGE_ALLOC_COSTLY_ORDER);
1674 	if (skb == NULL)
1675 		goto out;
1676 
1677 	err = unix_scm_to_skb(&scm, skb, true);
1678 	if (err < 0)
1679 		goto out_free;
1680 
1681 	skb_put(skb, len - data_len);
1682 	skb->data_len = data_len;
1683 	skb->len = len;
1684 	err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, len);
1685 	if (err)
1686 		goto out_free;
1687 
1688 	timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
1689 
1690 restart:
1691 	if (!other) {
1692 		err = -ECONNRESET;
1693 		if (sunaddr == NULL)
1694 			goto out_free;
1695 
1696 		other = unix_find_other(net, sunaddr, namelen, sk->sk_type,
1697 					hash, &err);
1698 		if (other == NULL)
1699 			goto out_free;
1700 	}
1701 
1702 	if (sk_filter(other, skb) < 0) {
1703 		/* Toss the packet but do not return any error to the sender */
1704 		err = len;
1705 		goto out_free;
1706 	}
1707 
1708 	sk_locked = 0;
1709 	unix_state_lock(other);
1710 restart_locked:
1711 	err = -EPERM;
1712 	if (!unix_may_send(sk, other))
1713 		goto out_unlock;
1714 
1715 	if (unlikely(sock_flag(other, SOCK_DEAD))) {
1716 		/*
1717 		 *	Check with 1003.1g - what should
1718 		 *	datagram error
1719 		 */
1720 		unix_state_unlock(other);
1721 		sock_put(other);
1722 
1723 		if (!sk_locked)
1724 			unix_state_lock(sk);
1725 
1726 		err = 0;
1727 		if (unix_peer(sk) == other) {
1728 			unix_peer(sk) = NULL;
1729 			unix_dgram_peer_wake_disconnect_wakeup(sk, other);
1730 
1731 			unix_state_unlock(sk);
1732 
1733 			unix_dgram_disconnected(sk, other);
1734 			sock_put(other);
1735 			err = -ECONNREFUSED;
1736 		} else {
1737 			unix_state_unlock(sk);
1738 		}
1739 
1740 		other = NULL;
1741 		if (err)
1742 			goto out_free;
1743 		goto restart;
1744 	}
1745 
1746 	err = -EPIPE;
1747 	if (other->sk_shutdown & RCV_SHUTDOWN)
1748 		goto out_unlock;
1749 
1750 	if (sk->sk_type != SOCK_SEQPACKET) {
1751 		err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1752 		if (err)
1753 			goto out_unlock;
1754 	}
1755 
1756 	/* other == sk && unix_peer(other) != sk if
1757 	 * - unix_peer(sk) == NULL, destination address bound to sk
1758 	 * - unix_peer(sk) == sk by time of get but disconnected before lock
1759 	 */
1760 	if (other != sk &&
1761 	    unlikely(unix_peer(other) != sk &&
1762 	    unix_recvq_full_lockless(other))) {
1763 		if (timeo) {
1764 			timeo = unix_wait_for_peer(other, timeo);
1765 
1766 			err = sock_intr_errno(timeo);
1767 			if (signal_pending(current))
1768 				goto out_free;
1769 
1770 			goto restart;
1771 		}
1772 
1773 		if (!sk_locked) {
1774 			unix_state_unlock(other);
1775 			unix_state_double_lock(sk, other);
1776 		}
1777 
1778 		if (unix_peer(sk) != other ||
1779 		    unix_dgram_peer_wake_me(sk, other)) {
1780 			err = -EAGAIN;
1781 			sk_locked = 1;
1782 			goto out_unlock;
1783 		}
1784 
1785 		if (!sk_locked) {
1786 			sk_locked = 1;
1787 			goto restart_locked;
1788 		}
1789 	}
1790 
1791 	if (unlikely(sk_locked))
1792 		unix_state_unlock(sk);
1793 
1794 	if (sock_flag(other, SOCK_RCVTSTAMP))
1795 		__net_timestamp(skb);
1796 	maybe_add_creds(skb, sock, other);
1797 	scm_stat_add(other, skb);
1798 	skb_queue_tail(&other->sk_receive_queue, skb);
1799 	unix_state_unlock(other);
1800 	other->sk_data_ready(other);
1801 	sock_put(other);
1802 	scm_destroy(&scm);
1803 	return len;
1804 
1805 out_unlock:
1806 	if (sk_locked)
1807 		unix_state_unlock(sk);
1808 	unix_state_unlock(other);
1809 out_free:
1810 	kfree_skb(skb);
1811 out:
1812 	if (other)
1813 		sock_put(other);
1814 	scm_destroy(&scm);
1815 	return err;
1816 }
1817 
1818 /* We use paged skbs for stream sockets, and limit occupancy to 32768
1819  * bytes, and a minimum of a full page.
1820  */
1821 #define UNIX_SKB_FRAGS_SZ (PAGE_SIZE << get_order(32768))
1822 
1823 static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
1824 			       size_t len)
1825 {
1826 	struct sock *sk = sock->sk;
1827 	struct sock *other = NULL;
1828 	int err, size;
1829 	struct sk_buff *skb;
1830 	int sent = 0;
1831 	struct scm_cookie scm;
1832 	bool fds_sent = false;
1833 	int data_len;
1834 
1835 	wait_for_unix_gc();
1836 	err = scm_send(sock, msg, &scm, false);
1837 	if (err < 0)
1838 		return err;
1839 
1840 	err = -EOPNOTSUPP;
1841 	if (msg->msg_flags&MSG_OOB)
1842 		goto out_err;
1843 
1844 	if (msg->msg_namelen) {
1845 		err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
1846 		goto out_err;
1847 	} else {
1848 		err = -ENOTCONN;
1849 		other = unix_peer(sk);
1850 		if (!other)
1851 			goto out_err;
1852 	}
1853 
1854 	if (sk->sk_shutdown & SEND_SHUTDOWN)
1855 		goto pipe_err;
1856 
1857 	while (sent < len) {
1858 		size = len - sent;
1859 
1860 		/* Keep two messages in the pipe so it schedules better */
1861 		size = min_t(int, size, (sk->sk_sndbuf >> 1) - 64);
1862 
1863 		/* allow fallback to order-0 allocations */
1864 		size = min_t(int, size, SKB_MAX_HEAD(0) + UNIX_SKB_FRAGS_SZ);
1865 
1866 		data_len = max_t(int, 0, size - SKB_MAX_HEAD(0));
1867 
1868 		data_len = min_t(size_t, size, PAGE_ALIGN(data_len));
1869 
1870 		skb = sock_alloc_send_pskb(sk, size - data_len, data_len,
1871 					   msg->msg_flags & MSG_DONTWAIT, &err,
1872 					   get_order(UNIX_SKB_FRAGS_SZ));
1873 		if (!skb)
1874 			goto out_err;
1875 
1876 		/* Only send the fds in the first buffer */
1877 		err = unix_scm_to_skb(&scm, skb, !fds_sent);
1878 		if (err < 0) {
1879 			kfree_skb(skb);
1880 			goto out_err;
1881 		}
1882 		fds_sent = true;
1883 
1884 		skb_put(skb, size - data_len);
1885 		skb->data_len = data_len;
1886 		skb->len = size;
1887 		err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size);
1888 		if (err) {
1889 			kfree_skb(skb);
1890 			goto out_err;
1891 		}
1892 
1893 		unix_state_lock(other);
1894 
1895 		if (sock_flag(other, SOCK_DEAD) ||
1896 		    (other->sk_shutdown & RCV_SHUTDOWN))
1897 			goto pipe_err_free;
1898 
1899 		maybe_add_creds(skb, sock, other);
1900 		scm_stat_add(other, skb);
1901 		skb_queue_tail(&other->sk_receive_queue, skb);
1902 		unix_state_unlock(other);
1903 		other->sk_data_ready(other);
1904 		sent += size;
1905 	}
1906 
1907 	scm_destroy(&scm);
1908 
1909 	return sent;
1910 
1911 pipe_err_free:
1912 	unix_state_unlock(other);
1913 	kfree_skb(skb);
1914 pipe_err:
1915 	if (sent == 0 && !(msg->msg_flags&MSG_NOSIGNAL))
1916 		send_sig(SIGPIPE, current, 0);
1917 	err = -EPIPE;
1918 out_err:
1919 	scm_destroy(&scm);
1920 	return sent ? : err;
1921 }
1922 
1923 static ssize_t unix_stream_sendpage(struct socket *socket, struct page *page,
1924 				    int offset, size_t size, int flags)
1925 {
1926 	int err;
1927 	bool send_sigpipe = false;
1928 	bool init_scm = true;
1929 	struct scm_cookie scm;
1930 	struct sock *other, *sk = socket->sk;
1931 	struct sk_buff *skb, *newskb = NULL, *tail = NULL;
1932 
1933 	if (flags & MSG_OOB)
1934 		return -EOPNOTSUPP;
1935 
1936 	other = unix_peer(sk);
1937 	if (!other || sk->sk_state != TCP_ESTABLISHED)
1938 		return -ENOTCONN;
1939 
1940 	if (false) {
1941 alloc_skb:
1942 		unix_state_unlock(other);
1943 		mutex_unlock(&unix_sk(other)->iolock);
1944 		newskb = sock_alloc_send_pskb(sk, 0, 0, flags & MSG_DONTWAIT,
1945 					      &err, 0);
1946 		if (!newskb)
1947 			goto err;
1948 	}
1949 
1950 	/* we must acquire iolock as we modify already present
1951 	 * skbs in the sk_receive_queue and mess with skb->len
1952 	 */
1953 	err = mutex_lock_interruptible(&unix_sk(other)->iolock);
1954 	if (err) {
1955 		err = flags & MSG_DONTWAIT ? -EAGAIN : -ERESTARTSYS;
1956 		goto err;
1957 	}
1958 
1959 	if (sk->sk_shutdown & SEND_SHUTDOWN) {
1960 		err = -EPIPE;
1961 		send_sigpipe = true;
1962 		goto err_unlock;
1963 	}
1964 
1965 	unix_state_lock(other);
1966 
1967 	if (sock_flag(other, SOCK_DEAD) ||
1968 	    other->sk_shutdown & RCV_SHUTDOWN) {
1969 		err = -EPIPE;
1970 		send_sigpipe = true;
1971 		goto err_state_unlock;
1972 	}
1973 
1974 	if (init_scm) {
1975 		err = maybe_init_creds(&scm, socket, other);
1976 		if (err)
1977 			goto err_state_unlock;
1978 		init_scm = false;
1979 	}
1980 
1981 	skb = skb_peek_tail(&other->sk_receive_queue);
1982 	if (tail && tail == skb) {
1983 		skb = newskb;
1984 	} else if (!skb || !unix_skb_scm_eq(skb, &scm)) {
1985 		if (newskb) {
1986 			skb = newskb;
1987 		} else {
1988 			tail = skb;
1989 			goto alloc_skb;
1990 		}
1991 	} else if (newskb) {
1992 		/* this is fast path, we don't necessarily need to
1993 		 * call to kfree_skb even though with newskb == NULL
1994 		 * this - does no harm
1995 		 */
1996 		consume_skb(newskb);
1997 		newskb = NULL;
1998 	}
1999 
2000 	if (skb_append_pagefrags(skb, page, offset, size)) {
2001 		tail = skb;
2002 		goto alloc_skb;
2003 	}
2004 
2005 	skb->len += size;
2006 	skb->data_len += size;
2007 	skb->truesize += size;
2008 	refcount_add(size, &sk->sk_wmem_alloc);
2009 
2010 	if (newskb) {
2011 		err = unix_scm_to_skb(&scm, skb, false);
2012 		if (err)
2013 			goto err_state_unlock;
2014 		spin_lock(&other->sk_receive_queue.lock);
2015 		__skb_queue_tail(&other->sk_receive_queue, newskb);
2016 		spin_unlock(&other->sk_receive_queue.lock);
2017 	}
2018 
2019 	unix_state_unlock(other);
2020 	mutex_unlock(&unix_sk(other)->iolock);
2021 
2022 	other->sk_data_ready(other);
2023 	scm_destroy(&scm);
2024 	return size;
2025 
2026 err_state_unlock:
2027 	unix_state_unlock(other);
2028 err_unlock:
2029 	mutex_unlock(&unix_sk(other)->iolock);
2030 err:
2031 	kfree_skb(newskb);
2032 	if (send_sigpipe && !(flags & MSG_NOSIGNAL))
2033 		send_sig(SIGPIPE, current, 0);
2034 	if (!init_scm)
2035 		scm_destroy(&scm);
2036 	return err;
2037 }
2038 
2039 static int unix_seqpacket_sendmsg(struct socket *sock, struct msghdr *msg,
2040 				  size_t len)
2041 {
2042 	int err;
2043 	struct sock *sk = sock->sk;
2044 
2045 	err = sock_error(sk);
2046 	if (err)
2047 		return err;
2048 
2049 	if (sk->sk_state != TCP_ESTABLISHED)
2050 		return -ENOTCONN;
2051 
2052 	if (msg->msg_namelen)
2053 		msg->msg_namelen = 0;
2054 
2055 	return unix_dgram_sendmsg(sock, msg, len);
2056 }
2057 
2058 static int unix_seqpacket_recvmsg(struct socket *sock, struct msghdr *msg,
2059 				  size_t size, int flags)
2060 {
2061 	struct sock *sk = sock->sk;
2062 
2063 	if (sk->sk_state != TCP_ESTABLISHED)
2064 		return -ENOTCONN;
2065 
2066 	return unix_dgram_recvmsg(sock, msg, size, flags);
2067 }
2068 
2069 static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
2070 {
2071 	struct unix_address *addr = smp_load_acquire(&unix_sk(sk)->addr);
2072 
2073 	if (addr) {
2074 		msg->msg_namelen = addr->len;
2075 		memcpy(msg->msg_name, addr->name, addr->len);
2076 	}
2077 }
2078 
2079 static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
2080 			      size_t size, int flags)
2081 {
2082 	struct scm_cookie scm;
2083 	struct sock *sk = sock->sk;
2084 	struct unix_sock *u = unix_sk(sk);
2085 	struct sk_buff *skb, *last;
2086 	long timeo;
2087 	int skip;
2088 	int err;
2089 
2090 	err = -EOPNOTSUPP;
2091 	if (flags&MSG_OOB)
2092 		goto out;
2093 
2094 	timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
2095 
2096 	do {
2097 		mutex_lock(&u->iolock);
2098 
2099 		skip = sk_peek_offset(sk, flags);
2100 		skb = __skb_try_recv_datagram(sk, &sk->sk_receive_queue, flags,
2101 					      &skip, &err, &last);
2102 		if (skb) {
2103 			if (!(flags & MSG_PEEK))
2104 				scm_stat_del(sk, skb);
2105 			break;
2106 		}
2107 
2108 		mutex_unlock(&u->iolock);
2109 
2110 		if (err != -EAGAIN)
2111 			break;
2112 	} while (timeo &&
2113 		 !__skb_wait_for_more_packets(sk, &sk->sk_receive_queue,
2114 					      &err, &timeo, last));
2115 
2116 	if (!skb) { /* implies iolock unlocked */
2117 		unix_state_lock(sk);
2118 		/* Signal EOF on disconnected non-blocking SEQPACKET socket. */
2119 		if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN &&
2120 		    (sk->sk_shutdown & RCV_SHUTDOWN))
2121 			err = 0;
2122 		unix_state_unlock(sk);
2123 		goto out;
2124 	}
2125 
2126 	if (wq_has_sleeper(&u->peer_wait))
2127 		wake_up_interruptible_sync_poll(&u->peer_wait,
2128 						EPOLLOUT | EPOLLWRNORM |
2129 						EPOLLWRBAND);
2130 
2131 	if (msg->msg_name)
2132 		unix_copy_addr(msg, skb->sk);
2133 
2134 	if (size > skb->len - skip)
2135 		size = skb->len - skip;
2136 	else if (size < skb->len - skip)
2137 		msg->msg_flags |= MSG_TRUNC;
2138 
2139 	err = skb_copy_datagram_msg(skb, skip, msg, size);
2140 	if (err)
2141 		goto out_free;
2142 
2143 	if (sock_flag(sk, SOCK_RCVTSTAMP))
2144 		__sock_recv_timestamp(msg, sk, skb);
2145 
2146 	memset(&scm, 0, sizeof(scm));
2147 
2148 	scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2149 	unix_set_secdata(&scm, skb);
2150 
2151 	if (!(flags & MSG_PEEK)) {
2152 		if (UNIXCB(skb).fp)
2153 			unix_detach_fds(&scm, skb);
2154 
2155 		sk_peek_offset_bwd(sk, skb->len);
2156 	} else {
2157 		/* It is questionable: on PEEK we could:
2158 		   - do not return fds - good, but too simple 8)
2159 		   - return fds, and do not return them on read (old strategy,
2160 		     apparently wrong)
2161 		   - clone fds (I chose it for now, it is the most universal
2162 		     solution)
2163 
2164 		   POSIX 1003.1g does not actually define this clearly
2165 		   at all. POSIX 1003.1g doesn't define a lot of things
2166 		   clearly however!
2167 
2168 		*/
2169 
2170 		sk_peek_offset_fwd(sk, size);
2171 
2172 		if (UNIXCB(skb).fp)
2173 			scm.fp = scm_fp_dup(UNIXCB(skb).fp);
2174 	}
2175 	err = (flags & MSG_TRUNC) ? skb->len - skip : size;
2176 
2177 	scm_recv(sock, msg, &scm, flags);
2178 
2179 out_free:
2180 	skb_free_datagram(sk, skb);
2181 	mutex_unlock(&u->iolock);
2182 out:
2183 	return err;
2184 }
2185 
2186 /*
2187  *	Sleep until more data has arrived. But check for races..
2188  */
2189 static long unix_stream_data_wait(struct sock *sk, long timeo,
2190 				  struct sk_buff *last, unsigned int last_len,
2191 				  bool freezable)
2192 {
2193 	struct sk_buff *tail;
2194 	DEFINE_WAIT(wait);
2195 
2196 	unix_state_lock(sk);
2197 
2198 	for (;;) {
2199 		prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
2200 
2201 		tail = skb_peek_tail(&sk->sk_receive_queue);
2202 		if (tail != last ||
2203 		    (tail && tail->len != last_len) ||
2204 		    sk->sk_err ||
2205 		    (sk->sk_shutdown & RCV_SHUTDOWN) ||
2206 		    signal_pending(current) ||
2207 		    !timeo)
2208 			break;
2209 
2210 		sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2211 		unix_state_unlock(sk);
2212 		if (freezable)
2213 			timeo = freezable_schedule_timeout(timeo);
2214 		else
2215 			timeo = schedule_timeout(timeo);
2216 		unix_state_lock(sk);
2217 
2218 		if (sock_flag(sk, SOCK_DEAD))
2219 			break;
2220 
2221 		sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2222 	}
2223 
2224 	finish_wait(sk_sleep(sk), &wait);
2225 	unix_state_unlock(sk);
2226 	return timeo;
2227 }
2228 
2229 static unsigned int unix_skb_len(const struct sk_buff *skb)
2230 {
2231 	return skb->len - UNIXCB(skb).consumed;
2232 }
2233 
2234 struct unix_stream_read_state {
2235 	int (*recv_actor)(struct sk_buff *, int, int,
2236 			  struct unix_stream_read_state *);
2237 	struct socket *socket;
2238 	struct msghdr *msg;
2239 	struct pipe_inode_info *pipe;
2240 	size_t size;
2241 	int flags;
2242 	unsigned int splice_flags;
2243 };
2244 
2245 static int unix_stream_read_generic(struct unix_stream_read_state *state,
2246 				    bool freezable)
2247 {
2248 	struct scm_cookie scm;
2249 	struct socket *sock = state->socket;
2250 	struct sock *sk = sock->sk;
2251 	struct unix_sock *u = unix_sk(sk);
2252 	int copied = 0;
2253 	int flags = state->flags;
2254 	int noblock = flags & MSG_DONTWAIT;
2255 	bool check_creds = false;
2256 	int target;
2257 	int err = 0;
2258 	long timeo;
2259 	int skip;
2260 	size_t size = state->size;
2261 	unsigned int last_len;
2262 
2263 	if (unlikely(sk->sk_state != TCP_ESTABLISHED)) {
2264 		err = -EINVAL;
2265 		goto out;
2266 	}
2267 
2268 	if (unlikely(flags & MSG_OOB)) {
2269 		err = -EOPNOTSUPP;
2270 		goto out;
2271 	}
2272 
2273 	target = sock_rcvlowat(sk, flags & MSG_WAITALL, size);
2274 	timeo = sock_rcvtimeo(sk, noblock);
2275 
2276 	memset(&scm, 0, sizeof(scm));
2277 
2278 	/* Lock the socket to prevent queue disordering
2279 	 * while sleeps in memcpy_tomsg
2280 	 */
2281 	mutex_lock(&u->iolock);
2282 
2283 	skip = max(sk_peek_offset(sk, flags), 0);
2284 
2285 	do {
2286 		int chunk;
2287 		bool drop_skb;
2288 		struct sk_buff *skb, *last;
2289 
2290 redo:
2291 		unix_state_lock(sk);
2292 		if (sock_flag(sk, SOCK_DEAD)) {
2293 			err = -ECONNRESET;
2294 			goto unlock;
2295 		}
2296 		last = skb = skb_peek(&sk->sk_receive_queue);
2297 		last_len = last ? last->len : 0;
2298 again:
2299 		if (skb == NULL) {
2300 			if (copied >= target)
2301 				goto unlock;
2302 
2303 			/*
2304 			 *	POSIX 1003.1g mandates this order.
2305 			 */
2306 
2307 			err = sock_error(sk);
2308 			if (err)
2309 				goto unlock;
2310 			if (sk->sk_shutdown & RCV_SHUTDOWN)
2311 				goto unlock;
2312 
2313 			unix_state_unlock(sk);
2314 			if (!timeo) {
2315 				err = -EAGAIN;
2316 				break;
2317 			}
2318 
2319 			mutex_unlock(&u->iolock);
2320 
2321 			timeo = unix_stream_data_wait(sk, timeo, last,
2322 						      last_len, freezable);
2323 
2324 			if (signal_pending(current)) {
2325 				err = sock_intr_errno(timeo);
2326 				scm_destroy(&scm);
2327 				goto out;
2328 			}
2329 
2330 			mutex_lock(&u->iolock);
2331 			goto redo;
2332 unlock:
2333 			unix_state_unlock(sk);
2334 			break;
2335 		}
2336 
2337 		while (skip >= unix_skb_len(skb)) {
2338 			skip -= unix_skb_len(skb);
2339 			last = skb;
2340 			last_len = skb->len;
2341 			skb = skb_peek_next(skb, &sk->sk_receive_queue);
2342 			if (!skb)
2343 				goto again;
2344 		}
2345 
2346 		unix_state_unlock(sk);
2347 
2348 		if (check_creds) {
2349 			/* Never glue messages from different writers */
2350 			if (!unix_skb_scm_eq(skb, &scm))
2351 				break;
2352 		} else if (test_bit(SOCK_PASSCRED, &sock->flags)) {
2353 			/* Copy credentials */
2354 			scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2355 			unix_set_secdata(&scm, skb);
2356 			check_creds = true;
2357 		}
2358 
2359 		/* Copy address just once */
2360 		if (state->msg && state->msg->msg_name) {
2361 			DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr,
2362 					 state->msg->msg_name);
2363 			unix_copy_addr(state->msg, skb->sk);
2364 			sunaddr = NULL;
2365 		}
2366 
2367 		chunk = min_t(unsigned int, unix_skb_len(skb) - skip, size);
2368 		skb_get(skb);
2369 		chunk = state->recv_actor(skb, skip, chunk, state);
2370 		drop_skb = !unix_skb_len(skb);
2371 		/* skb is only safe to use if !drop_skb */
2372 		consume_skb(skb);
2373 		if (chunk < 0) {
2374 			if (copied == 0)
2375 				copied = -EFAULT;
2376 			break;
2377 		}
2378 		copied += chunk;
2379 		size -= chunk;
2380 
2381 		if (drop_skb) {
2382 			/* the skb was touched by a concurrent reader;
2383 			 * we should not expect anything from this skb
2384 			 * anymore and assume it invalid - we can be
2385 			 * sure it was dropped from the socket queue
2386 			 *
2387 			 * let's report a short read
2388 			 */
2389 			err = 0;
2390 			break;
2391 		}
2392 
2393 		/* Mark read part of skb as used */
2394 		if (!(flags & MSG_PEEK)) {
2395 			UNIXCB(skb).consumed += chunk;
2396 
2397 			sk_peek_offset_bwd(sk, chunk);
2398 
2399 			if (UNIXCB(skb).fp) {
2400 				scm_stat_del(sk, skb);
2401 				unix_detach_fds(&scm, skb);
2402 			}
2403 
2404 			if (unix_skb_len(skb))
2405 				break;
2406 
2407 			skb_unlink(skb, &sk->sk_receive_queue);
2408 			consume_skb(skb);
2409 
2410 			if (scm.fp)
2411 				break;
2412 		} else {
2413 			/* It is questionable, see note in unix_dgram_recvmsg.
2414 			 */
2415 			if (UNIXCB(skb).fp)
2416 				scm.fp = scm_fp_dup(UNIXCB(skb).fp);
2417 
2418 			sk_peek_offset_fwd(sk, chunk);
2419 
2420 			if (UNIXCB(skb).fp)
2421 				break;
2422 
2423 			skip = 0;
2424 			last = skb;
2425 			last_len = skb->len;
2426 			unix_state_lock(sk);
2427 			skb = skb_peek_next(skb, &sk->sk_receive_queue);
2428 			if (skb)
2429 				goto again;
2430 			unix_state_unlock(sk);
2431 			break;
2432 		}
2433 	} while (size);
2434 
2435 	mutex_unlock(&u->iolock);
2436 	if (state->msg)
2437 		scm_recv(sock, state->msg, &scm, flags);
2438 	else
2439 		scm_destroy(&scm);
2440 out:
2441 	return copied ? : err;
2442 }
2443 
2444 static int unix_stream_read_actor(struct sk_buff *skb,
2445 				  int skip, int chunk,
2446 				  struct unix_stream_read_state *state)
2447 {
2448 	int ret;
2449 
2450 	ret = skb_copy_datagram_msg(skb, UNIXCB(skb).consumed + skip,
2451 				    state->msg, chunk);
2452 	return ret ?: chunk;
2453 }
2454 
2455 static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg,
2456 			       size_t size, int flags)
2457 {
2458 	struct unix_stream_read_state state = {
2459 		.recv_actor = unix_stream_read_actor,
2460 		.socket = sock,
2461 		.msg = msg,
2462 		.size = size,
2463 		.flags = flags
2464 	};
2465 
2466 	return unix_stream_read_generic(&state, true);
2467 }
2468 
2469 static int unix_stream_splice_actor(struct sk_buff *skb,
2470 				    int skip, int chunk,
2471 				    struct unix_stream_read_state *state)
2472 {
2473 	return skb_splice_bits(skb, state->socket->sk,
2474 			       UNIXCB(skb).consumed + skip,
2475 			       state->pipe, chunk, state->splice_flags);
2476 }
2477 
2478 static ssize_t unix_stream_splice_read(struct socket *sock,  loff_t *ppos,
2479 				       struct pipe_inode_info *pipe,
2480 				       size_t size, unsigned int flags)
2481 {
2482 	struct unix_stream_read_state state = {
2483 		.recv_actor = unix_stream_splice_actor,
2484 		.socket = sock,
2485 		.pipe = pipe,
2486 		.size = size,
2487 		.splice_flags = flags,
2488 	};
2489 
2490 	if (unlikely(*ppos))
2491 		return -ESPIPE;
2492 
2493 	if (sock->file->f_flags & O_NONBLOCK ||
2494 	    flags & SPLICE_F_NONBLOCK)
2495 		state.flags = MSG_DONTWAIT;
2496 
2497 	return unix_stream_read_generic(&state, false);
2498 }
2499 
2500 static int unix_shutdown(struct socket *sock, int mode)
2501 {
2502 	struct sock *sk = sock->sk;
2503 	struct sock *other;
2504 
2505 	if (mode < SHUT_RD || mode > SHUT_RDWR)
2506 		return -EINVAL;
2507 	/* This maps:
2508 	 * SHUT_RD   (0) -> RCV_SHUTDOWN  (1)
2509 	 * SHUT_WR   (1) -> SEND_SHUTDOWN (2)
2510 	 * SHUT_RDWR (2) -> SHUTDOWN_MASK (3)
2511 	 */
2512 	++mode;
2513 
2514 	unix_state_lock(sk);
2515 	sk->sk_shutdown |= mode;
2516 	other = unix_peer(sk);
2517 	if (other)
2518 		sock_hold(other);
2519 	unix_state_unlock(sk);
2520 	sk->sk_state_change(sk);
2521 
2522 	if (other &&
2523 		(sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
2524 
2525 		int peer_mode = 0;
2526 
2527 		if (mode&RCV_SHUTDOWN)
2528 			peer_mode |= SEND_SHUTDOWN;
2529 		if (mode&SEND_SHUTDOWN)
2530 			peer_mode |= RCV_SHUTDOWN;
2531 		unix_state_lock(other);
2532 		other->sk_shutdown |= peer_mode;
2533 		unix_state_unlock(other);
2534 		other->sk_state_change(other);
2535 		if (peer_mode == SHUTDOWN_MASK)
2536 			sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
2537 		else if (peer_mode & RCV_SHUTDOWN)
2538 			sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
2539 	}
2540 	if (other)
2541 		sock_put(other);
2542 
2543 	return 0;
2544 }
2545 
2546 long unix_inq_len(struct sock *sk)
2547 {
2548 	struct sk_buff *skb;
2549 	long amount = 0;
2550 
2551 	if (sk->sk_state == TCP_LISTEN)
2552 		return -EINVAL;
2553 
2554 	spin_lock(&sk->sk_receive_queue.lock);
2555 	if (sk->sk_type == SOCK_STREAM ||
2556 	    sk->sk_type == SOCK_SEQPACKET) {
2557 		skb_queue_walk(&sk->sk_receive_queue, skb)
2558 			amount += unix_skb_len(skb);
2559 	} else {
2560 		skb = skb_peek(&sk->sk_receive_queue);
2561 		if (skb)
2562 			amount = skb->len;
2563 	}
2564 	spin_unlock(&sk->sk_receive_queue.lock);
2565 
2566 	return amount;
2567 }
2568 EXPORT_SYMBOL_GPL(unix_inq_len);
2569 
2570 long unix_outq_len(struct sock *sk)
2571 {
2572 	return sk_wmem_alloc_get(sk);
2573 }
2574 EXPORT_SYMBOL_GPL(unix_outq_len);
2575 
2576 static int unix_open_file(struct sock *sk)
2577 {
2578 	struct path path;
2579 	struct file *f;
2580 	int fd;
2581 
2582 	if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
2583 		return -EPERM;
2584 
2585 	if (!smp_load_acquire(&unix_sk(sk)->addr))
2586 		return -ENOENT;
2587 
2588 	path = unix_sk(sk)->path;
2589 	if (!path.dentry)
2590 		return -ENOENT;
2591 
2592 	path_get(&path);
2593 
2594 	fd = get_unused_fd_flags(O_CLOEXEC);
2595 	if (fd < 0)
2596 		goto out;
2597 
2598 	f = dentry_open(&path, O_PATH, current_cred());
2599 	if (IS_ERR(f)) {
2600 		put_unused_fd(fd);
2601 		fd = PTR_ERR(f);
2602 		goto out;
2603 	}
2604 
2605 	fd_install(fd, f);
2606 out:
2607 	path_put(&path);
2608 
2609 	return fd;
2610 }
2611 
2612 static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
2613 {
2614 	struct sock *sk = sock->sk;
2615 	long amount = 0;
2616 	int err;
2617 
2618 	switch (cmd) {
2619 	case SIOCOUTQ:
2620 		amount = unix_outq_len(sk);
2621 		err = put_user(amount, (int __user *)arg);
2622 		break;
2623 	case SIOCINQ:
2624 		amount = unix_inq_len(sk);
2625 		if (amount < 0)
2626 			err = amount;
2627 		else
2628 			err = put_user(amount, (int __user *)arg);
2629 		break;
2630 	case SIOCUNIXFILE:
2631 		err = unix_open_file(sk);
2632 		break;
2633 	default:
2634 		err = -ENOIOCTLCMD;
2635 		break;
2636 	}
2637 	return err;
2638 }
2639 
2640 #ifdef CONFIG_COMPAT
2641 static int unix_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
2642 {
2643 	return unix_ioctl(sock, cmd, (unsigned long)compat_ptr(arg));
2644 }
2645 #endif
2646 
2647 static __poll_t unix_poll(struct file *file, struct socket *sock, poll_table *wait)
2648 {
2649 	struct sock *sk = sock->sk;
2650 	__poll_t mask;
2651 
2652 	sock_poll_wait(file, sock, wait);
2653 	mask = 0;
2654 
2655 	/* exceptional events? */
2656 	if (sk->sk_err)
2657 		mask |= EPOLLERR;
2658 	if (sk->sk_shutdown == SHUTDOWN_MASK)
2659 		mask |= EPOLLHUP;
2660 	if (sk->sk_shutdown & RCV_SHUTDOWN)
2661 		mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
2662 
2663 	/* readable? */
2664 	if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
2665 		mask |= EPOLLIN | EPOLLRDNORM;
2666 
2667 	/* Connection-based need to check for termination and startup */
2668 	if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) &&
2669 	    sk->sk_state == TCP_CLOSE)
2670 		mask |= EPOLLHUP;
2671 
2672 	/*
2673 	 * we set writable also when the other side has shut down the
2674 	 * connection. This prevents stuck sockets.
2675 	 */
2676 	if (unix_writable(sk))
2677 		mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
2678 
2679 	return mask;
2680 }
2681 
2682 static __poll_t unix_dgram_poll(struct file *file, struct socket *sock,
2683 				    poll_table *wait)
2684 {
2685 	struct sock *sk = sock->sk, *other;
2686 	unsigned int writable;
2687 	__poll_t mask;
2688 
2689 	sock_poll_wait(file, sock, wait);
2690 	mask = 0;
2691 
2692 	/* exceptional events? */
2693 	if (sk->sk_err || !skb_queue_empty_lockless(&sk->sk_error_queue))
2694 		mask |= EPOLLERR |
2695 			(sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0);
2696 
2697 	if (sk->sk_shutdown & RCV_SHUTDOWN)
2698 		mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
2699 	if (sk->sk_shutdown == SHUTDOWN_MASK)
2700 		mask |= EPOLLHUP;
2701 
2702 	/* readable? */
2703 	if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
2704 		mask |= EPOLLIN | EPOLLRDNORM;
2705 
2706 	/* Connection-based need to check for termination and startup */
2707 	if (sk->sk_type == SOCK_SEQPACKET) {
2708 		if (sk->sk_state == TCP_CLOSE)
2709 			mask |= EPOLLHUP;
2710 		/* connection hasn't started yet? */
2711 		if (sk->sk_state == TCP_SYN_SENT)
2712 			return mask;
2713 	}
2714 
2715 	/* No write status requested, avoid expensive OUT tests. */
2716 	if (!(poll_requested_events(wait) & (EPOLLWRBAND|EPOLLWRNORM|EPOLLOUT)))
2717 		return mask;
2718 
2719 	writable = unix_writable(sk);
2720 	if (writable) {
2721 		unix_state_lock(sk);
2722 
2723 		other = unix_peer(sk);
2724 		if (other && unix_peer(other) != sk &&
2725 		    unix_recvq_full(other) &&
2726 		    unix_dgram_peer_wake_me(sk, other))
2727 			writable = 0;
2728 
2729 		unix_state_unlock(sk);
2730 	}
2731 
2732 	if (writable)
2733 		mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
2734 	else
2735 		sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
2736 
2737 	return mask;
2738 }
2739 
2740 #ifdef CONFIG_PROC_FS
2741 
2742 #define BUCKET_SPACE (BITS_PER_LONG - (UNIX_HASH_BITS + 1) - 1)
2743 
2744 #define get_bucket(x) ((x) >> BUCKET_SPACE)
2745 #define get_offset(x) ((x) & ((1L << BUCKET_SPACE) - 1))
2746 #define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o))
2747 
2748 static struct sock *unix_from_bucket(struct seq_file *seq, loff_t *pos)
2749 {
2750 	unsigned long offset = get_offset(*pos);
2751 	unsigned long bucket = get_bucket(*pos);
2752 	struct sock *sk;
2753 	unsigned long count = 0;
2754 
2755 	for (sk = sk_head(&unix_socket_table[bucket]); sk; sk = sk_next(sk)) {
2756 		if (sock_net(sk) != seq_file_net(seq))
2757 			continue;
2758 		if (++count == offset)
2759 			break;
2760 	}
2761 
2762 	return sk;
2763 }
2764 
2765 static struct sock *unix_next_socket(struct seq_file *seq,
2766 				     struct sock *sk,
2767 				     loff_t *pos)
2768 {
2769 	unsigned long bucket;
2770 
2771 	while (sk > (struct sock *)SEQ_START_TOKEN) {
2772 		sk = sk_next(sk);
2773 		if (!sk)
2774 			goto next_bucket;
2775 		if (sock_net(sk) == seq_file_net(seq))
2776 			return sk;
2777 	}
2778 
2779 	do {
2780 		sk = unix_from_bucket(seq, pos);
2781 		if (sk)
2782 			return sk;
2783 
2784 next_bucket:
2785 		bucket = get_bucket(*pos) + 1;
2786 		*pos = set_bucket_offset(bucket, 1);
2787 	} while (bucket < ARRAY_SIZE(unix_socket_table));
2788 
2789 	return NULL;
2790 }
2791 
2792 static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
2793 	__acquires(unix_table_lock)
2794 {
2795 	spin_lock(&unix_table_lock);
2796 
2797 	if (!*pos)
2798 		return SEQ_START_TOKEN;
2799 
2800 	if (get_bucket(*pos) >= ARRAY_SIZE(unix_socket_table))
2801 		return NULL;
2802 
2803 	return unix_next_socket(seq, NULL, pos);
2804 }
2805 
2806 static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2807 {
2808 	++*pos;
2809 	return unix_next_socket(seq, v, pos);
2810 }
2811 
2812 static void unix_seq_stop(struct seq_file *seq, void *v)
2813 	__releases(unix_table_lock)
2814 {
2815 	spin_unlock(&unix_table_lock);
2816 }
2817 
2818 static int unix_seq_show(struct seq_file *seq, void *v)
2819 {
2820 
2821 	if (v == SEQ_START_TOKEN)
2822 		seq_puts(seq, "Num       RefCount Protocol Flags    Type St "
2823 			 "Inode Path\n");
2824 	else {
2825 		struct sock *s = v;
2826 		struct unix_sock *u = unix_sk(s);
2827 		unix_state_lock(s);
2828 
2829 		seq_printf(seq, "%pK: %08X %08X %08X %04X %02X %5lu",
2830 			s,
2831 			refcount_read(&s->sk_refcnt),
2832 			0,
2833 			s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
2834 			s->sk_type,
2835 			s->sk_socket ?
2836 			(s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
2837 			(s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
2838 			sock_i_ino(s));
2839 
2840 		if (u->addr) {	// under unix_table_lock here
2841 			int i, len;
2842 			seq_putc(seq, ' ');
2843 
2844 			i = 0;
2845 			len = u->addr->len - sizeof(short);
2846 			if (!UNIX_ABSTRACT(s))
2847 				len--;
2848 			else {
2849 				seq_putc(seq, '@');
2850 				i++;
2851 			}
2852 			for ( ; i < len; i++)
2853 				seq_putc(seq, u->addr->name->sun_path[i] ?:
2854 					 '@');
2855 		}
2856 		unix_state_unlock(s);
2857 		seq_putc(seq, '\n');
2858 	}
2859 
2860 	return 0;
2861 }
2862 
2863 static const struct seq_operations unix_seq_ops = {
2864 	.start  = unix_seq_start,
2865 	.next   = unix_seq_next,
2866 	.stop   = unix_seq_stop,
2867 	.show   = unix_seq_show,
2868 };
2869 #endif
2870 
2871 static const struct net_proto_family unix_family_ops = {
2872 	.family = PF_UNIX,
2873 	.create = unix_create,
2874 	.owner	= THIS_MODULE,
2875 };
2876 
2877 
2878 static int __net_init unix_net_init(struct net *net)
2879 {
2880 	int error = -ENOMEM;
2881 
2882 	net->unx.sysctl_max_dgram_qlen = 10;
2883 	if (unix_sysctl_register(net))
2884 		goto out;
2885 
2886 #ifdef CONFIG_PROC_FS
2887 	if (!proc_create_net("unix", 0, net->proc_net, &unix_seq_ops,
2888 			sizeof(struct seq_net_private))) {
2889 		unix_sysctl_unregister(net);
2890 		goto out;
2891 	}
2892 #endif
2893 	error = 0;
2894 out:
2895 	return error;
2896 }
2897 
2898 static void __net_exit unix_net_exit(struct net *net)
2899 {
2900 	unix_sysctl_unregister(net);
2901 	remove_proc_entry("unix", net->proc_net);
2902 }
2903 
2904 static struct pernet_operations unix_net_ops = {
2905 	.init = unix_net_init,
2906 	.exit = unix_net_exit,
2907 };
2908 
2909 static int __init af_unix_init(void)
2910 {
2911 	int rc = -1;
2912 
2913 	BUILD_BUG_ON(sizeof(struct unix_skb_parms) > sizeof_field(struct sk_buff, cb));
2914 
2915 	rc = proto_register(&unix_proto, 1);
2916 	if (rc != 0) {
2917 		pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__);
2918 		goto out;
2919 	}
2920 
2921 	sock_register(&unix_family_ops);
2922 	register_pernet_subsys(&unix_net_ops);
2923 out:
2924 	return rc;
2925 }
2926 
2927 static void __exit af_unix_exit(void)
2928 {
2929 	sock_unregister(PF_UNIX);
2930 	proto_unregister(&unix_proto);
2931 	unregister_pernet_subsys(&unix_net_ops);
2932 }
2933 
2934 /* Earlier than device_initcall() so that other drivers invoking
2935    request_module() don't end up in a loop when modprobe tries
2936    to use a UNIX socket. But later than subsys_initcall() because
2937    we depend on stuff initialised there */
2938 fs_initcall(af_unix_init);
2939 module_exit(af_unix_exit);
2940 
2941 MODULE_LICENSE("GPL");
2942 MODULE_ALIAS_NETPROTO(PF_UNIX);
2943