xref: /openbmc/linux/net/unix/af_unix.c (revision f39650de)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * NET4:	Implementation of BSD Unix domain sockets.
4  *
5  * Authors:	Alan Cox, <alan@lxorguk.ukuu.org.uk>
6  *
7  * Fixes:
8  *		Linus Torvalds	:	Assorted bug cures.
9  *		Niibe Yutaka	:	async I/O support.
10  *		Carsten Paeth	:	PF_UNIX check, address fixes.
11  *		Alan Cox	:	Limit size of allocated blocks.
12  *		Alan Cox	:	Fixed the stupid socketpair bug.
13  *		Alan Cox	:	BSD compatibility fine tuning.
14  *		Alan Cox	:	Fixed a bug in connect when interrupted.
15  *		Alan Cox	:	Sorted out a proper draft version of
16  *					file descriptor passing hacked up from
17  *					Mike Shaver's work.
18  *		Marty Leisner	:	Fixes to fd passing
19  *		Nick Nevin	:	recvmsg bugfix.
20  *		Alan Cox	:	Started proper garbage collector
21  *		Heiko EiBfeldt	:	Missing verify_area check
22  *		Alan Cox	:	Started POSIXisms
23  *		Andreas Schwab	:	Replace inode by dentry for proper
24  *					reference counting
25  *		Kirk Petersen	:	Made this a module
26  *	    Christoph Rohland	:	Elegant non-blocking accept/connect algorithm.
27  *					Lots of bug fixes.
28  *	     Alexey Kuznetosv	:	Repaired (I hope) bugs introduces
29  *					by above two patches.
30  *	     Andrea Arcangeli	:	If possible we block in connect(2)
31  *					if the max backlog of the listen socket
32  *					is been reached. This won't break
33  *					old apps and it will avoid huge amount
34  *					of socks hashed (this for unix_gc()
35  *					performances reasons).
36  *					Security fix that limits the max
37  *					number of socks to 2*max_files and
38  *					the number of skb queueable in the
39  *					dgram receiver.
40  *		Artur Skawina   :	Hash function optimizations
41  *	     Alexey Kuznetsov   :	Full scale SMP. Lot of bugs are introduced 8)
42  *	      Malcolm Beattie   :	Set peercred for socketpair
43  *	     Michal Ostrowski   :       Module initialization cleanup.
44  *	     Arnaldo C. Melo	:	Remove MOD_{INC,DEC}_USE_COUNT,
45  *	     				the core infrastructure is doing that
46  *	     				for all net proto families now (2.5.69+)
47  *
48  * Known differences from reference BSD that was tested:
49  *
50  *	[TO FIX]
51  *	ECONNREFUSED is not returned from one end of a connected() socket to the
52  *		other the moment one end closes.
53  *	fstat() doesn't return st_dev=0, and give the blksize as high water mark
54  *		and a fake inode identifier (nor the BSD first socket fstat twice bug).
55  *	[NOT TO FIX]
56  *	accept() returns a path name even if the connecting socket has closed
57  *		in the meantime (BSD loses the path and gives up).
58  *	accept() returns 0 length path for an unbound connector. BSD returns 16
59  *		and a null first byte in the path (but not for gethost/peername - BSD bug ??)
60  *	socketpair(...SOCK_RAW..) doesn't panic the kernel.
61  *	BSD af_unix apparently has connect forgetting to block properly.
62  *		(need to check this with the POSIX spec in detail)
63  *
64  * Differences from 2.0.0-11-... (ANK)
65  *	Bug fixes and improvements.
66  *		- client shutdown killed server socket.
67  *		- removed all useless cli/sti pairs.
68  *
69  *	Semantic changes/extensions.
70  *		- generic control message passing.
71  *		- SCM_CREDENTIALS control message.
72  *		- "Abstract" (not FS based) socket bindings.
73  *		  Abstract names are sequences of bytes (not zero terminated)
74  *		  started by 0, so that this name space does not intersect
75  *		  with BSD names.
76  */
77 
78 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
79 
80 #include <linux/module.h>
81 #include <linux/kernel.h>
82 #include <linux/signal.h>
83 #include <linux/sched/signal.h>
84 #include <linux/errno.h>
85 #include <linux/string.h>
86 #include <linux/stat.h>
87 #include <linux/dcache.h>
88 #include <linux/namei.h>
89 #include <linux/socket.h>
90 #include <linux/un.h>
91 #include <linux/fcntl.h>
92 #include <linux/termios.h>
93 #include <linux/sockios.h>
94 #include <linux/net.h>
95 #include <linux/in.h>
96 #include <linux/fs.h>
97 #include <linux/slab.h>
98 #include <linux/uaccess.h>
99 #include <linux/skbuff.h>
100 #include <linux/netdevice.h>
101 #include <net/net_namespace.h>
102 #include <net/sock.h>
103 #include <net/tcp_states.h>
104 #include <net/af_unix.h>
105 #include <linux/proc_fs.h>
106 #include <linux/seq_file.h>
107 #include <net/scm.h>
108 #include <linux/init.h>
109 #include <linux/poll.h>
110 #include <linux/rtnetlink.h>
111 #include <linux/mount.h>
112 #include <net/checksum.h>
113 #include <linux/security.h>
114 #include <linux/freezer.h>
115 #include <linux/file.h>
116 
117 #include "scm.h"
118 
119 struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE];
120 EXPORT_SYMBOL_GPL(unix_socket_table);
121 DEFINE_SPINLOCK(unix_table_lock);
122 EXPORT_SYMBOL_GPL(unix_table_lock);
123 static atomic_long_t unix_nr_socks;
124 
125 
126 static struct hlist_head *unix_sockets_unbound(void *addr)
127 {
128 	unsigned long hash = (unsigned long)addr;
129 
130 	hash ^= hash >> 16;
131 	hash ^= hash >> 8;
132 	hash %= UNIX_HASH_SIZE;
133 	return &unix_socket_table[UNIX_HASH_SIZE + hash];
134 }
135 
136 #define UNIX_ABSTRACT(sk)	(unix_sk(sk)->addr->hash < UNIX_HASH_SIZE)
137 
138 #ifdef CONFIG_SECURITY_NETWORK
139 static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
140 {
141 	UNIXCB(skb).secid = scm->secid;
142 }
143 
144 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
145 {
146 	scm->secid = UNIXCB(skb).secid;
147 }
148 
149 static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
150 {
151 	return (scm->secid == UNIXCB(skb).secid);
152 }
153 #else
154 static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
155 { }
156 
157 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
158 { }
159 
160 static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
161 {
162 	return true;
163 }
164 #endif /* CONFIG_SECURITY_NETWORK */
165 
166 /*
167  *  SMP locking strategy:
168  *    hash table is protected with spinlock unix_table_lock
169  *    each socket state is protected by separate spin lock.
170  */
171 
172 static inline unsigned int unix_hash_fold(__wsum n)
173 {
174 	unsigned int hash = (__force unsigned int)csum_fold(n);
175 
176 	hash ^= hash>>8;
177 	return hash&(UNIX_HASH_SIZE-1);
178 }
179 
180 #define unix_peer(sk) (unix_sk(sk)->peer)
181 
182 static inline int unix_our_peer(struct sock *sk, struct sock *osk)
183 {
184 	return unix_peer(osk) == sk;
185 }
186 
187 static inline int unix_may_send(struct sock *sk, struct sock *osk)
188 {
189 	return unix_peer(osk) == NULL || unix_our_peer(sk, osk);
190 }
191 
192 static inline int unix_recvq_full(const struct sock *sk)
193 {
194 	return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog;
195 }
196 
197 static inline int unix_recvq_full_lockless(const struct sock *sk)
198 {
199 	return skb_queue_len_lockless(&sk->sk_receive_queue) >
200 		READ_ONCE(sk->sk_max_ack_backlog);
201 }
202 
203 struct sock *unix_peer_get(struct sock *s)
204 {
205 	struct sock *peer;
206 
207 	unix_state_lock(s);
208 	peer = unix_peer(s);
209 	if (peer)
210 		sock_hold(peer);
211 	unix_state_unlock(s);
212 	return peer;
213 }
214 EXPORT_SYMBOL_GPL(unix_peer_get);
215 
216 static inline void unix_release_addr(struct unix_address *addr)
217 {
218 	if (refcount_dec_and_test(&addr->refcnt))
219 		kfree(addr);
220 }
221 
222 /*
223  *	Check unix socket name:
224  *		- should be not zero length.
225  *	        - if started by not zero, should be NULL terminated (FS object)
226  *		- if started by zero, it is abstract name.
227  */
228 
229 static int unix_mkname(struct sockaddr_un *sunaddr, int len, unsigned int *hashp)
230 {
231 	*hashp = 0;
232 
233 	if (len <= sizeof(short) || len > sizeof(*sunaddr))
234 		return -EINVAL;
235 	if (!sunaddr || sunaddr->sun_family != AF_UNIX)
236 		return -EINVAL;
237 	if (sunaddr->sun_path[0]) {
238 		/*
239 		 * This may look like an off by one error but it is a bit more
240 		 * subtle. 108 is the longest valid AF_UNIX path for a binding.
241 		 * sun_path[108] doesn't as such exist.  However in kernel space
242 		 * we are guaranteed that it is a valid memory location in our
243 		 * kernel address buffer.
244 		 */
245 		((char *)sunaddr)[len] = 0;
246 		len = strlen(sunaddr->sun_path)+1+sizeof(short);
247 		return len;
248 	}
249 
250 	*hashp = unix_hash_fold(csum_partial(sunaddr, len, 0));
251 	return len;
252 }
253 
254 static void __unix_remove_socket(struct sock *sk)
255 {
256 	sk_del_node_init(sk);
257 }
258 
259 static void __unix_insert_socket(struct hlist_head *list, struct sock *sk)
260 {
261 	WARN_ON(!sk_unhashed(sk));
262 	sk_add_node(sk, list);
263 }
264 
265 static inline void unix_remove_socket(struct sock *sk)
266 {
267 	spin_lock(&unix_table_lock);
268 	__unix_remove_socket(sk);
269 	spin_unlock(&unix_table_lock);
270 }
271 
272 static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk)
273 {
274 	spin_lock(&unix_table_lock);
275 	__unix_insert_socket(list, sk);
276 	spin_unlock(&unix_table_lock);
277 }
278 
279 static struct sock *__unix_find_socket_byname(struct net *net,
280 					      struct sockaddr_un *sunname,
281 					      int len, int type, unsigned int hash)
282 {
283 	struct sock *s;
284 
285 	sk_for_each(s, &unix_socket_table[hash ^ type]) {
286 		struct unix_sock *u = unix_sk(s);
287 
288 		if (!net_eq(sock_net(s), net))
289 			continue;
290 
291 		if (u->addr->len == len &&
292 		    !memcmp(u->addr->name, sunname, len))
293 			return s;
294 	}
295 	return NULL;
296 }
297 
298 static inline struct sock *unix_find_socket_byname(struct net *net,
299 						   struct sockaddr_un *sunname,
300 						   int len, int type,
301 						   unsigned int hash)
302 {
303 	struct sock *s;
304 
305 	spin_lock(&unix_table_lock);
306 	s = __unix_find_socket_byname(net, sunname, len, type, hash);
307 	if (s)
308 		sock_hold(s);
309 	spin_unlock(&unix_table_lock);
310 	return s;
311 }
312 
313 static struct sock *unix_find_socket_byinode(struct inode *i)
314 {
315 	struct sock *s;
316 
317 	spin_lock(&unix_table_lock);
318 	sk_for_each(s,
319 		    &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
320 		struct dentry *dentry = unix_sk(s)->path.dentry;
321 
322 		if (dentry && d_backing_inode(dentry) == i) {
323 			sock_hold(s);
324 			goto found;
325 		}
326 	}
327 	s = NULL;
328 found:
329 	spin_unlock(&unix_table_lock);
330 	return s;
331 }
332 
333 /* Support code for asymmetrically connected dgram sockets
334  *
335  * If a datagram socket is connected to a socket not itself connected
336  * to the first socket (eg, /dev/log), clients may only enqueue more
337  * messages if the present receive queue of the server socket is not
338  * "too large". This means there's a second writeability condition
339  * poll and sendmsg need to test. The dgram recv code will do a wake
340  * up on the peer_wait wait queue of a socket upon reception of a
341  * datagram which needs to be propagated to sleeping would-be writers
342  * since these might not have sent anything so far. This can't be
343  * accomplished via poll_wait because the lifetime of the server
344  * socket might be less than that of its clients if these break their
345  * association with it or if the server socket is closed while clients
346  * are still connected to it and there's no way to inform "a polling
347  * implementation" that it should let go of a certain wait queue
348  *
349  * In order to propagate a wake up, a wait_queue_entry_t of the client
350  * socket is enqueued on the peer_wait queue of the server socket
351  * whose wake function does a wake_up on the ordinary client socket
352  * wait queue. This connection is established whenever a write (or
353  * poll for write) hit the flow control condition and broken when the
354  * association to the server socket is dissolved or after a wake up
355  * was relayed.
356  */
357 
358 static int unix_dgram_peer_wake_relay(wait_queue_entry_t *q, unsigned mode, int flags,
359 				      void *key)
360 {
361 	struct unix_sock *u;
362 	wait_queue_head_t *u_sleep;
363 
364 	u = container_of(q, struct unix_sock, peer_wake);
365 
366 	__remove_wait_queue(&unix_sk(u->peer_wake.private)->peer_wait,
367 			    q);
368 	u->peer_wake.private = NULL;
369 
370 	/* relaying can only happen while the wq still exists */
371 	u_sleep = sk_sleep(&u->sk);
372 	if (u_sleep)
373 		wake_up_interruptible_poll(u_sleep, key_to_poll(key));
374 
375 	return 0;
376 }
377 
378 static int unix_dgram_peer_wake_connect(struct sock *sk, struct sock *other)
379 {
380 	struct unix_sock *u, *u_other;
381 	int rc;
382 
383 	u = unix_sk(sk);
384 	u_other = unix_sk(other);
385 	rc = 0;
386 	spin_lock(&u_other->peer_wait.lock);
387 
388 	if (!u->peer_wake.private) {
389 		u->peer_wake.private = other;
390 		__add_wait_queue(&u_other->peer_wait, &u->peer_wake);
391 
392 		rc = 1;
393 	}
394 
395 	spin_unlock(&u_other->peer_wait.lock);
396 	return rc;
397 }
398 
399 static void unix_dgram_peer_wake_disconnect(struct sock *sk,
400 					    struct sock *other)
401 {
402 	struct unix_sock *u, *u_other;
403 
404 	u = unix_sk(sk);
405 	u_other = unix_sk(other);
406 	spin_lock(&u_other->peer_wait.lock);
407 
408 	if (u->peer_wake.private == other) {
409 		__remove_wait_queue(&u_other->peer_wait, &u->peer_wake);
410 		u->peer_wake.private = NULL;
411 	}
412 
413 	spin_unlock(&u_other->peer_wait.lock);
414 }
415 
416 static void unix_dgram_peer_wake_disconnect_wakeup(struct sock *sk,
417 						   struct sock *other)
418 {
419 	unix_dgram_peer_wake_disconnect(sk, other);
420 	wake_up_interruptible_poll(sk_sleep(sk),
421 				   EPOLLOUT |
422 				   EPOLLWRNORM |
423 				   EPOLLWRBAND);
424 }
425 
426 /* preconditions:
427  *	- unix_peer(sk) == other
428  *	- association is stable
429  */
430 static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other)
431 {
432 	int connected;
433 
434 	connected = unix_dgram_peer_wake_connect(sk, other);
435 
436 	/* If other is SOCK_DEAD, we want to make sure we signal
437 	 * POLLOUT, such that a subsequent write() can get a
438 	 * -ECONNREFUSED. Otherwise, if we haven't queued any skbs
439 	 * to other and its full, we will hang waiting for POLLOUT.
440 	 */
441 	if (unix_recvq_full(other) && !sock_flag(other, SOCK_DEAD))
442 		return 1;
443 
444 	if (connected)
445 		unix_dgram_peer_wake_disconnect(sk, other);
446 
447 	return 0;
448 }
449 
450 static int unix_writable(const struct sock *sk)
451 {
452 	return sk->sk_state != TCP_LISTEN &&
453 	       (refcount_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
454 }
455 
456 static void unix_write_space(struct sock *sk)
457 {
458 	struct socket_wq *wq;
459 
460 	rcu_read_lock();
461 	if (unix_writable(sk)) {
462 		wq = rcu_dereference(sk->sk_wq);
463 		if (skwq_has_sleeper(wq))
464 			wake_up_interruptible_sync_poll(&wq->wait,
465 				EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND);
466 		sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
467 	}
468 	rcu_read_unlock();
469 }
470 
471 /* When dgram socket disconnects (or changes its peer), we clear its receive
472  * queue of packets arrived from previous peer. First, it allows to do
473  * flow control based only on wmem_alloc; second, sk connected to peer
474  * may receive messages only from that peer. */
475 static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
476 {
477 	if (!skb_queue_empty(&sk->sk_receive_queue)) {
478 		skb_queue_purge(&sk->sk_receive_queue);
479 		wake_up_interruptible_all(&unix_sk(sk)->peer_wait);
480 
481 		/* If one link of bidirectional dgram pipe is disconnected,
482 		 * we signal error. Messages are lost. Do not make this,
483 		 * when peer was not connected to us.
484 		 */
485 		if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
486 			other->sk_err = ECONNRESET;
487 			other->sk_error_report(other);
488 		}
489 	}
490 }
491 
492 static void unix_sock_destructor(struct sock *sk)
493 {
494 	struct unix_sock *u = unix_sk(sk);
495 
496 	skb_queue_purge(&sk->sk_receive_queue);
497 
498 	WARN_ON(refcount_read(&sk->sk_wmem_alloc));
499 	WARN_ON(!sk_unhashed(sk));
500 	WARN_ON(sk->sk_socket);
501 	if (!sock_flag(sk, SOCK_DEAD)) {
502 		pr_info("Attempt to release alive unix socket: %p\n", sk);
503 		return;
504 	}
505 
506 	if (u->addr)
507 		unix_release_addr(u->addr);
508 
509 	atomic_long_dec(&unix_nr_socks);
510 	local_bh_disable();
511 	sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
512 	local_bh_enable();
513 #ifdef UNIX_REFCNT_DEBUG
514 	pr_debug("UNIX %p is destroyed, %ld are still alive.\n", sk,
515 		atomic_long_read(&unix_nr_socks));
516 #endif
517 }
518 
519 static void unix_release_sock(struct sock *sk, int embrion)
520 {
521 	struct unix_sock *u = unix_sk(sk);
522 	struct path path;
523 	struct sock *skpair;
524 	struct sk_buff *skb;
525 	int state;
526 
527 	unix_remove_socket(sk);
528 
529 	/* Clear state */
530 	unix_state_lock(sk);
531 	sock_orphan(sk);
532 	sk->sk_shutdown = SHUTDOWN_MASK;
533 	path	     = u->path;
534 	u->path.dentry = NULL;
535 	u->path.mnt = NULL;
536 	state = sk->sk_state;
537 	sk->sk_state = TCP_CLOSE;
538 
539 	skpair = unix_peer(sk);
540 	unix_peer(sk) = NULL;
541 
542 	unix_state_unlock(sk);
543 
544 	wake_up_interruptible_all(&u->peer_wait);
545 
546 	if (skpair != NULL) {
547 		if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
548 			unix_state_lock(skpair);
549 			/* No more writes */
550 			skpair->sk_shutdown = SHUTDOWN_MASK;
551 			if (!skb_queue_empty(&sk->sk_receive_queue) || embrion)
552 				skpair->sk_err = ECONNRESET;
553 			unix_state_unlock(skpair);
554 			skpair->sk_state_change(skpair);
555 			sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
556 		}
557 
558 		unix_dgram_peer_wake_disconnect(sk, skpair);
559 		sock_put(skpair); /* It may now die */
560 	}
561 
562 	/* Try to flush out this socket. Throw out buffers at least */
563 
564 	while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
565 		if (state == TCP_LISTEN)
566 			unix_release_sock(skb->sk, 1);
567 		/* passed fds are erased in the kfree_skb hook	      */
568 		UNIXCB(skb).consumed = skb->len;
569 		kfree_skb(skb);
570 	}
571 
572 	if (path.dentry)
573 		path_put(&path);
574 
575 	sock_put(sk);
576 
577 	/* ---- Socket is dead now and most probably destroyed ---- */
578 
579 	/*
580 	 * Fixme: BSD difference: In BSD all sockets connected to us get
581 	 *	  ECONNRESET and we die on the spot. In Linux we behave
582 	 *	  like files and pipes do and wait for the last
583 	 *	  dereference.
584 	 *
585 	 * Can't we simply set sock->err?
586 	 *
587 	 *	  What the above comment does talk about? --ANK(980817)
588 	 */
589 
590 	if (unix_tot_inflight)
591 		unix_gc();		/* Garbage collect fds */
592 }
593 
594 static void init_peercred(struct sock *sk)
595 {
596 	put_pid(sk->sk_peer_pid);
597 	if (sk->sk_peer_cred)
598 		put_cred(sk->sk_peer_cred);
599 	sk->sk_peer_pid  = get_pid(task_tgid(current));
600 	sk->sk_peer_cred = get_current_cred();
601 }
602 
603 static void copy_peercred(struct sock *sk, struct sock *peersk)
604 {
605 	put_pid(sk->sk_peer_pid);
606 	if (sk->sk_peer_cred)
607 		put_cred(sk->sk_peer_cred);
608 	sk->sk_peer_pid  = get_pid(peersk->sk_peer_pid);
609 	sk->sk_peer_cred = get_cred(peersk->sk_peer_cred);
610 }
611 
612 static int unix_listen(struct socket *sock, int backlog)
613 {
614 	int err;
615 	struct sock *sk = sock->sk;
616 	struct unix_sock *u = unix_sk(sk);
617 
618 	err = -EOPNOTSUPP;
619 	if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
620 		goto out;	/* Only stream/seqpacket sockets accept */
621 	err = -EINVAL;
622 	if (!u->addr)
623 		goto out;	/* No listens on an unbound socket */
624 	unix_state_lock(sk);
625 	if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
626 		goto out_unlock;
627 	if (backlog > sk->sk_max_ack_backlog)
628 		wake_up_interruptible_all(&u->peer_wait);
629 	sk->sk_max_ack_backlog	= backlog;
630 	sk->sk_state		= TCP_LISTEN;
631 	/* set credentials so connect can copy them */
632 	init_peercred(sk);
633 	err = 0;
634 
635 out_unlock:
636 	unix_state_unlock(sk);
637 out:
638 	return err;
639 }
640 
641 static int unix_release(struct socket *);
642 static int unix_bind(struct socket *, struct sockaddr *, int);
643 static int unix_stream_connect(struct socket *, struct sockaddr *,
644 			       int addr_len, int flags);
645 static int unix_socketpair(struct socket *, struct socket *);
646 static int unix_accept(struct socket *, struct socket *, int, bool);
647 static int unix_getname(struct socket *, struct sockaddr *, int);
648 static __poll_t unix_poll(struct file *, struct socket *, poll_table *);
649 static __poll_t unix_dgram_poll(struct file *, struct socket *,
650 				    poll_table *);
651 static int unix_ioctl(struct socket *, unsigned int, unsigned long);
652 #ifdef CONFIG_COMPAT
653 static int unix_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg);
654 #endif
655 static int unix_shutdown(struct socket *, int);
656 static int unix_stream_sendmsg(struct socket *, struct msghdr *, size_t);
657 static int unix_stream_recvmsg(struct socket *, struct msghdr *, size_t, int);
658 static ssize_t unix_stream_sendpage(struct socket *, struct page *, int offset,
659 				    size_t size, int flags);
660 static ssize_t unix_stream_splice_read(struct socket *,  loff_t *ppos,
661 				       struct pipe_inode_info *, size_t size,
662 				       unsigned int flags);
663 static int unix_dgram_sendmsg(struct socket *, struct msghdr *, size_t);
664 static int unix_dgram_recvmsg(struct socket *, struct msghdr *, size_t, int);
665 static int unix_dgram_connect(struct socket *, struct sockaddr *,
666 			      int, int);
667 static int unix_seqpacket_sendmsg(struct socket *, struct msghdr *, size_t);
668 static int unix_seqpacket_recvmsg(struct socket *, struct msghdr *, size_t,
669 				  int);
670 
671 static int unix_set_peek_off(struct sock *sk, int val)
672 {
673 	struct unix_sock *u = unix_sk(sk);
674 
675 	if (mutex_lock_interruptible(&u->iolock))
676 		return -EINTR;
677 
678 	sk->sk_peek_off = val;
679 	mutex_unlock(&u->iolock);
680 
681 	return 0;
682 }
683 
684 #ifdef CONFIG_PROC_FS
685 static void unix_show_fdinfo(struct seq_file *m, struct socket *sock)
686 {
687 	struct sock *sk = sock->sk;
688 	struct unix_sock *u;
689 
690 	if (sk) {
691 		u = unix_sk(sock->sk);
692 		seq_printf(m, "scm_fds: %u\n",
693 			   atomic_read(&u->scm_stat.nr_fds));
694 	}
695 }
696 #else
697 #define unix_show_fdinfo NULL
698 #endif
699 
700 static const struct proto_ops unix_stream_ops = {
701 	.family =	PF_UNIX,
702 	.owner =	THIS_MODULE,
703 	.release =	unix_release,
704 	.bind =		unix_bind,
705 	.connect =	unix_stream_connect,
706 	.socketpair =	unix_socketpair,
707 	.accept =	unix_accept,
708 	.getname =	unix_getname,
709 	.poll =		unix_poll,
710 	.ioctl =	unix_ioctl,
711 #ifdef CONFIG_COMPAT
712 	.compat_ioctl =	unix_compat_ioctl,
713 #endif
714 	.listen =	unix_listen,
715 	.shutdown =	unix_shutdown,
716 	.sendmsg =	unix_stream_sendmsg,
717 	.recvmsg =	unix_stream_recvmsg,
718 	.mmap =		sock_no_mmap,
719 	.sendpage =	unix_stream_sendpage,
720 	.splice_read =	unix_stream_splice_read,
721 	.set_peek_off =	unix_set_peek_off,
722 	.show_fdinfo =	unix_show_fdinfo,
723 };
724 
725 static const struct proto_ops unix_dgram_ops = {
726 	.family =	PF_UNIX,
727 	.owner =	THIS_MODULE,
728 	.release =	unix_release,
729 	.bind =		unix_bind,
730 	.connect =	unix_dgram_connect,
731 	.socketpair =	unix_socketpair,
732 	.accept =	sock_no_accept,
733 	.getname =	unix_getname,
734 	.poll =		unix_dgram_poll,
735 	.ioctl =	unix_ioctl,
736 #ifdef CONFIG_COMPAT
737 	.compat_ioctl =	unix_compat_ioctl,
738 #endif
739 	.listen =	sock_no_listen,
740 	.shutdown =	unix_shutdown,
741 	.sendmsg =	unix_dgram_sendmsg,
742 	.recvmsg =	unix_dgram_recvmsg,
743 	.mmap =		sock_no_mmap,
744 	.sendpage =	sock_no_sendpage,
745 	.set_peek_off =	unix_set_peek_off,
746 	.show_fdinfo =	unix_show_fdinfo,
747 };
748 
749 static const struct proto_ops unix_seqpacket_ops = {
750 	.family =	PF_UNIX,
751 	.owner =	THIS_MODULE,
752 	.release =	unix_release,
753 	.bind =		unix_bind,
754 	.connect =	unix_stream_connect,
755 	.socketpair =	unix_socketpair,
756 	.accept =	unix_accept,
757 	.getname =	unix_getname,
758 	.poll =		unix_dgram_poll,
759 	.ioctl =	unix_ioctl,
760 #ifdef CONFIG_COMPAT
761 	.compat_ioctl =	unix_compat_ioctl,
762 #endif
763 	.listen =	unix_listen,
764 	.shutdown =	unix_shutdown,
765 	.sendmsg =	unix_seqpacket_sendmsg,
766 	.recvmsg =	unix_seqpacket_recvmsg,
767 	.mmap =		sock_no_mmap,
768 	.sendpage =	sock_no_sendpage,
769 	.set_peek_off =	unix_set_peek_off,
770 	.show_fdinfo =	unix_show_fdinfo,
771 };
772 
773 static struct proto unix_proto = {
774 	.name			= "UNIX",
775 	.owner			= THIS_MODULE,
776 	.obj_size		= sizeof(struct unix_sock),
777 };
778 
779 static struct sock *unix_create1(struct net *net, struct socket *sock, int kern)
780 {
781 	struct sock *sk = NULL;
782 	struct unix_sock *u;
783 
784 	atomic_long_inc(&unix_nr_socks);
785 	if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files())
786 		goto out;
787 
788 	sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto, kern);
789 	if (!sk)
790 		goto out;
791 
792 	sock_init_data(sock, sk);
793 
794 	sk->sk_allocation	= GFP_KERNEL_ACCOUNT;
795 	sk->sk_write_space	= unix_write_space;
796 	sk->sk_max_ack_backlog	= net->unx.sysctl_max_dgram_qlen;
797 	sk->sk_destruct		= unix_sock_destructor;
798 	u	  = unix_sk(sk);
799 	u->path.dentry = NULL;
800 	u->path.mnt = NULL;
801 	spin_lock_init(&u->lock);
802 	atomic_long_set(&u->inflight, 0);
803 	INIT_LIST_HEAD(&u->link);
804 	mutex_init(&u->iolock); /* single task reading lock */
805 	mutex_init(&u->bindlock); /* single task binding lock */
806 	init_waitqueue_head(&u->peer_wait);
807 	init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay);
808 	memset(&u->scm_stat, 0, sizeof(struct scm_stat));
809 	unix_insert_socket(unix_sockets_unbound(sk), sk);
810 out:
811 	if (sk == NULL)
812 		atomic_long_dec(&unix_nr_socks);
813 	else {
814 		local_bh_disable();
815 		sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
816 		local_bh_enable();
817 	}
818 	return sk;
819 }
820 
821 static int unix_create(struct net *net, struct socket *sock, int protocol,
822 		       int kern)
823 {
824 	if (protocol && protocol != PF_UNIX)
825 		return -EPROTONOSUPPORT;
826 
827 	sock->state = SS_UNCONNECTED;
828 
829 	switch (sock->type) {
830 	case SOCK_STREAM:
831 		sock->ops = &unix_stream_ops;
832 		break;
833 		/*
834 		 *	Believe it or not BSD has AF_UNIX, SOCK_RAW though
835 		 *	nothing uses it.
836 		 */
837 	case SOCK_RAW:
838 		sock->type = SOCK_DGRAM;
839 		fallthrough;
840 	case SOCK_DGRAM:
841 		sock->ops = &unix_dgram_ops;
842 		break;
843 	case SOCK_SEQPACKET:
844 		sock->ops = &unix_seqpacket_ops;
845 		break;
846 	default:
847 		return -ESOCKTNOSUPPORT;
848 	}
849 
850 	return unix_create1(net, sock, kern) ? 0 : -ENOMEM;
851 }
852 
853 static int unix_release(struct socket *sock)
854 {
855 	struct sock *sk = sock->sk;
856 
857 	if (!sk)
858 		return 0;
859 
860 	unix_release_sock(sk, 0);
861 	sock->sk = NULL;
862 
863 	return 0;
864 }
865 
866 static int unix_autobind(struct socket *sock)
867 {
868 	struct sock *sk = sock->sk;
869 	struct net *net = sock_net(sk);
870 	struct unix_sock *u = unix_sk(sk);
871 	static u32 ordernum = 1;
872 	struct unix_address *addr;
873 	int err;
874 	unsigned int retries = 0;
875 
876 	err = mutex_lock_interruptible(&u->bindlock);
877 	if (err)
878 		return err;
879 
880 	if (u->addr)
881 		goto out;
882 
883 	err = -ENOMEM;
884 	addr = kzalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL);
885 	if (!addr)
886 		goto out;
887 
888 	addr->name->sun_family = AF_UNIX;
889 	refcount_set(&addr->refcnt, 1);
890 
891 retry:
892 	addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short);
893 	addr->hash = unix_hash_fold(csum_partial(addr->name, addr->len, 0));
894 
895 	spin_lock(&unix_table_lock);
896 	ordernum = (ordernum+1)&0xFFFFF;
897 
898 	if (__unix_find_socket_byname(net, addr->name, addr->len, sock->type,
899 				      addr->hash)) {
900 		spin_unlock(&unix_table_lock);
901 		/*
902 		 * __unix_find_socket_byname() may take long time if many names
903 		 * are already in use.
904 		 */
905 		cond_resched();
906 		/* Give up if all names seems to be in use. */
907 		if (retries++ == 0xFFFFF) {
908 			err = -ENOSPC;
909 			kfree(addr);
910 			goto out;
911 		}
912 		goto retry;
913 	}
914 	addr->hash ^= sk->sk_type;
915 
916 	__unix_remove_socket(sk);
917 	smp_store_release(&u->addr, addr);
918 	__unix_insert_socket(&unix_socket_table[addr->hash], sk);
919 	spin_unlock(&unix_table_lock);
920 	err = 0;
921 
922 out:	mutex_unlock(&u->bindlock);
923 	return err;
924 }
925 
926 static struct sock *unix_find_other(struct net *net,
927 				    struct sockaddr_un *sunname, int len,
928 				    int type, unsigned int hash, int *error)
929 {
930 	struct sock *u;
931 	struct path path;
932 	int err = 0;
933 
934 	if (sunname->sun_path[0]) {
935 		struct inode *inode;
936 		err = kern_path(sunname->sun_path, LOOKUP_FOLLOW, &path);
937 		if (err)
938 			goto fail;
939 		inode = d_backing_inode(path.dentry);
940 		err = path_permission(&path, MAY_WRITE);
941 		if (err)
942 			goto put_fail;
943 
944 		err = -ECONNREFUSED;
945 		if (!S_ISSOCK(inode->i_mode))
946 			goto put_fail;
947 		u = unix_find_socket_byinode(inode);
948 		if (!u)
949 			goto put_fail;
950 
951 		if (u->sk_type == type)
952 			touch_atime(&path);
953 
954 		path_put(&path);
955 
956 		err = -EPROTOTYPE;
957 		if (u->sk_type != type) {
958 			sock_put(u);
959 			goto fail;
960 		}
961 	} else {
962 		err = -ECONNREFUSED;
963 		u = unix_find_socket_byname(net, sunname, len, type, hash);
964 		if (u) {
965 			struct dentry *dentry;
966 			dentry = unix_sk(u)->path.dentry;
967 			if (dentry)
968 				touch_atime(&unix_sk(u)->path);
969 		} else
970 			goto fail;
971 	}
972 	return u;
973 
974 put_fail:
975 	path_put(&path);
976 fail:
977 	*error = err;
978 	return NULL;
979 }
980 
981 static int unix_mknod(const char *sun_path, umode_t mode, struct path *res)
982 {
983 	struct dentry *dentry;
984 	struct path path;
985 	int err = 0;
986 	/*
987 	 * Get the parent directory, calculate the hash for last
988 	 * component.
989 	 */
990 	dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0);
991 	err = PTR_ERR(dentry);
992 	if (IS_ERR(dentry))
993 		return err;
994 
995 	/*
996 	 * All right, let's create it.
997 	 */
998 	err = security_path_mknod(&path, dentry, mode, 0);
999 	if (!err) {
1000 		err = vfs_mknod(mnt_user_ns(path.mnt), d_inode(path.dentry),
1001 				dentry, mode, 0);
1002 		if (!err) {
1003 			res->mnt = mntget(path.mnt);
1004 			res->dentry = dget(dentry);
1005 		}
1006 	}
1007 	done_path_create(&path, dentry);
1008 	return err;
1009 }
1010 
1011 static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
1012 {
1013 	struct sock *sk = sock->sk;
1014 	struct net *net = sock_net(sk);
1015 	struct unix_sock *u = unix_sk(sk);
1016 	struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1017 	char *sun_path = sunaddr->sun_path;
1018 	int err;
1019 	unsigned int hash;
1020 	struct unix_address *addr;
1021 	struct hlist_head *list;
1022 	struct path path = { };
1023 
1024 	err = -EINVAL;
1025 	if (addr_len < offsetofend(struct sockaddr_un, sun_family) ||
1026 	    sunaddr->sun_family != AF_UNIX)
1027 		goto out;
1028 
1029 	if (addr_len == sizeof(short)) {
1030 		err = unix_autobind(sock);
1031 		goto out;
1032 	}
1033 
1034 	err = unix_mkname(sunaddr, addr_len, &hash);
1035 	if (err < 0)
1036 		goto out;
1037 	addr_len = err;
1038 
1039 	if (sun_path[0]) {
1040 		umode_t mode = S_IFSOCK |
1041 		       (SOCK_INODE(sock)->i_mode & ~current_umask());
1042 		err = unix_mknod(sun_path, mode, &path);
1043 		if (err) {
1044 			if (err == -EEXIST)
1045 				err = -EADDRINUSE;
1046 			goto out;
1047 		}
1048 	}
1049 
1050 	err = mutex_lock_interruptible(&u->bindlock);
1051 	if (err)
1052 		goto out_put;
1053 
1054 	err = -EINVAL;
1055 	if (u->addr)
1056 		goto out_up;
1057 
1058 	err = -ENOMEM;
1059 	addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
1060 	if (!addr)
1061 		goto out_up;
1062 
1063 	memcpy(addr->name, sunaddr, addr_len);
1064 	addr->len = addr_len;
1065 	addr->hash = hash ^ sk->sk_type;
1066 	refcount_set(&addr->refcnt, 1);
1067 
1068 	if (sun_path[0]) {
1069 		addr->hash = UNIX_HASH_SIZE;
1070 		hash = d_backing_inode(path.dentry)->i_ino & (UNIX_HASH_SIZE - 1);
1071 		spin_lock(&unix_table_lock);
1072 		u->path = path;
1073 		list = &unix_socket_table[hash];
1074 	} else {
1075 		spin_lock(&unix_table_lock);
1076 		err = -EADDRINUSE;
1077 		if (__unix_find_socket_byname(net, sunaddr, addr_len,
1078 					      sk->sk_type, hash)) {
1079 			unix_release_addr(addr);
1080 			goto out_unlock;
1081 		}
1082 
1083 		list = &unix_socket_table[addr->hash];
1084 	}
1085 
1086 	err = 0;
1087 	__unix_remove_socket(sk);
1088 	smp_store_release(&u->addr, addr);
1089 	__unix_insert_socket(list, sk);
1090 
1091 out_unlock:
1092 	spin_unlock(&unix_table_lock);
1093 out_up:
1094 	mutex_unlock(&u->bindlock);
1095 out_put:
1096 	if (err)
1097 		path_put(&path);
1098 out:
1099 	return err;
1100 }
1101 
1102 static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
1103 {
1104 	if (unlikely(sk1 == sk2) || !sk2) {
1105 		unix_state_lock(sk1);
1106 		return;
1107 	}
1108 	if (sk1 < sk2) {
1109 		unix_state_lock(sk1);
1110 		unix_state_lock_nested(sk2);
1111 	} else {
1112 		unix_state_lock(sk2);
1113 		unix_state_lock_nested(sk1);
1114 	}
1115 }
1116 
1117 static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)
1118 {
1119 	if (unlikely(sk1 == sk2) || !sk2) {
1120 		unix_state_unlock(sk1);
1121 		return;
1122 	}
1123 	unix_state_unlock(sk1);
1124 	unix_state_unlock(sk2);
1125 }
1126 
1127 static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
1128 			      int alen, int flags)
1129 {
1130 	struct sock *sk = sock->sk;
1131 	struct net *net = sock_net(sk);
1132 	struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr;
1133 	struct sock *other;
1134 	unsigned int hash;
1135 	int err;
1136 
1137 	err = -EINVAL;
1138 	if (alen < offsetofend(struct sockaddr, sa_family))
1139 		goto out;
1140 
1141 	if (addr->sa_family != AF_UNSPEC) {
1142 		err = unix_mkname(sunaddr, alen, &hash);
1143 		if (err < 0)
1144 			goto out;
1145 		alen = err;
1146 
1147 		if (test_bit(SOCK_PASSCRED, &sock->flags) &&
1148 		    !unix_sk(sk)->addr && (err = unix_autobind(sock)) != 0)
1149 			goto out;
1150 
1151 restart:
1152 		other = unix_find_other(net, sunaddr, alen, sock->type, hash, &err);
1153 		if (!other)
1154 			goto out;
1155 
1156 		unix_state_double_lock(sk, other);
1157 
1158 		/* Apparently VFS overslept socket death. Retry. */
1159 		if (sock_flag(other, SOCK_DEAD)) {
1160 			unix_state_double_unlock(sk, other);
1161 			sock_put(other);
1162 			goto restart;
1163 		}
1164 
1165 		err = -EPERM;
1166 		if (!unix_may_send(sk, other))
1167 			goto out_unlock;
1168 
1169 		err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1170 		if (err)
1171 			goto out_unlock;
1172 
1173 	} else {
1174 		/*
1175 		 *	1003.1g breaking connected state with AF_UNSPEC
1176 		 */
1177 		other = NULL;
1178 		unix_state_double_lock(sk, other);
1179 	}
1180 
1181 	/*
1182 	 * If it was connected, reconnect.
1183 	 */
1184 	if (unix_peer(sk)) {
1185 		struct sock *old_peer = unix_peer(sk);
1186 		unix_peer(sk) = other;
1187 		unix_dgram_peer_wake_disconnect_wakeup(sk, old_peer);
1188 
1189 		unix_state_double_unlock(sk, other);
1190 
1191 		if (other != old_peer)
1192 			unix_dgram_disconnected(sk, old_peer);
1193 		sock_put(old_peer);
1194 	} else {
1195 		unix_peer(sk) = other;
1196 		unix_state_double_unlock(sk, other);
1197 	}
1198 	return 0;
1199 
1200 out_unlock:
1201 	unix_state_double_unlock(sk, other);
1202 	sock_put(other);
1203 out:
1204 	return err;
1205 }
1206 
1207 static long unix_wait_for_peer(struct sock *other, long timeo)
1208 	__releases(&unix_sk(other)->lock)
1209 {
1210 	struct unix_sock *u = unix_sk(other);
1211 	int sched;
1212 	DEFINE_WAIT(wait);
1213 
1214 	prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);
1215 
1216 	sched = !sock_flag(other, SOCK_DEAD) &&
1217 		!(other->sk_shutdown & RCV_SHUTDOWN) &&
1218 		unix_recvq_full(other);
1219 
1220 	unix_state_unlock(other);
1221 
1222 	if (sched)
1223 		timeo = schedule_timeout(timeo);
1224 
1225 	finish_wait(&u->peer_wait, &wait);
1226 	return timeo;
1227 }
1228 
1229 static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
1230 			       int addr_len, int flags)
1231 {
1232 	struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1233 	struct sock *sk = sock->sk;
1234 	struct net *net = sock_net(sk);
1235 	struct unix_sock *u = unix_sk(sk), *newu, *otheru;
1236 	struct sock *newsk = NULL;
1237 	struct sock *other = NULL;
1238 	struct sk_buff *skb = NULL;
1239 	unsigned int hash;
1240 	int st;
1241 	int err;
1242 	long timeo;
1243 
1244 	err = unix_mkname(sunaddr, addr_len, &hash);
1245 	if (err < 0)
1246 		goto out;
1247 	addr_len = err;
1248 
1249 	if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr &&
1250 	    (err = unix_autobind(sock)) != 0)
1251 		goto out;
1252 
1253 	timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
1254 
1255 	/* First of all allocate resources.
1256 	   If we will make it after state is locked,
1257 	   we will have to recheck all again in any case.
1258 	 */
1259 
1260 	err = -ENOMEM;
1261 
1262 	/* create new sock for complete connection */
1263 	newsk = unix_create1(sock_net(sk), NULL, 0);
1264 	if (newsk == NULL)
1265 		goto out;
1266 
1267 	/* Allocate skb for sending to listening sock */
1268 	skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
1269 	if (skb == NULL)
1270 		goto out;
1271 
1272 restart:
1273 	/*  Find listening sock. */
1274 	other = unix_find_other(net, sunaddr, addr_len, sk->sk_type, hash, &err);
1275 	if (!other)
1276 		goto out;
1277 
1278 	/* Latch state of peer */
1279 	unix_state_lock(other);
1280 
1281 	/* Apparently VFS overslept socket death. Retry. */
1282 	if (sock_flag(other, SOCK_DEAD)) {
1283 		unix_state_unlock(other);
1284 		sock_put(other);
1285 		goto restart;
1286 	}
1287 
1288 	err = -ECONNREFUSED;
1289 	if (other->sk_state != TCP_LISTEN)
1290 		goto out_unlock;
1291 	if (other->sk_shutdown & RCV_SHUTDOWN)
1292 		goto out_unlock;
1293 
1294 	if (unix_recvq_full(other)) {
1295 		err = -EAGAIN;
1296 		if (!timeo)
1297 			goto out_unlock;
1298 
1299 		timeo = unix_wait_for_peer(other, timeo);
1300 
1301 		err = sock_intr_errno(timeo);
1302 		if (signal_pending(current))
1303 			goto out;
1304 		sock_put(other);
1305 		goto restart;
1306 	}
1307 
1308 	/* Latch our state.
1309 
1310 	   It is tricky place. We need to grab our state lock and cannot
1311 	   drop lock on peer. It is dangerous because deadlock is
1312 	   possible. Connect to self case and simultaneous
1313 	   attempt to connect are eliminated by checking socket
1314 	   state. other is TCP_LISTEN, if sk is TCP_LISTEN we
1315 	   check this before attempt to grab lock.
1316 
1317 	   Well, and we have to recheck the state after socket locked.
1318 	 */
1319 	st = sk->sk_state;
1320 
1321 	switch (st) {
1322 	case TCP_CLOSE:
1323 		/* This is ok... continue with connect */
1324 		break;
1325 	case TCP_ESTABLISHED:
1326 		/* Socket is already connected */
1327 		err = -EISCONN;
1328 		goto out_unlock;
1329 	default:
1330 		err = -EINVAL;
1331 		goto out_unlock;
1332 	}
1333 
1334 	unix_state_lock_nested(sk);
1335 
1336 	if (sk->sk_state != st) {
1337 		unix_state_unlock(sk);
1338 		unix_state_unlock(other);
1339 		sock_put(other);
1340 		goto restart;
1341 	}
1342 
1343 	err = security_unix_stream_connect(sk, other, newsk);
1344 	if (err) {
1345 		unix_state_unlock(sk);
1346 		goto out_unlock;
1347 	}
1348 
1349 	/* The way is open! Fastly set all the necessary fields... */
1350 
1351 	sock_hold(sk);
1352 	unix_peer(newsk)	= sk;
1353 	newsk->sk_state		= TCP_ESTABLISHED;
1354 	newsk->sk_type		= sk->sk_type;
1355 	init_peercred(newsk);
1356 	newu = unix_sk(newsk);
1357 	RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq);
1358 	otheru = unix_sk(other);
1359 
1360 	/* copy address information from listening to new sock
1361 	 *
1362 	 * The contents of *(otheru->addr) and otheru->path
1363 	 * are seen fully set up here, since we have found
1364 	 * otheru in hash under unix_table_lock.  Insertion
1365 	 * into the hash chain we'd found it in had been done
1366 	 * in an earlier critical area protected by unix_table_lock,
1367 	 * the same one where we'd set *(otheru->addr) contents,
1368 	 * as well as otheru->path and otheru->addr itself.
1369 	 *
1370 	 * Using smp_store_release() here to set newu->addr
1371 	 * is enough to make those stores, as well as stores
1372 	 * to newu->path visible to anyone who gets newu->addr
1373 	 * by smp_load_acquire().  IOW, the same warranties
1374 	 * as for unix_sock instances bound in unix_bind() or
1375 	 * in unix_autobind().
1376 	 */
1377 	if (otheru->path.dentry) {
1378 		path_get(&otheru->path);
1379 		newu->path = otheru->path;
1380 	}
1381 	refcount_inc(&otheru->addr->refcnt);
1382 	smp_store_release(&newu->addr, otheru->addr);
1383 
1384 	/* Set credentials */
1385 	copy_peercred(sk, other);
1386 
1387 	sock->state	= SS_CONNECTED;
1388 	sk->sk_state	= TCP_ESTABLISHED;
1389 	sock_hold(newsk);
1390 
1391 	smp_mb__after_atomic();	/* sock_hold() does an atomic_inc() */
1392 	unix_peer(sk)	= newsk;
1393 
1394 	unix_state_unlock(sk);
1395 
1396 	/* take ten and and send info to listening sock */
1397 	spin_lock(&other->sk_receive_queue.lock);
1398 	__skb_queue_tail(&other->sk_receive_queue, skb);
1399 	spin_unlock(&other->sk_receive_queue.lock);
1400 	unix_state_unlock(other);
1401 	other->sk_data_ready(other);
1402 	sock_put(other);
1403 	return 0;
1404 
1405 out_unlock:
1406 	if (other)
1407 		unix_state_unlock(other);
1408 
1409 out:
1410 	kfree_skb(skb);
1411 	if (newsk)
1412 		unix_release_sock(newsk, 0);
1413 	if (other)
1414 		sock_put(other);
1415 	return err;
1416 }
1417 
1418 static int unix_socketpair(struct socket *socka, struct socket *sockb)
1419 {
1420 	struct sock *ska = socka->sk, *skb = sockb->sk;
1421 
1422 	/* Join our sockets back to back */
1423 	sock_hold(ska);
1424 	sock_hold(skb);
1425 	unix_peer(ska) = skb;
1426 	unix_peer(skb) = ska;
1427 	init_peercred(ska);
1428 	init_peercred(skb);
1429 
1430 	if (ska->sk_type != SOCK_DGRAM) {
1431 		ska->sk_state = TCP_ESTABLISHED;
1432 		skb->sk_state = TCP_ESTABLISHED;
1433 		socka->state  = SS_CONNECTED;
1434 		sockb->state  = SS_CONNECTED;
1435 	}
1436 	return 0;
1437 }
1438 
1439 static void unix_sock_inherit_flags(const struct socket *old,
1440 				    struct socket *new)
1441 {
1442 	if (test_bit(SOCK_PASSCRED, &old->flags))
1443 		set_bit(SOCK_PASSCRED, &new->flags);
1444 	if (test_bit(SOCK_PASSSEC, &old->flags))
1445 		set_bit(SOCK_PASSSEC, &new->flags);
1446 }
1447 
1448 static int unix_accept(struct socket *sock, struct socket *newsock, int flags,
1449 		       bool kern)
1450 {
1451 	struct sock *sk = sock->sk;
1452 	struct sock *tsk;
1453 	struct sk_buff *skb;
1454 	int err;
1455 
1456 	err = -EOPNOTSUPP;
1457 	if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
1458 		goto out;
1459 
1460 	err = -EINVAL;
1461 	if (sk->sk_state != TCP_LISTEN)
1462 		goto out;
1463 
1464 	/* If socket state is TCP_LISTEN it cannot change (for now...),
1465 	 * so that no locks are necessary.
1466 	 */
1467 
1468 	skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err);
1469 	if (!skb) {
1470 		/* This means receive shutdown. */
1471 		if (err == 0)
1472 			err = -EINVAL;
1473 		goto out;
1474 	}
1475 
1476 	tsk = skb->sk;
1477 	skb_free_datagram(sk, skb);
1478 	wake_up_interruptible(&unix_sk(sk)->peer_wait);
1479 
1480 	/* attach accepted sock to socket */
1481 	unix_state_lock(tsk);
1482 	newsock->state = SS_CONNECTED;
1483 	unix_sock_inherit_flags(sock, newsock);
1484 	sock_graft(tsk, newsock);
1485 	unix_state_unlock(tsk);
1486 	return 0;
1487 
1488 out:
1489 	return err;
1490 }
1491 
1492 
1493 static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int peer)
1494 {
1495 	struct sock *sk = sock->sk;
1496 	struct unix_address *addr;
1497 	DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, uaddr);
1498 	int err = 0;
1499 
1500 	if (peer) {
1501 		sk = unix_peer_get(sk);
1502 
1503 		err = -ENOTCONN;
1504 		if (!sk)
1505 			goto out;
1506 		err = 0;
1507 	} else {
1508 		sock_hold(sk);
1509 	}
1510 
1511 	addr = smp_load_acquire(&unix_sk(sk)->addr);
1512 	if (!addr) {
1513 		sunaddr->sun_family = AF_UNIX;
1514 		sunaddr->sun_path[0] = 0;
1515 		err = sizeof(short);
1516 	} else {
1517 		err = addr->len;
1518 		memcpy(sunaddr, addr->name, addr->len);
1519 	}
1520 	sock_put(sk);
1521 out:
1522 	return err;
1523 }
1524 
1525 static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)
1526 {
1527 	int err = 0;
1528 
1529 	UNIXCB(skb).pid  = get_pid(scm->pid);
1530 	UNIXCB(skb).uid = scm->creds.uid;
1531 	UNIXCB(skb).gid = scm->creds.gid;
1532 	UNIXCB(skb).fp = NULL;
1533 	unix_get_secdata(scm, skb);
1534 	if (scm->fp && send_fds)
1535 		err = unix_attach_fds(scm, skb);
1536 
1537 	skb->destructor = unix_destruct_scm;
1538 	return err;
1539 }
1540 
1541 static bool unix_passcred_enabled(const struct socket *sock,
1542 				  const struct sock *other)
1543 {
1544 	return test_bit(SOCK_PASSCRED, &sock->flags) ||
1545 	       !other->sk_socket ||
1546 	       test_bit(SOCK_PASSCRED, &other->sk_socket->flags);
1547 }
1548 
1549 /*
1550  * Some apps rely on write() giving SCM_CREDENTIALS
1551  * We include credentials if source or destination socket
1552  * asserted SOCK_PASSCRED.
1553  */
1554 static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock,
1555 			    const struct sock *other)
1556 {
1557 	if (UNIXCB(skb).pid)
1558 		return;
1559 	if (unix_passcred_enabled(sock, other)) {
1560 		UNIXCB(skb).pid  = get_pid(task_tgid(current));
1561 		current_uid_gid(&UNIXCB(skb).uid, &UNIXCB(skb).gid);
1562 	}
1563 }
1564 
1565 static int maybe_init_creds(struct scm_cookie *scm,
1566 			    struct socket *socket,
1567 			    const struct sock *other)
1568 {
1569 	int err;
1570 	struct msghdr msg = { .msg_controllen = 0 };
1571 
1572 	err = scm_send(socket, &msg, scm, false);
1573 	if (err)
1574 		return err;
1575 
1576 	if (unix_passcred_enabled(socket, other)) {
1577 		scm->pid = get_pid(task_tgid(current));
1578 		current_uid_gid(&scm->creds.uid, &scm->creds.gid);
1579 	}
1580 	return err;
1581 }
1582 
1583 static bool unix_skb_scm_eq(struct sk_buff *skb,
1584 			    struct scm_cookie *scm)
1585 {
1586 	const struct unix_skb_parms *u = &UNIXCB(skb);
1587 
1588 	return u->pid == scm->pid &&
1589 	       uid_eq(u->uid, scm->creds.uid) &&
1590 	       gid_eq(u->gid, scm->creds.gid) &&
1591 	       unix_secdata_eq(scm, skb);
1592 }
1593 
1594 static void scm_stat_add(struct sock *sk, struct sk_buff *skb)
1595 {
1596 	struct scm_fp_list *fp = UNIXCB(skb).fp;
1597 	struct unix_sock *u = unix_sk(sk);
1598 
1599 	if (unlikely(fp && fp->count))
1600 		atomic_add(fp->count, &u->scm_stat.nr_fds);
1601 }
1602 
1603 static void scm_stat_del(struct sock *sk, struct sk_buff *skb)
1604 {
1605 	struct scm_fp_list *fp = UNIXCB(skb).fp;
1606 	struct unix_sock *u = unix_sk(sk);
1607 
1608 	if (unlikely(fp && fp->count))
1609 		atomic_sub(fp->count, &u->scm_stat.nr_fds);
1610 }
1611 
1612 /*
1613  *	Send AF_UNIX data.
1614  */
1615 
1616 static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
1617 			      size_t len)
1618 {
1619 	struct sock *sk = sock->sk;
1620 	struct net *net = sock_net(sk);
1621 	struct unix_sock *u = unix_sk(sk);
1622 	DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, msg->msg_name);
1623 	struct sock *other = NULL;
1624 	int namelen = 0; /* fake GCC */
1625 	int err;
1626 	unsigned int hash;
1627 	struct sk_buff *skb;
1628 	long timeo;
1629 	struct scm_cookie scm;
1630 	int data_len = 0;
1631 	int sk_locked;
1632 
1633 	wait_for_unix_gc();
1634 	err = scm_send(sock, msg, &scm, false);
1635 	if (err < 0)
1636 		return err;
1637 
1638 	err = -EOPNOTSUPP;
1639 	if (msg->msg_flags&MSG_OOB)
1640 		goto out;
1641 
1642 	if (msg->msg_namelen) {
1643 		err = unix_mkname(sunaddr, msg->msg_namelen, &hash);
1644 		if (err < 0)
1645 			goto out;
1646 		namelen = err;
1647 	} else {
1648 		sunaddr = NULL;
1649 		err = -ENOTCONN;
1650 		other = unix_peer_get(sk);
1651 		if (!other)
1652 			goto out;
1653 	}
1654 
1655 	if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr
1656 	    && (err = unix_autobind(sock)) != 0)
1657 		goto out;
1658 
1659 	err = -EMSGSIZE;
1660 	if (len > sk->sk_sndbuf - 32)
1661 		goto out;
1662 
1663 	if (len > SKB_MAX_ALLOC) {
1664 		data_len = min_t(size_t,
1665 				 len - SKB_MAX_ALLOC,
1666 				 MAX_SKB_FRAGS * PAGE_SIZE);
1667 		data_len = PAGE_ALIGN(data_len);
1668 
1669 		BUILD_BUG_ON(SKB_MAX_ALLOC < PAGE_SIZE);
1670 	}
1671 
1672 	skb = sock_alloc_send_pskb(sk, len - data_len, data_len,
1673 				   msg->msg_flags & MSG_DONTWAIT, &err,
1674 				   PAGE_ALLOC_COSTLY_ORDER);
1675 	if (skb == NULL)
1676 		goto out;
1677 
1678 	err = unix_scm_to_skb(&scm, skb, true);
1679 	if (err < 0)
1680 		goto out_free;
1681 
1682 	skb_put(skb, len - data_len);
1683 	skb->data_len = data_len;
1684 	skb->len = len;
1685 	err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, len);
1686 	if (err)
1687 		goto out_free;
1688 
1689 	timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
1690 
1691 restart:
1692 	if (!other) {
1693 		err = -ECONNRESET;
1694 		if (sunaddr == NULL)
1695 			goto out_free;
1696 
1697 		other = unix_find_other(net, sunaddr, namelen, sk->sk_type,
1698 					hash, &err);
1699 		if (other == NULL)
1700 			goto out_free;
1701 	}
1702 
1703 	if (sk_filter(other, skb) < 0) {
1704 		/* Toss the packet but do not return any error to the sender */
1705 		err = len;
1706 		goto out_free;
1707 	}
1708 
1709 	sk_locked = 0;
1710 	unix_state_lock(other);
1711 restart_locked:
1712 	err = -EPERM;
1713 	if (!unix_may_send(sk, other))
1714 		goto out_unlock;
1715 
1716 	if (unlikely(sock_flag(other, SOCK_DEAD))) {
1717 		/*
1718 		 *	Check with 1003.1g - what should
1719 		 *	datagram error
1720 		 */
1721 		unix_state_unlock(other);
1722 		sock_put(other);
1723 
1724 		if (!sk_locked)
1725 			unix_state_lock(sk);
1726 
1727 		err = 0;
1728 		if (unix_peer(sk) == other) {
1729 			unix_peer(sk) = NULL;
1730 			unix_dgram_peer_wake_disconnect_wakeup(sk, other);
1731 
1732 			unix_state_unlock(sk);
1733 
1734 			unix_dgram_disconnected(sk, other);
1735 			sock_put(other);
1736 			err = -ECONNREFUSED;
1737 		} else {
1738 			unix_state_unlock(sk);
1739 		}
1740 
1741 		other = NULL;
1742 		if (err)
1743 			goto out_free;
1744 		goto restart;
1745 	}
1746 
1747 	err = -EPIPE;
1748 	if (other->sk_shutdown & RCV_SHUTDOWN)
1749 		goto out_unlock;
1750 
1751 	if (sk->sk_type != SOCK_SEQPACKET) {
1752 		err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1753 		if (err)
1754 			goto out_unlock;
1755 	}
1756 
1757 	/* other == sk && unix_peer(other) != sk if
1758 	 * - unix_peer(sk) == NULL, destination address bound to sk
1759 	 * - unix_peer(sk) == sk by time of get but disconnected before lock
1760 	 */
1761 	if (other != sk &&
1762 	    unlikely(unix_peer(other) != sk &&
1763 	    unix_recvq_full_lockless(other))) {
1764 		if (timeo) {
1765 			timeo = unix_wait_for_peer(other, timeo);
1766 
1767 			err = sock_intr_errno(timeo);
1768 			if (signal_pending(current))
1769 				goto out_free;
1770 
1771 			goto restart;
1772 		}
1773 
1774 		if (!sk_locked) {
1775 			unix_state_unlock(other);
1776 			unix_state_double_lock(sk, other);
1777 		}
1778 
1779 		if (unix_peer(sk) != other ||
1780 		    unix_dgram_peer_wake_me(sk, other)) {
1781 			err = -EAGAIN;
1782 			sk_locked = 1;
1783 			goto out_unlock;
1784 		}
1785 
1786 		if (!sk_locked) {
1787 			sk_locked = 1;
1788 			goto restart_locked;
1789 		}
1790 	}
1791 
1792 	if (unlikely(sk_locked))
1793 		unix_state_unlock(sk);
1794 
1795 	if (sock_flag(other, SOCK_RCVTSTAMP))
1796 		__net_timestamp(skb);
1797 	maybe_add_creds(skb, sock, other);
1798 	scm_stat_add(other, skb);
1799 	skb_queue_tail(&other->sk_receive_queue, skb);
1800 	unix_state_unlock(other);
1801 	other->sk_data_ready(other);
1802 	sock_put(other);
1803 	scm_destroy(&scm);
1804 	return len;
1805 
1806 out_unlock:
1807 	if (sk_locked)
1808 		unix_state_unlock(sk);
1809 	unix_state_unlock(other);
1810 out_free:
1811 	kfree_skb(skb);
1812 out:
1813 	if (other)
1814 		sock_put(other);
1815 	scm_destroy(&scm);
1816 	return err;
1817 }
1818 
1819 /* We use paged skbs for stream sockets, and limit occupancy to 32768
1820  * bytes, and a minimum of a full page.
1821  */
1822 #define UNIX_SKB_FRAGS_SZ (PAGE_SIZE << get_order(32768))
1823 
1824 static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
1825 			       size_t len)
1826 {
1827 	struct sock *sk = sock->sk;
1828 	struct sock *other = NULL;
1829 	int err, size;
1830 	struct sk_buff *skb;
1831 	int sent = 0;
1832 	struct scm_cookie scm;
1833 	bool fds_sent = false;
1834 	int data_len;
1835 
1836 	wait_for_unix_gc();
1837 	err = scm_send(sock, msg, &scm, false);
1838 	if (err < 0)
1839 		return err;
1840 
1841 	err = -EOPNOTSUPP;
1842 	if (msg->msg_flags&MSG_OOB)
1843 		goto out_err;
1844 
1845 	if (msg->msg_namelen) {
1846 		err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
1847 		goto out_err;
1848 	} else {
1849 		err = -ENOTCONN;
1850 		other = unix_peer(sk);
1851 		if (!other)
1852 			goto out_err;
1853 	}
1854 
1855 	if (sk->sk_shutdown & SEND_SHUTDOWN)
1856 		goto pipe_err;
1857 
1858 	while (sent < len) {
1859 		size = len - sent;
1860 
1861 		/* Keep two messages in the pipe so it schedules better */
1862 		size = min_t(int, size, (sk->sk_sndbuf >> 1) - 64);
1863 
1864 		/* allow fallback to order-0 allocations */
1865 		size = min_t(int, size, SKB_MAX_HEAD(0) + UNIX_SKB_FRAGS_SZ);
1866 
1867 		data_len = max_t(int, 0, size - SKB_MAX_HEAD(0));
1868 
1869 		data_len = min_t(size_t, size, PAGE_ALIGN(data_len));
1870 
1871 		skb = sock_alloc_send_pskb(sk, size - data_len, data_len,
1872 					   msg->msg_flags & MSG_DONTWAIT, &err,
1873 					   get_order(UNIX_SKB_FRAGS_SZ));
1874 		if (!skb)
1875 			goto out_err;
1876 
1877 		/* Only send the fds in the first buffer */
1878 		err = unix_scm_to_skb(&scm, skb, !fds_sent);
1879 		if (err < 0) {
1880 			kfree_skb(skb);
1881 			goto out_err;
1882 		}
1883 		fds_sent = true;
1884 
1885 		skb_put(skb, size - data_len);
1886 		skb->data_len = data_len;
1887 		skb->len = size;
1888 		err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size);
1889 		if (err) {
1890 			kfree_skb(skb);
1891 			goto out_err;
1892 		}
1893 
1894 		unix_state_lock(other);
1895 
1896 		if (sock_flag(other, SOCK_DEAD) ||
1897 		    (other->sk_shutdown & RCV_SHUTDOWN))
1898 			goto pipe_err_free;
1899 
1900 		maybe_add_creds(skb, sock, other);
1901 		scm_stat_add(other, skb);
1902 		skb_queue_tail(&other->sk_receive_queue, skb);
1903 		unix_state_unlock(other);
1904 		other->sk_data_ready(other);
1905 		sent += size;
1906 	}
1907 
1908 	scm_destroy(&scm);
1909 
1910 	return sent;
1911 
1912 pipe_err_free:
1913 	unix_state_unlock(other);
1914 	kfree_skb(skb);
1915 pipe_err:
1916 	if (sent == 0 && !(msg->msg_flags&MSG_NOSIGNAL))
1917 		send_sig(SIGPIPE, current, 0);
1918 	err = -EPIPE;
1919 out_err:
1920 	scm_destroy(&scm);
1921 	return sent ? : err;
1922 }
1923 
1924 static ssize_t unix_stream_sendpage(struct socket *socket, struct page *page,
1925 				    int offset, size_t size, int flags)
1926 {
1927 	int err;
1928 	bool send_sigpipe = false;
1929 	bool init_scm = true;
1930 	struct scm_cookie scm;
1931 	struct sock *other, *sk = socket->sk;
1932 	struct sk_buff *skb, *newskb = NULL, *tail = NULL;
1933 
1934 	if (flags & MSG_OOB)
1935 		return -EOPNOTSUPP;
1936 
1937 	other = unix_peer(sk);
1938 	if (!other || sk->sk_state != TCP_ESTABLISHED)
1939 		return -ENOTCONN;
1940 
1941 	if (false) {
1942 alloc_skb:
1943 		unix_state_unlock(other);
1944 		mutex_unlock(&unix_sk(other)->iolock);
1945 		newskb = sock_alloc_send_pskb(sk, 0, 0, flags & MSG_DONTWAIT,
1946 					      &err, 0);
1947 		if (!newskb)
1948 			goto err;
1949 	}
1950 
1951 	/* we must acquire iolock as we modify already present
1952 	 * skbs in the sk_receive_queue and mess with skb->len
1953 	 */
1954 	err = mutex_lock_interruptible(&unix_sk(other)->iolock);
1955 	if (err) {
1956 		err = flags & MSG_DONTWAIT ? -EAGAIN : -ERESTARTSYS;
1957 		goto err;
1958 	}
1959 
1960 	if (sk->sk_shutdown & SEND_SHUTDOWN) {
1961 		err = -EPIPE;
1962 		send_sigpipe = true;
1963 		goto err_unlock;
1964 	}
1965 
1966 	unix_state_lock(other);
1967 
1968 	if (sock_flag(other, SOCK_DEAD) ||
1969 	    other->sk_shutdown & RCV_SHUTDOWN) {
1970 		err = -EPIPE;
1971 		send_sigpipe = true;
1972 		goto err_state_unlock;
1973 	}
1974 
1975 	if (init_scm) {
1976 		err = maybe_init_creds(&scm, socket, other);
1977 		if (err)
1978 			goto err_state_unlock;
1979 		init_scm = false;
1980 	}
1981 
1982 	skb = skb_peek_tail(&other->sk_receive_queue);
1983 	if (tail && tail == skb) {
1984 		skb = newskb;
1985 	} else if (!skb || !unix_skb_scm_eq(skb, &scm)) {
1986 		if (newskb) {
1987 			skb = newskb;
1988 		} else {
1989 			tail = skb;
1990 			goto alloc_skb;
1991 		}
1992 	} else if (newskb) {
1993 		/* this is fast path, we don't necessarily need to
1994 		 * call to kfree_skb even though with newskb == NULL
1995 		 * this - does no harm
1996 		 */
1997 		consume_skb(newskb);
1998 		newskb = NULL;
1999 	}
2000 
2001 	if (skb_append_pagefrags(skb, page, offset, size)) {
2002 		tail = skb;
2003 		goto alloc_skb;
2004 	}
2005 
2006 	skb->len += size;
2007 	skb->data_len += size;
2008 	skb->truesize += size;
2009 	refcount_add(size, &sk->sk_wmem_alloc);
2010 
2011 	if (newskb) {
2012 		err = unix_scm_to_skb(&scm, skb, false);
2013 		if (err)
2014 			goto err_state_unlock;
2015 		spin_lock(&other->sk_receive_queue.lock);
2016 		__skb_queue_tail(&other->sk_receive_queue, newskb);
2017 		spin_unlock(&other->sk_receive_queue.lock);
2018 	}
2019 
2020 	unix_state_unlock(other);
2021 	mutex_unlock(&unix_sk(other)->iolock);
2022 
2023 	other->sk_data_ready(other);
2024 	scm_destroy(&scm);
2025 	return size;
2026 
2027 err_state_unlock:
2028 	unix_state_unlock(other);
2029 err_unlock:
2030 	mutex_unlock(&unix_sk(other)->iolock);
2031 err:
2032 	kfree_skb(newskb);
2033 	if (send_sigpipe && !(flags & MSG_NOSIGNAL))
2034 		send_sig(SIGPIPE, current, 0);
2035 	if (!init_scm)
2036 		scm_destroy(&scm);
2037 	return err;
2038 }
2039 
2040 static int unix_seqpacket_sendmsg(struct socket *sock, struct msghdr *msg,
2041 				  size_t len)
2042 {
2043 	int err;
2044 	struct sock *sk = sock->sk;
2045 
2046 	err = sock_error(sk);
2047 	if (err)
2048 		return err;
2049 
2050 	if (sk->sk_state != TCP_ESTABLISHED)
2051 		return -ENOTCONN;
2052 
2053 	if (msg->msg_namelen)
2054 		msg->msg_namelen = 0;
2055 
2056 	return unix_dgram_sendmsg(sock, msg, len);
2057 }
2058 
2059 static int unix_seqpacket_recvmsg(struct socket *sock, struct msghdr *msg,
2060 				  size_t size, int flags)
2061 {
2062 	struct sock *sk = sock->sk;
2063 
2064 	if (sk->sk_state != TCP_ESTABLISHED)
2065 		return -ENOTCONN;
2066 
2067 	return unix_dgram_recvmsg(sock, msg, size, flags);
2068 }
2069 
2070 static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
2071 {
2072 	struct unix_address *addr = smp_load_acquire(&unix_sk(sk)->addr);
2073 
2074 	if (addr) {
2075 		msg->msg_namelen = addr->len;
2076 		memcpy(msg->msg_name, addr->name, addr->len);
2077 	}
2078 }
2079 
2080 static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
2081 			      size_t size, int flags)
2082 {
2083 	struct scm_cookie scm;
2084 	struct sock *sk = sock->sk;
2085 	struct unix_sock *u = unix_sk(sk);
2086 	struct sk_buff *skb, *last;
2087 	long timeo;
2088 	int skip;
2089 	int err;
2090 
2091 	err = -EOPNOTSUPP;
2092 	if (flags&MSG_OOB)
2093 		goto out;
2094 
2095 	timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
2096 
2097 	do {
2098 		mutex_lock(&u->iolock);
2099 
2100 		skip = sk_peek_offset(sk, flags);
2101 		skb = __skb_try_recv_datagram(sk, &sk->sk_receive_queue, flags,
2102 					      &skip, &err, &last);
2103 		if (skb) {
2104 			if (!(flags & MSG_PEEK))
2105 				scm_stat_del(sk, skb);
2106 			break;
2107 		}
2108 
2109 		mutex_unlock(&u->iolock);
2110 
2111 		if (err != -EAGAIN)
2112 			break;
2113 	} while (timeo &&
2114 		 !__skb_wait_for_more_packets(sk, &sk->sk_receive_queue,
2115 					      &err, &timeo, last));
2116 
2117 	if (!skb) { /* implies iolock unlocked */
2118 		unix_state_lock(sk);
2119 		/* Signal EOF on disconnected non-blocking SEQPACKET socket. */
2120 		if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN &&
2121 		    (sk->sk_shutdown & RCV_SHUTDOWN))
2122 			err = 0;
2123 		unix_state_unlock(sk);
2124 		goto out;
2125 	}
2126 
2127 	if (wq_has_sleeper(&u->peer_wait))
2128 		wake_up_interruptible_sync_poll(&u->peer_wait,
2129 						EPOLLOUT | EPOLLWRNORM |
2130 						EPOLLWRBAND);
2131 
2132 	if (msg->msg_name)
2133 		unix_copy_addr(msg, skb->sk);
2134 
2135 	if (size > skb->len - skip)
2136 		size = skb->len - skip;
2137 	else if (size < skb->len - skip)
2138 		msg->msg_flags |= MSG_TRUNC;
2139 
2140 	err = skb_copy_datagram_msg(skb, skip, msg, size);
2141 	if (err)
2142 		goto out_free;
2143 
2144 	if (sock_flag(sk, SOCK_RCVTSTAMP))
2145 		__sock_recv_timestamp(msg, sk, skb);
2146 
2147 	memset(&scm, 0, sizeof(scm));
2148 
2149 	scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2150 	unix_set_secdata(&scm, skb);
2151 
2152 	if (!(flags & MSG_PEEK)) {
2153 		if (UNIXCB(skb).fp)
2154 			unix_detach_fds(&scm, skb);
2155 
2156 		sk_peek_offset_bwd(sk, skb->len);
2157 	} else {
2158 		/* It is questionable: on PEEK we could:
2159 		   - do not return fds - good, but too simple 8)
2160 		   - return fds, and do not return them on read (old strategy,
2161 		     apparently wrong)
2162 		   - clone fds (I chose it for now, it is the most universal
2163 		     solution)
2164 
2165 		   POSIX 1003.1g does not actually define this clearly
2166 		   at all. POSIX 1003.1g doesn't define a lot of things
2167 		   clearly however!
2168 
2169 		*/
2170 
2171 		sk_peek_offset_fwd(sk, size);
2172 
2173 		if (UNIXCB(skb).fp)
2174 			scm.fp = scm_fp_dup(UNIXCB(skb).fp);
2175 	}
2176 	err = (flags & MSG_TRUNC) ? skb->len - skip : size;
2177 
2178 	scm_recv(sock, msg, &scm, flags);
2179 
2180 out_free:
2181 	skb_free_datagram(sk, skb);
2182 	mutex_unlock(&u->iolock);
2183 out:
2184 	return err;
2185 }
2186 
2187 /*
2188  *	Sleep until more data has arrived. But check for races..
2189  */
2190 static long unix_stream_data_wait(struct sock *sk, long timeo,
2191 				  struct sk_buff *last, unsigned int last_len,
2192 				  bool freezable)
2193 {
2194 	struct sk_buff *tail;
2195 	DEFINE_WAIT(wait);
2196 
2197 	unix_state_lock(sk);
2198 
2199 	for (;;) {
2200 		prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
2201 
2202 		tail = skb_peek_tail(&sk->sk_receive_queue);
2203 		if (tail != last ||
2204 		    (tail && tail->len != last_len) ||
2205 		    sk->sk_err ||
2206 		    (sk->sk_shutdown & RCV_SHUTDOWN) ||
2207 		    signal_pending(current) ||
2208 		    !timeo)
2209 			break;
2210 
2211 		sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2212 		unix_state_unlock(sk);
2213 		if (freezable)
2214 			timeo = freezable_schedule_timeout(timeo);
2215 		else
2216 			timeo = schedule_timeout(timeo);
2217 		unix_state_lock(sk);
2218 
2219 		if (sock_flag(sk, SOCK_DEAD))
2220 			break;
2221 
2222 		sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2223 	}
2224 
2225 	finish_wait(sk_sleep(sk), &wait);
2226 	unix_state_unlock(sk);
2227 	return timeo;
2228 }
2229 
2230 static unsigned int unix_skb_len(const struct sk_buff *skb)
2231 {
2232 	return skb->len - UNIXCB(skb).consumed;
2233 }
2234 
2235 struct unix_stream_read_state {
2236 	int (*recv_actor)(struct sk_buff *, int, int,
2237 			  struct unix_stream_read_state *);
2238 	struct socket *socket;
2239 	struct msghdr *msg;
2240 	struct pipe_inode_info *pipe;
2241 	size_t size;
2242 	int flags;
2243 	unsigned int splice_flags;
2244 };
2245 
2246 static int unix_stream_read_generic(struct unix_stream_read_state *state,
2247 				    bool freezable)
2248 {
2249 	struct scm_cookie scm;
2250 	struct socket *sock = state->socket;
2251 	struct sock *sk = sock->sk;
2252 	struct unix_sock *u = unix_sk(sk);
2253 	int copied = 0;
2254 	int flags = state->flags;
2255 	int noblock = flags & MSG_DONTWAIT;
2256 	bool check_creds = false;
2257 	int target;
2258 	int err = 0;
2259 	long timeo;
2260 	int skip;
2261 	size_t size = state->size;
2262 	unsigned int last_len;
2263 
2264 	if (unlikely(sk->sk_state != TCP_ESTABLISHED)) {
2265 		err = -EINVAL;
2266 		goto out;
2267 	}
2268 
2269 	if (unlikely(flags & MSG_OOB)) {
2270 		err = -EOPNOTSUPP;
2271 		goto out;
2272 	}
2273 
2274 	target = sock_rcvlowat(sk, flags & MSG_WAITALL, size);
2275 	timeo = sock_rcvtimeo(sk, noblock);
2276 
2277 	memset(&scm, 0, sizeof(scm));
2278 
2279 	/* Lock the socket to prevent queue disordering
2280 	 * while sleeps in memcpy_tomsg
2281 	 */
2282 	mutex_lock(&u->iolock);
2283 
2284 	skip = max(sk_peek_offset(sk, flags), 0);
2285 
2286 	do {
2287 		int chunk;
2288 		bool drop_skb;
2289 		struct sk_buff *skb, *last;
2290 
2291 redo:
2292 		unix_state_lock(sk);
2293 		if (sock_flag(sk, SOCK_DEAD)) {
2294 			err = -ECONNRESET;
2295 			goto unlock;
2296 		}
2297 		last = skb = skb_peek(&sk->sk_receive_queue);
2298 		last_len = last ? last->len : 0;
2299 again:
2300 		if (skb == NULL) {
2301 			if (copied >= target)
2302 				goto unlock;
2303 
2304 			/*
2305 			 *	POSIX 1003.1g mandates this order.
2306 			 */
2307 
2308 			err = sock_error(sk);
2309 			if (err)
2310 				goto unlock;
2311 			if (sk->sk_shutdown & RCV_SHUTDOWN)
2312 				goto unlock;
2313 
2314 			unix_state_unlock(sk);
2315 			if (!timeo) {
2316 				err = -EAGAIN;
2317 				break;
2318 			}
2319 
2320 			mutex_unlock(&u->iolock);
2321 
2322 			timeo = unix_stream_data_wait(sk, timeo, last,
2323 						      last_len, freezable);
2324 
2325 			if (signal_pending(current)) {
2326 				err = sock_intr_errno(timeo);
2327 				scm_destroy(&scm);
2328 				goto out;
2329 			}
2330 
2331 			mutex_lock(&u->iolock);
2332 			goto redo;
2333 unlock:
2334 			unix_state_unlock(sk);
2335 			break;
2336 		}
2337 
2338 		while (skip >= unix_skb_len(skb)) {
2339 			skip -= unix_skb_len(skb);
2340 			last = skb;
2341 			last_len = skb->len;
2342 			skb = skb_peek_next(skb, &sk->sk_receive_queue);
2343 			if (!skb)
2344 				goto again;
2345 		}
2346 
2347 		unix_state_unlock(sk);
2348 
2349 		if (check_creds) {
2350 			/* Never glue messages from different writers */
2351 			if (!unix_skb_scm_eq(skb, &scm))
2352 				break;
2353 		} else if (test_bit(SOCK_PASSCRED, &sock->flags)) {
2354 			/* Copy credentials */
2355 			scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2356 			unix_set_secdata(&scm, skb);
2357 			check_creds = true;
2358 		}
2359 
2360 		/* Copy address just once */
2361 		if (state->msg && state->msg->msg_name) {
2362 			DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr,
2363 					 state->msg->msg_name);
2364 			unix_copy_addr(state->msg, skb->sk);
2365 			sunaddr = NULL;
2366 		}
2367 
2368 		chunk = min_t(unsigned int, unix_skb_len(skb) - skip, size);
2369 		skb_get(skb);
2370 		chunk = state->recv_actor(skb, skip, chunk, state);
2371 		drop_skb = !unix_skb_len(skb);
2372 		/* skb is only safe to use if !drop_skb */
2373 		consume_skb(skb);
2374 		if (chunk < 0) {
2375 			if (copied == 0)
2376 				copied = -EFAULT;
2377 			break;
2378 		}
2379 		copied += chunk;
2380 		size -= chunk;
2381 
2382 		if (drop_skb) {
2383 			/* the skb was touched by a concurrent reader;
2384 			 * we should not expect anything from this skb
2385 			 * anymore and assume it invalid - we can be
2386 			 * sure it was dropped from the socket queue
2387 			 *
2388 			 * let's report a short read
2389 			 */
2390 			err = 0;
2391 			break;
2392 		}
2393 
2394 		/* Mark read part of skb as used */
2395 		if (!(flags & MSG_PEEK)) {
2396 			UNIXCB(skb).consumed += chunk;
2397 
2398 			sk_peek_offset_bwd(sk, chunk);
2399 
2400 			if (UNIXCB(skb).fp) {
2401 				scm_stat_del(sk, skb);
2402 				unix_detach_fds(&scm, skb);
2403 			}
2404 
2405 			if (unix_skb_len(skb))
2406 				break;
2407 
2408 			skb_unlink(skb, &sk->sk_receive_queue);
2409 			consume_skb(skb);
2410 
2411 			if (scm.fp)
2412 				break;
2413 		} else {
2414 			/* It is questionable, see note in unix_dgram_recvmsg.
2415 			 */
2416 			if (UNIXCB(skb).fp)
2417 				scm.fp = scm_fp_dup(UNIXCB(skb).fp);
2418 
2419 			sk_peek_offset_fwd(sk, chunk);
2420 
2421 			if (UNIXCB(skb).fp)
2422 				break;
2423 
2424 			skip = 0;
2425 			last = skb;
2426 			last_len = skb->len;
2427 			unix_state_lock(sk);
2428 			skb = skb_peek_next(skb, &sk->sk_receive_queue);
2429 			if (skb)
2430 				goto again;
2431 			unix_state_unlock(sk);
2432 			break;
2433 		}
2434 	} while (size);
2435 
2436 	mutex_unlock(&u->iolock);
2437 	if (state->msg)
2438 		scm_recv(sock, state->msg, &scm, flags);
2439 	else
2440 		scm_destroy(&scm);
2441 out:
2442 	return copied ? : err;
2443 }
2444 
2445 static int unix_stream_read_actor(struct sk_buff *skb,
2446 				  int skip, int chunk,
2447 				  struct unix_stream_read_state *state)
2448 {
2449 	int ret;
2450 
2451 	ret = skb_copy_datagram_msg(skb, UNIXCB(skb).consumed + skip,
2452 				    state->msg, chunk);
2453 	return ret ?: chunk;
2454 }
2455 
2456 static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg,
2457 			       size_t size, int flags)
2458 {
2459 	struct unix_stream_read_state state = {
2460 		.recv_actor = unix_stream_read_actor,
2461 		.socket = sock,
2462 		.msg = msg,
2463 		.size = size,
2464 		.flags = flags
2465 	};
2466 
2467 	return unix_stream_read_generic(&state, true);
2468 }
2469 
2470 static int unix_stream_splice_actor(struct sk_buff *skb,
2471 				    int skip, int chunk,
2472 				    struct unix_stream_read_state *state)
2473 {
2474 	return skb_splice_bits(skb, state->socket->sk,
2475 			       UNIXCB(skb).consumed + skip,
2476 			       state->pipe, chunk, state->splice_flags);
2477 }
2478 
2479 static ssize_t unix_stream_splice_read(struct socket *sock,  loff_t *ppos,
2480 				       struct pipe_inode_info *pipe,
2481 				       size_t size, unsigned int flags)
2482 {
2483 	struct unix_stream_read_state state = {
2484 		.recv_actor = unix_stream_splice_actor,
2485 		.socket = sock,
2486 		.pipe = pipe,
2487 		.size = size,
2488 		.splice_flags = flags,
2489 	};
2490 
2491 	if (unlikely(*ppos))
2492 		return -ESPIPE;
2493 
2494 	if (sock->file->f_flags & O_NONBLOCK ||
2495 	    flags & SPLICE_F_NONBLOCK)
2496 		state.flags = MSG_DONTWAIT;
2497 
2498 	return unix_stream_read_generic(&state, false);
2499 }
2500 
2501 static int unix_shutdown(struct socket *sock, int mode)
2502 {
2503 	struct sock *sk = sock->sk;
2504 	struct sock *other;
2505 
2506 	if (mode < SHUT_RD || mode > SHUT_RDWR)
2507 		return -EINVAL;
2508 	/* This maps:
2509 	 * SHUT_RD   (0) -> RCV_SHUTDOWN  (1)
2510 	 * SHUT_WR   (1) -> SEND_SHUTDOWN (2)
2511 	 * SHUT_RDWR (2) -> SHUTDOWN_MASK (3)
2512 	 */
2513 	++mode;
2514 
2515 	unix_state_lock(sk);
2516 	sk->sk_shutdown |= mode;
2517 	other = unix_peer(sk);
2518 	if (other)
2519 		sock_hold(other);
2520 	unix_state_unlock(sk);
2521 	sk->sk_state_change(sk);
2522 
2523 	if (other &&
2524 		(sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
2525 
2526 		int peer_mode = 0;
2527 
2528 		if (mode&RCV_SHUTDOWN)
2529 			peer_mode |= SEND_SHUTDOWN;
2530 		if (mode&SEND_SHUTDOWN)
2531 			peer_mode |= RCV_SHUTDOWN;
2532 		unix_state_lock(other);
2533 		other->sk_shutdown |= peer_mode;
2534 		unix_state_unlock(other);
2535 		other->sk_state_change(other);
2536 		if (peer_mode == SHUTDOWN_MASK)
2537 			sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
2538 		else if (peer_mode & RCV_SHUTDOWN)
2539 			sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
2540 	}
2541 	if (other)
2542 		sock_put(other);
2543 
2544 	return 0;
2545 }
2546 
2547 long unix_inq_len(struct sock *sk)
2548 {
2549 	struct sk_buff *skb;
2550 	long amount = 0;
2551 
2552 	if (sk->sk_state == TCP_LISTEN)
2553 		return -EINVAL;
2554 
2555 	spin_lock(&sk->sk_receive_queue.lock);
2556 	if (sk->sk_type == SOCK_STREAM ||
2557 	    sk->sk_type == SOCK_SEQPACKET) {
2558 		skb_queue_walk(&sk->sk_receive_queue, skb)
2559 			amount += unix_skb_len(skb);
2560 	} else {
2561 		skb = skb_peek(&sk->sk_receive_queue);
2562 		if (skb)
2563 			amount = skb->len;
2564 	}
2565 	spin_unlock(&sk->sk_receive_queue.lock);
2566 
2567 	return amount;
2568 }
2569 EXPORT_SYMBOL_GPL(unix_inq_len);
2570 
2571 long unix_outq_len(struct sock *sk)
2572 {
2573 	return sk_wmem_alloc_get(sk);
2574 }
2575 EXPORT_SYMBOL_GPL(unix_outq_len);
2576 
2577 static int unix_open_file(struct sock *sk)
2578 {
2579 	struct path path;
2580 	struct file *f;
2581 	int fd;
2582 
2583 	if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
2584 		return -EPERM;
2585 
2586 	if (!smp_load_acquire(&unix_sk(sk)->addr))
2587 		return -ENOENT;
2588 
2589 	path = unix_sk(sk)->path;
2590 	if (!path.dentry)
2591 		return -ENOENT;
2592 
2593 	path_get(&path);
2594 
2595 	fd = get_unused_fd_flags(O_CLOEXEC);
2596 	if (fd < 0)
2597 		goto out;
2598 
2599 	f = dentry_open(&path, O_PATH, current_cred());
2600 	if (IS_ERR(f)) {
2601 		put_unused_fd(fd);
2602 		fd = PTR_ERR(f);
2603 		goto out;
2604 	}
2605 
2606 	fd_install(fd, f);
2607 out:
2608 	path_put(&path);
2609 
2610 	return fd;
2611 }
2612 
2613 static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
2614 {
2615 	struct sock *sk = sock->sk;
2616 	long amount = 0;
2617 	int err;
2618 
2619 	switch (cmd) {
2620 	case SIOCOUTQ:
2621 		amount = unix_outq_len(sk);
2622 		err = put_user(amount, (int __user *)arg);
2623 		break;
2624 	case SIOCINQ:
2625 		amount = unix_inq_len(sk);
2626 		if (amount < 0)
2627 			err = amount;
2628 		else
2629 			err = put_user(amount, (int __user *)arg);
2630 		break;
2631 	case SIOCUNIXFILE:
2632 		err = unix_open_file(sk);
2633 		break;
2634 	default:
2635 		err = -ENOIOCTLCMD;
2636 		break;
2637 	}
2638 	return err;
2639 }
2640 
2641 #ifdef CONFIG_COMPAT
2642 static int unix_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
2643 {
2644 	return unix_ioctl(sock, cmd, (unsigned long)compat_ptr(arg));
2645 }
2646 #endif
2647 
2648 static __poll_t unix_poll(struct file *file, struct socket *sock, poll_table *wait)
2649 {
2650 	struct sock *sk = sock->sk;
2651 	__poll_t mask;
2652 
2653 	sock_poll_wait(file, sock, wait);
2654 	mask = 0;
2655 
2656 	/* exceptional events? */
2657 	if (sk->sk_err)
2658 		mask |= EPOLLERR;
2659 	if (sk->sk_shutdown == SHUTDOWN_MASK)
2660 		mask |= EPOLLHUP;
2661 	if (sk->sk_shutdown & RCV_SHUTDOWN)
2662 		mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
2663 
2664 	/* readable? */
2665 	if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
2666 		mask |= EPOLLIN | EPOLLRDNORM;
2667 
2668 	/* Connection-based need to check for termination and startup */
2669 	if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) &&
2670 	    sk->sk_state == TCP_CLOSE)
2671 		mask |= EPOLLHUP;
2672 
2673 	/*
2674 	 * we set writable also when the other side has shut down the
2675 	 * connection. This prevents stuck sockets.
2676 	 */
2677 	if (unix_writable(sk))
2678 		mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
2679 
2680 	return mask;
2681 }
2682 
2683 static __poll_t unix_dgram_poll(struct file *file, struct socket *sock,
2684 				    poll_table *wait)
2685 {
2686 	struct sock *sk = sock->sk, *other;
2687 	unsigned int writable;
2688 	__poll_t mask;
2689 
2690 	sock_poll_wait(file, sock, wait);
2691 	mask = 0;
2692 
2693 	/* exceptional events? */
2694 	if (sk->sk_err || !skb_queue_empty_lockless(&sk->sk_error_queue))
2695 		mask |= EPOLLERR |
2696 			(sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0);
2697 
2698 	if (sk->sk_shutdown & RCV_SHUTDOWN)
2699 		mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
2700 	if (sk->sk_shutdown == SHUTDOWN_MASK)
2701 		mask |= EPOLLHUP;
2702 
2703 	/* readable? */
2704 	if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
2705 		mask |= EPOLLIN | EPOLLRDNORM;
2706 
2707 	/* Connection-based need to check for termination and startup */
2708 	if (sk->sk_type == SOCK_SEQPACKET) {
2709 		if (sk->sk_state == TCP_CLOSE)
2710 			mask |= EPOLLHUP;
2711 		/* connection hasn't started yet? */
2712 		if (sk->sk_state == TCP_SYN_SENT)
2713 			return mask;
2714 	}
2715 
2716 	/* No write status requested, avoid expensive OUT tests. */
2717 	if (!(poll_requested_events(wait) & (EPOLLWRBAND|EPOLLWRNORM|EPOLLOUT)))
2718 		return mask;
2719 
2720 	writable = unix_writable(sk);
2721 	if (writable) {
2722 		unix_state_lock(sk);
2723 
2724 		other = unix_peer(sk);
2725 		if (other && unix_peer(other) != sk &&
2726 		    unix_recvq_full(other) &&
2727 		    unix_dgram_peer_wake_me(sk, other))
2728 			writable = 0;
2729 
2730 		unix_state_unlock(sk);
2731 	}
2732 
2733 	if (writable)
2734 		mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
2735 	else
2736 		sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
2737 
2738 	return mask;
2739 }
2740 
2741 #ifdef CONFIG_PROC_FS
2742 
2743 #define BUCKET_SPACE (BITS_PER_LONG - (UNIX_HASH_BITS + 1) - 1)
2744 
2745 #define get_bucket(x) ((x) >> BUCKET_SPACE)
2746 #define get_offset(x) ((x) & ((1L << BUCKET_SPACE) - 1))
2747 #define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o))
2748 
2749 static struct sock *unix_from_bucket(struct seq_file *seq, loff_t *pos)
2750 {
2751 	unsigned long offset = get_offset(*pos);
2752 	unsigned long bucket = get_bucket(*pos);
2753 	struct sock *sk;
2754 	unsigned long count = 0;
2755 
2756 	for (sk = sk_head(&unix_socket_table[bucket]); sk; sk = sk_next(sk)) {
2757 		if (sock_net(sk) != seq_file_net(seq))
2758 			continue;
2759 		if (++count == offset)
2760 			break;
2761 	}
2762 
2763 	return sk;
2764 }
2765 
2766 static struct sock *unix_next_socket(struct seq_file *seq,
2767 				     struct sock *sk,
2768 				     loff_t *pos)
2769 {
2770 	unsigned long bucket;
2771 
2772 	while (sk > (struct sock *)SEQ_START_TOKEN) {
2773 		sk = sk_next(sk);
2774 		if (!sk)
2775 			goto next_bucket;
2776 		if (sock_net(sk) == seq_file_net(seq))
2777 			return sk;
2778 	}
2779 
2780 	do {
2781 		sk = unix_from_bucket(seq, pos);
2782 		if (sk)
2783 			return sk;
2784 
2785 next_bucket:
2786 		bucket = get_bucket(*pos) + 1;
2787 		*pos = set_bucket_offset(bucket, 1);
2788 	} while (bucket < ARRAY_SIZE(unix_socket_table));
2789 
2790 	return NULL;
2791 }
2792 
2793 static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
2794 	__acquires(unix_table_lock)
2795 {
2796 	spin_lock(&unix_table_lock);
2797 
2798 	if (!*pos)
2799 		return SEQ_START_TOKEN;
2800 
2801 	if (get_bucket(*pos) >= ARRAY_SIZE(unix_socket_table))
2802 		return NULL;
2803 
2804 	return unix_next_socket(seq, NULL, pos);
2805 }
2806 
2807 static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2808 {
2809 	++*pos;
2810 	return unix_next_socket(seq, v, pos);
2811 }
2812 
2813 static void unix_seq_stop(struct seq_file *seq, void *v)
2814 	__releases(unix_table_lock)
2815 {
2816 	spin_unlock(&unix_table_lock);
2817 }
2818 
2819 static int unix_seq_show(struct seq_file *seq, void *v)
2820 {
2821 
2822 	if (v == SEQ_START_TOKEN)
2823 		seq_puts(seq, "Num       RefCount Protocol Flags    Type St "
2824 			 "Inode Path\n");
2825 	else {
2826 		struct sock *s = v;
2827 		struct unix_sock *u = unix_sk(s);
2828 		unix_state_lock(s);
2829 
2830 		seq_printf(seq, "%pK: %08X %08X %08X %04X %02X %5lu",
2831 			s,
2832 			refcount_read(&s->sk_refcnt),
2833 			0,
2834 			s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
2835 			s->sk_type,
2836 			s->sk_socket ?
2837 			(s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
2838 			(s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
2839 			sock_i_ino(s));
2840 
2841 		if (u->addr) {	// under unix_table_lock here
2842 			int i, len;
2843 			seq_putc(seq, ' ');
2844 
2845 			i = 0;
2846 			len = u->addr->len - sizeof(short);
2847 			if (!UNIX_ABSTRACT(s))
2848 				len--;
2849 			else {
2850 				seq_putc(seq, '@');
2851 				i++;
2852 			}
2853 			for ( ; i < len; i++)
2854 				seq_putc(seq, u->addr->name->sun_path[i] ?:
2855 					 '@');
2856 		}
2857 		unix_state_unlock(s);
2858 		seq_putc(seq, '\n');
2859 	}
2860 
2861 	return 0;
2862 }
2863 
2864 static const struct seq_operations unix_seq_ops = {
2865 	.start  = unix_seq_start,
2866 	.next   = unix_seq_next,
2867 	.stop   = unix_seq_stop,
2868 	.show   = unix_seq_show,
2869 };
2870 #endif
2871 
2872 static const struct net_proto_family unix_family_ops = {
2873 	.family = PF_UNIX,
2874 	.create = unix_create,
2875 	.owner	= THIS_MODULE,
2876 };
2877 
2878 
2879 static int __net_init unix_net_init(struct net *net)
2880 {
2881 	int error = -ENOMEM;
2882 
2883 	net->unx.sysctl_max_dgram_qlen = 10;
2884 	if (unix_sysctl_register(net))
2885 		goto out;
2886 
2887 #ifdef CONFIG_PROC_FS
2888 	if (!proc_create_net("unix", 0, net->proc_net, &unix_seq_ops,
2889 			sizeof(struct seq_net_private))) {
2890 		unix_sysctl_unregister(net);
2891 		goto out;
2892 	}
2893 #endif
2894 	error = 0;
2895 out:
2896 	return error;
2897 }
2898 
2899 static void __net_exit unix_net_exit(struct net *net)
2900 {
2901 	unix_sysctl_unregister(net);
2902 	remove_proc_entry("unix", net->proc_net);
2903 }
2904 
2905 static struct pernet_operations unix_net_ops = {
2906 	.init = unix_net_init,
2907 	.exit = unix_net_exit,
2908 };
2909 
2910 static int __init af_unix_init(void)
2911 {
2912 	int rc = -1;
2913 
2914 	BUILD_BUG_ON(sizeof(struct unix_skb_parms) > sizeof_field(struct sk_buff, cb));
2915 
2916 	rc = proto_register(&unix_proto, 1);
2917 	if (rc != 0) {
2918 		pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__);
2919 		goto out;
2920 	}
2921 
2922 	sock_register(&unix_family_ops);
2923 	register_pernet_subsys(&unix_net_ops);
2924 out:
2925 	return rc;
2926 }
2927 
2928 static void __exit af_unix_exit(void)
2929 {
2930 	sock_unregister(PF_UNIX);
2931 	proto_unregister(&unix_proto);
2932 	unregister_pernet_subsys(&unix_net_ops);
2933 }
2934 
2935 /* Earlier than device_initcall() so that other drivers invoking
2936    request_module() don't end up in a loop when modprobe tries
2937    to use a UNIX socket. But later than subsys_initcall() because
2938    we depend on stuff initialised there */
2939 fs_initcall(af_unix_init);
2940 module_exit(af_unix_exit);
2941 
2942 MODULE_LICENSE("GPL");
2943 MODULE_ALIAS_NETPROTO(PF_UNIX);
2944