xref: /openbmc/linux/net/unix/af_unix.c (revision adb19164)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * NET4:	Implementation of BSD Unix domain sockets.
4  *
5  * Authors:	Alan Cox, <alan@lxorguk.ukuu.org.uk>
6  *
7  * Fixes:
8  *		Linus Torvalds	:	Assorted bug cures.
9  *		Niibe Yutaka	:	async I/O support.
10  *		Carsten Paeth	:	PF_UNIX check, address fixes.
11  *		Alan Cox	:	Limit size of allocated blocks.
12  *		Alan Cox	:	Fixed the stupid socketpair bug.
13  *		Alan Cox	:	BSD compatibility fine tuning.
14  *		Alan Cox	:	Fixed a bug in connect when interrupted.
15  *		Alan Cox	:	Sorted out a proper draft version of
16  *					file descriptor passing hacked up from
17  *					Mike Shaver's work.
18  *		Marty Leisner	:	Fixes to fd passing
19  *		Nick Nevin	:	recvmsg bugfix.
20  *		Alan Cox	:	Started proper garbage collector
21  *		Heiko EiBfeldt	:	Missing verify_area check
22  *		Alan Cox	:	Started POSIXisms
23  *		Andreas Schwab	:	Replace inode by dentry for proper
24  *					reference counting
25  *		Kirk Petersen	:	Made this a module
26  *	    Christoph Rohland	:	Elegant non-blocking accept/connect algorithm.
27  *					Lots of bug fixes.
28  *	     Alexey Kuznetosv	:	Repaired (I hope) bugs introduces
29  *					by above two patches.
30  *	     Andrea Arcangeli	:	If possible we block in connect(2)
31  *					if the max backlog of the listen socket
32  *					is been reached. This won't break
33  *					old apps and it will avoid huge amount
34  *					of socks hashed (this for unix_gc()
35  *					performances reasons).
36  *					Security fix that limits the max
37  *					number of socks to 2*max_files and
38  *					the number of skb queueable in the
39  *					dgram receiver.
40  *		Artur Skawina   :	Hash function optimizations
41  *	     Alexey Kuznetsov   :	Full scale SMP. Lot of bugs are introduced 8)
42  *	      Malcolm Beattie   :	Set peercred for socketpair
43  *	     Michal Ostrowski   :       Module initialization cleanup.
44  *	     Arnaldo C. Melo	:	Remove MOD_{INC,DEC}_USE_COUNT,
45  *	     				the core infrastructure is doing that
46  *	     				for all net proto families now (2.5.69+)
47  *
48  * Known differences from reference BSD that was tested:
49  *
50  *	[TO FIX]
51  *	ECONNREFUSED is not returned from one end of a connected() socket to the
52  *		other the moment one end closes.
53  *	fstat() doesn't return st_dev=0, and give the blksize as high water mark
54  *		and a fake inode identifier (nor the BSD first socket fstat twice bug).
55  *	[NOT TO FIX]
56  *	accept() returns a path name even if the connecting socket has closed
57  *		in the meantime (BSD loses the path and gives up).
58  *	accept() returns 0 length path for an unbound connector. BSD returns 16
59  *		and a null first byte in the path (but not for gethost/peername - BSD bug ??)
60  *	socketpair(...SOCK_RAW..) doesn't panic the kernel.
61  *	BSD af_unix apparently has connect forgetting to block properly.
62  *		(need to check this with the POSIX spec in detail)
63  *
64  * Differences from 2.0.0-11-... (ANK)
65  *	Bug fixes and improvements.
66  *		- client shutdown killed server socket.
67  *		- removed all useless cli/sti pairs.
68  *
69  *	Semantic changes/extensions.
70  *		- generic control message passing.
71  *		- SCM_CREDENTIALS control message.
72  *		- "Abstract" (not FS based) socket bindings.
73  *		  Abstract names are sequences of bytes (not zero terminated)
74  *		  started by 0, so that this name space does not intersect
75  *		  with BSD names.
76  */
77 
78 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
79 
80 #include <linux/module.h>
81 #include <linux/kernel.h>
82 #include <linux/signal.h>
83 #include <linux/sched/signal.h>
84 #include <linux/errno.h>
85 #include <linux/string.h>
86 #include <linux/stat.h>
87 #include <linux/dcache.h>
88 #include <linux/namei.h>
89 #include <linux/socket.h>
90 #include <linux/un.h>
91 #include <linux/fcntl.h>
92 #include <linux/filter.h>
93 #include <linux/termios.h>
94 #include <linux/sockios.h>
95 #include <linux/net.h>
96 #include <linux/in.h>
97 #include <linux/fs.h>
98 #include <linux/slab.h>
99 #include <linux/uaccess.h>
100 #include <linux/skbuff.h>
101 #include <linux/netdevice.h>
102 #include <net/net_namespace.h>
103 #include <net/sock.h>
104 #include <net/tcp_states.h>
105 #include <net/af_unix.h>
106 #include <linux/proc_fs.h>
107 #include <linux/seq_file.h>
108 #include <net/scm.h>
109 #include <linux/init.h>
110 #include <linux/poll.h>
111 #include <linux/rtnetlink.h>
112 #include <linux/mount.h>
113 #include <net/checksum.h>
114 #include <linux/security.h>
115 #include <linux/splice.h>
116 #include <linux/freezer.h>
117 #include <linux/file.h>
118 #include <linux/btf_ids.h>
119 
120 #include "scm.h"
121 
122 static atomic_long_t unix_nr_socks;
123 static struct hlist_head bsd_socket_buckets[UNIX_HASH_SIZE / 2];
124 static spinlock_t bsd_socket_locks[UNIX_HASH_SIZE / 2];
125 
126 /* SMP locking strategy:
127  *    hash table is protected with spinlock.
128  *    each socket state is protected by separate spinlock.
129  */
130 
131 static unsigned int unix_unbound_hash(struct sock *sk)
132 {
133 	unsigned long hash = (unsigned long)sk;
134 
135 	hash ^= hash >> 16;
136 	hash ^= hash >> 8;
137 	hash ^= sk->sk_type;
138 
139 	return hash & UNIX_HASH_MOD;
140 }
141 
142 static unsigned int unix_bsd_hash(struct inode *i)
143 {
144 	return i->i_ino & UNIX_HASH_MOD;
145 }
146 
147 static unsigned int unix_abstract_hash(struct sockaddr_un *sunaddr,
148 				       int addr_len, int type)
149 {
150 	__wsum csum = csum_partial(sunaddr, addr_len, 0);
151 	unsigned int hash;
152 
153 	hash = (__force unsigned int)csum_fold(csum);
154 	hash ^= hash >> 8;
155 	hash ^= type;
156 
157 	return UNIX_HASH_MOD + 1 + (hash & UNIX_HASH_MOD);
158 }
159 
160 static void unix_table_double_lock(struct net *net,
161 				   unsigned int hash1, unsigned int hash2)
162 {
163 	if (hash1 == hash2) {
164 		spin_lock(&net->unx.table.locks[hash1]);
165 		return;
166 	}
167 
168 	if (hash1 > hash2)
169 		swap(hash1, hash2);
170 
171 	spin_lock(&net->unx.table.locks[hash1]);
172 	spin_lock_nested(&net->unx.table.locks[hash2], SINGLE_DEPTH_NESTING);
173 }
174 
175 static void unix_table_double_unlock(struct net *net,
176 				     unsigned int hash1, unsigned int hash2)
177 {
178 	if (hash1 == hash2) {
179 		spin_unlock(&net->unx.table.locks[hash1]);
180 		return;
181 	}
182 
183 	spin_unlock(&net->unx.table.locks[hash1]);
184 	spin_unlock(&net->unx.table.locks[hash2]);
185 }
186 
187 #ifdef CONFIG_SECURITY_NETWORK
188 static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
189 {
190 	UNIXCB(skb).secid = scm->secid;
191 }
192 
193 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
194 {
195 	scm->secid = UNIXCB(skb).secid;
196 }
197 
198 static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
199 {
200 	return (scm->secid == UNIXCB(skb).secid);
201 }
202 #else
203 static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
204 { }
205 
206 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
207 { }
208 
209 static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
210 {
211 	return true;
212 }
213 #endif /* CONFIG_SECURITY_NETWORK */
214 
215 static inline int unix_our_peer(struct sock *sk, struct sock *osk)
216 {
217 	return unix_peer(osk) == sk;
218 }
219 
220 static inline int unix_may_send(struct sock *sk, struct sock *osk)
221 {
222 	return unix_peer(osk) == NULL || unix_our_peer(sk, osk);
223 }
224 
225 static inline int unix_recvq_full(const struct sock *sk)
226 {
227 	return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog;
228 }
229 
230 static inline int unix_recvq_full_lockless(const struct sock *sk)
231 {
232 	return skb_queue_len_lockless(&sk->sk_receive_queue) >
233 		READ_ONCE(sk->sk_max_ack_backlog);
234 }
235 
236 struct sock *unix_peer_get(struct sock *s)
237 {
238 	struct sock *peer;
239 
240 	unix_state_lock(s);
241 	peer = unix_peer(s);
242 	if (peer)
243 		sock_hold(peer);
244 	unix_state_unlock(s);
245 	return peer;
246 }
247 EXPORT_SYMBOL_GPL(unix_peer_get);
248 
249 static struct unix_address *unix_create_addr(struct sockaddr_un *sunaddr,
250 					     int addr_len)
251 {
252 	struct unix_address *addr;
253 
254 	addr = kmalloc(sizeof(*addr) + addr_len, GFP_KERNEL);
255 	if (!addr)
256 		return NULL;
257 
258 	refcount_set(&addr->refcnt, 1);
259 	addr->len = addr_len;
260 	memcpy(addr->name, sunaddr, addr_len);
261 
262 	return addr;
263 }
264 
265 static inline void unix_release_addr(struct unix_address *addr)
266 {
267 	if (refcount_dec_and_test(&addr->refcnt))
268 		kfree(addr);
269 }
270 
271 /*
272  *	Check unix socket name:
273  *		- should be not zero length.
274  *	        - if started by not zero, should be NULL terminated (FS object)
275  *		- if started by zero, it is abstract name.
276  */
277 
278 static int unix_validate_addr(struct sockaddr_un *sunaddr, int addr_len)
279 {
280 	if (addr_len <= offsetof(struct sockaddr_un, sun_path) ||
281 	    addr_len > sizeof(*sunaddr))
282 		return -EINVAL;
283 
284 	if (sunaddr->sun_family != AF_UNIX)
285 		return -EINVAL;
286 
287 	return 0;
288 }
289 
290 static int unix_mkname_bsd(struct sockaddr_un *sunaddr, int addr_len)
291 {
292 	struct sockaddr_storage *addr = (struct sockaddr_storage *)sunaddr;
293 	short offset = offsetof(struct sockaddr_storage, __data);
294 
295 	BUILD_BUG_ON(offset != offsetof(struct sockaddr_un, sun_path));
296 
297 	/* This may look like an off by one error but it is a bit more
298 	 * subtle.  108 is the longest valid AF_UNIX path for a binding.
299 	 * sun_path[108] doesn't as such exist.  However in kernel space
300 	 * we are guaranteed that it is a valid memory location in our
301 	 * kernel address buffer because syscall functions always pass
302 	 * a pointer of struct sockaddr_storage which has a bigger buffer
303 	 * than 108.  Also, we must terminate sun_path for strlen() in
304 	 * getname_kernel().
305 	 */
306 	addr->__data[addr_len - offset] = 0;
307 
308 	/* Don't pass sunaddr->sun_path to strlen().  Otherwise, 108 will
309 	 * cause panic if CONFIG_FORTIFY_SOURCE=y.  Let __fortify_strlen()
310 	 * know the actual buffer.
311 	 */
312 	return strlen(addr->__data) + offset + 1;
313 }
314 
315 static void __unix_remove_socket(struct sock *sk)
316 {
317 	sk_del_node_init(sk);
318 }
319 
320 static void __unix_insert_socket(struct net *net, struct sock *sk)
321 {
322 	DEBUG_NET_WARN_ON_ONCE(!sk_unhashed(sk));
323 	sk_add_node(sk, &net->unx.table.buckets[sk->sk_hash]);
324 }
325 
326 static void __unix_set_addr_hash(struct net *net, struct sock *sk,
327 				 struct unix_address *addr, unsigned int hash)
328 {
329 	__unix_remove_socket(sk);
330 	smp_store_release(&unix_sk(sk)->addr, addr);
331 
332 	sk->sk_hash = hash;
333 	__unix_insert_socket(net, sk);
334 }
335 
336 static void unix_remove_socket(struct net *net, struct sock *sk)
337 {
338 	spin_lock(&net->unx.table.locks[sk->sk_hash]);
339 	__unix_remove_socket(sk);
340 	spin_unlock(&net->unx.table.locks[sk->sk_hash]);
341 }
342 
343 static void unix_insert_unbound_socket(struct net *net, struct sock *sk)
344 {
345 	spin_lock(&net->unx.table.locks[sk->sk_hash]);
346 	__unix_insert_socket(net, sk);
347 	spin_unlock(&net->unx.table.locks[sk->sk_hash]);
348 }
349 
350 static void unix_insert_bsd_socket(struct sock *sk)
351 {
352 	spin_lock(&bsd_socket_locks[sk->sk_hash]);
353 	sk_add_bind_node(sk, &bsd_socket_buckets[sk->sk_hash]);
354 	spin_unlock(&bsd_socket_locks[sk->sk_hash]);
355 }
356 
357 static void unix_remove_bsd_socket(struct sock *sk)
358 {
359 	if (!hlist_unhashed(&sk->sk_bind_node)) {
360 		spin_lock(&bsd_socket_locks[sk->sk_hash]);
361 		__sk_del_bind_node(sk);
362 		spin_unlock(&bsd_socket_locks[sk->sk_hash]);
363 
364 		sk_node_init(&sk->sk_bind_node);
365 	}
366 }
367 
368 static struct sock *__unix_find_socket_byname(struct net *net,
369 					      struct sockaddr_un *sunname,
370 					      int len, unsigned int hash)
371 {
372 	struct sock *s;
373 
374 	sk_for_each(s, &net->unx.table.buckets[hash]) {
375 		struct unix_sock *u = unix_sk(s);
376 
377 		if (u->addr->len == len &&
378 		    !memcmp(u->addr->name, sunname, len))
379 			return s;
380 	}
381 	return NULL;
382 }
383 
384 static inline struct sock *unix_find_socket_byname(struct net *net,
385 						   struct sockaddr_un *sunname,
386 						   int len, unsigned int hash)
387 {
388 	struct sock *s;
389 
390 	spin_lock(&net->unx.table.locks[hash]);
391 	s = __unix_find_socket_byname(net, sunname, len, hash);
392 	if (s)
393 		sock_hold(s);
394 	spin_unlock(&net->unx.table.locks[hash]);
395 	return s;
396 }
397 
398 static struct sock *unix_find_socket_byinode(struct inode *i)
399 {
400 	unsigned int hash = unix_bsd_hash(i);
401 	struct sock *s;
402 
403 	spin_lock(&bsd_socket_locks[hash]);
404 	sk_for_each_bound(s, &bsd_socket_buckets[hash]) {
405 		struct dentry *dentry = unix_sk(s)->path.dentry;
406 
407 		if (dentry && d_backing_inode(dentry) == i) {
408 			sock_hold(s);
409 			spin_unlock(&bsd_socket_locks[hash]);
410 			return s;
411 		}
412 	}
413 	spin_unlock(&bsd_socket_locks[hash]);
414 	return NULL;
415 }
416 
417 /* Support code for asymmetrically connected dgram sockets
418  *
419  * If a datagram socket is connected to a socket not itself connected
420  * to the first socket (eg, /dev/log), clients may only enqueue more
421  * messages if the present receive queue of the server socket is not
422  * "too large". This means there's a second writeability condition
423  * poll and sendmsg need to test. The dgram recv code will do a wake
424  * up on the peer_wait wait queue of a socket upon reception of a
425  * datagram which needs to be propagated to sleeping would-be writers
426  * since these might not have sent anything so far. This can't be
427  * accomplished via poll_wait because the lifetime of the server
428  * socket might be less than that of its clients if these break their
429  * association with it or if the server socket is closed while clients
430  * are still connected to it and there's no way to inform "a polling
431  * implementation" that it should let go of a certain wait queue
432  *
433  * In order to propagate a wake up, a wait_queue_entry_t of the client
434  * socket is enqueued on the peer_wait queue of the server socket
435  * whose wake function does a wake_up on the ordinary client socket
436  * wait queue. This connection is established whenever a write (or
437  * poll for write) hit the flow control condition and broken when the
438  * association to the server socket is dissolved or after a wake up
439  * was relayed.
440  */
441 
442 static int unix_dgram_peer_wake_relay(wait_queue_entry_t *q, unsigned mode, int flags,
443 				      void *key)
444 {
445 	struct unix_sock *u;
446 	wait_queue_head_t *u_sleep;
447 
448 	u = container_of(q, struct unix_sock, peer_wake);
449 
450 	__remove_wait_queue(&unix_sk(u->peer_wake.private)->peer_wait,
451 			    q);
452 	u->peer_wake.private = NULL;
453 
454 	/* relaying can only happen while the wq still exists */
455 	u_sleep = sk_sleep(&u->sk);
456 	if (u_sleep)
457 		wake_up_interruptible_poll(u_sleep, key_to_poll(key));
458 
459 	return 0;
460 }
461 
462 static int unix_dgram_peer_wake_connect(struct sock *sk, struct sock *other)
463 {
464 	struct unix_sock *u, *u_other;
465 	int rc;
466 
467 	u = unix_sk(sk);
468 	u_other = unix_sk(other);
469 	rc = 0;
470 	spin_lock(&u_other->peer_wait.lock);
471 
472 	if (!u->peer_wake.private) {
473 		u->peer_wake.private = other;
474 		__add_wait_queue(&u_other->peer_wait, &u->peer_wake);
475 
476 		rc = 1;
477 	}
478 
479 	spin_unlock(&u_other->peer_wait.lock);
480 	return rc;
481 }
482 
483 static void unix_dgram_peer_wake_disconnect(struct sock *sk,
484 					    struct sock *other)
485 {
486 	struct unix_sock *u, *u_other;
487 
488 	u = unix_sk(sk);
489 	u_other = unix_sk(other);
490 	spin_lock(&u_other->peer_wait.lock);
491 
492 	if (u->peer_wake.private == other) {
493 		__remove_wait_queue(&u_other->peer_wait, &u->peer_wake);
494 		u->peer_wake.private = NULL;
495 	}
496 
497 	spin_unlock(&u_other->peer_wait.lock);
498 }
499 
500 static void unix_dgram_peer_wake_disconnect_wakeup(struct sock *sk,
501 						   struct sock *other)
502 {
503 	unix_dgram_peer_wake_disconnect(sk, other);
504 	wake_up_interruptible_poll(sk_sleep(sk),
505 				   EPOLLOUT |
506 				   EPOLLWRNORM |
507 				   EPOLLWRBAND);
508 }
509 
510 /* preconditions:
511  *	- unix_peer(sk) == other
512  *	- association is stable
513  */
514 static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other)
515 {
516 	int connected;
517 
518 	connected = unix_dgram_peer_wake_connect(sk, other);
519 
520 	/* If other is SOCK_DEAD, we want to make sure we signal
521 	 * POLLOUT, such that a subsequent write() can get a
522 	 * -ECONNREFUSED. Otherwise, if we haven't queued any skbs
523 	 * to other and its full, we will hang waiting for POLLOUT.
524 	 */
525 	if (unix_recvq_full_lockless(other) && !sock_flag(other, SOCK_DEAD))
526 		return 1;
527 
528 	if (connected)
529 		unix_dgram_peer_wake_disconnect(sk, other);
530 
531 	return 0;
532 }
533 
534 static int unix_writable(const struct sock *sk)
535 {
536 	return sk->sk_state != TCP_LISTEN &&
537 	       (refcount_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
538 }
539 
540 static void unix_write_space(struct sock *sk)
541 {
542 	struct socket_wq *wq;
543 
544 	rcu_read_lock();
545 	if (unix_writable(sk)) {
546 		wq = rcu_dereference(sk->sk_wq);
547 		if (skwq_has_sleeper(wq))
548 			wake_up_interruptible_sync_poll(&wq->wait,
549 				EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND);
550 		sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
551 	}
552 	rcu_read_unlock();
553 }
554 
555 /* When dgram socket disconnects (or changes its peer), we clear its receive
556  * queue of packets arrived from previous peer. First, it allows to do
557  * flow control based only on wmem_alloc; second, sk connected to peer
558  * may receive messages only from that peer. */
559 static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
560 {
561 	if (!skb_queue_empty(&sk->sk_receive_queue)) {
562 		skb_queue_purge(&sk->sk_receive_queue);
563 		wake_up_interruptible_all(&unix_sk(sk)->peer_wait);
564 
565 		/* If one link of bidirectional dgram pipe is disconnected,
566 		 * we signal error. Messages are lost. Do not make this,
567 		 * when peer was not connected to us.
568 		 */
569 		if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
570 			WRITE_ONCE(other->sk_err, ECONNRESET);
571 			sk_error_report(other);
572 		}
573 	}
574 	other->sk_state = TCP_CLOSE;
575 }
576 
577 static void unix_sock_destructor(struct sock *sk)
578 {
579 	struct unix_sock *u = unix_sk(sk);
580 
581 	skb_queue_purge(&sk->sk_receive_queue);
582 
583 	DEBUG_NET_WARN_ON_ONCE(refcount_read(&sk->sk_wmem_alloc));
584 	DEBUG_NET_WARN_ON_ONCE(!sk_unhashed(sk));
585 	DEBUG_NET_WARN_ON_ONCE(sk->sk_socket);
586 	if (!sock_flag(sk, SOCK_DEAD)) {
587 		pr_info("Attempt to release alive unix socket: %p\n", sk);
588 		return;
589 	}
590 
591 	if (u->addr)
592 		unix_release_addr(u->addr);
593 
594 	atomic_long_dec(&unix_nr_socks);
595 	sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
596 #ifdef UNIX_REFCNT_DEBUG
597 	pr_debug("UNIX %p is destroyed, %ld are still alive.\n", sk,
598 		atomic_long_read(&unix_nr_socks));
599 #endif
600 }
601 
602 static void unix_release_sock(struct sock *sk, int embrion)
603 {
604 	struct unix_sock *u = unix_sk(sk);
605 	struct sock *skpair;
606 	struct sk_buff *skb;
607 	struct path path;
608 	int state;
609 
610 	unix_remove_socket(sock_net(sk), sk);
611 	unix_remove_bsd_socket(sk);
612 
613 	/* Clear state */
614 	unix_state_lock(sk);
615 	sock_orphan(sk);
616 	WRITE_ONCE(sk->sk_shutdown, SHUTDOWN_MASK);
617 	path	     = u->path;
618 	u->path.dentry = NULL;
619 	u->path.mnt = NULL;
620 	state = sk->sk_state;
621 	sk->sk_state = TCP_CLOSE;
622 
623 	skpair = unix_peer(sk);
624 	unix_peer(sk) = NULL;
625 
626 	unix_state_unlock(sk);
627 
628 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
629 	if (u->oob_skb) {
630 		kfree_skb(u->oob_skb);
631 		u->oob_skb = NULL;
632 	}
633 #endif
634 
635 	wake_up_interruptible_all(&u->peer_wait);
636 
637 	if (skpair != NULL) {
638 		if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
639 			unix_state_lock(skpair);
640 			/* No more writes */
641 			WRITE_ONCE(skpair->sk_shutdown, SHUTDOWN_MASK);
642 			if (!skb_queue_empty(&sk->sk_receive_queue) || embrion)
643 				WRITE_ONCE(skpair->sk_err, ECONNRESET);
644 			unix_state_unlock(skpair);
645 			skpair->sk_state_change(skpair);
646 			sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
647 		}
648 
649 		unix_dgram_peer_wake_disconnect(sk, skpair);
650 		sock_put(skpair); /* It may now die */
651 	}
652 
653 	/* Try to flush out this socket. Throw out buffers at least */
654 
655 	while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
656 		if (state == TCP_LISTEN)
657 			unix_release_sock(skb->sk, 1);
658 		/* passed fds are erased in the kfree_skb hook	      */
659 		UNIXCB(skb).consumed = skb->len;
660 		kfree_skb(skb);
661 	}
662 
663 	if (path.dentry)
664 		path_put(&path);
665 
666 	sock_put(sk);
667 
668 	/* ---- Socket is dead now and most probably destroyed ---- */
669 
670 	/*
671 	 * Fixme: BSD difference: In BSD all sockets connected to us get
672 	 *	  ECONNRESET and we die on the spot. In Linux we behave
673 	 *	  like files and pipes do and wait for the last
674 	 *	  dereference.
675 	 *
676 	 * Can't we simply set sock->err?
677 	 *
678 	 *	  What the above comment does talk about? --ANK(980817)
679 	 */
680 
681 	if (READ_ONCE(unix_tot_inflight))
682 		unix_gc();		/* Garbage collect fds */
683 }
684 
685 static void init_peercred(struct sock *sk)
686 {
687 	const struct cred *old_cred;
688 	struct pid *old_pid;
689 
690 	spin_lock(&sk->sk_peer_lock);
691 	old_pid = sk->sk_peer_pid;
692 	old_cred = sk->sk_peer_cred;
693 	sk->sk_peer_pid  = get_pid(task_tgid(current));
694 	sk->sk_peer_cred = get_current_cred();
695 	spin_unlock(&sk->sk_peer_lock);
696 
697 	put_pid(old_pid);
698 	put_cred(old_cred);
699 }
700 
701 static void copy_peercred(struct sock *sk, struct sock *peersk)
702 {
703 	const struct cred *old_cred;
704 	struct pid *old_pid;
705 
706 	if (sk < peersk) {
707 		spin_lock(&sk->sk_peer_lock);
708 		spin_lock_nested(&peersk->sk_peer_lock, SINGLE_DEPTH_NESTING);
709 	} else {
710 		spin_lock(&peersk->sk_peer_lock);
711 		spin_lock_nested(&sk->sk_peer_lock, SINGLE_DEPTH_NESTING);
712 	}
713 	old_pid = sk->sk_peer_pid;
714 	old_cred = sk->sk_peer_cred;
715 	sk->sk_peer_pid  = get_pid(peersk->sk_peer_pid);
716 	sk->sk_peer_cred = get_cred(peersk->sk_peer_cred);
717 
718 	spin_unlock(&sk->sk_peer_lock);
719 	spin_unlock(&peersk->sk_peer_lock);
720 
721 	put_pid(old_pid);
722 	put_cred(old_cred);
723 }
724 
725 static int unix_listen(struct socket *sock, int backlog)
726 {
727 	int err;
728 	struct sock *sk = sock->sk;
729 	struct unix_sock *u = unix_sk(sk);
730 
731 	err = -EOPNOTSUPP;
732 	if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
733 		goto out;	/* Only stream/seqpacket sockets accept */
734 	err = -EINVAL;
735 	if (!u->addr)
736 		goto out;	/* No listens on an unbound socket */
737 	unix_state_lock(sk);
738 	if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
739 		goto out_unlock;
740 	if (backlog > sk->sk_max_ack_backlog)
741 		wake_up_interruptible_all(&u->peer_wait);
742 	sk->sk_max_ack_backlog	= backlog;
743 	sk->sk_state		= TCP_LISTEN;
744 	/* set credentials so connect can copy them */
745 	init_peercred(sk);
746 	err = 0;
747 
748 out_unlock:
749 	unix_state_unlock(sk);
750 out:
751 	return err;
752 }
753 
754 static int unix_release(struct socket *);
755 static int unix_bind(struct socket *, struct sockaddr *, int);
756 static int unix_stream_connect(struct socket *, struct sockaddr *,
757 			       int addr_len, int flags);
758 static int unix_socketpair(struct socket *, struct socket *);
759 static int unix_accept(struct socket *, struct socket *, int, bool);
760 static int unix_getname(struct socket *, struct sockaddr *, int);
761 static __poll_t unix_poll(struct file *, struct socket *, poll_table *);
762 static __poll_t unix_dgram_poll(struct file *, struct socket *,
763 				    poll_table *);
764 static int unix_ioctl(struct socket *, unsigned int, unsigned long);
765 #ifdef CONFIG_COMPAT
766 static int unix_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg);
767 #endif
768 static int unix_shutdown(struct socket *, int);
769 static int unix_stream_sendmsg(struct socket *, struct msghdr *, size_t);
770 static int unix_stream_recvmsg(struct socket *, struct msghdr *, size_t, int);
771 static ssize_t unix_stream_splice_read(struct socket *,  loff_t *ppos,
772 				       struct pipe_inode_info *, size_t size,
773 				       unsigned int flags);
774 static int unix_dgram_sendmsg(struct socket *, struct msghdr *, size_t);
775 static int unix_dgram_recvmsg(struct socket *, struct msghdr *, size_t, int);
776 static int unix_read_skb(struct sock *sk, skb_read_actor_t recv_actor);
777 static int unix_stream_read_skb(struct sock *sk, skb_read_actor_t recv_actor);
778 static int unix_dgram_connect(struct socket *, struct sockaddr *,
779 			      int, int);
780 static int unix_seqpacket_sendmsg(struct socket *, struct msghdr *, size_t);
781 static int unix_seqpacket_recvmsg(struct socket *, struct msghdr *, size_t,
782 				  int);
783 
784 static int unix_set_peek_off(struct sock *sk, int val)
785 {
786 	struct unix_sock *u = unix_sk(sk);
787 
788 	if (mutex_lock_interruptible(&u->iolock))
789 		return -EINTR;
790 
791 	WRITE_ONCE(sk->sk_peek_off, val);
792 	mutex_unlock(&u->iolock);
793 
794 	return 0;
795 }
796 
797 #ifdef CONFIG_PROC_FS
798 static int unix_count_nr_fds(struct sock *sk)
799 {
800 	struct sk_buff *skb;
801 	struct unix_sock *u;
802 	int nr_fds = 0;
803 
804 	spin_lock(&sk->sk_receive_queue.lock);
805 	skb = skb_peek(&sk->sk_receive_queue);
806 	while (skb) {
807 		u = unix_sk(skb->sk);
808 		nr_fds += atomic_read(&u->scm_stat.nr_fds);
809 		skb = skb_peek_next(skb, &sk->sk_receive_queue);
810 	}
811 	spin_unlock(&sk->sk_receive_queue.lock);
812 
813 	return nr_fds;
814 }
815 
816 static void unix_show_fdinfo(struct seq_file *m, struct socket *sock)
817 {
818 	struct sock *sk = sock->sk;
819 	unsigned char s_state;
820 	struct unix_sock *u;
821 	int nr_fds = 0;
822 
823 	if (sk) {
824 		s_state = READ_ONCE(sk->sk_state);
825 		u = unix_sk(sk);
826 
827 		/* SOCK_STREAM and SOCK_SEQPACKET sockets never change their
828 		 * sk_state after switching to TCP_ESTABLISHED or TCP_LISTEN.
829 		 * SOCK_DGRAM is ordinary. So, no lock is needed.
830 		 */
831 		if (sock->type == SOCK_DGRAM || s_state == TCP_ESTABLISHED)
832 			nr_fds = atomic_read(&u->scm_stat.nr_fds);
833 		else if (s_state == TCP_LISTEN)
834 			nr_fds = unix_count_nr_fds(sk);
835 
836 		seq_printf(m, "scm_fds: %u\n", nr_fds);
837 	}
838 }
839 #else
840 #define unix_show_fdinfo NULL
841 #endif
842 
843 static const struct proto_ops unix_stream_ops = {
844 	.family =	PF_UNIX,
845 	.owner =	THIS_MODULE,
846 	.release =	unix_release,
847 	.bind =		unix_bind,
848 	.connect =	unix_stream_connect,
849 	.socketpair =	unix_socketpair,
850 	.accept =	unix_accept,
851 	.getname =	unix_getname,
852 	.poll =		unix_poll,
853 	.ioctl =	unix_ioctl,
854 #ifdef CONFIG_COMPAT
855 	.compat_ioctl =	unix_compat_ioctl,
856 #endif
857 	.listen =	unix_listen,
858 	.shutdown =	unix_shutdown,
859 	.sendmsg =	unix_stream_sendmsg,
860 	.recvmsg =	unix_stream_recvmsg,
861 	.read_skb =	unix_stream_read_skb,
862 	.mmap =		sock_no_mmap,
863 	.splice_read =	unix_stream_splice_read,
864 	.set_peek_off =	unix_set_peek_off,
865 	.show_fdinfo =	unix_show_fdinfo,
866 };
867 
868 static const struct proto_ops unix_dgram_ops = {
869 	.family =	PF_UNIX,
870 	.owner =	THIS_MODULE,
871 	.release =	unix_release,
872 	.bind =		unix_bind,
873 	.connect =	unix_dgram_connect,
874 	.socketpair =	unix_socketpair,
875 	.accept =	sock_no_accept,
876 	.getname =	unix_getname,
877 	.poll =		unix_dgram_poll,
878 	.ioctl =	unix_ioctl,
879 #ifdef CONFIG_COMPAT
880 	.compat_ioctl =	unix_compat_ioctl,
881 #endif
882 	.listen =	sock_no_listen,
883 	.shutdown =	unix_shutdown,
884 	.sendmsg =	unix_dgram_sendmsg,
885 	.read_skb =	unix_read_skb,
886 	.recvmsg =	unix_dgram_recvmsg,
887 	.mmap =		sock_no_mmap,
888 	.set_peek_off =	unix_set_peek_off,
889 	.show_fdinfo =	unix_show_fdinfo,
890 };
891 
892 static const struct proto_ops unix_seqpacket_ops = {
893 	.family =	PF_UNIX,
894 	.owner =	THIS_MODULE,
895 	.release =	unix_release,
896 	.bind =		unix_bind,
897 	.connect =	unix_stream_connect,
898 	.socketpair =	unix_socketpair,
899 	.accept =	unix_accept,
900 	.getname =	unix_getname,
901 	.poll =		unix_dgram_poll,
902 	.ioctl =	unix_ioctl,
903 #ifdef CONFIG_COMPAT
904 	.compat_ioctl =	unix_compat_ioctl,
905 #endif
906 	.listen =	unix_listen,
907 	.shutdown =	unix_shutdown,
908 	.sendmsg =	unix_seqpacket_sendmsg,
909 	.recvmsg =	unix_seqpacket_recvmsg,
910 	.mmap =		sock_no_mmap,
911 	.set_peek_off =	unix_set_peek_off,
912 	.show_fdinfo =	unix_show_fdinfo,
913 };
914 
915 static void unix_close(struct sock *sk, long timeout)
916 {
917 	/* Nothing to do here, unix socket does not need a ->close().
918 	 * This is merely for sockmap.
919 	 */
920 }
921 
922 static void unix_unhash(struct sock *sk)
923 {
924 	/* Nothing to do here, unix socket does not need a ->unhash().
925 	 * This is merely for sockmap.
926 	 */
927 }
928 
929 static bool unix_bpf_bypass_getsockopt(int level, int optname)
930 {
931 	if (level == SOL_SOCKET) {
932 		switch (optname) {
933 		case SO_PEERPIDFD:
934 			return true;
935 		default:
936 			return false;
937 		}
938 	}
939 
940 	return false;
941 }
942 
943 struct proto unix_dgram_proto = {
944 	.name			= "UNIX",
945 	.owner			= THIS_MODULE,
946 	.obj_size		= sizeof(struct unix_sock),
947 	.close			= unix_close,
948 	.bpf_bypass_getsockopt	= unix_bpf_bypass_getsockopt,
949 #ifdef CONFIG_BPF_SYSCALL
950 	.psock_update_sk_prot	= unix_dgram_bpf_update_proto,
951 #endif
952 };
953 
954 struct proto unix_stream_proto = {
955 	.name			= "UNIX-STREAM",
956 	.owner			= THIS_MODULE,
957 	.obj_size		= sizeof(struct unix_sock),
958 	.close			= unix_close,
959 	.unhash			= unix_unhash,
960 	.bpf_bypass_getsockopt	= unix_bpf_bypass_getsockopt,
961 #ifdef CONFIG_BPF_SYSCALL
962 	.psock_update_sk_prot	= unix_stream_bpf_update_proto,
963 #endif
964 };
965 
966 static struct sock *unix_create1(struct net *net, struct socket *sock, int kern, int type)
967 {
968 	struct unix_sock *u;
969 	struct sock *sk;
970 	int err;
971 
972 	atomic_long_inc(&unix_nr_socks);
973 	if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files()) {
974 		err = -ENFILE;
975 		goto err;
976 	}
977 
978 	if (type == SOCK_STREAM)
979 		sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_stream_proto, kern);
980 	else /*dgram and  seqpacket */
981 		sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_dgram_proto, kern);
982 
983 	if (!sk) {
984 		err = -ENOMEM;
985 		goto err;
986 	}
987 
988 	sock_init_data(sock, sk);
989 
990 	sk->sk_hash		= unix_unbound_hash(sk);
991 	sk->sk_allocation	= GFP_KERNEL_ACCOUNT;
992 	sk->sk_write_space	= unix_write_space;
993 	sk->sk_max_ack_backlog	= net->unx.sysctl_max_dgram_qlen;
994 	sk->sk_destruct		= unix_sock_destructor;
995 	u	  = unix_sk(sk);
996 	u->path.dentry = NULL;
997 	u->path.mnt = NULL;
998 	spin_lock_init(&u->lock);
999 	atomic_long_set(&u->inflight, 0);
1000 	INIT_LIST_HEAD(&u->link);
1001 	mutex_init(&u->iolock); /* single task reading lock */
1002 	mutex_init(&u->bindlock); /* single task binding lock */
1003 	init_waitqueue_head(&u->peer_wait);
1004 	init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay);
1005 	memset(&u->scm_stat, 0, sizeof(struct scm_stat));
1006 	unix_insert_unbound_socket(net, sk);
1007 
1008 	sock_prot_inuse_add(net, sk->sk_prot, 1);
1009 
1010 	return sk;
1011 
1012 err:
1013 	atomic_long_dec(&unix_nr_socks);
1014 	return ERR_PTR(err);
1015 }
1016 
1017 static int unix_create(struct net *net, struct socket *sock, int protocol,
1018 		       int kern)
1019 {
1020 	struct sock *sk;
1021 
1022 	if (protocol && protocol != PF_UNIX)
1023 		return -EPROTONOSUPPORT;
1024 
1025 	sock->state = SS_UNCONNECTED;
1026 
1027 	switch (sock->type) {
1028 	case SOCK_STREAM:
1029 		sock->ops = &unix_stream_ops;
1030 		break;
1031 		/*
1032 		 *	Believe it or not BSD has AF_UNIX, SOCK_RAW though
1033 		 *	nothing uses it.
1034 		 */
1035 	case SOCK_RAW:
1036 		sock->type = SOCK_DGRAM;
1037 		fallthrough;
1038 	case SOCK_DGRAM:
1039 		sock->ops = &unix_dgram_ops;
1040 		break;
1041 	case SOCK_SEQPACKET:
1042 		sock->ops = &unix_seqpacket_ops;
1043 		break;
1044 	default:
1045 		return -ESOCKTNOSUPPORT;
1046 	}
1047 
1048 	sk = unix_create1(net, sock, kern, sock->type);
1049 	if (IS_ERR(sk))
1050 		return PTR_ERR(sk);
1051 
1052 	return 0;
1053 }
1054 
1055 static int unix_release(struct socket *sock)
1056 {
1057 	struct sock *sk = sock->sk;
1058 
1059 	if (!sk)
1060 		return 0;
1061 
1062 	sk->sk_prot->close(sk, 0);
1063 	unix_release_sock(sk, 0);
1064 	sock->sk = NULL;
1065 
1066 	return 0;
1067 }
1068 
1069 static struct sock *unix_find_bsd(struct sockaddr_un *sunaddr, int addr_len,
1070 				  int type)
1071 {
1072 	struct inode *inode;
1073 	struct path path;
1074 	struct sock *sk;
1075 	int err;
1076 
1077 	unix_mkname_bsd(sunaddr, addr_len);
1078 	err = kern_path(sunaddr->sun_path, LOOKUP_FOLLOW, &path);
1079 	if (err)
1080 		goto fail;
1081 
1082 	err = path_permission(&path, MAY_WRITE);
1083 	if (err)
1084 		goto path_put;
1085 
1086 	err = -ECONNREFUSED;
1087 	inode = d_backing_inode(path.dentry);
1088 	if (!S_ISSOCK(inode->i_mode))
1089 		goto path_put;
1090 
1091 	sk = unix_find_socket_byinode(inode);
1092 	if (!sk)
1093 		goto path_put;
1094 
1095 	err = -EPROTOTYPE;
1096 	if (sk->sk_type == type)
1097 		touch_atime(&path);
1098 	else
1099 		goto sock_put;
1100 
1101 	path_put(&path);
1102 
1103 	return sk;
1104 
1105 sock_put:
1106 	sock_put(sk);
1107 path_put:
1108 	path_put(&path);
1109 fail:
1110 	return ERR_PTR(err);
1111 }
1112 
1113 static struct sock *unix_find_abstract(struct net *net,
1114 				       struct sockaddr_un *sunaddr,
1115 				       int addr_len, int type)
1116 {
1117 	unsigned int hash = unix_abstract_hash(sunaddr, addr_len, type);
1118 	struct dentry *dentry;
1119 	struct sock *sk;
1120 
1121 	sk = unix_find_socket_byname(net, sunaddr, addr_len, hash);
1122 	if (!sk)
1123 		return ERR_PTR(-ECONNREFUSED);
1124 
1125 	dentry = unix_sk(sk)->path.dentry;
1126 	if (dentry)
1127 		touch_atime(&unix_sk(sk)->path);
1128 
1129 	return sk;
1130 }
1131 
1132 static struct sock *unix_find_other(struct net *net,
1133 				    struct sockaddr_un *sunaddr,
1134 				    int addr_len, int type)
1135 {
1136 	struct sock *sk;
1137 
1138 	if (sunaddr->sun_path[0])
1139 		sk = unix_find_bsd(sunaddr, addr_len, type);
1140 	else
1141 		sk = unix_find_abstract(net, sunaddr, addr_len, type);
1142 
1143 	return sk;
1144 }
1145 
1146 static int unix_autobind(struct sock *sk)
1147 {
1148 	unsigned int new_hash, old_hash = sk->sk_hash;
1149 	struct unix_sock *u = unix_sk(sk);
1150 	struct net *net = sock_net(sk);
1151 	struct unix_address *addr;
1152 	u32 lastnum, ordernum;
1153 	int err;
1154 
1155 	err = mutex_lock_interruptible(&u->bindlock);
1156 	if (err)
1157 		return err;
1158 
1159 	if (u->addr)
1160 		goto out;
1161 
1162 	err = -ENOMEM;
1163 	addr = kzalloc(sizeof(*addr) +
1164 		       offsetof(struct sockaddr_un, sun_path) + 16, GFP_KERNEL);
1165 	if (!addr)
1166 		goto out;
1167 
1168 	addr->len = offsetof(struct sockaddr_un, sun_path) + 6;
1169 	addr->name->sun_family = AF_UNIX;
1170 	refcount_set(&addr->refcnt, 1);
1171 
1172 	ordernum = get_random_u32();
1173 	lastnum = ordernum & 0xFFFFF;
1174 retry:
1175 	ordernum = (ordernum + 1) & 0xFFFFF;
1176 	sprintf(addr->name->sun_path + 1, "%05x", ordernum);
1177 
1178 	new_hash = unix_abstract_hash(addr->name, addr->len, sk->sk_type);
1179 	unix_table_double_lock(net, old_hash, new_hash);
1180 
1181 	if (__unix_find_socket_byname(net, addr->name, addr->len, new_hash)) {
1182 		unix_table_double_unlock(net, old_hash, new_hash);
1183 
1184 		/* __unix_find_socket_byname() may take long time if many names
1185 		 * are already in use.
1186 		 */
1187 		cond_resched();
1188 
1189 		if (ordernum == lastnum) {
1190 			/* Give up if all names seems to be in use. */
1191 			err = -ENOSPC;
1192 			unix_release_addr(addr);
1193 			goto out;
1194 		}
1195 
1196 		goto retry;
1197 	}
1198 
1199 	__unix_set_addr_hash(net, sk, addr, new_hash);
1200 	unix_table_double_unlock(net, old_hash, new_hash);
1201 	err = 0;
1202 
1203 out:	mutex_unlock(&u->bindlock);
1204 	return err;
1205 }
1206 
1207 static int unix_bind_bsd(struct sock *sk, struct sockaddr_un *sunaddr,
1208 			 int addr_len)
1209 {
1210 	umode_t mode = S_IFSOCK |
1211 	       (SOCK_INODE(sk->sk_socket)->i_mode & ~current_umask());
1212 	unsigned int new_hash, old_hash = sk->sk_hash;
1213 	struct unix_sock *u = unix_sk(sk);
1214 	struct net *net = sock_net(sk);
1215 	struct mnt_idmap *idmap;
1216 	struct unix_address *addr;
1217 	struct dentry *dentry;
1218 	struct path parent;
1219 	int err;
1220 
1221 	addr_len = unix_mkname_bsd(sunaddr, addr_len);
1222 	addr = unix_create_addr(sunaddr, addr_len);
1223 	if (!addr)
1224 		return -ENOMEM;
1225 
1226 	/*
1227 	 * Get the parent directory, calculate the hash for last
1228 	 * component.
1229 	 */
1230 	dentry = kern_path_create(AT_FDCWD, addr->name->sun_path, &parent, 0);
1231 	if (IS_ERR(dentry)) {
1232 		err = PTR_ERR(dentry);
1233 		goto out;
1234 	}
1235 
1236 	/*
1237 	 * All right, let's create it.
1238 	 */
1239 	idmap = mnt_idmap(parent.mnt);
1240 	err = security_path_mknod(&parent, dentry, mode, 0);
1241 	if (!err)
1242 		err = vfs_mknod(idmap, d_inode(parent.dentry), dentry, mode, 0);
1243 	if (err)
1244 		goto out_path;
1245 	err = mutex_lock_interruptible(&u->bindlock);
1246 	if (err)
1247 		goto out_unlink;
1248 	if (u->addr)
1249 		goto out_unlock;
1250 
1251 	new_hash = unix_bsd_hash(d_backing_inode(dentry));
1252 	unix_table_double_lock(net, old_hash, new_hash);
1253 	u->path.mnt = mntget(parent.mnt);
1254 	u->path.dentry = dget(dentry);
1255 	__unix_set_addr_hash(net, sk, addr, new_hash);
1256 	unix_table_double_unlock(net, old_hash, new_hash);
1257 	unix_insert_bsd_socket(sk);
1258 	mutex_unlock(&u->bindlock);
1259 	done_path_create(&parent, dentry);
1260 	return 0;
1261 
1262 out_unlock:
1263 	mutex_unlock(&u->bindlock);
1264 	err = -EINVAL;
1265 out_unlink:
1266 	/* failed after successful mknod?  unlink what we'd created... */
1267 	vfs_unlink(idmap, d_inode(parent.dentry), dentry, NULL);
1268 out_path:
1269 	done_path_create(&parent, dentry);
1270 out:
1271 	unix_release_addr(addr);
1272 	return err == -EEXIST ? -EADDRINUSE : err;
1273 }
1274 
1275 static int unix_bind_abstract(struct sock *sk, struct sockaddr_un *sunaddr,
1276 			      int addr_len)
1277 {
1278 	unsigned int new_hash, old_hash = sk->sk_hash;
1279 	struct unix_sock *u = unix_sk(sk);
1280 	struct net *net = sock_net(sk);
1281 	struct unix_address *addr;
1282 	int err;
1283 
1284 	addr = unix_create_addr(sunaddr, addr_len);
1285 	if (!addr)
1286 		return -ENOMEM;
1287 
1288 	err = mutex_lock_interruptible(&u->bindlock);
1289 	if (err)
1290 		goto out;
1291 
1292 	if (u->addr) {
1293 		err = -EINVAL;
1294 		goto out_mutex;
1295 	}
1296 
1297 	new_hash = unix_abstract_hash(addr->name, addr->len, sk->sk_type);
1298 	unix_table_double_lock(net, old_hash, new_hash);
1299 
1300 	if (__unix_find_socket_byname(net, addr->name, addr->len, new_hash))
1301 		goto out_spin;
1302 
1303 	__unix_set_addr_hash(net, sk, addr, new_hash);
1304 	unix_table_double_unlock(net, old_hash, new_hash);
1305 	mutex_unlock(&u->bindlock);
1306 	return 0;
1307 
1308 out_spin:
1309 	unix_table_double_unlock(net, old_hash, new_hash);
1310 	err = -EADDRINUSE;
1311 out_mutex:
1312 	mutex_unlock(&u->bindlock);
1313 out:
1314 	unix_release_addr(addr);
1315 	return err;
1316 }
1317 
1318 static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
1319 {
1320 	struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1321 	struct sock *sk = sock->sk;
1322 	int err;
1323 
1324 	if (addr_len == offsetof(struct sockaddr_un, sun_path) &&
1325 	    sunaddr->sun_family == AF_UNIX)
1326 		return unix_autobind(sk);
1327 
1328 	err = unix_validate_addr(sunaddr, addr_len);
1329 	if (err)
1330 		return err;
1331 
1332 	if (sunaddr->sun_path[0])
1333 		err = unix_bind_bsd(sk, sunaddr, addr_len);
1334 	else
1335 		err = unix_bind_abstract(sk, sunaddr, addr_len);
1336 
1337 	return err;
1338 }
1339 
1340 static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
1341 {
1342 	if (unlikely(sk1 == sk2) || !sk2) {
1343 		unix_state_lock(sk1);
1344 		return;
1345 	}
1346 	if (sk1 > sk2)
1347 		swap(sk1, sk2);
1348 
1349 	unix_state_lock(sk1);
1350 	unix_state_lock_nested(sk2, U_LOCK_SECOND);
1351 }
1352 
1353 static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)
1354 {
1355 	if (unlikely(sk1 == sk2) || !sk2) {
1356 		unix_state_unlock(sk1);
1357 		return;
1358 	}
1359 	unix_state_unlock(sk1);
1360 	unix_state_unlock(sk2);
1361 }
1362 
1363 static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
1364 			      int alen, int flags)
1365 {
1366 	struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr;
1367 	struct sock *sk = sock->sk;
1368 	struct sock *other;
1369 	int err;
1370 
1371 	err = -EINVAL;
1372 	if (alen < offsetofend(struct sockaddr, sa_family))
1373 		goto out;
1374 
1375 	if (addr->sa_family != AF_UNSPEC) {
1376 		err = unix_validate_addr(sunaddr, alen);
1377 		if (err)
1378 			goto out;
1379 
1380 		if ((test_bit(SOCK_PASSCRED, &sock->flags) ||
1381 		     test_bit(SOCK_PASSPIDFD, &sock->flags)) &&
1382 		    !unix_sk(sk)->addr) {
1383 			err = unix_autobind(sk);
1384 			if (err)
1385 				goto out;
1386 		}
1387 
1388 restart:
1389 		other = unix_find_other(sock_net(sk), sunaddr, alen, sock->type);
1390 		if (IS_ERR(other)) {
1391 			err = PTR_ERR(other);
1392 			goto out;
1393 		}
1394 
1395 		unix_state_double_lock(sk, other);
1396 
1397 		/* Apparently VFS overslept socket death. Retry. */
1398 		if (sock_flag(other, SOCK_DEAD)) {
1399 			unix_state_double_unlock(sk, other);
1400 			sock_put(other);
1401 			goto restart;
1402 		}
1403 
1404 		err = -EPERM;
1405 		if (!unix_may_send(sk, other))
1406 			goto out_unlock;
1407 
1408 		err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1409 		if (err)
1410 			goto out_unlock;
1411 
1412 		sk->sk_state = other->sk_state = TCP_ESTABLISHED;
1413 	} else {
1414 		/*
1415 		 *	1003.1g breaking connected state with AF_UNSPEC
1416 		 */
1417 		other = NULL;
1418 		unix_state_double_lock(sk, other);
1419 	}
1420 
1421 	/*
1422 	 * If it was connected, reconnect.
1423 	 */
1424 	if (unix_peer(sk)) {
1425 		struct sock *old_peer = unix_peer(sk);
1426 
1427 		unix_peer(sk) = other;
1428 		if (!other)
1429 			sk->sk_state = TCP_CLOSE;
1430 		unix_dgram_peer_wake_disconnect_wakeup(sk, old_peer);
1431 
1432 		unix_state_double_unlock(sk, other);
1433 
1434 		if (other != old_peer)
1435 			unix_dgram_disconnected(sk, old_peer);
1436 		sock_put(old_peer);
1437 	} else {
1438 		unix_peer(sk) = other;
1439 		unix_state_double_unlock(sk, other);
1440 	}
1441 
1442 	return 0;
1443 
1444 out_unlock:
1445 	unix_state_double_unlock(sk, other);
1446 	sock_put(other);
1447 out:
1448 	return err;
1449 }
1450 
1451 static long unix_wait_for_peer(struct sock *other, long timeo)
1452 	__releases(&unix_sk(other)->lock)
1453 {
1454 	struct unix_sock *u = unix_sk(other);
1455 	int sched;
1456 	DEFINE_WAIT(wait);
1457 
1458 	prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);
1459 
1460 	sched = !sock_flag(other, SOCK_DEAD) &&
1461 		!(other->sk_shutdown & RCV_SHUTDOWN) &&
1462 		unix_recvq_full_lockless(other);
1463 
1464 	unix_state_unlock(other);
1465 
1466 	if (sched)
1467 		timeo = schedule_timeout(timeo);
1468 
1469 	finish_wait(&u->peer_wait, &wait);
1470 	return timeo;
1471 }
1472 
1473 static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
1474 			       int addr_len, int flags)
1475 {
1476 	struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1477 	struct sock *sk = sock->sk, *newsk = NULL, *other = NULL;
1478 	struct unix_sock *u = unix_sk(sk), *newu, *otheru;
1479 	struct net *net = sock_net(sk);
1480 	struct sk_buff *skb = NULL;
1481 	long timeo;
1482 	int err;
1483 	int st;
1484 
1485 	err = unix_validate_addr(sunaddr, addr_len);
1486 	if (err)
1487 		goto out;
1488 
1489 	if ((test_bit(SOCK_PASSCRED, &sock->flags) ||
1490 	     test_bit(SOCK_PASSPIDFD, &sock->flags)) && !u->addr) {
1491 		err = unix_autobind(sk);
1492 		if (err)
1493 			goto out;
1494 	}
1495 
1496 	timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
1497 
1498 	/* First of all allocate resources.
1499 	   If we will make it after state is locked,
1500 	   we will have to recheck all again in any case.
1501 	 */
1502 
1503 	/* create new sock for complete connection */
1504 	newsk = unix_create1(net, NULL, 0, sock->type);
1505 	if (IS_ERR(newsk)) {
1506 		err = PTR_ERR(newsk);
1507 		newsk = NULL;
1508 		goto out;
1509 	}
1510 
1511 	err = -ENOMEM;
1512 
1513 	/* Allocate skb for sending to listening sock */
1514 	skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
1515 	if (skb == NULL)
1516 		goto out;
1517 
1518 restart:
1519 	/*  Find listening sock. */
1520 	other = unix_find_other(net, sunaddr, addr_len, sk->sk_type);
1521 	if (IS_ERR(other)) {
1522 		err = PTR_ERR(other);
1523 		other = NULL;
1524 		goto out;
1525 	}
1526 
1527 	/* Latch state of peer */
1528 	unix_state_lock(other);
1529 
1530 	/* Apparently VFS overslept socket death. Retry. */
1531 	if (sock_flag(other, SOCK_DEAD)) {
1532 		unix_state_unlock(other);
1533 		sock_put(other);
1534 		goto restart;
1535 	}
1536 
1537 	err = -ECONNREFUSED;
1538 	if (other->sk_state != TCP_LISTEN)
1539 		goto out_unlock;
1540 	if (other->sk_shutdown & RCV_SHUTDOWN)
1541 		goto out_unlock;
1542 
1543 	if (unix_recvq_full(other)) {
1544 		err = -EAGAIN;
1545 		if (!timeo)
1546 			goto out_unlock;
1547 
1548 		timeo = unix_wait_for_peer(other, timeo);
1549 
1550 		err = sock_intr_errno(timeo);
1551 		if (signal_pending(current))
1552 			goto out;
1553 		sock_put(other);
1554 		goto restart;
1555 	}
1556 
1557 	/* Latch our state.
1558 
1559 	   It is tricky place. We need to grab our state lock and cannot
1560 	   drop lock on peer. It is dangerous because deadlock is
1561 	   possible. Connect to self case and simultaneous
1562 	   attempt to connect are eliminated by checking socket
1563 	   state. other is TCP_LISTEN, if sk is TCP_LISTEN we
1564 	   check this before attempt to grab lock.
1565 
1566 	   Well, and we have to recheck the state after socket locked.
1567 	 */
1568 	st = sk->sk_state;
1569 
1570 	switch (st) {
1571 	case TCP_CLOSE:
1572 		/* This is ok... continue with connect */
1573 		break;
1574 	case TCP_ESTABLISHED:
1575 		/* Socket is already connected */
1576 		err = -EISCONN;
1577 		goto out_unlock;
1578 	default:
1579 		err = -EINVAL;
1580 		goto out_unlock;
1581 	}
1582 
1583 	unix_state_lock_nested(sk, U_LOCK_SECOND);
1584 
1585 	if (sk->sk_state != st) {
1586 		unix_state_unlock(sk);
1587 		unix_state_unlock(other);
1588 		sock_put(other);
1589 		goto restart;
1590 	}
1591 
1592 	err = security_unix_stream_connect(sk, other, newsk);
1593 	if (err) {
1594 		unix_state_unlock(sk);
1595 		goto out_unlock;
1596 	}
1597 
1598 	/* The way is open! Fastly set all the necessary fields... */
1599 
1600 	sock_hold(sk);
1601 	unix_peer(newsk)	= sk;
1602 	newsk->sk_state		= TCP_ESTABLISHED;
1603 	newsk->sk_type		= sk->sk_type;
1604 	init_peercred(newsk);
1605 	newu = unix_sk(newsk);
1606 	RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq);
1607 	otheru = unix_sk(other);
1608 
1609 	/* copy address information from listening to new sock
1610 	 *
1611 	 * The contents of *(otheru->addr) and otheru->path
1612 	 * are seen fully set up here, since we have found
1613 	 * otheru in hash under its lock.  Insertion into the
1614 	 * hash chain we'd found it in had been done in an
1615 	 * earlier critical area protected by the chain's lock,
1616 	 * the same one where we'd set *(otheru->addr) contents,
1617 	 * as well as otheru->path and otheru->addr itself.
1618 	 *
1619 	 * Using smp_store_release() here to set newu->addr
1620 	 * is enough to make those stores, as well as stores
1621 	 * to newu->path visible to anyone who gets newu->addr
1622 	 * by smp_load_acquire().  IOW, the same warranties
1623 	 * as for unix_sock instances bound in unix_bind() or
1624 	 * in unix_autobind().
1625 	 */
1626 	if (otheru->path.dentry) {
1627 		path_get(&otheru->path);
1628 		newu->path = otheru->path;
1629 	}
1630 	refcount_inc(&otheru->addr->refcnt);
1631 	smp_store_release(&newu->addr, otheru->addr);
1632 
1633 	/* Set credentials */
1634 	copy_peercred(sk, other);
1635 
1636 	sock->state	= SS_CONNECTED;
1637 	sk->sk_state	= TCP_ESTABLISHED;
1638 	sock_hold(newsk);
1639 
1640 	smp_mb__after_atomic();	/* sock_hold() does an atomic_inc() */
1641 	unix_peer(sk)	= newsk;
1642 
1643 	unix_state_unlock(sk);
1644 
1645 	/* take ten and send info to listening sock */
1646 	spin_lock(&other->sk_receive_queue.lock);
1647 	__skb_queue_tail(&other->sk_receive_queue, skb);
1648 	spin_unlock(&other->sk_receive_queue.lock);
1649 	unix_state_unlock(other);
1650 	other->sk_data_ready(other);
1651 	sock_put(other);
1652 	return 0;
1653 
1654 out_unlock:
1655 	if (other)
1656 		unix_state_unlock(other);
1657 
1658 out:
1659 	kfree_skb(skb);
1660 	if (newsk)
1661 		unix_release_sock(newsk, 0);
1662 	if (other)
1663 		sock_put(other);
1664 	return err;
1665 }
1666 
1667 static int unix_socketpair(struct socket *socka, struct socket *sockb)
1668 {
1669 	struct sock *ska = socka->sk, *skb = sockb->sk;
1670 
1671 	/* Join our sockets back to back */
1672 	sock_hold(ska);
1673 	sock_hold(skb);
1674 	unix_peer(ska) = skb;
1675 	unix_peer(skb) = ska;
1676 	init_peercred(ska);
1677 	init_peercred(skb);
1678 
1679 	ska->sk_state = TCP_ESTABLISHED;
1680 	skb->sk_state = TCP_ESTABLISHED;
1681 	socka->state  = SS_CONNECTED;
1682 	sockb->state  = SS_CONNECTED;
1683 	return 0;
1684 }
1685 
1686 static void unix_sock_inherit_flags(const struct socket *old,
1687 				    struct socket *new)
1688 {
1689 	if (test_bit(SOCK_PASSCRED, &old->flags))
1690 		set_bit(SOCK_PASSCRED, &new->flags);
1691 	if (test_bit(SOCK_PASSPIDFD, &old->flags))
1692 		set_bit(SOCK_PASSPIDFD, &new->flags);
1693 	if (test_bit(SOCK_PASSSEC, &old->flags))
1694 		set_bit(SOCK_PASSSEC, &new->flags);
1695 }
1696 
1697 static int unix_accept(struct socket *sock, struct socket *newsock, int flags,
1698 		       bool kern)
1699 {
1700 	struct sock *sk = sock->sk;
1701 	struct sock *tsk;
1702 	struct sk_buff *skb;
1703 	int err;
1704 
1705 	err = -EOPNOTSUPP;
1706 	if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
1707 		goto out;
1708 
1709 	err = -EINVAL;
1710 	if (sk->sk_state != TCP_LISTEN)
1711 		goto out;
1712 
1713 	/* If socket state is TCP_LISTEN it cannot change (for now...),
1714 	 * so that no locks are necessary.
1715 	 */
1716 
1717 	skb = skb_recv_datagram(sk, (flags & O_NONBLOCK) ? MSG_DONTWAIT : 0,
1718 				&err);
1719 	if (!skb) {
1720 		/* This means receive shutdown. */
1721 		if (err == 0)
1722 			err = -EINVAL;
1723 		goto out;
1724 	}
1725 
1726 	tsk = skb->sk;
1727 	skb_free_datagram(sk, skb);
1728 	wake_up_interruptible(&unix_sk(sk)->peer_wait);
1729 
1730 	/* attach accepted sock to socket */
1731 	unix_state_lock(tsk);
1732 	newsock->state = SS_CONNECTED;
1733 	unix_sock_inherit_flags(sock, newsock);
1734 	sock_graft(tsk, newsock);
1735 	unix_state_unlock(tsk);
1736 	return 0;
1737 
1738 out:
1739 	return err;
1740 }
1741 
1742 
1743 static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int peer)
1744 {
1745 	struct sock *sk = sock->sk;
1746 	struct unix_address *addr;
1747 	DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, uaddr);
1748 	int err = 0;
1749 
1750 	if (peer) {
1751 		sk = unix_peer_get(sk);
1752 
1753 		err = -ENOTCONN;
1754 		if (!sk)
1755 			goto out;
1756 		err = 0;
1757 	} else {
1758 		sock_hold(sk);
1759 	}
1760 
1761 	addr = smp_load_acquire(&unix_sk(sk)->addr);
1762 	if (!addr) {
1763 		sunaddr->sun_family = AF_UNIX;
1764 		sunaddr->sun_path[0] = 0;
1765 		err = offsetof(struct sockaddr_un, sun_path);
1766 	} else {
1767 		err = addr->len;
1768 		memcpy(sunaddr, addr->name, addr->len);
1769 	}
1770 	sock_put(sk);
1771 out:
1772 	return err;
1773 }
1774 
1775 static void unix_peek_fds(struct scm_cookie *scm, struct sk_buff *skb)
1776 {
1777 	scm->fp = scm_fp_dup(UNIXCB(skb).fp);
1778 
1779 	/*
1780 	 * Garbage collection of unix sockets starts by selecting a set of
1781 	 * candidate sockets which have reference only from being in flight
1782 	 * (total_refs == inflight_refs).  This condition is checked once during
1783 	 * the candidate collection phase, and candidates are marked as such, so
1784 	 * that non-candidates can later be ignored.  While inflight_refs is
1785 	 * protected by unix_gc_lock, total_refs (file count) is not, hence this
1786 	 * is an instantaneous decision.
1787 	 *
1788 	 * Once a candidate, however, the socket must not be reinstalled into a
1789 	 * file descriptor while the garbage collection is in progress.
1790 	 *
1791 	 * If the above conditions are met, then the directed graph of
1792 	 * candidates (*) does not change while unix_gc_lock is held.
1793 	 *
1794 	 * Any operations that changes the file count through file descriptors
1795 	 * (dup, close, sendmsg) does not change the graph since candidates are
1796 	 * not installed in fds.
1797 	 *
1798 	 * Dequeing a candidate via recvmsg would install it into an fd, but
1799 	 * that takes unix_gc_lock to decrement the inflight count, so it's
1800 	 * serialized with garbage collection.
1801 	 *
1802 	 * MSG_PEEK is special in that it does not change the inflight count,
1803 	 * yet does install the socket into an fd.  The following lock/unlock
1804 	 * pair is to ensure serialization with garbage collection.  It must be
1805 	 * done between incrementing the file count and installing the file into
1806 	 * an fd.
1807 	 *
1808 	 * If garbage collection starts after the barrier provided by the
1809 	 * lock/unlock, then it will see the elevated refcount and not mark this
1810 	 * as a candidate.  If a garbage collection is already in progress
1811 	 * before the file count was incremented, then the lock/unlock pair will
1812 	 * ensure that garbage collection is finished before progressing to
1813 	 * installing the fd.
1814 	 *
1815 	 * (*) A -> B where B is on the queue of A or B is on the queue of C
1816 	 * which is on the queue of listening socket A.
1817 	 */
1818 	spin_lock(&unix_gc_lock);
1819 	spin_unlock(&unix_gc_lock);
1820 }
1821 
1822 static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)
1823 {
1824 	int err = 0;
1825 
1826 	UNIXCB(skb).pid  = get_pid(scm->pid);
1827 	UNIXCB(skb).uid = scm->creds.uid;
1828 	UNIXCB(skb).gid = scm->creds.gid;
1829 	UNIXCB(skb).fp = NULL;
1830 	unix_get_secdata(scm, skb);
1831 	if (scm->fp && send_fds)
1832 		err = unix_attach_fds(scm, skb);
1833 
1834 	skb->destructor = unix_destruct_scm;
1835 	return err;
1836 }
1837 
1838 static bool unix_passcred_enabled(const struct socket *sock,
1839 				  const struct sock *other)
1840 {
1841 	return test_bit(SOCK_PASSCRED, &sock->flags) ||
1842 	       test_bit(SOCK_PASSPIDFD, &sock->flags) ||
1843 	       !other->sk_socket ||
1844 	       test_bit(SOCK_PASSCRED, &other->sk_socket->flags) ||
1845 	       test_bit(SOCK_PASSPIDFD, &other->sk_socket->flags);
1846 }
1847 
1848 /*
1849  * Some apps rely on write() giving SCM_CREDENTIALS
1850  * We include credentials if source or destination socket
1851  * asserted SOCK_PASSCRED.
1852  */
1853 static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock,
1854 			    const struct sock *other)
1855 {
1856 	if (UNIXCB(skb).pid)
1857 		return;
1858 	if (unix_passcred_enabled(sock, other)) {
1859 		UNIXCB(skb).pid  = get_pid(task_tgid(current));
1860 		current_uid_gid(&UNIXCB(skb).uid, &UNIXCB(skb).gid);
1861 	}
1862 }
1863 
1864 static bool unix_skb_scm_eq(struct sk_buff *skb,
1865 			    struct scm_cookie *scm)
1866 {
1867 	return UNIXCB(skb).pid == scm->pid &&
1868 	       uid_eq(UNIXCB(skb).uid, scm->creds.uid) &&
1869 	       gid_eq(UNIXCB(skb).gid, scm->creds.gid) &&
1870 	       unix_secdata_eq(scm, skb);
1871 }
1872 
1873 static void scm_stat_add(struct sock *sk, struct sk_buff *skb)
1874 {
1875 	struct scm_fp_list *fp = UNIXCB(skb).fp;
1876 	struct unix_sock *u = unix_sk(sk);
1877 
1878 	if (unlikely(fp && fp->count))
1879 		atomic_add(fp->count, &u->scm_stat.nr_fds);
1880 }
1881 
1882 static void scm_stat_del(struct sock *sk, struct sk_buff *skb)
1883 {
1884 	struct scm_fp_list *fp = UNIXCB(skb).fp;
1885 	struct unix_sock *u = unix_sk(sk);
1886 
1887 	if (unlikely(fp && fp->count))
1888 		atomic_sub(fp->count, &u->scm_stat.nr_fds);
1889 }
1890 
1891 /*
1892  *	Send AF_UNIX data.
1893  */
1894 
1895 static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
1896 			      size_t len)
1897 {
1898 	DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, msg->msg_name);
1899 	struct sock *sk = sock->sk, *other = NULL;
1900 	struct unix_sock *u = unix_sk(sk);
1901 	struct scm_cookie scm;
1902 	struct sk_buff *skb;
1903 	int data_len = 0;
1904 	int sk_locked;
1905 	long timeo;
1906 	int err;
1907 
1908 	wait_for_unix_gc();
1909 	err = scm_send(sock, msg, &scm, false);
1910 	if (err < 0)
1911 		return err;
1912 
1913 	err = -EOPNOTSUPP;
1914 	if (msg->msg_flags&MSG_OOB)
1915 		goto out;
1916 
1917 	if (msg->msg_namelen) {
1918 		err = unix_validate_addr(sunaddr, msg->msg_namelen);
1919 		if (err)
1920 			goto out;
1921 	} else {
1922 		sunaddr = NULL;
1923 		err = -ENOTCONN;
1924 		other = unix_peer_get(sk);
1925 		if (!other)
1926 			goto out;
1927 	}
1928 
1929 	if ((test_bit(SOCK_PASSCRED, &sock->flags) ||
1930 	     test_bit(SOCK_PASSPIDFD, &sock->flags)) && !u->addr) {
1931 		err = unix_autobind(sk);
1932 		if (err)
1933 			goto out;
1934 	}
1935 
1936 	err = -EMSGSIZE;
1937 	if (len > sk->sk_sndbuf - 32)
1938 		goto out;
1939 
1940 	if (len > SKB_MAX_ALLOC) {
1941 		data_len = min_t(size_t,
1942 				 len - SKB_MAX_ALLOC,
1943 				 MAX_SKB_FRAGS * PAGE_SIZE);
1944 		data_len = PAGE_ALIGN(data_len);
1945 
1946 		BUILD_BUG_ON(SKB_MAX_ALLOC < PAGE_SIZE);
1947 	}
1948 
1949 	skb = sock_alloc_send_pskb(sk, len - data_len, data_len,
1950 				   msg->msg_flags & MSG_DONTWAIT, &err,
1951 				   PAGE_ALLOC_COSTLY_ORDER);
1952 	if (skb == NULL)
1953 		goto out;
1954 
1955 	err = unix_scm_to_skb(&scm, skb, true);
1956 	if (err < 0)
1957 		goto out_free;
1958 
1959 	skb_put(skb, len - data_len);
1960 	skb->data_len = data_len;
1961 	skb->len = len;
1962 	err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, len);
1963 	if (err)
1964 		goto out_free;
1965 
1966 	timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
1967 
1968 restart:
1969 	if (!other) {
1970 		err = -ECONNRESET;
1971 		if (sunaddr == NULL)
1972 			goto out_free;
1973 
1974 		other = unix_find_other(sock_net(sk), sunaddr, msg->msg_namelen,
1975 					sk->sk_type);
1976 		if (IS_ERR(other)) {
1977 			err = PTR_ERR(other);
1978 			other = NULL;
1979 			goto out_free;
1980 		}
1981 	}
1982 
1983 	if (sk_filter(other, skb) < 0) {
1984 		/* Toss the packet but do not return any error to the sender */
1985 		err = len;
1986 		goto out_free;
1987 	}
1988 
1989 	sk_locked = 0;
1990 	unix_state_lock(other);
1991 restart_locked:
1992 	err = -EPERM;
1993 	if (!unix_may_send(sk, other))
1994 		goto out_unlock;
1995 
1996 	if (unlikely(sock_flag(other, SOCK_DEAD))) {
1997 		/*
1998 		 *	Check with 1003.1g - what should
1999 		 *	datagram error
2000 		 */
2001 		unix_state_unlock(other);
2002 		sock_put(other);
2003 
2004 		if (!sk_locked)
2005 			unix_state_lock(sk);
2006 
2007 		err = 0;
2008 		if (sk->sk_type == SOCK_SEQPACKET) {
2009 			/* We are here only when racing with unix_release_sock()
2010 			 * is clearing @other. Never change state to TCP_CLOSE
2011 			 * unlike SOCK_DGRAM wants.
2012 			 */
2013 			unix_state_unlock(sk);
2014 			err = -EPIPE;
2015 		} else if (unix_peer(sk) == other) {
2016 			unix_peer(sk) = NULL;
2017 			unix_dgram_peer_wake_disconnect_wakeup(sk, other);
2018 
2019 			sk->sk_state = TCP_CLOSE;
2020 			unix_state_unlock(sk);
2021 
2022 			unix_dgram_disconnected(sk, other);
2023 			sock_put(other);
2024 			err = -ECONNREFUSED;
2025 		} else {
2026 			unix_state_unlock(sk);
2027 		}
2028 
2029 		other = NULL;
2030 		if (err)
2031 			goto out_free;
2032 		goto restart;
2033 	}
2034 
2035 	err = -EPIPE;
2036 	if (other->sk_shutdown & RCV_SHUTDOWN)
2037 		goto out_unlock;
2038 
2039 	if (sk->sk_type != SOCK_SEQPACKET) {
2040 		err = security_unix_may_send(sk->sk_socket, other->sk_socket);
2041 		if (err)
2042 			goto out_unlock;
2043 	}
2044 
2045 	/* other == sk && unix_peer(other) != sk if
2046 	 * - unix_peer(sk) == NULL, destination address bound to sk
2047 	 * - unix_peer(sk) == sk by time of get but disconnected before lock
2048 	 */
2049 	if (other != sk &&
2050 	    unlikely(unix_peer(other) != sk &&
2051 	    unix_recvq_full_lockless(other))) {
2052 		if (timeo) {
2053 			timeo = unix_wait_for_peer(other, timeo);
2054 
2055 			err = sock_intr_errno(timeo);
2056 			if (signal_pending(current))
2057 				goto out_free;
2058 
2059 			goto restart;
2060 		}
2061 
2062 		if (!sk_locked) {
2063 			unix_state_unlock(other);
2064 			unix_state_double_lock(sk, other);
2065 		}
2066 
2067 		if (unix_peer(sk) != other ||
2068 		    unix_dgram_peer_wake_me(sk, other)) {
2069 			err = -EAGAIN;
2070 			sk_locked = 1;
2071 			goto out_unlock;
2072 		}
2073 
2074 		if (!sk_locked) {
2075 			sk_locked = 1;
2076 			goto restart_locked;
2077 		}
2078 	}
2079 
2080 	if (unlikely(sk_locked))
2081 		unix_state_unlock(sk);
2082 
2083 	if (sock_flag(other, SOCK_RCVTSTAMP))
2084 		__net_timestamp(skb);
2085 	maybe_add_creds(skb, sock, other);
2086 	scm_stat_add(other, skb);
2087 	skb_queue_tail(&other->sk_receive_queue, skb);
2088 	unix_state_unlock(other);
2089 	other->sk_data_ready(other);
2090 	sock_put(other);
2091 	scm_destroy(&scm);
2092 	return len;
2093 
2094 out_unlock:
2095 	if (sk_locked)
2096 		unix_state_unlock(sk);
2097 	unix_state_unlock(other);
2098 out_free:
2099 	kfree_skb(skb);
2100 out:
2101 	if (other)
2102 		sock_put(other);
2103 	scm_destroy(&scm);
2104 	return err;
2105 }
2106 
2107 /* We use paged skbs for stream sockets, and limit occupancy to 32768
2108  * bytes, and a minimum of a full page.
2109  */
2110 #define UNIX_SKB_FRAGS_SZ (PAGE_SIZE << get_order(32768))
2111 
2112 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
2113 static int queue_oob(struct socket *sock, struct msghdr *msg, struct sock *other,
2114 		     struct scm_cookie *scm, bool fds_sent)
2115 {
2116 	struct unix_sock *ousk = unix_sk(other);
2117 	struct sk_buff *skb;
2118 	int err = 0;
2119 
2120 	skb = sock_alloc_send_skb(sock->sk, 1, msg->msg_flags & MSG_DONTWAIT, &err);
2121 
2122 	if (!skb)
2123 		return err;
2124 
2125 	err = unix_scm_to_skb(scm, skb, !fds_sent);
2126 	if (err < 0) {
2127 		kfree_skb(skb);
2128 		return err;
2129 	}
2130 	skb_put(skb, 1);
2131 	err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, 1);
2132 
2133 	if (err) {
2134 		kfree_skb(skb);
2135 		return err;
2136 	}
2137 
2138 	unix_state_lock(other);
2139 
2140 	if (sock_flag(other, SOCK_DEAD) ||
2141 	    (other->sk_shutdown & RCV_SHUTDOWN)) {
2142 		unix_state_unlock(other);
2143 		kfree_skb(skb);
2144 		return -EPIPE;
2145 	}
2146 
2147 	maybe_add_creds(skb, sock, other);
2148 	skb_get(skb);
2149 
2150 	if (ousk->oob_skb)
2151 		consume_skb(ousk->oob_skb);
2152 
2153 	WRITE_ONCE(ousk->oob_skb, skb);
2154 
2155 	scm_stat_add(other, skb);
2156 	skb_queue_tail(&other->sk_receive_queue, skb);
2157 	sk_send_sigurg(other);
2158 	unix_state_unlock(other);
2159 	other->sk_data_ready(other);
2160 
2161 	return err;
2162 }
2163 #endif
2164 
2165 static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
2166 			       size_t len)
2167 {
2168 	struct sock *sk = sock->sk;
2169 	struct sock *other = NULL;
2170 	int err, size;
2171 	struct sk_buff *skb;
2172 	int sent = 0;
2173 	struct scm_cookie scm;
2174 	bool fds_sent = false;
2175 	int data_len;
2176 
2177 	wait_for_unix_gc();
2178 	err = scm_send(sock, msg, &scm, false);
2179 	if (err < 0)
2180 		return err;
2181 
2182 	err = -EOPNOTSUPP;
2183 	if (msg->msg_flags & MSG_OOB) {
2184 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
2185 		if (len)
2186 			len--;
2187 		else
2188 #endif
2189 			goto out_err;
2190 	}
2191 
2192 	if (msg->msg_namelen) {
2193 		err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
2194 		goto out_err;
2195 	} else {
2196 		err = -ENOTCONN;
2197 		other = unix_peer(sk);
2198 		if (!other)
2199 			goto out_err;
2200 	}
2201 
2202 	if (sk->sk_shutdown & SEND_SHUTDOWN)
2203 		goto pipe_err;
2204 
2205 	while (sent < len) {
2206 		size = len - sent;
2207 
2208 		if (unlikely(msg->msg_flags & MSG_SPLICE_PAGES)) {
2209 			skb = sock_alloc_send_pskb(sk, 0, 0,
2210 						   msg->msg_flags & MSG_DONTWAIT,
2211 						   &err, 0);
2212 		} else {
2213 			/* Keep two messages in the pipe so it schedules better */
2214 			size = min_t(int, size, (sk->sk_sndbuf >> 1) - 64);
2215 
2216 			/* allow fallback to order-0 allocations */
2217 			size = min_t(int, size, SKB_MAX_HEAD(0) + UNIX_SKB_FRAGS_SZ);
2218 
2219 			data_len = max_t(int, 0, size - SKB_MAX_HEAD(0));
2220 
2221 			data_len = min_t(size_t, size, PAGE_ALIGN(data_len));
2222 
2223 			skb = sock_alloc_send_pskb(sk, size - data_len, data_len,
2224 						   msg->msg_flags & MSG_DONTWAIT, &err,
2225 						   get_order(UNIX_SKB_FRAGS_SZ));
2226 		}
2227 		if (!skb)
2228 			goto out_err;
2229 
2230 		/* Only send the fds in the first buffer */
2231 		err = unix_scm_to_skb(&scm, skb, !fds_sent);
2232 		if (err < 0) {
2233 			kfree_skb(skb);
2234 			goto out_err;
2235 		}
2236 		fds_sent = true;
2237 
2238 		if (unlikely(msg->msg_flags & MSG_SPLICE_PAGES)) {
2239 			err = skb_splice_from_iter(skb, &msg->msg_iter, size,
2240 						   sk->sk_allocation);
2241 			if (err < 0) {
2242 				kfree_skb(skb);
2243 				goto out_err;
2244 			}
2245 			size = err;
2246 			refcount_add(size, &sk->sk_wmem_alloc);
2247 		} else {
2248 			skb_put(skb, size - data_len);
2249 			skb->data_len = data_len;
2250 			skb->len = size;
2251 			err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size);
2252 			if (err) {
2253 				kfree_skb(skb);
2254 				goto out_err;
2255 			}
2256 		}
2257 
2258 		unix_state_lock(other);
2259 
2260 		if (sock_flag(other, SOCK_DEAD) ||
2261 		    (other->sk_shutdown & RCV_SHUTDOWN))
2262 			goto pipe_err_free;
2263 
2264 		maybe_add_creds(skb, sock, other);
2265 		scm_stat_add(other, skb);
2266 		skb_queue_tail(&other->sk_receive_queue, skb);
2267 		unix_state_unlock(other);
2268 		other->sk_data_ready(other);
2269 		sent += size;
2270 	}
2271 
2272 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
2273 	if (msg->msg_flags & MSG_OOB) {
2274 		err = queue_oob(sock, msg, other, &scm, fds_sent);
2275 		if (err)
2276 			goto out_err;
2277 		sent++;
2278 	}
2279 #endif
2280 
2281 	scm_destroy(&scm);
2282 
2283 	return sent;
2284 
2285 pipe_err_free:
2286 	unix_state_unlock(other);
2287 	kfree_skb(skb);
2288 pipe_err:
2289 	if (sent == 0 && !(msg->msg_flags&MSG_NOSIGNAL))
2290 		send_sig(SIGPIPE, current, 0);
2291 	err = -EPIPE;
2292 out_err:
2293 	scm_destroy(&scm);
2294 	return sent ? : err;
2295 }
2296 
2297 static int unix_seqpacket_sendmsg(struct socket *sock, struct msghdr *msg,
2298 				  size_t len)
2299 {
2300 	int err;
2301 	struct sock *sk = sock->sk;
2302 
2303 	err = sock_error(sk);
2304 	if (err)
2305 		return err;
2306 
2307 	if (sk->sk_state != TCP_ESTABLISHED)
2308 		return -ENOTCONN;
2309 
2310 	if (msg->msg_namelen)
2311 		msg->msg_namelen = 0;
2312 
2313 	return unix_dgram_sendmsg(sock, msg, len);
2314 }
2315 
2316 static int unix_seqpacket_recvmsg(struct socket *sock, struct msghdr *msg,
2317 				  size_t size, int flags)
2318 {
2319 	struct sock *sk = sock->sk;
2320 
2321 	if (sk->sk_state != TCP_ESTABLISHED)
2322 		return -ENOTCONN;
2323 
2324 	return unix_dgram_recvmsg(sock, msg, size, flags);
2325 }
2326 
2327 static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
2328 {
2329 	struct unix_address *addr = smp_load_acquire(&unix_sk(sk)->addr);
2330 
2331 	if (addr) {
2332 		msg->msg_namelen = addr->len;
2333 		memcpy(msg->msg_name, addr->name, addr->len);
2334 	}
2335 }
2336 
2337 int __unix_dgram_recvmsg(struct sock *sk, struct msghdr *msg, size_t size,
2338 			 int flags)
2339 {
2340 	struct scm_cookie scm;
2341 	struct socket *sock = sk->sk_socket;
2342 	struct unix_sock *u = unix_sk(sk);
2343 	struct sk_buff *skb, *last;
2344 	long timeo;
2345 	int skip;
2346 	int err;
2347 
2348 	err = -EOPNOTSUPP;
2349 	if (flags&MSG_OOB)
2350 		goto out;
2351 
2352 	timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
2353 
2354 	do {
2355 		mutex_lock(&u->iolock);
2356 
2357 		skip = sk_peek_offset(sk, flags);
2358 		skb = __skb_try_recv_datagram(sk, &sk->sk_receive_queue, flags,
2359 					      &skip, &err, &last);
2360 		if (skb) {
2361 			if (!(flags & MSG_PEEK))
2362 				scm_stat_del(sk, skb);
2363 			break;
2364 		}
2365 
2366 		mutex_unlock(&u->iolock);
2367 
2368 		if (err != -EAGAIN)
2369 			break;
2370 	} while (timeo &&
2371 		 !__skb_wait_for_more_packets(sk, &sk->sk_receive_queue,
2372 					      &err, &timeo, last));
2373 
2374 	if (!skb) { /* implies iolock unlocked */
2375 		unix_state_lock(sk);
2376 		/* Signal EOF on disconnected non-blocking SEQPACKET socket. */
2377 		if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN &&
2378 		    (sk->sk_shutdown & RCV_SHUTDOWN))
2379 			err = 0;
2380 		unix_state_unlock(sk);
2381 		goto out;
2382 	}
2383 
2384 	if (wq_has_sleeper(&u->peer_wait))
2385 		wake_up_interruptible_sync_poll(&u->peer_wait,
2386 						EPOLLOUT | EPOLLWRNORM |
2387 						EPOLLWRBAND);
2388 
2389 	if (msg->msg_name)
2390 		unix_copy_addr(msg, skb->sk);
2391 
2392 	if (size > skb->len - skip)
2393 		size = skb->len - skip;
2394 	else if (size < skb->len - skip)
2395 		msg->msg_flags |= MSG_TRUNC;
2396 
2397 	err = skb_copy_datagram_msg(skb, skip, msg, size);
2398 	if (err)
2399 		goto out_free;
2400 
2401 	if (sock_flag(sk, SOCK_RCVTSTAMP))
2402 		__sock_recv_timestamp(msg, sk, skb);
2403 
2404 	memset(&scm, 0, sizeof(scm));
2405 
2406 	scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2407 	unix_set_secdata(&scm, skb);
2408 
2409 	if (!(flags & MSG_PEEK)) {
2410 		if (UNIXCB(skb).fp)
2411 			unix_detach_fds(&scm, skb);
2412 
2413 		sk_peek_offset_bwd(sk, skb->len);
2414 	} else {
2415 		/* It is questionable: on PEEK we could:
2416 		   - do not return fds - good, but too simple 8)
2417 		   - return fds, and do not return them on read (old strategy,
2418 		     apparently wrong)
2419 		   - clone fds (I chose it for now, it is the most universal
2420 		     solution)
2421 
2422 		   POSIX 1003.1g does not actually define this clearly
2423 		   at all. POSIX 1003.1g doesn't define a lot of things
2424 		   clearly however!
2425 
2426 		*/
2427 
2428 		sk_peek_offset_fwd(sk, size);
2429 
2430 		if (UNIXCB(skb).fp)
2431 			unix_peek_fds(&scm, skb);
2432 	}
2433 	err = (flags & MSG_TRUNC) ? skb->len - skip : size;
2434 
2435 	scm_recv_unix(sock, msg, &scm, flags);
2436 
2437 out_free:
2438 	skb_free_datagram(sk, skb);
2439 	mutex_unlock(&u->iolock);
2440 out:
2441 	return err;
2442 }
2443 
2444 static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
2445 			      int flags)
2446 {
2447 	struct sock *sk = sock->sk;
2448 
2449 #ifdef CONFIG_BPF_SYSCALL
2450 	const struct proto *prot = READ_ONCE(sk->sk_prot);
2451 
2452 	if (prot != &unix_dgram_proto)
2453 		return prot->recvmsg(sk, msg, size, flags, NULL);
2454 #endif
2455 	return __unix_dgram_recvmsg(sk, msg, size, flags);
2456 }
2457 
2458 static int unix_read_skb(struct sock *sk, skb_read_actor_t recv_actor)
2459 {
2460 	struct unix_sock *u = unix_sk(sk);
2461 	struct sk_buff *skb;
2462 	int err;
2463 
2464 	mutex_lock(&u->iolock);
2465 	skb = skb_recv_datagram(sk, MSG_DONTWAIT, &err);
2466 	mutex_unlock(&u->iolock);
2467 	if (!skb)
2468 		return err;
2469 
2470 	return recv_actor(sk, skb);
2471 }
2472 
2473 /*
2474  *	Sleep until more data has arrived. But check for races..
2475  */
2476 static long unix_stream_data_wait(struct sock *sk, long timeo,
2477 				  struct sk_buff *last, unsigned int last_len,
2478 				  bool freezable)
2479 {
2480 	unsigned int state = TASK_INTERRUPTIBLE | freezable * TASK_FREEZABLE;
2481 	struct sk_buff *tail;
2482 	DEFINE_WAIT(wait);
2483 
2484 	unix_state_lock(sk);
2485 
2486 	for (;;) {
2487 		prepare_to_wait(sk_sleep(sk), &wait, state);
2488 
2489 		tail = skb_peek_tail(&sk->sk_receive_queue);
2490 		if (tail != last ||
2491 		    (tail && tail->len != last_len) ||
2492 		    sk->sk_err ||
2493 		    (sk->sk_shutdown & RCV_SHUTDOWN) ||
2494 		    signal_pending(current) ||
2495 		    !timeo)
2496 			break;
2497 
2498 		sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2499 		unix_state_unlock(sk);
2500 		timeo = schedule_timeout(timeo);
2501 		unix_state_lock(sk);
2502 
2503 		if (sock_flag(sk, SOCK_DEAD))
2504 			break;
2505 
2506 		sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2507 	}
2508 
2509 	finish_wait(sk_sleep(sk), &wait);
2510 	unix_state_unlock(sk);
2511 	return timeo;
2512 }
2513 
2514 static unsigned int unix_skb_len(const struct sk_buff *skb)
2515 {
2516 	return skb->len - UNIXCB(skb).consumed;
2517 }
2518 
2519 struct unix_stream_read_state {
2520 	int (*recv_actor)(struct sk_buff *, int, int,
2521 			  struct unix_stream_read_state *);
2522 	struct socket *socket;
2523 	struct msghdr *msg;
2524 	struct pipe_inode_info *pipe;
2525 	size_t size;
2526 	int flags;
2527 	unsigned int splice_flags;
2528 };
2529 
2530 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
2531 static int unix_stream_recv_urg(struct unix_stream_read_state *state)
2532 {
2533 	struct socket *sock = state->socket;
2534 	struct sock *sk = sock->sk;
2535 	struct unix_sock *u = unix_sk(sk);
2536 	int chunk = 1;
2537 	struct sk_buff *oob_skb;
2538 
2539 	mutex_lock(&u->iolock);
2540 	unix_state_lock(sk);
2541 
2542 	if (sock_flag(sk, SOCK_URGINLINE) || !u->oob_skb) {
2543 		unix_state_unlock(sk);
2544 		mutex_unlock(&u->iolock);
2545 		return -EINVAL;
2546 	}
2547 
2548 	oob_skb = u->oob_skb;
2549 
2550 	if (!(state->flags & MSG_PEEK))
2551 		WRITE_ONCE(u->oob_skb, NULL);
2552 	else
2553 		skb_get(oob_skb);
2554 	unix_state_unlock(sk);
2555 
2556 	chunk = state->recv_actor(oob_skb, 0, chunk, state);
2557 
2558 	if (!(state->flags & MSG_PEEK))
2559 		UNIXCB(oob_skb).consumed += 1;
2560 
2561 	consume_skb(oob_skb);
2562 
2563 	mutex_unlock(&u->iolock);
2564 
2565 	if (chunk < 0)
2566 		return -EFAULT;
2567 
2568 	state->msg->msg_flags |= MSG_OOB;
2569 	return 1;
2570 }
2571 
2572 static struct sk_buff *manage_oob(struct sk_buff *skb, struct sock *sk,
2573 				  int flags, int copied)
2574 {
2575 	struct unix_sock *u = unix_sk(sk);
2576 
2577 	if (!unix_skb_len(skb) && !(flags & MSG_PEEK)) {
2578 		skb_unlink(skb, &sk->sk_receive_queue);
2579 		consume_skb(skb);
2580 		skb = NULL;
2581 	} else {
2582 		if (skb == u->oob_skb) {
2583 			if (copied) {
2584 				skb = NULL;
2585 			} else if (sock_flag(sk, SOCK_URGINLINE)) {
2586 				if (!(flags & MSG_PEEK)) {
2587 					WRITE_ONCE(u->oob_skb, NULL);
2588 					consume_skb(skb);
2589 				}
2590 			} else if (!(flags & MSG_PEEK)) {
2591 				skb_unlink(skb, &sk->sk_receive_queue);
2592 				consume_skb(skb);
2593 				skb = skb_peek(&sk->sk_receive_queue);
2594 			}
2595 		}
2596 	}
2597 	return skb;
2598 }
2599 #endif
2600 
2601 static int unix_stream_read_skb(struct sock *sk, skb_read_actor_t recv_actor)
2602 {
2603 	if (unlikely(sk->sk_state != TCP_ESTABLISHED))
2604 		return -ENOTCONN;
2605 
2606 	return unix_read_skb(sk, recv_actor);
2607 }
2608 
2609 static int unix_stream_read_generic(struct unix_stream_read_state *state,
2610 				    bool freezable)
2611 {
2612 	struct scm_cookie scm;
2613 	struct socket *sock = state->socket;
2614 	struct sock *sk = sock->sk;
2615 	struct unix_sock *u = unix_sk(sk);
2616 	int copied = 0;
2617 	int flags = state->flags;
2618 	int noblock = flags & MSG_DONTWAIT;
2619 	bool check_creds = false;
2620 	int target;
2621 	int err = 0;
2622 	long timeo;
2623 	int skip;
2624 	size_t size = state->size;
2625 	unsigned int last_len;
2626 
2627 	if (unlikely(sk->sk_state != TCP_ESTABLISHED)) {
2628 		err = -EINVAL;
2629 		goto out;
2630 	}
2631 
2632 	if (unlikely(flags & MSG_OOB)) {
2633 		err = -EOPNOTSUPP;
2634 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
2635 		err = unix_stream_recv_urg(state);
2636 #endif
2637 		goto out;
2638 	}
2639 
2640 	target = sock_rcvlowat(sk, flags & MSG_WAITALL, size);
2641 	timeo = sock_rcvtimeo(sk, noblock);
2642 
2643 	memset(&scm, 0, sizeof(scm));
2644 
2645 	/* Lock the socket to prevent queue disordering
2646 	 * while sleeps in memcpy_tomsg
2647 	 */
2648 	mutex_lock(&u->iolock);
2649 
2650 	skip = max(sk_peek_offset(sk, flags), 0);
2651 
2652 	do {
2653 		int chunk;
2654 		bool drop_skb;
2655 		struct sk_buff *skb, *last;
2656 
2657 redo:
2658 		unix_state_lock(sk);
2659 		if (sock_flag(sk, SOCK_DEAD)) {
2660 			err = -ECONNRESET;
2661 			goto unlock;
2662 		}
2663 		last = skb = skb_peek(&sk->sk_receive_queue);
2664 		last_len = last ? last->len : 0;
2665 
2666 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
2667 		if (skb) {
2668 			skb = manage_oob(skb, sk, flags, copied);
2669 			if (!skb) {
2670 				unix_state_unlock(sk);
2671 				if (copied)
2672 					break;
2673 				goto redo;
2674 			}
2675 		}
2676 #endif
2677 again:
2678 		if (skb == NULL) {
2679 			if (copied >= target)
2680 				goto unlock;
2681 
2682 			/*
2683 			 *	POSIX 1003.1g mandates this order.
2684 			 */
2685 
2686 			err = sock_error(sk);
2687 			if (err)
2688 				goto unlock;
2689 			if (sk->sk_shutdown & RCV_SHUTDOWN)
2690 				goto unlock;
2691 
2692 			unix_state_unlock(sk);
2693 			if (!timeo) {
2694 				err = -EAGAIN;
2695 				break;
2696 			}
2697 
2698 			mutex_unlock(&u->iolock);
2699 
2700 			timeo = unix_stream_data_wait(sk, timeo, last,
2701 						      last_len, freezable);
2702 
2703 			if (signal_pending(current)) {
2704 				err = sock_intr_errno(timeo);
2705 				scm_destroy(&scm);
2706 				goto out;
2707 			}
2708 
2709 			mutex_lock(&u->iolock);
2710 			goto redo;
2711 unlock:
2712 			unix_state_unlock(sk);
2713 			break;
2714 		}
2715 
2716 		while (skip >= unix_skb_len(skb)) {
2717 			skip -= unix_skb_len(skb);
2718 			last = skb;
2719 			last_len = skb->len;
2720 			skb = skb_peek_next(skb, &sk->sk_receive_queue);
2721 			if (!skb)
2722 				goto again;
2723 		}
2724 
2725 		unix_state_unlock(sk);
2726 
2727 		if (check_creds) {
2728 			/* Never glue messages from different writers */
2729 			if (!unix_skb_scm_eq(skb, &scm))
2730 				break;
2731 		} else if (test_bit(SOCK_PASSCRED, &sock->flags) ||
2732 			   test_bit(SOCK_PASSPIDFD, &sock->flags)) {
2733 			/* Copy credentials */
2734 			scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2735 			unix_set_secdata(&scm, skb);
2736 			check_creds = true;
2737 		}
2738 
2739 		/* Copy address just once */
2740 		if (state->msg && state->msg->msg_name) {
2741 			DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr,
2742 					 state->msg->msg_name);
2743 			unix_copy_addr(state->msg, skb->sk);
2744 			sunaddr = NULL;
2745 		}
2746 
2747 		chunk = min_t(unsigned int, unix_skb_len(skb) - skip, size);
2748 		skb_get(skb);
2749 		chunk = state->recv_actor(skb, skip, chunk, state);
2750 		drop_skb = !unix_skb_len(skb);
2751 		/* skb is only safe to use if !drop_skb */
2752 		consume_skb(skb);
2753 		if (chunk < 0) {
2754 			if (copied == 0)
2755 				copied = -EFAULT;
2756 			break;
2757 		}
2758 		copied += chunk;
2759 		size -= chunk;
2760 
2761 		if (drop_skb) {
2762 			/* the skb was touched by a concurrent reader;
2763 			 * we should not expect anything from this skb
2764 			 * anymore and assume it invalid - we can be
2765 			 * sure it was dropped from the socket queue
2766 			 *
2767 			 * let's report a short read
2768 			 */
2769 			err = 0;
2770 			break;
2771 		}
2772 
2773 		/* Mark read part of skb as used */
2774 		if (!(flags & MSG_PEEK)) {
2775 			UNIXCB(skb).consumed += chunk;
2776 
2777 			sk_peek_offset_bwd(sk, chunk);
2778 
2779 			if (UNIXCB(skb).fp) {
2780 				scm_stat_del(sk, skb);
2781 				unix_detach_fds(&scm, skb);
2782 			}
2783 
2784 			if (unix_skb_len(skb))
2785 				break;
2786 
2787 			skb_unlink(skb, &sk->sk_receive_queue);
2788 			consume_skb(skb);
2789 
2790 			if (scm.fp)
2791 				break;
2792 		} else {
2793 			/* It is questionable, see note in unix_dgram_recvmsg.
2794 			 */
2795 			if (UNIXCB(skb).fp)
2796 				unix_peek_fds(&scm, skb);
2797 
2798 			sk_peek_offset_fwd(sk, chunk);
2799 
2800 			if (UNIXCB(skb).fp)
2801 				break;
2802 
2803 			skip = 0;
2804 			last = skb;
2805 			last_len = skb->len;
2806 			unix_state_lock(sk);
2807 			skb = skb_peek_next(skb, &sk->sk_receive_queue);
2808 			if (skb)
2809 				goto again;
2810 			unix_state_unlock(sk);
2811 			break;
2812 		}
2813 	} while (size);
2814 
2815 	mutex_unlock(&u->iolock);
2816 	if (state->msg)
2817 		scm_recv_unix(sock, state->msg, &scm, flags);
2818 	else
2819 		scm_destroy(&scm);
2820 out:
2821 	return copied ? : err;
2822 }
2823 
2824 static int unix_stream_read_actor(struct sk_buff *skb,
2825 				  int skip, int chunk,
2826 				  struct unix_stream_read_state *state)
2827 {
2828 	int ret;
2829 
2830 	ret = skb_copy_datagram_msg(skb, UNIXCB(skb).consumed + skip,
2831 				    state->msg, chunk);
2832 	return ret ?: chunk;
2833 }
2834 
2835 int __unix_stream_recvmsg(struct sock *sk, struct msghdr *msg,
2836 			  size_t size, int flags)
2837 {
2838 	struct unix_stream_read_state state = {
2839 		.recv_actor = unix_stream_read_actor,
2840 		.socket = sk->sk_socket,
2841 		.msg = msg,
2842 		.size = size,
2843 		.flags = flags
2844 	};
2845 
2846 	return unix_stream_read_generic(&state, true);
2847 }
2848 
2849 static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg,
2850 			       size_t size, int flags)
2851 {
2852 	struct unix_stream_read_state state = {
2853 		.recv_actor = unix_stream_read_actor,
2854 		.socket = sock,
2855 		.msg = msg,
2856 		.size = size,
2857 		.flags = flags
2858 	};
2859 
2860 #ifdef CONFIG_BPF_SYSCALL
2861 	struct sock *sk = sock->sk;
2862 	const struct proto *prot = READ_ONCE(sk->sk_prot);
2863 
2864 	if (prot != &unix_stream_proto)
2865 		return prot->recvmsg(sk, msg, size, flags, NULL);
2866 #endif
2867 	return unix_stream_read_generic(&state, true);
2868 }
2869 
2870 static int unix_stream_splice_actor(struct sk_buff *skb,
2871 				    int skip, int chunk,
2872 				    struct unix_stream_read_state *state)
2873 {
2874 	return skb_splice_bits(skb, state->socket->sk,
2875 			       UNIXCB(skb).consumed + skip,
2876 			       state->pipe, chunk, state->splice_flags);
2877 }
2878 
2879 static ssize_t unix_stream_splice_read(struct socket *sock,  loff_t *ppos,
2880 				       struct pipe_inode_info *pipe,
2881 				       size_t size, unsigned int flags)
2882 {
2883 	struct unix_stream_read_state state = {
2884 		.recv_actor = unix_stream_splice_actor,
2885 		.socket = sock,
2886 		.pipe = pipe,
2887 		.size = size,
2888 		.splice_flags = flags,
2889 	};
2890 
2891 	if (unlikely(*ppos))
2892 		return -ESPIPE;
2893 
2894 	if (sock->file->f_flags & O_NONBLOCK ||
2895 	    flags & SPLICE_F_NONBLOCK)
2896 		state.flags = MSG_DONTWAIT;
2897 
2898 	return unix_stream_read_generic(&state, false);
2899 }
2900 
2901 static int unix_shutdown(struct socket *sock, int mode)
2902 {
2903 	struct sock *sk = sock->sk;
2904 	struct sock *other;
2905 
2906 	if (mode < SHUT_RD || mode > SHUT_RDWR)
2907 		return -EINVAL;
2908 	/* This maps:
2909 	 * SHUT_RD   (0) -> RCV_SHUTDOWN  (1)
2910 	 * SHUT_WR   (1) -> SEND_SHUTDOWN (2)
2911 	 * SHUT_RDWR (2) -> SHUTDOWN_MASK (3)
2912 	 */
2913 	++mode;
2914 
2915 	unix_state_lock(sk);
2916 	WRITE_ONCE(sk->sk_shutdown, sk->sk_shutdown | mode);
2917 	other = unix_peer(sk);
2918 	if (other)
2919 		sock_hold(other);
2920 	unix_state_unlock(sk);
2921 	sk->sk_state_change(sk);
2922 
2923 	if (other &&
2924 		(sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
2925 
2926 		int peer_mode = 0;
2927 		const struct proto *prot = READ_ONCE(other->sk_prot);
2928 
2929 		if (prot->unhash)
2930 			prot->unhash(other);
2931 		if (mode&RCV_SHUTDOWN)
2932 			peer_mode |= SEND_SHUTDOWN;
2933 		if (mode&SEND_SHUTDOWN)
2934 			peer_mode |= RCV_SHUTDOWN;
2935 		unix_state_lock(other);
2936 		WRITE_ONCE(other->sk_shutdown, other->sk_shutdown | peer_mode);
2937 		unix_state_unlock(other);
2938 		other->sk_state_change(other);
2939 		if (peer_mode == SHUTDOWN_MASK)
2940 			sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
2941 		else if (peer_mode & RCV_SHUTDOWN)
2942 			sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
2943 	}
2944 	if (other)
2945 		sock_put(other);
2946 
2947 	return 0;
2948 }
2949 
2950 long unix_inq_len(struct sock *sk)
2951 {
2952 	struct sk_buff *skb;
2953 	long amount = 0;
2954 
2955 	if (sk->sk_state == TCP_LISTEN)
2956 		return -EINVAL;
2957 
2958 	spin_lock(&sk->sk_receive_queue.lock);
2959 	if (sk->sk_type == SOCK_STREAM ||
2960 	    sk->sk_type == SOCK_SEQPACKET) {
2961 		skb_queue_walk(&sk->sk_receive_queue, skb)
2962 			amount += unix_skb_len(skb);
2963 	} else {
2964 		skb = skb_peek(&sk->sk_receive_queue);
2965 		if (skb)
2966 			amount = skb->len;
2967 	}
2968 	spin_unlock(&sk->sk_receive_queue.lock);
2969 
2970 	return amount;
2971 }
2972 EXPORT_SYMBOL_GPL(unix_inq_len);
2973 
2974 long unix_outq_len(struct sock *sk)
2975 {
2976 	return sk_wmem_alloc_get(sk);
2977 }
2978 EXPORT_SYMBOL_GPL(unix_outq_len);
2979 
2980 static int unix_open_file(struct sock *sk)
2981 {
2982 	struct path path;
2983 	struct file *f;
2984 	int fd;
2985 
2986 	if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
2987 		return -EPERM;
2988 
2989 	if (!smp_load_acquire(&unix_sk(sk)->addr))
2990 		return -ENOENT;
2991 
2992 	path = unix_sk(sk)->path;
2993 	if (!path.dentry)
2994 		return -ENOENT;
2995 
2996 	path_get(&path);
2997 
2998 	fd = get_unused_fd_flags(O_CLOEXEC);
2999 	if (fd < 0)
3000 		goto out;
3001 
3002 	f = dentry_open(&path, O_PATH, current_cred());
3003 	if (IS_ERR(f)) {
3004 		put_unused_fd(fd);
3005 		fd = PTR_ERR(f);
3006 		goto out;
3007 	}
3008 
3009 	fd_install(fd, f);
3010 out:
3011 	path_put(&path);
3012 
3013 	return fd;
3014 }
3015 
3016 static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
3017 {
3018 	struct sock *sk = sock->sk;
3019 	long amount = 0;
3020 	int err;
3021 
3022 	switch (cmd) {
3023 	case SIOCOUTQ:
3024 		amount = unix_outq_len(sk);
3025 		err = put_user(amount, (int __user *)arg);
3026 		break;
3027 	case SIOCINQ:
3028 		amount = unix_inq_len(sk);
3029 		if (amount < 0)
3030 			err = amount;
3031 		else
3032 			err = put_user(amount, (int __user *)arg);
3033 		break;
3034 	case SIOCUNIXFILE:
3035 		err = unix_open_file(sk);
3036 		break;
3037 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
3038 	case SIOCATMARK:
3039 		{
3040 			struct sk_buff *skb;
3041 			int answ = 0;
3042 
3043 			skb = skb_peek(&sk->sk_receive_queue);
3044 			if (skb && skb == READ_ONCE(unix_sk(sk)->oob_skb))
3045 				answ = 1;
3046 			err = put_user(answ, (int __user *)arg);
3047 		}
3048 		break;
3049 #endif
3050 	default:
3051 		err = -ENOIOCTLCMD;
3052 		break;
3053 	}
3054 	return err;
3055 }
3056 
3057 #ifdef CONFIG_COMPAT
3058 static int unix_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
3059 {
3060 	return unix_ioctl(sock, cmd, (unsigned long)compat_ptr(arg));
3061 }
3062 #endif
3063 
3064 static __poll_t unix_poll(struct file *file, struct socket *sock, poll_table *wait)
3065 {
3066 	struct sock *sk = sock->sk;
3067 	__poll_t mask;
3068 	u8 shutdown;
3069 
3070 	sock_poll_wait(file, sock, wait);
3071 	mask = 0;
3072 	shutdown = READ_ONCE(sk->sk_shutdown);
3073 
3074 	/* exceptional events? */
3075 	if (READ_ONCE(sk->sk_err))
3076 		mask |= EPOLLERR;
3077 	if (shutdown == SHUTDOWN_MASK)
3078 		mask |= EPOLLHUP;
3079 	if (shutdown & RCV_SHUTDOWN)
3080 		mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
3081 
3082 	/* readable? */
3083 	if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
3084 		mask |= EPOLLIN | EPOLLRDNORM;
3085 	if (sk_is_readable(sk))
3086 		mask |= EPOLLIN | EPOLLRDNORM;
3087 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
3088 	if (READ_ONCE(unix_sk(sk)->oob_skb))
3089 		mask |= EPOLLPRI;
3090 #endif
3091 
3092 	/* Connection-based need to check for termination and startup */
3093 	if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) &&
3094 	    sk->sk_state == TCP_CLOSE)
3095 		mask |= EPOLLHUP;
3096 
3097 	/*
3098 	 * we set writable also when the other side has shut down the
3099 	 * connection. This prevents stuck sockets.
3100 	 */
3101 	if (unix_writable(sk))
3102 		mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
3103 
3104 	return mask;
3105 }
3106 
3107 static __poll_t unix_dgram_poll(struct file *file, struct socket *sock,
3108 				    poll_table *wait)
3109 {
3110 	struct sock *sk = sock->sk, *other;
3111 	unsigned int writable;
3112 	__poll_t mask;
3113 	u8 shutdown;
3114 
3115 	sock_poll_wait(file, sock, wait);
3116 	mask = 0;
3117 	shutdown = READ_ONCE(sk->sk_shutdown);
3118 
3119 	/* exceptional events? */
3120 	if (READ_ONCE(sk->sk_err) ||
3121 	    !skb_queue_empty_lockless(&sk->sk_error_queue))
3122 		mask |= EPOLLERR |
3123 			(sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0);
3124 
3125 	if (shutdown & RCV_SHUTDOWN)
3126 		mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
3127 	if (shutdown == SHUTDOWN_MASK)
3128 		mask |= EPOLLHUP;
3129 
3130 	/* readable? */
3131 	if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
3132 		mask |= EPOLLIN | EPOLLRDNORM;
3133 	if (sk_is_readable(sk))
3134 		mask |= EPOLLIN | EPOLLRDNORM;
3135 
3136 	/* Connection-based need to check for termination and startup */
3137 	if (sk->sk_type == SOCK_SEQPACKET) {
3138 		if (sk->sk_state == TCP_CLOSE)
3139 			mask |= EPOLLHUP;
3140 		/* connection hasn't started yet? */
3141 		if (sk->sk_state == TCP_SYN_SENT)
3142 			return mask;
3143 	}
3144 
3145 	/* No write status requested, avoid expensive OUT tests. */
3146 	if (!(poll_requested_events(wait) & (EPOLLWRBAND|EPOLLWRNORM|EPOLLOUT)))
3147 		return mask;
3148 
3149 	writable = unix_writable(sk);
3150 	if (writable) {
3151 		unix_state_lock(sk);
3152 
3153 		other = unix_peer(sk);
3154 		if (other && unix_peer(other) != sk &&
3155 		    unix_recvq_full_lockless(other) &&
3156 		    unix_dgram_peer_wake_me(sk, other))
3157 			writable = 0;
3158 
3159 		unix_state_unlock(sk);
3160 	}
3161 
3162 	if (writable)
3163 		mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
3164 	else
3165 		sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
3166 
3167 	return mask;
3168 }
3169 
3170 #ifdef CONFIG_PROC_FS
3171 
3172 #define BUCKET_SPACE (BITS_PER_LONG - (UNIX_HASH_BITS + 1) - 1)
3173 
3174 #define get_bucket(x) ((x) >> BUCKET_SPACE)
3175 #define get_offset(x) ((x) & ((1UL << BUCKET_SPACE) - 1))
3176 #define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o))
3177 
3178 static struct sock *unix_from_bucket(struct seq_file *seq, loff_t *pos)
3179 {
3180 	unsigned long offset = get_offset(*pos);
3181 	unsigned long bucket = get_bucket(*pos);
3182 	unsigned long count = 0;
3183 	struct sock *sk;
3184 
3185 	for (sk = sk_head(&seq_file_net(seq)->unx.table.buckets[bucket]);
3186 	     sk; sk = sk_next(sk)) {
3187 		if (++count == offset)
3188 			break;
3189 	}
3190 
3191 	return sk;
3192 }
3193 
3194 static struct sock *unix_get_first(struct seq_file *seq, loff_t *pos)
3195 {
3196 	unsigned long bucket = get_bucket(*pos);
3197 	struct net *net = seq_file_net(seq);
3198 	struct sock *sk;
3199 
3200 	while (bucket < UNIX_HASH_SIZE) {
3201 		spin_lock(&net->unx.table.locks[bucket]);
3202 
3203 		sk = unix_from_bucket(seq, pos);
3204 		if (sk)
3205 			return sk;
3206 
3207 		spin_unlock(&net->unx.table.locks[bucket]);
3208 
3209 		*pos = set_bucket_offset(++bucket, 1);
3210 	}
3211 
3212 	return NULL;
3213 }
3214 
3215 static struct sock *unix_get_next(struct seq_file *seq, struct sock *sk,
3216 				  loff_t *pos)
3217 {
3218 	unsigned long bucket = get_bucket(*pos);
3219 
3220 	sk = sk_next(sk);
3221 	if (sk)
3222 		return sk;
3223 
3224 
3225 	spin_unlock(&seq_file_net(seq)->unx.table.locks[bucket]);
3226 
3227 	*pos = set_bucket_offset(++bucket, 1);
3228 
3229 	return unix_get_first(seq, pos);
3230 }
3231 
3232 static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
3233 {
3234 	if (!*pos)
3235 		return SEQ_START_TOKEN;
3236 
3237 	return unix_get_first(seq, pos);
3238 }
3239 
3240 static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3241 {
3242 	++*pos;
3243 
3244 	if (v == SEQ_START_TOKEN)
3245 		return unix_get_first(seq, pos);
3246 
3247 	return unix_get_next(seq, v, pos);
3248 }
3249 
3250 static void unix_seq_stop(struct seq_file *seq, void *v)
3251 {
3252 	struct sock *sk = v;
3253 
3254 	if (sk)
3255 		spin_unlock(&seq_file_net(seq)->unx.table.locks[sk->sk_hash]);
3256 }
3257 
3258 static int unix_seq_show(struct seq_file *seq, void *v)
3259 {
3260 
3261 	if (v == SEQ_START_TOKEN)
3262 		seq_puts(seq, "Num       RefCount Protocol Flags    Type St "
3263 			 "Inode Path\n");
3264 	else {
3265 		struct sock *s = v;
3266 		struct unix_sock *u = unix_sk(s);
3267 		unix_state_lock(s);
3268 
3269 		seq_printf(seq, "%pK: %08X %08X %08X %04X %02X %5lu",
3270 			s,
3271 			refcount_read(&s->sk_refcnt),
3272 			0,
3273 			s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
3274 			s->sk_type,
3275 			s->sk_socket ?
3276 			(s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
3277 			(s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
3278 			sock_i_ino(s));
3279 
3280 		if (u->addr) {	// under a hash table lock here
3281 			int i, len;
3282 			seq_putc(seq, ' ');
3283 
3284 			i = 0;
3285 			len = u->addr->len -
3286 				offsetof(struct sockaddr_un, sun_path);
3287 			if (u->addr->name->sun_path[0]) {
3288 				len--;
3289 			} else {
3290 				seq_putc(seq, '@');
3291 				i++;
3292 			}
3293 			for ( ; i < len; i++)
3294 				seq_putc(seq, u->addr->name->sun_path[i] ?:
3295 					 '@');
3296 		}
3297 		unix_state_unlock(s);
3298 		seq_putc(seq, '\n');
3299 	}
3300 
3301 	return 0;
3302 }
3303 
3304 static const struct seq_operations unix_seq_ops = {
3305 	.start  = unix_seq_start,
3306 	.next   = unix_seq_next,
3307 	.stop   = unix_seq_stop,
3308 	.show   = unix_seq_show,
3309 };
3310 
3311 #if IS_BUILTIN(CONFIG_UNIX) && defined(CONFIG_BPF_SYSCALL)
3312 struct bpf_unix_iter_state {
3313 	struct seq_net_private p;
3314 	unsigned int cur_sk;
3315 	unsigned int end_sk;
3316 	unsigned int max_sk;
3317 	struct sock **batch;
3318 	bool st_bucket_done;
3319 };
3320 
3321 struct bpf_iter__unix {
3322 	__bpf_md_ptr(struct bpf_iter_meta *, meta);
3323 	__bpf_md_ptr(struct unix_sock *, unix_sk);
3324 	uid_t uid __aligned(8);
3325 };
3326 
3327 static int unix_prog_seq_show(struct bpf_prog *prog, struct bpf_iter_meta *meta,
3328 			      struct unix_sock *unix_sk, uid_t uid)
3329 {
3330 	struct bpf_iter__unix ctx;
3331 
3332 	meta->seq_num--;  /* skip SEQ_START_TOKEN */
3333 	ctx.meta = meta;
3334 	ctx.unix_sk = unix_sk;
3335 	ctx.uid = uid;
3336 	return bpf_iter_run_prog(prog, &ctx);
3337 }
3338 
3339 static int bpf_iter_unix_hold_batch(struct seq_file *seq, struct sock *start_sk)
3340 
3341 {
3342 	struct bpf_unix_iter_state *iter = seq->private;
3343 	unsigned int expected = 1;
3344 	struct sock *sk;
3345 
3346 	sock_hold(start_sk);
3347 	iter->batch[iter->end_sk++] = start_sk;
3348 
3349 	for (sk = sk_next(start_sk); sk; sk = sk_next(sk)) {
3350 		if (iter->end_sk < iter->max_sk) {
3351 			sock_hold(sk);
3352 			iter->batch[iter->end_sk++] = sk;
3353 		}
3354 
3355 		expected++;
3356 	}
3357 
3358 	spin_unlock(&seq_file_net(seq)->unx.table.locks[start_sk->sk_hash]);
3359 
3360 	return expected;
3361 }
3362 
3363 static void bpf_iter_unix_put_batch(struct bpf_unix_iter_state *iter)
3364 {
3365 	while (iter->cur_sk < iter->end_sk)
3366 		sock_put(iter->batch[iter->cur_sk++]);
3367 }
3368 
3369 static int bpf_iter_unix_realloc_batch(struct bpf_unix_iter_state *iter,
3370 				       unsigned int new_batch_sz)
3371 {
3372 	struct sock **new_batch;
3373 
3374 	new_batch = kvmalloc(sizeof(*new_batch) * new_batch_sz,
3375 			     GFP_USER | __GFP_NOWARN);
3376 	if (!new_batch)
3377 		return -ENOMEM;
3378 
3379 	bpf_iter_unix_put_batch(iter);
3380 	kvfree(iter->batch);
3381 	iter->batch = new_batch;
3382 	iter->max_sk = new_batch_sz;
3383 
3384 	return 0;
3385 }
3386 
3387 static struct sock *bpf_iter_unix_batch(struct seq_file *seq,
3388 					loff_t *pos)
3389 {
3390 	struct bpf_unix_iter_state *iter = seq->private;
3391 	unsigned int expected;
3392 	bool resized = false;
3393 	struct sock *sk;
3394 
3395 	if (iter->st_bucket_done)
3396 		*pos = set_bucket_offset(get_bucket(*pos) + 1, 1);
3397 
3398 again:
3399 	/* Get a new batch */
3400 	iter->cur_sk = 0;
3401 	iter->end_sk = 0;
3402 
3403 	sk = unix_get_first(seq, pos);
3404 	if (!sk)
3405 		return NULL; /* Done */
3406 
3407 	expected = bpf_iter_unix_hold_batch(seq, sk);
3408 
3409 	if (iter->end_sk == expected) {
3410 		iter->st_bucket_done = true;
3411 		return sk;
3412 	}
3413 
3414 	if (!resized && !bpf_iter_unix_realloc_batch(iter, expected * 3 / 2)) {
3415 		resized = true;
3416 		goto again;
3417 	}
3418 
3419 	return sk;
3420 }
3421 
3422 static void *bpf_iter_unix_seq_start(struct seq_file *seq, loff_t *pos)
3423 {
3424 	if (!*pos)
3425 		return SEQ_START_TOKEN;
3426 
3427 	/* bpf iter does not support lseek, so it always
3428 	 * continue from where it was stop()-ped.
3429 	 */
3430 	return bpf_iter_unix_batch(seq, pos);
3431 }
3432 
3433 static void *bpf_iter_unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3434 {
3435 	struct bpf_unix_iter_state *iter = seq->private;
3436 	struct sock *sk;
3437 
3438 	/* Whenever seq_next() is called, the iter->cur_sk is
3439 	 * done with seq_show(), so advance to the next sk in
3440 	 * the batch.
3441 	 */
3442 	if (iter->cur_sk < iter->end_sk)
3443 		sock_put(iter->batch[iter->cur_sk++]);
3444 
3445 	++*pos;
3446 
3447 	if (iter->cur_sk < iter->end_sk)
3448 		sk = iter->batch[iter->cur_sk];
3449 	else
3450 		sk = bpf_iter_unix_batch(seq, pos);
3451 
3452 	return sk;
3453 }
3454 
3455 static int bpf_iter_unix_seq_show(struct seq_file *seq, void *v)
3456 {
3457 	struct bpf_iter_meta meta;
3458 	struct bpf_prog *prog;
3459 	struct sock *sk = v;
3460 	uid_t uid;
3461 	bool slow;
3462 	int ret;
3463 
3464 	if (v == SEQ_START_TOKEN)
3465 		return 0;
3466 
3467 	slow = lock_sock_fast(sk);
3468 
3469 	if (unlikely(sk_unhashed(sk))) {
3470 		ret = SEQ_SKIP;
3471 		goto unlock;
3472 	}
3473 
3474 	uid = from_kuid_munged(seq_user_ns(seq), sock_i_uid(sk));
3475 	meta.seq = seq;
3476 	prog = bpf_iter_get_info(&meta, false);
3477 	ret = unix_prog_seq_show(prog, &meta, v, uid);
3478 unlock:
3479 	unlock_sock_fast(sk, slow);
3480 	return ret;
3481 }
3482 
3483 static void bpf_iter_unix_seq_stop(struct seq_file *seq, void *v)
3484 {
3485 	struct bpf_unix_iter_state *iter = seq->private;
3486 	struct bpf_iter_meta meta;
3487 	struct bpf_prog *prog;
3488 
3489 	if (!v) {
3490 		meta.seq = seq;
3491 		prog = bpf_iter_get_info(&meta, true);
3492 		if (prog)
3493 			(void)unix_prog_seq_show(prog, &meta, v, 0);
3494 	}
3495 
3496 	if (iter->cur_sk < iter->end_sk)
3497 		bpf_iter_unix_put_batch(iter);
3498 }
3499 
3500 static const struct seq_operations bpf_iter_unix_seq_ops = {
3501 	.start	= bpf_iter_unix_seq_start,
3502 	.next	= bpf_iter_unix_seq_next,
3503 	.stop	= bpf_iter_unix_seq_stop,
3504 	.show	= bpf_iter_unix_seq_show,
3505 };
3506 #endif
3507 #endif
3508 
3509 static const struct net_proto_family unix_family_ops = {
3510 	.family = PF_UNIX,
3511 	.create = unix_create,
3512 	.owner	= THIS_MODULE,
3513 };
3514 
3515 
3516 static int __net_init unix_net_init(struct net *net)
3517 {
3518 	int i;
3519 
3520 	net->unx.sysctl_max_dgram_qlen = 10;
3521 	if (unix_sysctl_register(net))
3522 		goto out;
3523 
3524 #ifdef CONFIG_PROC_FS
3525 	if (!proc_create_net("unix", 0, net->proc_net, &unix_seq_ops,
3526 			     sizeof(struct seq_net_private)))
3527 		goto err_sysctl;
3528 #endif
3529 
3530 	net->unx.table.locks = kvmalloc_array(UNIX_HASH_SIZE,
3531 					      sizeof(spinlock_t), GFP_KERNEL);
3532 	if (!net->unx.table.locks)
3533 		goto err_proc;
3534 
3535 	net->unx.table.buckets = kvmalloc_array(UNIX_HASH_SIZE,
3536 						sizeof(struct hlist_head),
3537 						GFP_KERNEL);
3538 	if (!net->unx.table.buckets)
3539 		goto free_locks;
3540 
3541 	for (i = 0; i < UNIX_HASH_SIZE; i++) {
3542 		spin_lock_init(&net->unx.table.locks[i]);
3543 		INIT_HLIST_HEAD(&net->unx.table.buckets[i]);
3544 	}
3545 
3546 	return 0;
3547 
3548 free_locks:
3549 	kvfree(net->unx.table.locks);
3550 err_proc:
3551 #ifdef CONFIG_PROC_FS
3552 	remove_proc_entry("unix", net->proc_net);
3553 err_sysctl:
3554 #endif
3555 	unix_sysctl_unregister(net);
3556 out:
3557 	return -ENOMEM;
3558 }
3559 
3560 static void __net_exit unix_net_exit(struct net *net)
3561 {
3562 	kvfree(net->unx.table.buckets);
3563 	kvfree(net->unx.table.locks);
3564 	unix_sysctl_unregister(net);
3565 	remove_proc_entry("unix", net->proc_net);
3566 }
3567 
3568 static struct pernet_operations unix_net_ops = {
3569 	.init = unix_net_init,
3570 	.exit = unix_net_exit,
3571 };
3572 
3573 #if IS_BUILTIN(CONFIG_UNIX) && defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
3574 DEFINE_BPF_ITER_FUNC(unix, struct bpf_iter_meta *meta,
3575 		     struct unix_sock *unix_sk, uid_t uid)
3576 
3577 #define INIT_BATCH_SZ 16
3578 
3579 static int bpf_iter_init_unix(void *priv_data, struct bpf_iter_aux_info *aux)
3580 {
3581 	struct bpf_unix_iter_state *iter = priv_data;
3582 	int err;
3583 
3584 	err = bpf_iter_init_seq_net(priv_data, aux);
3585 	if (err)
3586 		return err;
3587 
3588 	err = bpf_iter_unix_realloc_batch(iter, INIT_BATCH_SZ);
3589 	if (err) {
3590 		bpf_iter_fini_seq_net(priv_data);
3591 		return err;
3592 	}
3593 
3594 	return 0;
3595 }
3596 
3597 static void bpf_iter_fini_unix(void *priv_data)
3598 {
3599 	struct bpf_unix_iter_state *iter = priv_data;
3600 
3601 	bpf_iter_fini_seq_net(priv_data);
3602 	kvfree(iter->batch);
3603 }
3604 
3605 static const struct bpf_iter_seq_info unix_seq_info = {
3606 	.seq_ops		= &bpf_iter_unix_seq_ops,
3607 	.init_seq_private	= bpf_iter_init_unix,
3608 	.fini_seq_private	= bpf_iter_fini_unix,
3609 	.seq_priv_size		= sizeof(struct bpf_unix_iter_state),
3610 };
3611 
3612 static const struct bpf_func_proto *
3613 bpf_iter_unix_get_func_proto(enum bpf_func_id func_id,
3614 			     const struct bpf_prog *prog)
3615 {
3616 	switch (func_id) {
3617 	case BPF_FUNC_setsockopt:
3618 		return &bpf_sk_setsockopt_proto;
3619 	case BPF_FUNC_getsockopt:
3620 		return &bpf_sk_getsockopt_proto;
3621 	default:
3622 		return NULL;
3623 	}
3624 }
3625 
3626 static struct bpf_iter_reg unix_reg_info = {
3627 	.target			= "unix",
3628 	.ctx_arg_info_size	= 1,
3629 	.ctx_arg_info		= {
3630 		{ offsetof(struct bpf_iter__unix, unix_sk),
3631 		  PTR_TO_BTF_ID_OR_NULL },
3632 	},
3633 	.get_func_proto         = bpf_iter_unix_get_func_proto,
3634 	.seq_info		= &unix_seq_info,
3635 };
3636 
3637 static void __init bpf_iter_register(void)
3638 {
3639 	unix_reg_info.ctx_arg_info[0].btf_id = btf_sock_ids[BTF_SOCK_TYPE_UNIX];
3640 	if (bpf_iter_reg_target(&unix_reg_info))
3641 		pr_warn("Warning: could not register bpf iterator unix\n");
3642 }
3643 #endif
3644 
3645 static int __init af_unix_init(void)
3646 {
3647 	int i, rc = -1;
3648 
3649 	BUILD_BUG_ON(sizeof(struct unix_skb_parms) > sizeof_field(struct sk_buff, cb));
3650 
3651 	for (i = 0; i < UNIX_HASH_SIZE / 2; i++) {
3652 		spin_lock_init(&bsd_socket_locks[i]);
3653 		INIT_HLIST_HEAD(&bsd_socket_buckets[i]);
3654 	}
3655 
3656 	rc = proto_register(&unix_dgram_proto, 1);
3657 	if (rc != 0) {
3658 		pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__);
3659 		goto out;
3660 	}
3661 
3662 	rc = proto_register(&unix_stream_proto, 1);
3663 	if (rc != 0) {
3664 		pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__);
3665 		proto_unregister(&unix_dgram_proto);
3666 		goto out;
3667 	}
3668 
3669 	sock_register(&unix_family_ops);
3670 	register_pernet_subsys(&unix_net_ops);
3671 	unix_bpf_build_proto();
3672 
3673 #if IS_BUILTIN(CONFIG_UNIX) && defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
3674 	bpf_iter_register();
3675 #endif
3676 
3677 out:
3678 	return rc;
3679 }
3680 
3681 static void __exit af_unix_exit(void)
3682 {
3683 	sock_unregister(PF_UNIX);
3684 	proto_unregister(&unix_dgram_proto);
3685 	proto_unregister(&unix_stream_proto);
3686 	unregister_pernet_subsys(&unix_net_ops);
3687 }
3688 
3689 /* Earlier than device_initcall() so that other drivers invoking
3690    request_module() don't end up in a loop when modprobe tries
3691    to use a UNIX socket. But later than subsys_initcall() because
3692    we depend on stuff initialised there */
3693 fs_initcall(af_unix_init);
3694 module_exit(af_unix_exit);
3695 
3696 MODULE_LICENSE("GPL");
3697 MODULE_ALIAS_NETPROTO(PF_UNIX);
3698