xref: /openbmc/linux/net/unix/af_unix.c (revision 47327e198d42c77322dbe175817499d2d7ddc26a)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * NET4:	Implementation of BSD Unix domain sockets.
4  *
5  * Authors:	Alan Cox, <alan@lxorguk.ukuu.org.uk>
6  *
7  * Fixes:
8  *		Linus Torvalds	:	Assorted bug cures.
9  *		Niibe Yutaka	:	async I/O support.
10  *		Carsten Paeth	:	PF_UNIX check, address fixes.
11  *		Alan Cox	:	Limit size of allocated blocks.
12  *		Alan Cox	:	Fixed the stupid socketpair bug.
13  *		Alan Cox	:	BSD compatibility fine tuning.
14  *		Alan Cox	:	Fixed a bug in connect when interrupted.
15  *		Alan Cox	:	Sorted out a proper draft version of
16  *					file descriptor passing hacked up from
17  *					Mike Shaver's work.
18  *		Marty Leisner	:	Fixes to fd passing
19  *		Nick Nevin	:	recvmsg bugfix.
20  *		Alan Cox	:	Started proper garbage collector
21  *		Heiko EiBfeldt	:	Missing verify_area check
22  *		Alan Cox	:	Started POSIXisms
23  *		Andreas Schwab	:	Replace inode by dentry for proper
24  *					reference counting
25  *		Kirk Petersen	:	Made this a module
26  *	    Christoph Rohland	:	Elegant non-blocking accept/connect algorithm.
27  *					Lots of bug fixes.
28  *	     Alexey Kuznetosv	:	Repaired (I hope) bugs introduces
29  *					by above two patches.
30  *	     Andrea Arcangeli	:	If possible we block in connect(2)
31  *					if the max backlog of the listen socket
32  *					is been reached. This won't break
33  *					old apps and it will avoid huge amount
34  *					of socks hashed (this for unix_gc()
35  *					performances reasons).
36  *					Security fix that limits the max
37  *					number of socks to 2*max_files and
38  *					the number of skb queueable in the
39  *					dgram receiver.
40  *		Artur Skawina   :	Hash function optimizations
41  *	     Alexey Kuznetsov   :	Full scale SMP. Lot of bugs are introduced 8)
42  *	      Malcolm Beattie   :	Set peercred for socketpair
43  *	     Michal Ostrowski   :       Module initialization cleanup.
44  *	     Arnaldo C. Melo	:	Remove MOD_{INC,DEC}_USE_COUNT,
45  *	     				the core infrastructure is doing that
46  *	     				for all net proto families now (2.5.69+)
47  *
48  * Known differences from reference BSD that was tested:
49  *
50  *	[TO FIX]
51  *	ECONNREFUSED is not returned from one end of a connected() socket to the
52  *		other the moment one end closes.
53  *	fstat() doesn't return st_dev=0, and give the blksize as high water mark
54  *		and a fake inode identifier (nor the BSD first socket fstat twice bug).
55  *	[NOT TO FIX]
56  *	accept() returns a path name even if the connecting socket has closed
57  *		in the meantime (BSD loses the path and gives up).
58  *	accept() returns 0 length path for an unbound connector. BSD returns 16
59  *		and a null first byte in the path (but not for gethost/peername - BSD bug ??)
60  *	socketpair(...SOCK_RAW..) doesn't panic the kernel.
61  *	BSD af_unix apparently has connect forgetting to block properly.
62  *		(need to check this with the POSIX spec in detail)
63  *
64  * Differences from 2.0.0-11-... (ANK)
65  *	Bug fixes and improvements.
66  *		- client shutdown killed server socket.
67  *		- removed all useless cli/sti pairs.
68  *
69  *	Semantic changes/extensions.
70  *		- generic control message passing.
71  *		- SCM_CREDENTIALS control message.
72  *		- "Abstract" (not FS based) socket bindings.
73  *		  Abstract names are sequences of bytes (not zero terminated)
74  *		  started by 0, so that this name space does not intersect
75  *		  with BSD names.
76  */
77 
78 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
79 
80 #include <linux/module.h>
81 #include <linux/kernel.h>
82 #include <linux/signal.h>
83 #include <linux/sched/signal.h>
84 #include <linux/errno.h>
85 #include <linux/string.h>
86 #include <linux/stat.h>
87 #include <linux/dcache.h>
88 #include <linux/namei.h>
89 #include <linux/socket.h>
90 #include <linux/un.h>
91 #include <linux/fcntl.h>
92 #include <linux/termios.h>
93 #include <linux/sockios.h>
94 #include <linux/net.h>
95 #include <linux/in.h>
96 #include <linux/fs.h>
97 #include <linux/slab.h>
98 #include <linux/uaccess.h>
99 #include <linux/skbuff.h>
100 #include <linux/netdevice.h>
101 #include <net/net_namespace.h>
102 #include <net/sock.h>
103 #include <net/tcp_states.h>
104 #include <net/af_unix.h>
105 #include <linux/proc_fs.h>
106 #include <linux/seq_file.h>
107 #include <net/scm.h>
108 #include <linux/init.h>
109 #include <linux/poll.h>
110 #include <linux/rtnetlink.h>
111 #include <linux/mount.h>
112 #include <net/checksum.h>
113 #include <linux/security.h>
114 #include <linux/freezer.h>
115 #include <linux/file.h>
116 #include <linux/btf_ids.h>
117 
118 #include "scm.h"
119 
120 spinlock_t unix_table_locks[2 * UNIX_HASH_SIZE];
121 EXPORT_SYMBOL_GPL(unix_table_locks);
122 struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE];
123 EXPORT_SYMBOL_GPL(unix_socket_table);
124 static atomic_long_t unix_nr_socks;
125 
126 /* SMP locking strategy:
127  *    hash table is protected with spinlock unix_table_locks
128  *    each socket state is protected by separate spin lock.
129  */
130 
131 static unsigned int unix_unbound_hash(struct sock *sk)
132 {
133 	unsigned long hash = (unsigned long)sk;
134 
135 	hash ^= hash >> 16;
136 	hash ^= hash >> 8;
137 	hash ^= sk->sk_type;
138 
139 	return UNIX_HASH_SIZE + (hash & (UNIX_HASH_SIZE - 1));
140 }
141 
142 static unsigned int unix_bsd_hash(struct inode *i)
143 {
144 	return i->i_ino & (UNIX_HASH_SIZE - 1);
145 }
146 
147 static unsigned int unix_abstract_hash(struct sockaddr_un *sunaddr,
148 				       int addr_len, int type)
149 {
150 	__wsum csum = csum_partial(sunaddr, addr_len, 0);
151 	unsigned int hash;
152 
153 	hash = (__force unsigned int)csum_fold(csum);
154 	hash ^= hash >> 8;
155 	hash ^= type;
156 
157 	return hash & (UNIX_HASH_SIZE - 1);
158 }
159 
160 static void unix_table_double_lock(unsigned int hash1, unsigned int hash2)
161 {
162 	/* hash1 and hash2 is never the same because
163 	 * one is between 0 and UNIX_HASH_SIZE - 1, and
164 	 * another is between UNIX_HASH_SIZE and UNIX_HASH_SIZE * 2.
165 	 */
166 	if (hash1 > hash2)
167 		swap(hash1, hash2);
168 
169 	spin_lock(&unix_table_locks[hash1]);
170 	spin_lock_nested(&unix_table_locks[hash2], SINGLE_DEPTH_NESTING);
171 }
172 
173 static void unix_table_double_unlock(unsigned int hash1, unsigned int hash2)
174 {
175 	spin_unlock(&unix_table_locks[hash1]);
176 	spin_unlock(&unix_table_locks[hash2]);
177 }
178 
179 #ifdef CONFIG_SECURITY_NETWORK
180 static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
181 {
182 	UNIXCB(skb).secid = scm->secid;
183 }
184 
185 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
186 {
187 	scm->secid = UNIXCB(skb).secid;
188 }
189 
190 static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
191 {
192 	return (scm->secid == UNIXCB(skb).secid);
193 }
194 #else
195 static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
196 { }
197 
198 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
199 { }
200 
201 static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
202 {
203 	return true;
204 }
205 #endif /* CONFIG_SECURITY_NETWORK */
206 
207 #define unix_peer(sk) (unix_sk(sk)->peer)
208 
209 static inline int unix_our_peer(struct sock *sk, struct sock *osk)
210 {
211 	return unix_peer(osk) == sk;
212 }
213 
214 static inline int unix_may_send(struct sock *sk, struct sock *osk)
215 {
216 	return unix_peer(osk) == NULL || unix_our_peer(sk, osk);
217 }
218 
219 static inline int unix_recvq_full(const struct sock *sk)
220 {
221 	return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog;
222 }
223 
224 static inline int unix_recvq_full_lockless(const struct sock *sk)
225 {
226 	return skb_queue_len_lockless(&sk->sk_receive_queue) >
227 		READ_ONCE(sk->sk_max_ack_backlog);
228 }
229 
230 struct sock *unix_peer_get(struct sock *s)
231 {
232 	struct sock *peer;
233 
234 	unix_state_lock(s);
235 	peer = unix_peer(s);
236 	if (peer)
237 		sock_hold(peer);
238 	unix_state_unlock(s);
239 	return peer;
240 }
241 EXPORT_SYMBOL_GPL(unix_peer_get);
242 
243 static struct unix_address *unix_create_addr(struct sockaddr_un *sunaddr,
244 					     int addr_len)
245 {
246 	struct unix_address *addr;
247 
248 	addr = kmalloc(sizeof(*addr) + addr_len, GFP_KERNEL);
249 	if (!addr)
250 		return NULL;
251 
252 	refcount_set(&addr->refcnt, 1);
253 	addr->len = addr_len;
254 	memcpy(addr->name, sunaddr, addr_len);
255 
256 	return addr;
257 }
258 
259 static inline void unix_release_addr(struct unix_address *addr)
260 {
261 	if (refcount_dec_and_test(&addr->refcnt))
262 		kfree(addr);
263 }
264 
265 /*
266  *	Check unix socket name:
267  *		- should be not zero length.
268  *	        - if started by not zero, should be NULL terminated (FS object)
269  *		- if started by zero, it is abstract name.
270  */
271 
272 static int unix_validate_addr(struct sockaddr_un *sunaddr, int addr_len)
273 {
274 	if (addr_len <= offsetof(struct sockaddr_un, sun_path) ||
275 	    addr_len > sizeof(*sunaddr))
276 		return -EINVAL;
277 
278 	if (sunaddr->sun_family != AF_UNIX)
279 		return -EINVAL;
280 
281 	return 0;
282 }
283 
284 static void unix_mkname_bsd(struct sockaddr_un *sunaddr, int addr_len)
285 {
286 	/* This may look like an off by one error but it is a bit more
287 	 * subtle.  108 is the longest valid AF_UNIX path for a binding.
288 	 * sun_path[108] doesn't as such exist.  However in kernel space
289 	 * we are guaranteed that it is a valid memory location in our
290 	 * kernel address buffer because syscall functions always pass
291 	 * a pointer of struct sockaddr_storage which has a bigger buffer
292 	 * than 108.
293 	 */
294 	((char *)sunaddr)[addr_len] = 0;
295 }
296 
297 static void __unix_remove_socket(struct sock *sk)
298 {
299 	sk_del_node_init(sk);
300 }
301 
302 static void __unix_insert_socket(struct sock *sk)
303 {
304 	WARN_ON(!sk_unhashed(sk));
305 	sk_add_node(sk, &unix_socket_table[sk->sk_hash]);
306 }
307 
308 static void __unix_set_addr_hash(struct sock *sk, struct unix_address *addr,
309 				 unsigned int hash)
310 {
311 	__unix_remove_socket(sk);
312 	smp_store_release(&unix_sk(sk)->addr, addr);
313 
314 	sk->sk_hash = hash;
315 	__unix_insert_socket(sk);
316 }
317 
318 static void unix_remove_socket(struct sock *sk)
319 {
320 	spin_lock(&unix_table_locks[sk->sk_hash]);
321 	__unix_remove_socket(sk);
322 	spin_unlock(&unix_table_locks[sk->sk_hash]);
323 }
324 
325 static void unix_insert_unbound_socket(struct sock *sk)
326 {
327 	spin_lock(&unix_table_locks[sk->sk_hash]);
328 	__unix_insert_socket(sk);
329 	spin_unlock(&unix_table_locks[sk->sk_hash]);
330 }
331 
332 static struct sock *__unix_find_socket_byname(struct net *net,
333 					      struct sockaddr_un *sunname,
334 					      int len, unsigned int hash)
335 {
336 	struct sock *s;
337 
338 	sk_for_each(s, &unix_socket_table[hash]) {
339 		struct unix_sock *u = unix_sk(s);
340 
341 		if (!net_eq(sock_net(s), net))
342 			continue;
343 
344 		if (u->addr->len == len &&
345 		    !memcmp(u->addr->name, sunname, len))
346 			return s;
347 	}
348 	return NULL;
349 }
350 
351 static inline struct sock *unix_find_socket_byname(struct net *net,
352 						   struct sockaddr_un *sunname,
353 						   int len, unsigned int hash)
354 {
355 	struct sock *s;
356 
357 	spin_lock(&unix_table_locks[hash]);
358 	s = __unix_find_socket_byname(net, sunname, len, hash);
359 	if (s)
360 		sock_hold(s);
361 	spin_unlock(&unix_table_locks[hash]);
362 	return s;
363 }
364 
365 static struct sock *unix_find_socket_byinode(struct inode *i)
366 {
367 	unsigned int hash = unix_bsd_hash(i);
368 	struct sock *s;
369 
370 	spin_lock(&unix_table_locks[hash]);
371 	sk_for_each(s, &unix_socket_table[hash]) {
372 		struct dentry *dentry = unix_sk(s)->path.dentry;
373 
374 		if (dentry && d_backing_inode(dentry) == i) {
375 			sock_hold(s);
376 			spin_unlock(&unix_table_locks[hash]);
377 			return s;
378 		}
379 	}
380 	spin_unlock(&unix_table_locks[hash]);
381 	return NULL;
382 }
383 
384 /* Support code for asymmetrically connected dgram sockets
385  *
386  * If a datagram socket is connected to a socket not itself connected
387  * to the first socket (eg, /dev/log), clients may only enqueue more
388  * messages if the present receive queue of the server socket is not
389  * "too large". This means there's a second writeability condition
390  * poll and sendmsg need to test. The dgram recv code will do a wake
391  * up on the peer_wait wait queue of a socket upon reception of a
392  * datagram which needs to be propagated to sleeping would-be writers
393  * since these might not have sent anything so far. This can't be
394  * accomplished via poll_wait because the lifetime of the server
395  * socket might be less than that of its clients if these break their
396  * association with it or if the server socket is closed while clients
397  * are still connected to it and there's no way to inform "a polling
398  * implementation" that it should let go of a certain wait queue
399  *
400  * In order to propagate a wake up, a wait_queue_entry_t of the client
401  * socket is enqueued on the peer_wait queue of the server socket
402  * whose wake function does a wake_up on the ordinary client socket
403  * wait queue. This connection is established whenever a write (or
404  * poll for write) hit the flow control condition and broken when the
405  * association to the server socket is dissolved or after a wake up
406  * was relayed.
407  */
408 
409 static int unix_dgram_peer_wake_relay(wait_queue_entry_t *q, unsigned mode, int flags,
410 				      void *key)
411 {
412 	struct unix_sock *u;
413 	wait_queue_head_t *u_sleep;
414 
415 	u = container_of(q, struct unix_sock, peer_wake);
416 
417 	__remove_wait_queue(&unix_sk(u->peer_wake.private)->peer_wait,
418 			    q);
419 	u->peer_wake.private = NULL;
420 
421 	/* relaying can only happen while the wq still exists */
422 	u_sleep = sk_sleep(&u->sk);
423 	if (u_sleep)
424 		wake_up_interruptible_poll(u_sleep, key_to_poll(key));
425 
426 	return 0;
427 }
428 
429 static int unix_dgram_peer_wake_connect(struct sock *sk, struct sock *other)
430 {
431 	struct unix_sock *u, *u_other;
432 	int rc;
433 
434 	u = unix_sk(sk);
435 	u_other = unix_sk(other);
436 	rc = 0;
437 	spin_lock(&u_other->peer_wait.lock);
438 
439 	if (!u->peer_wake.private) {
440 		u->peer_wake.private = other;
441 		__add_wait_queue(&u_other->peer_wait, &u->peer_wake);
442 
443 		rc = 1;
444 	}
445 
446 	spin_unlock(&u_other->peer_wait.lock);
447 	return rc;
448 }
449 
450 static void unix_dgram_peer_wake_disconnect(struct sock *sk,
451 					    struct sock *other)
452 {
453 	struct unix_sock *u, *u_other;
454 
455 	u = unix_sk(sk);
456 	u_other = unix_sk(other);
457 	spin_lock(&u_other->peer_wait.lock);
458 
459 	if (u->peer_wake.private == other) {
460 		__remove_wait_queue(&u_other->peer_wait, &u->peer_wake);
461 		u->peer_wake.private = NULL;
462 	}
463 
464 	spin_unlock(&u_other->peer_wait.lock);
465 }
466 
467 static void unix_dgram_peer_wake_disconnect_wakeup(struct sock *sk,
468 						   struct sock *other)
469 {
470 	unix_dgram_peer_wake_disconnect(sk, other);
471 	wake_up_interruptible_poll(sk_sleep(sk),
472 				   EPOLLOUT |
473 				   EPOLLWRNORM |
474 				   EPOLLWRBAND);
475 }
476 
477 /* preconditions:
478  *	- unix_peer(sk) == other
479  *	- association is stable
480  */
481 static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other)
482 {
483 	int connected;
484 
485 	connected = unix_dgram_peer_wake_connect(sk, other);
486 
487 	/* If other is SOCK_DEAD, we want to make sure we signal
488 	 * POLLOUT, such that a subsequent write() can get a
489 	 * -ECONNREFUSED. Otherwise, if we haven't queued any skbs
490 	 * to other and its full, we will hang waiting for POLLOUT.
491 	 */
492 	if (unix_recvq_full(other) && !sock_flag(other, SOCK_DEAD))
493 		return 1;
494 
495 	if (connected)
496 		unix_dgram_peer_wake_disconnect(sk, other);
497 
498 	return 0;
499 }
500 
501 static int unix_writable(const struct sock *sk)
502 {
503 	return sk->sk_state != TCP_LISTEN &&
504 	       (refcount_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
505 }
506 
507 static void unix_write_space(struct sock *sk)
508 {
509 	struct socket_wq *wq;
510 
511 	rcu_read_lock();
512 	if (unix_writable(sk)) {
513 		wq = rcu_dereference(sk->sk_wq);
514 		if (skwq_has_sleeper(wq))
515 			wake_up_interruptible_sync_poll(&wq->wait,
516 				EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND);
517 		sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
518 	}
519 	rcu_read_unlock();
520 }
521 
522 /* When dgram socket disconnects (or changes its peer), we clear its receive
523  * queue of packets arrived from previous peer. First, it allows to do
524  * flow control based only on wmem_alloc; second, sk connected to peer
525  * may receive messages only from that peer. */
526 static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
527 {
528 	if (!skb_queue_empty(&sk->sk_receive_queue)) {
529 		skb_queue_purge(&sk->sk_receive_queue);
530 		wake_up_interruptible_all(&unix_sk(sk)->peer_wait);
531 
532 		/* If one link of bidirectional dgram pipe is disconnected,
533 		 * we signal error. Messages are lost. Do not make this,
534 		 * when peer was not connected to us.
535 		 */
536 		if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
537 			other->sk_err = ECONNRESET;
538 			sk_error_report(other);
539 		}
540 	}
541 	other->sk_state = TCP_CLOSE;
542 }
543 
544 static void unix_sock_destructor(struct sock *sk)
545 {
546 	struct unix_sock *u = unix_sk(sk);
547 
548 	skb_queue_purge(&sk->sk_receive_queue);
549 
550 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
551 	if (u->oob_skb) {
552 		kfree_skb(u->oob_skb);
553 		u->oob_skb = NULL;
554 	}
555 #endif
556 	WARN_ON(refcount_read(&sk->sk_wmem_alloc));
557 	WARN_ON(!sk_unhashed(sk));
558 	WARN_ON(sk->sk_socket);
559 	if (!sock_flag(sk, SOCK_DEAD)) {
560 		pr_info("Attempt to release alive unix socket: %p\n", sk);
561 		return;
562 	}
563 
564 	if (u->addr)
565 		unix_release_addr(u->addr);
566 
567 	atomic_long_dec(&unix_nr_socks);
568 	sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
569 #ifdef UNIX_REFCNT_DEBUG
570 	pr_debug("UNIX %p is destroyed, %ld are still alive.\n", sk,
571 		atomic_long_read(&unix_nr_socks));
572 #endif
573 }
574 
575 static void unix_release_sock(struct sock *sk, int embrion)
576 {
577 	struct unix_sock *u = unix_sk(sk);
578 	struct path path;
579 	struct sock *skpair;
580 	struct sk_buff *skb;
581 	int state;
582 
583 	unix_remove_socket(sk);
584 
585 	/* Clear state */
586 	unix_state_lock(sk);
587 	sock_orphan(sk);
588 	sk->sk_shutdown = SHUTDOWN_MASK;
589 	path	     = u->path;
590 	u->path.dentry = NULL;
591 	u->path.mnt = NULL;
592 	state = sk->sk_state;
593 	sk->sk_state = TCP_CLOSE;
594 
595 	skpair = unix_peer(sk);
596 	unix_peer(sk) = NULL;
597 
598 	unix_state_unlock(sk);
599 
600 	wake_up_interruptible_all(&u->peer_wait);
601 
602 	if (skpair != NULL) {
603 		if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
604 			unix_state_lock(skpair);
605 			/* No more writes */
606 			skpair->sk_shutdown = SHUTDOWN_MASK;
607 			if (!skb_queue_empty(&sk->sk_receive_queue) || embrion)
608 				skpair->sk_err = ECONNRESET;
609 			unix_state_unlock(skpair);
610 			skpair->sk_state_change(skpair);
611 			sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
612 		}
613 
614 		unix_dgram_peer_wake_disconnect(sk, skpair);
615 		sock_put(skpair); /* It may now die */
616 	}
617 
618 	/* Try to flush out this socket. Throw out buffers at least */
619 
620 	while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
621 		if (state == TCP_LISTEN)
622 			unix_release_sock(skb->sk, 1);
623 		/* passed fds are erased in the kfree_skb hook	      */
624 		UNIXCB(skb).consumed = skb->len;
625 		kfree_skb(skb);
626 	}
627 
628 	if (path.dentry)
629 		path_put(&path);
630 
631 	sock_put(sk);
632 
633 	/* ---- Socket is dead now and most probably destroyed ---- */
634 
635 	/*
636 	 * Fixme: BSD difference: In BSD all sockets connected to us get
637 	 *	  ECONNRESET and we die on the spot. In Linux we behave
638 	 *	  like files and pipes do and wait for the last
639 	 *	  dereference.
640 	 *
641 	 * Can't we simply set sock->err?
642 	 *
643 	 *	  What the above comment does talk about? --ANK(980817)
644 	 */
645 
646 	if (unix_tot_inflight)
647 		unix_gc();		/* Garbage collect fds */
648 }
649 
650 static void init_peercred(struct sock *sk)
651 {
652 	const struct cred *old_cred;
653 	struct pid *old_pid;
654 
655 	spin_lock(&sk->sk_peer_lock);
656 	old_pid = sk->sk_peer_pid;
657 	old_cred = sk->sk_peer_cred;
658 	sk->sk_peer_pid  = get_pid(task_tgid(current));
659 	sk->sk_peer_cred = get_current_cred();
660 	spin_unlock(&sk->sk_peer_lock);
661 
662 	put_pid(old_pid);
663 	put_cred(old_cred);
664 }
665 
666 static void copy_peercred(struct sock *sk, struct sock *peersk)
667 {
668 	const struct cred *old_cred;
669 	struct pid *old_pid;
670 
671 	if (sk < peersk) {
672 		spin_lock(&sk->sk_peer_lock);
673 		spin_lock_nested(&peersk->sk_peer_lock, SINGLE_DEPTH_NESTING);
674 	} else {
675 		spin_lock(&peersk->sk_peer_lock);
676 		spin_lock_nested(&sk->sk_peer_lock, SINGLE_DEPTH_NESTING);
677 	}
678 	old_pid = sk->sk_peer_pid;
679 	old_cred = sk->sk_peer_cred;
680 	sk->sk_peer_pid  = get_pid(peersk->sk_peer_pid);
681 	sk->sk_peer_cred = get_cred(peersk->sk_peer_cred);
682 
683 	spin_unlock(&sk->sk_peer_lock);
684 	spin_unlock(&peersk->sk_peer_lock);
685 
686 	put_pid(old_pid);
687 	put_cred(old_cred);
688 }
689 
690 static int unix_listen(struct socket *sock, int backlog)
691 {
692 	int err;
693 	struct sock *sk = sock->sk;
694 	struct unix_sock *u = unix_sk(sk);
695 
696 	err = -EOPNOTSUPP;
697 	if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
698 		goto out;	/* Only stream/seqpacket sockets accept */
699 	err = -EINVAL;
700 	if (!u->addr)
701 		goto out;	/* No listens on an unbound socket */
702 	unix_state_lock(sk);
703 	if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
704 		goto out_unlock;
705 	if (backlog > sk->sk_max_ack_backlog)
706 		wake_up_interruptible_all(&u->peer_wait);
707 	sk->sk_max_ack_backlog	= backlog;
708 	sk->sk_state		= TCP_LISTEN;
709 	/* set credentials so connect can copy them */
710 	init_peercred(sk);
711 	err = 0;
712 
713 out_unlock:
714 	unix_state_unlock(sk);
715 out:
716 	return err;
717 }
718 
719 static int unix_release(struct socket *);
720 static int unix_bind(struct socket *, struct sockaddr *, int);
721 static int unix_stream_connect(struct socket *, struct sockaddr *,
722 			       int addr_len, int flags);
723 static int unix_socketpair(struct socket *, struct socket *);
724 static int unix_accept(struct socket *, struct socket *, int, bool);
725 static int unix_getname(struct socket *, struct sockaddr *, int);
726 static __poll_t unix_poll(struct file *, struct socket *, poll_table *);
727 static __poll_t unix_dgram_poll(struct file *, struct socket *,
728 				    poll_table *);
729 static int unix_ioctl(struct socket *, unsigned int, unsigned long);
730 #ifdef CONFIG_COMPAT
731 static int unix_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg);
732 #endif
733 static int unix_shutdown(struct socket *, int);
734 static int unix_stream_sendmsg(struct socket *, struct msghdr *, size_t);
735 static int unix_stream_recvmsg(struct socket *, struct msghdr *, size_t, int);
736 static ssize_t unix_stream_sendpage(struct socket *, struct page *, int offset,
737 				    size_t size, int flags);
738 static ssize_t unix_stream_splice_read(struct socket *,  loff_t *ppos,
739 				       struct pipe_inode_info *, size_t size,
740 				       unsigned int flags);
741 static int unix_dgram_sendmsg(struct socket *, struct msghdr *, size_t);
742 static int unix_dgram_recvmsg(struct socket *, struct msghdr *, size_t, int);
743 static int unix_read_sock(struct sock *sk, read_descriptor_t *desc,
744 			  sk_read_actor_t recv_actor);
745 static int unix_stream_read_sock(struct sock *sk, read_descriptor_t *desc,
746 				 sk_read_actor_t recv_actor);
747 static int unix_dgram_connect(struct socket *, struct sockaddr *,
748 			      int, int);
749 static int unix_seqpacket_sendmsg(struct socket *, struct msghdr *, size_t);
750 static int unix_seqpacket_recvmsg(struct socket *, struct msghdr *, size_t,
751 				  int);
752 
753 static int unix_set_peek_off(struct sock *sk, int val)
754 {
755 	struct unix_sock *u = unix_sk(sk);
756 
757 	if (mutex_lock_interruptible(&u->iolock))
758 		return -EINTR;
759 
760 	sk->sk_peek_off = val;
761 	mutex_unlock(&u->iolock);
762 
763 	return 0;
764 }
765 
766 #ifdef CONFIG_PROC_FS
767 static void unix_show_fdinfo(struct seq_file *m, struct socket *sock)
768 {
769 	struct sock *sk = sock->sk;
770 	struct unix_sock *u;
771 
772 	if (sk) {
773 		u = unix_sk(sock->sk);
774 		seq_printf(m, "scm_fds: %u\n",
775 			   atomic_read(&u->scm_stat.nr_fds));
776 	}
777 }
778 #else
779 #define unix_show_fdinfo NULL
780 #endif
781 
782 static const struct proto_ops unix_stream_ops = {
783 	.family =	PF_UNIX,
784 	.owner =	THIS_MODULE,
785 	.release =	unix_release,
786 	.bind =		unix_bind,
787 	.connect =	unix_stream_connect,
788 	.socketpair =	unix_socketpair,
789 	.accept =	unix_accept,
790 	.getname =	unix_getname,
791 	.poll =		unix_poll,
792 	.ioctl =	unix_ioctl,
793 #ifdef CONFIG_COMPAT
794 	.compat_ioctl =	unix_compat_ioctl,
795 #endif
796 	.listen =	unix_listen,
797 	.shutdown =	unix_shutdown,
798 	.sendmsg =	unix_stream_sendmsg,
799 	.recvmsg =	unix_stream_recvmsg,
800 	.read_sock =	unix_stream_read_sock,
801 	.mmap =		sock_no_mmap,
802 	.sendpage =	unix_stream_sendpage,
803 	.splice_read =	unix_stream_splice_read,
804 	.set_peek_off =	unix_set_peek_off,
805 	.show_fdinfo =	unix_show_fdinfo,
806 };
807 
808 static const struct proto_ops unix_dgram_ops = {
809 	.family =	PF_UNIX,
810 	.owner =	THIS_MODULE,
811 	.release =	unix_release,
812 	.bind =		unix_bind,
813 	.connect =	unix_dgram_connect,
814 	.socketpair =	unix_socketpair,
815 	.accept =	sock_no_accept,
816 	.getname =	unix_getname,
817 	.poll =		unix_dgram_poll,
818 	.ioctl =	unix_ioctl,
819 #ifdef CONFIG_COMPAT
820 	.compat_ioctl =	unix_compat_ioctl,
821 #endif
822 	.listen =	sock_no_listen,
823 	.shutdown =	unix_shutdown,
824 	.sendmsg =	unix_dgram_sendmsg,
825 	.read_sock =	unix_read_sock,
826 	.recvmsg =	unix_dgram_recvmsg,
827 	.mmap =		sock_no_mmap,
828 	.sendpage =	sock_no_sendpage,
829 	.set_peek_off =	unix_set_peek_off,
830 	.show_fdinfo =	unix_show_fdinfo,
831 };
832 
833 static const struct proto_ops unix_seqpacket_ops = {
834 	.family =	PF_UNIX,
835 	.owner =	THIS_MODULE,
836 	.release =	unix_release,
837 	.bind =		unix_bind,
838 	.connect =	unix_stream_connect,
839 	.socketpair =	unix_socketpair,
840 	.accept =	unix_accept,
841 	.getname =	unix_getname,
842 	.poll =		unix_dgram_poll,
843 	.ioctl =	unix_ioctl,
844 #ifdef CONFIG_COMPAT
845 	.compat_ioctl =	unix_compat_ioctl,
846 #endif
847 	.listen =	unix_listen,
848 	.shutdown =	unix_shutdown,
849 	.sendmsg =	unix_seqpacket_sendmsg,
850 	.recvmsg =	unix_seqpacket_recvmsg,
851 	.mmap =		sock_no_mmap,
852 	.sendpage =	sock_no_sendpage,
853 	.set_peek_off =	unix_set_peek_off,
854 	.show_fdinfo =	unix_show_fdinfo,
855 };
856 
857 static void unix_close(struct sock *sk, long timeout)
858 {
859 	/* Nothing to do here, unix socket does not need a ->close().
860 	 * This is merely for sockmap.
861 	 */
862 }
863 
864 static void unix_unhash(struct sock *sk)
865 {
866 	/* Nothing to do here, unix socket does not need a ->unhash().
867 	 * This is merely for sockmap.
868 	 */
869 }
870 
871 struct proto unix_dgram_proto = {
872 	.name			= "UNIX",
873 	.owner			= THIS_MODULE,
874 	.obj_size		= sizeof(struct unix_sock),
875 	.close			= unix_close,
876 #ifdef CONFIG_BPF_SYSCALL
877 	.psock_update_sk_prot	= unix_dgram_bpf_update_proto,
878 #endif
879 };
880 
881 struct proto unix_stream_proto = {
882 	.name			= "UNIX-STREAM",
883 	.owner			= THIS_MODULE,
884 	.obj_size		= sizeof(struct unix_sock),
885 	.close			= unix_close,
886 	.unhash			= unix_unhash,
887 #ifdef CONFIG_BPF_SYSCALL
888 	.psock_update_sk_prot	= unix_stream_bpf_update_proto,
889 #endif
890 };
891 
892 static struct sock *unix_create1(struct net *net, struct socket *sock, int kern, int type)
893 {
894 	struct unix_sock *u;
895 	struct sock *sk;
896 	int err;
897 
898 	atomic_long_inc(&unix_nr_socks);
899 	if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files()) {
900 		err = -ENFILE;
901 		goto err;
902 	}
903 
904 	if (type == SOCK_STREAM)
905 		sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_stream_proto, kern);
906 	else /*dgram and  seqpacket */
907 		sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_dgram_proto, kern);
908 
909 	if (!sk) {
910 		err = -ENOMEM;
911 		goto err;
912 	}
913 
914 	sock_init_data(sock, sk);
915 
916 	sk->sk_hash		= unix_unbound_hash(sk);
917 	sk->sk_allocation	= GFP_KERNEL_ACCOUNT;
918 	sk->sk_write_space	= unix_write_space;
919 	sk->sk_max_ack_backlog	= net->unx.sysctl_max_dgram_qlen;
920 	sk->sk_destruct		= unix_sock_destructor;
921 	u	  = unix_sk(sk);
922 	u->path.dentry = NULL;
923 	u->path.mnt = NULL;
924 	spin_lock_init(&u->lock);
925 	atomic_long_set(&u->inflight, 0);
926 	INIT_LIST_HEAD(&u->link);
927 	mutex_init(&u->iolock); /* single task reading lock */
928 	mutex_init(&u->bindlock); /* single task binding lock */
929 	init_waitqueue_head(&u->peer_wait);
930 	init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay);
931 	memset(&u->scm_stat, 0, sizeof(struct scm_stat));
932 	unix_insert_unbound_socket(sk);
933 
934 	sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
935 
936 	return sk;
937 
938 err:
939 	atomic_long_dec(&unix_nr_socks);
940 	return ERR_PTR(err);
941 }
942 
943 static int unix_create(struct net *net, struct socket *sock, int protocol,
944 		       int kern)
945 {
946 	struct sock *sk;
947 
948 	if (protocol && protocol != PF_UNIX)
949 		return -EPROTONOSUPPORT;
950 
951 	sock->state = SS_UNCONNECTED;
952 
953 	switch (sock->type) {
954 	case SOCK_STREAM:
955 		sock->ops = &unix_stream_ops;
956 		break;
957 		/*
958 		 *	Believe it or not BSD has AF_UNIX, SOCK_RAW though
959 		 *	nothing uses it.
960 		 */
961 	case SOCK_RAW:
962 		sock->type = SOCK_DGRAM;
963 		fallthrough;
964 	case SOCK_DGRAM:
965 		sock->ops = &unix_dgram_ops;
966 		break;
967 	case SOCK_SEQPACKET:
968 		sock->ops = &unix_seqpacket_ops;
969 		break;
970 	default:
971 		return -ESOCKTNOSUPPORT;
972 	}
973 
974 	sk = unix_create1(net, sock, kern, sock->type);
975 	if (IS_ERR(sk))
976 		return PTR_ERR(sk);
977 
978 	return 0;
979 }
980 
981 static int unix_release(struct socket *sock)
982 {
983 	struct sock *sk = sock->sk;
984 
985 	if (!sk)
986 		return 0;
987 
988 	sk->sk_prot->close(sk, 0);
989 	unix_release_sock(sk, 0);
990 	sock->sk = NULL;
991 
992 	return 0;
993 }
994 
995 static struct sock *unix_find_bsd(struct net *net, struct sockaddr_un *sunaddr,
996 				  int addr_len, int type)
997 {
998 	struct inode *inode;
999 	struct path path;
1000 	struct sock *sk;
1001 	int err;
1002 
1003 	unix_mkname_bsd(sunaddr, addr_len);
1004 	err = kern_path(sunaddr->sun_path, LOOKUP_FOLLOW, &path);
1005 	if (err)
1006 		goto fail;
1007 
1008 	err = path_permission(&path, MAY_WRITE);
1009 	if (err)
1010 		goto path_put;
1011 
1012 	err = -ECONNREFUSED;
1013 	inode = d_backing_inode(path.dentry);
1014 	if (!S_ISSOCK(inode->i_mode))
1015 		goto path_put;
1016 
1017 	sk = unix_find_socket_byinode(inode);
1018 	if (!sk)
1019 		goto path_put;
1020 
1021 	err = -EPROTOTYPE;
1022 	if (sk->sk_type == type)
1023 		touch_atime(&path);
1024 	else
1025 		goto sock_put;
1026 
1027 	path_put(&path);
1028 
1029 	return sk;
1030 
1031 sock_put:
1032 	sock_put(sk);
1033 path_put:
1034 	path_put(&path);
1035 fail:
1036 	return ERR_PTR(err);
1037 }
1038 
1039 static struct sock *unix_find_abstract(struct net *net,
1040 				       struct sockaddr_un *sunaddr,
1041 				       int addr_len, int type)
1042 {
1043 	unsigned int hash = unix_abstract_hash(sunaddr, addr_len, type);
1044 	struct dentry *dentry;
1045 	struct sock *sk;
1046 
1047 	sk = unix_find_socket_byname(net, sunaddr, addr_len, hash);
1048 	if (!sk)
1049 		return ERR_PTR(-ECONNREFUSED);
1050 
1051 	dentry = unix_sk(sk)->path.dentry;
1052 	if (dentry)
1053 		touch_atime(&unix_sk(sk)->path);
1054 
1055 	return sk;
1056 }
1057 
1058 static struct sock *unix_find_other(struct net *net,
1059 				    struct sockaddr_un *sunaddr,
1060 				    int addr_len, int type)
1061 {
1062 	struct sock *sk;
1063 
1064 	if (sunaddr->sun_path[0])
1065 		sk = unix_find_bsd(net, sunaddr, addr_len, type);
1066 	else
1067 		sk = unix_find_abstract(net, sunaddr, addr_len, type);
1068 
1069 	return sk;
1070 }
1071 
1072 static int unix_autobind(struct sock *sk)
1073 {
1074 	unsigned int new_hash, old_hash = sk->sk_hash;
1075 	struct unix_sock *u = unix_sk(sk);
1076 	struct unix_address *addr;
1077 	u32 lastnum, ordernum;
1078 	int err;
1079 
1080 	err = mutex_lock_interruptible(&u->bindlock);
1081 	if (err)
1082 		return err;
1083 
1084 	if (u->addr)
1085 		goto out;
1086 
1087 	err = -ENOMEM;
1088 	addr = kzalloc(sizeof(*addr) +
1089 		       offsetof(struct sockaddr_un, sun_path) + 16, GFP_KERNEL);
1090 	if (!addr)
1091 		goto out;
1092 
1093 	addr->len = offsetof(struct sockaddr_un, sun_path) + 6;
1094 	addr->name->sun_family = AF_UNIX;
1095 	refcount_set(&addr->refcnt, 1);
1096 
1097 	ordernum = prandom_u32();
1098 	lastnum = ordernum & 0xFFFFF;
1099 retry:
1100 	ordernum = (ordernum + 1) & 0xFFFFF;
1101 	sprintf(addr->name->sun_path + 1, "%05x", ordernum);
1102 
1103 	new_hash = unix_abstract_hash(addr->name, addr->len, sk->sk_type);
1104 	unix_table_double_lock(old_hash, new_hash);
1105 
1106 	if (__unix_find_socket_byname(sock_net(sk), addr->name, addr->len,
1107 				      new_hash)) {
1108 		unix_table_double_unlock(old_hash, new_hash);
1109 
1110 		/* __unix_find_socket_byname() may take long time if many names
1111 		 * are already in use.
1112 		 */
1113 		cond_resched();
1114 
1115 		if (ordernum == lastnum) {
1116 			/* Give up if all names seems to be in use. */
1117 			err = -ENOSPC;
1118 			unix_release_addr(addr);
1119 			goto out;
1120 		}
1121 
1122 		goto retry;
1123 	}
1124 
1125 	__unix_set_addr_hash(sk, addr, new_hash);
1126 	unix_table_double_unlock(old_hash, new_hash);
1127 	err = 0;
1128 
1129 out:	mutex_unlock(&u->bindlock);
1130 	return err;
1131 }
1132 
1133 static int unix_bind_bsd(struct sock *sk, struct sockaddr_un *sunaddr,
1134 			 int addr_len)
1135 {
1136 	umode_t mode = S_IFSOCK |
1137 	       (SOCK_INODE(sk->sk_socket)->i_mode & ~current_umask());
1138 	unsigned int new_hash, old_hash = sk->sk_hash;
1139 	struct unix_sock *u = unix_sk(sk);
1140 	struct user_namespace *ns; // barf...
1141 	struct unix_address *addr;
1142 	struct dentry *dentry;
1143 	struct path parent;
1144 	int err;
1145 
1146 	unix_mkname_bsd(sunaddr, addr_len);
1147 	addr_len = strlen(sunaddr->sun_path) +
1148 		offsetof(struct sockaddr_un, sun_path) + 1;
1149 
1150 	addr = unix_create_addr(sunaddr, addr_len);
1151 	if (!addr)
1152 		return -ENOMEM;
1153 
1154 	/*
1155 	 * Get the parent directory, calculate the hash for last
1156 	 * component.
1157 	 */
1158 	dentry = kern_path_create(AT_FDCWD, addr->name->sun_path, &parent, 0);
1159 	if (IS_ERR(dentry)) {
1160 		err = PTR_ERR(dentry);
1161 		goto out;
1162 	}
1163 
1164 	/*
1165 	 * All right, let's create it.
1166 	 */
1167 	ns = mnt_user_ns(parent.mnt);
1168 	err = security_path_mknod(&parent, dentry, mode, 0);
1169 	if (!err)
1170 		err = vfs_mknod(ns, d_inode(parent.dentry), dentry, mode, 0);
1171 	if (err)
1172 		goto out_path;
1173 	err = mutex_lock_interruptible(&u->bindlock);
1174 	if (err)
1175 		goto out_unlink;
1176 	if (u->addr)
1177 		goto out_unlock;
1178 
1179 	new_hash = unix_bsd_hash(d_backing_inode(dentry));
1180 	unix_table_double_lock(old_hash, new_hash);
1181 	u->path.mnt = mntget(parent.mnt);
1182 	u->path.dentry = dget(dentry);
1183 	__unix_set_addr_hash(sk, addr, new_hash);
1184 	unix_table_double_unlock(old_hash, new_hash);
1185 	mutex_unlock(&u->bindlock);
1186 	done_path_create(&parent, dentry);
1187 	return 0;
1188 
1189 out_unlock:
1190 	mutex_unlock(&u->bindlock);
1191 	err = -EINVAL;
1192 out_unlink:
1193 	/* failed after successful mknod?  unlink what we'd created... */
1194 	vfs_unlink(ns, d_inode(parent.dentry), dentry, NULL);
1195 out_path:
1196 	done_path_create(&parent, dentry);
1197 out:
1198 	unix_release_addr(addr);
1199 	return err == -EEXIST ? -EADDRINUSE : err;
1200 }
1201 
1202 static int unix_bind_abstract(struct sock *sk, struct sockaddr_un *sunaddr,
1203 			      int addr_len)
1204 {
1205 	unsigned int new_hash, old_hash = sk->sk_hash;
1206 	struct unix_sock *u = unix_sk(sk);
1207 	struct unix_address *addr;
1208 	int err;
1209 
1210 	addr = unix_create_addr(sunaddr, addr_len);
1211 	if (!addr)
1212 		return -ENOMEM;
1213 
1214 	err = mutex_lock_interruptible(&u->bindlock);
1215 	if (err)
1216 		goto out;
1217 
1218 	if (u->addr) {
1219 		err = -EINVAL;
1220 		goto out_mutex;
1221 	}
1222 
1223 	new_hash = unix_abstract_hash(addr->name, addr->len, sk->sk_type);
1224 	unix_table_double_lock(old_hash, new_hash);
1225 
1226 	if (__unix_find_socket_byname(sock_net(sk), addr->name, addr->len,
1227 				      new_hash))
1228 		goto out_spin;
1229 
1230 	__unix_set_addr_hash(sk, addr, new_hash);
1231 	unix_table_double_unlock(old_hash, new_hash);
1232 	mutex_unlock(&u->bindlock);
1233 	return 0;
1234 
1235 out_spin:
1236 	unix_table_double_unlock(old_hash, new_hash);
1237 	err = -EADDRINUSE;
1238 out_mutex:
1239 	mutex_unlock(&u->bindlock);
1240 out:
1241 	unix_release_addr(addr);
1242 	return err;
1243 }
1244 
1245 static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
1246 {
1247 	struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1248 	struct sock *sk = sock->sk;
1249 	int err;
1250 
1251 	if (addr_len == offsetof(struct sockaddr_un, sun_path) &&
1252 	    sunaddr->sun_family == AF_UNIX)
1253 		return unix_autobind(sk);
1254 
1255 	err = unix_validate_addr(sunaddr, addr_len);
1256 	if (err)
1257 		return err;
1258 
1259 	if (sunaddr->sun_path[0])
1260 		err = unix_bind_bsd(sk, sunaddr, addr_len);
1261 	else
1262 		err = unix_bind_abstract(sk, sunaddr, addr_len);
1263 
1264 	return err;
1265 }
1266 
1267 static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
1268 {
1269 	if (unlikely(sk1 == sk2) || !sk2) {
1270 		unix_state_lock(sk1);
1271 		return;
1272 	}
1273 	if (sk1 < sk2) {
1274 		unix_state_lock(sk1);
1275 		unix_state_lock_nested(sk2);
1276 	} else {
1277 		unix_state_lock(sk2);
1278 		unix_state_lock_nested(sk1);
1279 	}
1280 }
1281 
1282 static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)
1283 {
1284 	if (unlikely(sk1 == sk2) || !sk2) {
1285 		unix_state_unlock(sk1);
1286 		return;
1287 	}
1288 	unix_state_unlock(sk1);
1289 	unix_state_unlock(sk2);
1290 }
1291 
1292 static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
1293 			      int alen, int flags)
1294 {
1295 	struct sock *sk = sock->sk;
1296 	struct net *net = sock_net(sk);
1297 	struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr;
1298 	struct sock *other;
1299 	int err;
1300 
1301 	err = -EINVAL;
1302 	if (alen < offsetofend(struct sockaddr, sa_family))
1303 		goto out;
1304 
1305 	if (addr->sa_family != AF_UNSPEC) {
1306 		err = unix_validate_addr(sunaddr, alen);
1307 		if (err)
1308 			goto out;
1309 
1310 		if (test_bit(SOCK_PASSCRED, &sock->flags) &&
1311 		    !unix_sk(sk)->addr) {
1312 			err = unix_autobind(sk);
1313 			if (err)
1314 				goto out;
1315 		}
1316 
1317 restart:
1318 		other = unix_find_other(net, sunaddr, alen, sock->type);
1319 		if (IS_ERR(other)) {
1320 			err = PTR_ERR(other);
1321 			goto out;
1322 		}
1323 
1324 		unix_state_double_lock(sk, other);
1325 
1326 		/* Apparently VFS overslept socket death. Retry. */
1327 		if (sock_flag(other, SOCK_DEAD)) {
1328 			unix_state_double_unlock(sk, other);
1329 			sock_put(other);
1330 			goto restart;
1331 		}
1332 
1333 		err = -EPERM;
1334 		if (!unix_may_send(sk, other))
1335 			goto out_unlock;
1336 
1337 		err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1338 		if (err)
1339 			goto out_unlock;
1340 
1341 		sk->sk_state = other->sk_state = TCP_ESTABLISHED;
1342 	} else {
1343 		/*
1344 		 *	1003.1g breaking connected state with AF_UNSPEC
1345 		 */
1346 		other = NULL;
1347 		unix_state_double_lock(sk, other);
1348 	}
1349 
1350 	/*
1351 	 * If it was connected, reconnect.
1352 	 */
1353 	if (unix_peer(sk)) {
1354 		struct sock *old_peer = unix_peer(sk);
1355 
1356 		unix_peer(sk) = other;
1357 		if (!other)
1358 			sk->sk_state = TCP_CLOSE;
1359 		unix_dgram_peer_wake_disconnect_wakeup(sk, old_peer);
1360 
1361 		unix_state_double_unlock(sk, other);
1362 
1363 		if (other != old_peer)
1364 			unix_dgram_disconnected(sk, old_peer);
1365 		sock_put(old_peer);
1366 	} else {
1367 		unix_peer(sk) = other;
1368 		unix_state_double_unlock(sk, other);
1369 	}
1370 
1371 	return 0;
1372 
1373 out_unlock:
1374 	unix_state_double_unlock(sk, other);
1375 	sock_put(other);
1376 out:
1377 	return err;
1378 }
1379 
1380 static long unix_wait_for_peer(struct sock *other, long timeo)
1381 	__releases(&unix_sk(other)->lock)
1382 {
1383 	struct unix_sock *u = unix_sk(other);
1384 	int sched;
1385 	DEFINE_WAIT(wait);
1386 
1387 	prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);
1388 
1389 	sched = !sock_flag(other, SOCK_DEAD) &&
1390 		!(other->sk_shutdown & RCV_SHUTDOWN) &&
1391 		unix_recvq_full(other);
1392 
1393 	unix_state_unlock(other);
1394 
1395 	if (sched)
1396 		timeo = schedule_timeout(timeo);
1397 
1398 	finish_wait(&u->peer_wait, &wait);
1399 	return timeo;
1400 }
1401 
1402 static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
1403 			       int addr_len, int flags)
1404 {
1405 	struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1406 	struct sock *sk = sock->sk;
1407 	struct net *net = sock_net(sk);
1408 	struct unix_sock *u = unix_sk(sk), *newu, *otheru;
1409 	struct sock *newsk = NULL;
1410 	struct sock *other = NULL;
1411 	struct sk_buff *skb = NULL;
1412 	int st;
1413 	int err;
1414 	long timeo;
1415 
1416 	err = unix_validate_addr(sunaddr, addr_len);
1417 	if (err)
1418 		goto out;
1419 
1420 	if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr) {
1421 		err = unix_autobind(sk);
1422 		if (err)
1423 			goto out;
1424 	}
1425 
1426 	timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
1427 
1428 	/* First of all allocate resources.
1429 	   If we will make it after state is locked,
1430 	   we will have to recheck all again in any case.
1431 	 */
1432 
1433 	/* create new sock for complete connection */
1434 	newsk = unix_create1(sock_net(sk), NULL, 0, sock->type);
1435 	if (IS_ERR(newsk)) {
1436 		err = PTR_ERR(newsk);
1437 		newsk = NULL;
1438 		goto out;
1439 	}
1440 
1441 	err = -ENOMEM;
1442 
1443 	/* Allocate skb for sending to listening sock */
1444 	skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
1445 	if (skb == NULL)
1446 		goto out;
1447 
1448 restart:
1449 	/*  Find listening sock. */
1450 	other = unix_find_other(net, sunaddr, addr_len, sk->sk_type);
1451 	if (IS_ERR(other)) {
1452 		err = PTR_ERR(other);
1453 		other = NULL;
1454 		goto out;
1455 	}
1456 
1457 	/* Latch state of peer */
1458 	unix_state_lock(other);
1459 
1460 	/* Apparently VFS overslept socket death. Retry. */
1461 	if (sock_flag(other, SOCK_DEAD)) {
1462 		unix_state_unlock(other);
1463 		sock_put(other);
1464 		goto restart;
1465 	}
1466 
1467 	err = -ECONNREFUSED;
1468 	if (other->sk_state != TCP_LISTEN)
1469 		goto out_unlock;
1470 	if (other->sk_shutdown & RCV_SHUTDOWN)
1471 		goto out_unlock;
1472 
1473 	if (unix_recvq_full(other)) {
1474 		err = -EAGAIN;
1475 		if (!timeo)
1476 			goto out_unlock;
1477 
1478 		timeo = unix_wait_for_peer(other, timeo);
1479 
1480 		err = sock_intr_errno(timeo);
1481 		if (signal_pending(current))
1482 			goto out;
1483 		sock_put(other);
1484 		goto restart;
1485 	}
1486 
1487 	/* Latch our state.
1488 
1489 	   It is tricky place. We need to grab our state lock and cannot
1490 	   drop lock on peer. It is dangerous because deadlock is
1491 	   possible. Connect to self case and simultaneous
1492 	   attempt to connect are eliminated by checking socket
1493 	   state. other is TCP_LISTEN, if sk is TCP_LISTEN we
1494 	   check this before attempt to grab lock.
1495 
1496 	   Well, and we have to recheck the state after socket locked.
1497 	 */
1498 	st = sk->sk_state;
1499 
1500 	switch (st) {
1501 	case TCP_CLOSE:
1502 		/* This is ok... continue with connect */
1503 		break;
1504 	case TCP_ESTABLISHED:
1505 		/* Socket is already connected */
1506 		err = -EISCONN;
1507 		goto out_unlock;
1508 	default:
1509 		err = -EINVAL;
1510 		goto out_unlock;
1511 	}
1512 
1513 	unix_state_lock_nested(sk);
1514 
1515 	if (sk->sk_state != st) {
1516 		unix_state_unlock(sk);
1517 		unix_state_unlock(other);
1518 		sock_put(other);
1519 		goto restart;
1520 	}
1521 
1522 	err = security_unix_stream_connect(sk, other, newsk);
1523 	if (err) {
1524 		unix_state_unlock(sk);
1525 		goto out_unlock;
1526 	}
1527 
1528 	/* The way is open! Fastly set all the necessary fields... */
1529 
1530 	sock_hold(sk);
1531 	unix_peer(newsk)	= sk;
1532 	newsk->sk_state		= TCP_ESTABLISHED;
1533 	newsk->sk_type		= sk->sk_type;
1534 	init_peercred(newsk);
1535 	newu = unix_sk(newsk);
1536 	RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq);
1537 	otheru = unix_sk(other);
1538 
1539 	/* copy address information from listening to new sock
1540 	 *
1541 	 * The contents of *(otheru->addr) and otheru->path
1542 	 * are seen fully set up here, since we have found
1543 	 * otheru in hash under unix_table_locks.  Insertion
1544 	 * into the hash chain we'd found it in had been done
1545 	 * in an earlier critical area protected by unix_table_locks,
1546 	 * the same one where we'd set *(otheru->addr) contents,
1547 	 * as well as otheru->path and otheru->addr itself.
1548 	 *
1549 	 * Using smp_store_release() here to set newu->addr
1550 	 * is enough to make those stores, as well as stores
1551 	 * to newu->path visible to anyone who gets newu->addr
1552 	 * by smp_load_acquire().  IOW, the same warranties
1553 	 * as for unix_sock instances bound in unix_bind() or
1554 	 * in unix_autobind().
1555 	 */
1556 	if (otheru->path.dentry) {
1557 		path_get(&otheru->path);
1558 		newu->path = otheru->path;
1559 	}
1560 	refcount_inc(&otheru->addr->refcnt);
1561 	smp_store_release(&newu->addr, otheru->addr);
1562 
1563 	/* Set credentials */
1564 	copy_peercred(sk, other);
1565 
1566 	sock->state	= SS_CONNECTED;
1567 	sk->sk_state	= TCP_ESTABLISHED;
1568 	sock_hold(newsk);
1569 
1570 	smp_mb__after_atomic();	/* sock_hold() does an atomic_inc() */
1571 	unix_peer(sk)	= newsk;
1572 
1573 	unix_state_unlock(sk);
1574 
1575 	/* take ten and send info to listening sock */
1576 	spin_lock(&other->sk_receive_queue.lock);
1577 	__skb_queue_tail(&other->sk_receive_queue, skb);
1578 	spin_unlock(&other->sk_receive_queue.lock);
1579 	unix_state_unlock(other);
1580 	other->sk_data_ready(other);
1581 	sock_put(other);
1582 	return 0;
1583 
1584 out_unlock:
1585 	if (other)
1586 		unix_state_unlock(other);
1587 
1588 out:
1589 	kfree_skb(skb);
1590 	if (newsk)
1591 		unix_release_sock(newsk, 0);
1592 	if (other)
1593 		sock_put(other);
1594 	return err;
1595 }
1596 
1597 static int unix_socketpair(struct socket *socka, struct socket *sockb)
1598 {
1599 	struct sock *ska = socka->sk, *skb = sockb->sk;
1600 
1601 	/* Join our sockets back to back */
1602 	sock_hold(ska);
1603 	sock_hold(skb);
1604 	unix_peer(ska) = skb;
1605 	unix_peer(skb) = ska;
1606 	init_peercred(ska);
1607 	init_peercred(skb);
1608 
1609 	ska->sk_state = TCP_ESTABLISHED;
1610 	skb->sk_state = TCP_ESTABLISHED;
1611 	socka->state  = SS_CONNECTED;
1612 	sockb->state  = SS_CONNECTED;
1613 	return 0;
1614 }
1615 
1616 static void unix_sock_inherit_flags(const struct socket *old,
1617 				    struct socket *new)
1618 {
1619 	if (test_bit(SOCK_PASSCRED, &old->flags))
1620 		set_bit(SOCK_PASSCRED, &new->flags);
1621 	if (test_bit(SOCK_PASSSEC, &old->flags))
1622 		set_bit(SOCK_PASSSEC, &new->flags);
1623 }
1624 
1625 static int unix_accept(struct socket *sock, struct socket *newsock, int flags,
1626 		       bool kern)
1627 {
1628 	struct sock *sk = sock->sk;
1629 	struct sock *tsk;
1630 	struct sk_buff *skb;
1631 	int err;
1632 
1633 	err = -EOPNOTSUPP;
1634 	if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
1635 		goto out;
1636 
1637 	err = -EINVAL;
1638 	if (sk->sk_state != TCP_LISTEN)
1639 		goto out;
1640 
1641 	/* If socket state is TCP_LISTEN it cannot change (for now...),
1642 	 * so that no locks are necessary.
1643 	 */
1644 
1645 	skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err);
1646 	if (!skb) {
1647 		/* This means receive shutdown. */
1648 		if (err == 0)
1649 			err = -EINVAL;
1650 		goto out;
1651 	}
1652 
1653 	tsk = skb->sk;
1654 	skb_free_datagram(sk, skb);
1655 	wake_up_interruptible(&unix_sk(sk)->peer_wait);
1656 
1657 	/* attach accepted sock to socket */
1658 	unix_state_lock(tsk);
1659 	newsock->state = SS_CONNECTED;
1660 	unix_sock_inherit_flags(sock, newsock);
1661 	sock_graft(tsk, newsock);
1662 	unix_state_unlock(tsk);
1663 	return 0;
1664 
1665 out:
1666 	return err;
1667 }
1668 
1669 
1670 static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int peer)
1671 {
1672 	struct sock *sk = sock->sk;
1673 	struct unix_address *addr;
1674 	DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, uaddr);
1675 	int err = 0;
1676 
1677 	if (peer) {
1678 		sk = unix_peer_get(sk);
1679 
1680 		err = -ENOTCONN;
1681 		if (!sk)
1682 			goto out;
1683 		err = 0;
1684 	} else {
1685 		sock_hold(sk);
1686 	}
1687 
1688 	addr = smp_load_acquire(&unix_sk(sk)->addr);
1689 	if (!addr) {
1690 		sunaddr->sun_family = AF_UNIX;
1691 		sunaddr->sun_path[0] = 0;
1692 		err = offsetof(struct sockaddr_un, sun_path);
1693 	} else {
1694 		err = addr->len;
1695 		memcpy(sunaddr, addr->name, addr->len);
1696 	}
1697 	sock_put(sk);
1698 out:
1699 	return err;
1700 }
1701 
1702 static void unix_peek_fds(struct scm_cookie *scm, struct sk_buff *skb)
1703 {
1704 	scm->fp = scm_fp_dup(UNIXCB(skb).fp);
1705 
1706 	/*
1707 	 * Garbage collection of unix sockets starts by selecting a set of
1708 	 * candidate sockets which have reference only from being in flight
1709 	 * (total_refs == inflight_refs).  This condition is checked once during
1710 	 * the candidate collection phase, and candidates are marked as such, so
1711 	 * that non-candidates can later be ignored.  While inflight_refs is
1712 	 * protected by unix_gc_lock, total_refs (file count) is not, hence this
1713 	 * is an instantaneous decision.
1714 	 *
1715 	 * Once a candidate, however, the socket must not be reinstalled into a
1716 	 * file descriptor while the garbage collection is in progress.
1717 	 *
1718 	 * If the above conditions are met, then the directed graph of
1719 	 * candidates (*) does not change while unix_gc_lock is held.
1720 	 *
1721 	 * Any operations that changes the file count through file descriptors
1722 	 * (dup, close, sendmsg) does not change the graph since candidates are
1723 	 * not installed in fds.
1724 	 *
1725 	 * Dequeing a candidate via recvmsg would install it into an fd, but
1726 	 * that takes unix_gc_lock to decrement the inflight count, so it's
1727 	 * serialized with garbage collection.
1728 	 *
1729 	 * MSG_PEEK is special in that it does not change the inflight count,
1730 	 * yet does install the socket into an fd.  The following lock/unlock
1731 	 * pair is to ensure serialization with garbage collection.  It must be
1732 	 * done between incrementing the file count and installing the file into
1733 	 * an fd.
1734 	 *
1735 	 * If garbage collection starts after the barrier provided by the
1736 	 * lock/unlock, then it will see the elevated refcount and not mark this
1737 	 * as a candidate.  If a garbage collection is already in progress
1738 	 * before the file count was incremented, then the lock/unlock pair will
1739 	 * ensure that garbage collection is finished before progressing to
1740 	 * installing the fd.
1741 	 *
1742 	 * (*) A -> B where B is on the queue of A or B is on the queue of C
1743 	 * which is on the queue of listening socket A.
1744 	 */
1745 	spin_lock(&unix_gc_lock);
1746 	spin_unlock(&unix_gc_lock);
1747 }
1748 
1749 static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)
1750 {
1751 	int err = 0;
1752 
1753 	UNIXCB(skb).pid  = get_pid(scm->pid);
1754 	UNIXCB(skb).uid = scm->creds.uid;
1755 	UNIXCB(skb).gid = scm->creds.gid;
1756 	UNIXCB(skb).fp = NULL;
1757 	unix_get_secdata(scm, skb);
1758 	if (scm->fp && send_fds)
1759 		err = unix_attach_fds(scm, skb);
1760 
1761 	skb->destructor = unix_destruct_scm;
1762 	return err;
1763 }
1764 
1765 static bool unix_passcred_enabled(const struct socket *sock,
1766 				  const struct sock *other)
1767 {
1768 	return test_bit(SOCK_PASSCRED, &sock->flags) ||
1769 	       !other->sk_socket ||
1770 	       test_bit(SOCK_PASSCRED, &other->sk_socket->flags);
1771 }
1772 
1773 /*
1774  * Some apps rely on write() giving SCM_CREDENTIALS
1775  * We include credentials if source or destination socket
1776  * asserted SOCK_PASSCRED.
1777  */
1778 static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock,
1779 			    const struct sock *other)
1780 {
1781 	if (UNIXCB(skb).pid)
1782 		return;
1783 	if (unix_passcred_enabled(sock, other)) {
1784 		UNIXCB(skb).pid  = get_pid(task_tgid(current));
1785 		current_uid_gid(&UNIXCB(skb).uid, &UNIXCB(skb).gid);
1786 	}
1787 }
1788 
1789 static int maybe_init_creds(struct scm_cookie *scm,
1790 			    struct socket *socket,
1791 			    const struct sock *other)
1792 {
1793 	int err;
1794 	struct msghdr msg = { .msg_controllen = 0 };
1795 
1796 	err = scm_send(socket, &msg, scm, false);
1797 	if (err)
1798 		return err;
1799 
1800 	if (unix_passcred_enabled(socket, other)) {
1801 		scm->pid = get_pid(task_tgid(current));
1802 		current_uid_gid(&scm->creds.uid, &scm->creds.gid);
1803 	}
1804 	return err;
1805 }
1806 
1807 static bool unix_skb_scm_eq(struct sk_buff *skb,
1808 			    struct scm_cookie *scm)
1809 {
1810 	const struct unix_skb_parms *u = &UNIXCB(skb);
1811 
1812 	return u->pid == scm->pid &&
1813 	       uid_eq(u->uid, scm->creds.uid) &&
1814 	       gid_eq(u->gid, scm->creds.gid) &&
1815 	       unix_secdata_eq(scm, skb);
1816 }
1817 
1818 static void scm_stat_add(struct sock *sk, struct sk_buff *skb)
1819 {
1820 	struct scm_fp_list *fp = UNIXCB(skb).fp;
1821 	struct unix_sock *u = unix_sk(sk);
1822 
1823 	if (unlikely(fp && fp->count))
1824 		atomic_add(fp->count, &u->scm_stat.nr_fds);
1825 }
1826 
1827 static void scm_stat_del(struct sock *sk, struct sk_buff *skb)
1828 {
1829 	struct scm_fp_list *fp = UNIXCB(skb).fp;
1830 	struct unix_sock *u = unix_sk(sk);
1831 
1832 	if (unlikely(fp && fp->count))
1833 		atomic_sub(fp->count, &u->scm_stat.nr_fds);
1834 }
1835 
1836 /*
1837  *	Send AF_UNIX data.
1838  */
1839 
1840 static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
1841 			      size_t len)
1842 {
1843 	struct sock *sk = sock->sk;
1844 	struct net *net = sock_net(sk);
1845 	struct unix_sock *u = unix_sk(sk);
1846 	DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, msg->msg_name);
1847 	struct sock *other = NULL;
1848 	int err;
1849 	struct sk_buff *skb;
1850 	long timeo;
1851 	struct scm_cookie scm;
1852 	int data_len = 0;
1853 	int sk_locked;
1854 
1855 	wait_for_unix_gc();
1856 	err = scm_send(sock, msg, &scm, false);
1857 	if (err < 0)
1858 		return err;
1859 
1860 	err = -EOPNOTSUPP;
1861 	if (msg->msg_flags&MSG_OOB)
1862 		goto out;
1863 
1864 	if (msg->msg_namelen) {
1865 		err = unix_validate_addr(sunaddr, msg->msg_namelen);
1866 		if (err)
1867 			goto out;
1868 	} else {
1869 		sunaddr = NULL;
1870 		err = -ENOTCONN;
1871 		other = unix_peer_get(sk);
1872 		if (!other)
1873 			goto out;
1874 	}
1875 
1876 	if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr) {
1877 		err = unix_autobind(sk);
1878 		if (err)
1879 			goto out;
1880 	}
1881 
1882 	err = -EMSGSIZE;
1883 	if (len > sk->sk_sndbuf - 32)
1884 		goto out;
1885 
1886 	if (len > SKB_MAX_ALLOC) {
1887 		data_len = min_t(size_t,
1888 				 len - SKB_MAX_ALLOC,
1889 				 MAX_SKB_FRAGS * PAGE_SIZE);
1890 		data_len = PAGE_ALIGN(data_len);
1891 
1892 		BUILD_BUG_ON(SKB_MAX_ALLOC < PAGE_SIZE);
1893 	}
1894 
1895 	skb = sock_alloc_send_pskb(sk, len - data_len, data_len,
1896 				   msg->msg_flags & MSG_DONTWAIT, &err,
1897 				   PAGE_ALLOC_COSTLY_ORDER);
1898 	if (skb == NULL)
1899 		goto out;
1900 
1901 	err = unix_scm_to_skb(&scm, skb, true);
1902 	if (err < 0)
1903 		goto out_free;
1904 
1905 	skb_put(skb, len - data_len);
1906 	skb->data_len = data_len;
1907 	skb->len = len;
1908 	err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, len);
1909 	if (err)
1910 		goto out_free;
1911 
1912 	timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
1913 
1914 restart:
1915 	if (!other) {
1916 		err = -ECONNRESET;
1917 		if (sunaddr == NULL)
1918 			goto out_free;
1919 
1920 		other = unix_find_other(net, sunaddr, msg->msg_namelen,
1921 					sk->sk_type);
1922 		if (IS_ERR(other)) {
1923 			err = PTR_ERR(other);
1924 			other = NULL;
1925 			goto out_free;
1926 		}
1927 	}
1928 
1929 	if (sk_filter(other, skb) < 0) {
1930 		/* Toss the packet but do not return any error to the sender */
1931 		err = len;
1932 		goto out_free;
1933 	}
1934 
1935 	sk_locked = 0;
1936 	unix_state_lock(other);
1937 restart_locked:
1938 	err = -EPERM;
1939 	if (!unix_may_send(sk, other))
1940 		goto out_unlock;
1941 
1942 	if (unlikely(sock_flag(other, SOCK_DEAD))) {
1943 		/*
1944 		 *	Check with 1003.1g - what should
1945 		 *	datagram error
1946 		 */
1947 		unix_state_unlock(other);
1948 		sock_put(other);
1949 
1950 		if (!sk_locked)
1951 			unix_state_lock(sk);
1952 
1953 		err = 0;
1954 		if (unix_peer(sk) == other) {
1955 			unix_peer(sk) = NULL;
1956 			unix_dgram_peer_wake_disconnect_wakeup(sk, other);
1957 
1958 			unix_state_unlock(sk);
1959 
1960 			sk->sk_state = TCP_CLOSE;
1961 			unix_dgram_disconnected(sk, other);
1962 			sock_put(other);
1963 			err = -ECONNREFUSED;
1964 		} else {
1965 			unix_state_unlock(sk);
1966 		}
1967 
1968 		other = NULL;
1969 		if (err)
1970 			goto out_free;
1971 		goto restart;
1972 	}
1973 
1974 	err = -EPIPE;
1975 	if (other->sk_shutdown & RCV_SHUTDOWN)
1976 		goto out_unlock;
1977 
1978 	if (sk->sk_type != SOCK_SEQPACKET) {
1979 		err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1980 		if (err)
1981 			goto out_unlock;
1982 	}
1983 
1984 	/* other == sk && unix_peer(other) != sk if
1985 	 * - unix_peer(sk) == NULL, destination address bound to sk
1986 	 * - unix_peer(sk) == sk by time of get but disconnected before lock
1987 	 */
1988 	if (other != sk &&
1989 	    unlikely(unix_peer(other) != sk &&
1990 	    unix_recvq_full_lockless(other))) {
1991 		if (timeo) {
1992 			timeo = unix_wait_for_peer(other, timeo);
1993 
1994 			err = sock_intr_errno(timeo);
1995 			if (signal_pending(current))
1996 				goto out_free;
1997 
1998 			goto restart;
1999 		}
2000 
2001 		if (!sk_locked) {
2002 			unix_state_unlock(other);
2003 			unix_state_double_lock(sk, other);
2004 		}
2005 
2006 		if (unix_peer(sk) != other ||
2007 		    unix_dgram_peer_wake_me(sk, other)) {
2008 			err = -EAGAIN;
2009 			sk_locked = 1;
2010 			goto out_unlock;
2011 		}
2012 
2013 		if (!sk_locked) {
2014 			sk_locked = 1;
2015 			goto restart_locked;
2016 		}
2017 	}
2018 
2019 	if (unlikely(sk_locked))
2020 		unix_state_unlock(sk);
2021 
2022 	if (sock_flag(other, SOCK_RCVTSTAMP))
2023 		__net_timestamp(skb);
2024 	maybe_add_creds(skb, sock, other);
2025 	scm_stat_add(other, skb);
2026 	skb_queue_tail(&other->sk_receive_queue, skb);
2027 	unix_state_unlock(other);
2028 	other->sk_data_ready(other);
2029 	sock_put(other);
2030 	scm_destroy(&scm);
2031 	return len;
2032 
2033 out_unlock:
2034 	if (sk_locked)
2035 		unix_state_unlock(sk);
2036 	unix_state_unlock(other);
2037 out_free:
2038 	kfree_skb(skb);
2039 out:
2040 	if (other)
2041 		sock_put(other);
2042 	scm_destroy(&scm);
2043 	return err;
2044 }
2045 
2046 /* We use paged skbs for stream sockets, and limit occupancy to 32768
2047  * bytes, and a minimum of a full page.
2048  */
2049 #define UNIX_SKB_FRAGS_SZ (PAGE_SIZE << get_order(32768))
2050 
2051 #if (IS_ENABLED(CONFIG_AF_UNIX_OOB))
2052 static int queue_oob(struct socket *sock, struct msghdr *msg, struct sock *other)
2053 {
2054 	struct unix_sock *ousk = unix_sk(other);
2055 	struct sk_buff *skb;
2056 	int err = 0;
2057 
2058 	skb = sock_alloc_send_skb(sock->sk, 1, msg->msg_flags & MSG_DONTWAIT, &err);
2059 
2060 	if (!skb)
2061 		return err;
2062 
2063 	skb_put(skb, 1);
2064 	err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, 1);
2065 
2066 	if (err) {
2067 		kfree_skb(skb);
2068 		return err;
2069 	}
2070 
2071 	unix_state_lock(other);
2072 
2073 	if (sock_flag(other, SOCK_DEAD) ||
2074 	    (other->sk_shutdown & RCV_SHUTDOWN)) {
2075 		unix_state_unlock(other);
2076 		kfree_skb(skb);
2077 		return -EPIPE;
2078 	}
2079 
2080 	maybe_add_creds(skb, sock, other);
2081 	skb_get(skb);
2082 
2083 	if (ousk->oob_skb)
2084 		consume_skb(ousk->oob_skb);
2085 
2086 	ousk->oob_skb = skb;
2087 
2088 	scm_stat_add(other, skb);
2089 	skb_queue_tail(&other->sk_receive_queue, skb);
2090 	sk_send_sigurg(other);
2091 	unix_state_unlock(other);
2092 	other->sk_data_ready(other);
2093 
2094 	return err;
2095 }
2096 #endif
2097 
2098 static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
2099 			       size_t len)
2100 {
2101 	struct sock *sk = sock->sk;
2102 	struct sock *other = NULL;
2103 	int err, size;
2104 	struct sk_buff *skb;
2105 	int sent = 0;
2106 	struct scm_cookie scm;
2107 	bool fds_sent = false;
2108 	int data_len;
2109 
2110 	wait_for_unix_gc();
2111 	err = scm_send(sock, msg, &scm, false);
2112 	if (err < 0)
2113 		return err;
2114 
2115 	err = -EOPNOTSUPP;
2116 	if (msg->msg_flags & MSG_OOB) {
2117 #if (IS_ENABLED(CONFIG_AF_UNIX_OOB))
2118 		if (len)
2119 			len--;
2120 		else
2121 #endif
2122 			goto out_err;
2123 	}
2124 
2125 	if (msg->msg_namelen) {
2126 		err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
2127 		goto out_err;
2128 	} else {
2129 		err = -ENOTCONN;
2130 		other = unix_peer(sk);
2131 		if (!other)
2132 			goto out_err;
2133 	}
2134 
2135 	if (sk->sk_shutdown & SEND_SHUTDOWN)
2136 		goto pipe_err;
2137 
2138 	while (sent < len) {
2139 		size = len - sent;
2140 
2141 		/* Keep two messages in the pipe so it schedules better */
2142 		size = min_t(int, size, (sk->sk_sndbuf >> 1) - 64);
2143 
2144 		/* allow fallback to order-0 allocations */
2145 		size = min_t(int, size, SKB_MAX_HEAD(0) + UNIX_SKB_FRAGS_SZ);
2146 
2147 		data_len = max_t(int, 0, size - SKB_MAX_HEAD(0));
2148 
2149 		data_len = min_t(size_t, size, PAGE_ALIGN(data_len));
2150 
2151 		skb = sock_alloc_send_pskb(sk, size - data_len, data_len,
2152 					   msg->msg_flags & MSG_DONTWAIT, &err,
2153 					   get_order(UNIX_SKB_FRAGS_SZ));
2154 		if (!skb)
2155 			goto out_err;
2156 
2157 		/* Only send the fds in the first buffer */
2158 		err = unix_scm_to_skb(&scm, skb, !fds_sent);
2159 		if (err < 0) {
2160 			kfree_skb(skb);
2161 			goto out_err;
2162 		}
2163 		fds_sent = true;
2164 
2165 		skb_put(skb, size - data_len);
2166 		skb->data_len = data_len;
2167 		skb->len = size;
2168 		err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size);
2169 		if (err) {
2170 			kfree_skb(skb);
2171 			goto out_err;
2172 		}
2173 
2174 		unix_state_lock(other);
2175 
2176 		if (sock_flag(other, SOCK_DEAD) ||
2177 		    (other->sk_shutdown & RCV_SHUTDOWN))
2178 			goto pipe_err_free;
2179 
2180 		maybe_add_creds(skb, sock, other);
2181 		scm_stat_add(other, skb);
2182 		skb_queue_tail(&other->sk_receive_queue, skb);
2183 		unix_state_unlock(other);
2184 		other->sk_data_ready(other);
2185 		sent += size;
2186 	}
2187 
2188 #if (IS_ENABLED(CONFIG_AF_UNIX_OOB))
2189 	if (msg->msg_flags & MSG_OOB) {
2190 		err = queue_oob(sock, msg, other);
2191 		if (err)
2192 			goto out_err;
2193 		sent++;
2194 	}
2195 #endif
2196 
2197 	scm_destroy(&scm);
2198 
2199 	return sent;
2200 
2201 pipe_err_free:
2202 	unix_state_unlock(other);
2203 	kfree_skb(skb);
2204 pipe_err:
2205 	if (sent == 0 && !(msg->msg_flags&MSG_NOSIGNAL))
2206 		send_sig(SIGPIPE, current, 0);
2207 	err = -EPIPE;
2208 out_err:
2209 	scm_destroy(&scm);
2210 	return sent ? : err;
2211 }
2212 
2213 static ssize_t unix_stream_sendpage(struct socket *socket, struct page *page,
2214 				    int offset, size_t size, int flags)
2215 {
2216 	int err;
2217 	bool send_sigpipe = false;
2218 	bool init_scm = true;
2219 	struct scm_cookie scm;
2220 	struct sock *other, *sk = socket->sk;
2221 	struct sk_buff *skb, *newskb = NULL, *tail = NULL;
2222 
2223 	if (flags & MSG_OOB)
2224 		return -EOPNOTSUPP;
2225 
2226 	other = unix_peer(sk);
2227 	if (!other || sk->sk_state != TCP_ESTABLISHED)
2228 		return -ENOTCONN;
2229 
2230 	if (false) {
2231 alloc_skb:
2232 		unix_state_unlock(other);
2233 		mutex_unlock(&unix_sk(other)->iolock);
2234 		newskb = sock_alloc_send_pskb(sk, 0, 0, flags & MSG_DONTWAIT,
2235 					      &err, 0);
2236 		if (!newskb)
2237 			goto err;
2238 	}
2239 
2240 	/* we must acquire iolock as we modify already present
2241 	 * skbs in the sk_receive_queue and mess with skb->len
2242 	 */
2243 	err = mutex_lock_interruptible(&unix_sk(other)->iolock);
2244 	if (err) {
2245 		err = flags & MSG_DONTWAIT ? -EAGAIN : -ERESTARTSYS;
2246 		goto err;
2247 	}
2248 
2249 	if (sk->sk_shutdown & SEND_SHUTDOWN) {
2250 		err = -EPIPE;
2251 		send_sigpipe = true;
2252 		goto err_unlock;
2253 	}
2254 
2255 	unix_state_lock(other);
2256 
2257 	if (sock_flag(other, SOCK_DEAD) ||
2258 	    other->sk_shutdown & RCV_SHUTDOWN) {
2259 		err = -EPIPE;
2260 		send_sigpipe = true;
2261 		goto err_state_unlock;
2262 	}
2263 
2264 	if (init_scm) {
2265 		err = maybe_init_creds(&scm, socket, other);
2266 		if (err)
2267 			goto err_state_unlock;
2268 		init_scm = false;
2269 	}
2270 
2271 	skb = skb_peek_tail(&other->sk_receive_queue);
2272 	if (tail && tail == skb) {
2273 		skb = newskb;
2274 	} else if (!skb || !unix_skb_scm_eq(skb, &scm)) {
2275 		if (newskb) {
2276 			skb = newskb;
2277 		} else {
2278 			tail = skb;
2279 			goto alloc_skb;
2280 		}
2281 	} else if (newskb) {
2282 		/* this is fast path, we don't necessarily need to
2283 		 * call to kfree_skb even though with newskb == NULL
2284 		 * this - does no harm
2285 		 */
2286 		consume_skb(newskb);
2287 		newskb = NULL;
2288 	}
2289 
2290 	if (skb_append_pagefrags(skb, page, offset, size)) {
2291 		tail = skb;
2292 		goto alloc_skb;
2293 	}
2294 
2295 	skb->len += size;
2296 	skb->data_len += size;
2297 	skb->truesize += size;
2298 	refcount_add(size, &sk->sk_wmem_alloc);
2299 
2300 	if (newskb) {
2301 		err = unix_scm_to_skb(&scm, skb, false);
2302 		if (err)
2303 			goto err_state_unlock;
2304 		spin_lock(&other->sk_receive_queue.lock);
2305 		__skb_queue_tail(&other->sk_receive_queue, newskb);
2306 		spin_unlock(&other->sk_receive_queue.lock);
2307 	}
2308 
2309 	unix_state_unlock(other);
2310 	mutex_unlock(&unix_sk(other)->iolock);
2311 
2312 	other->sk_data_ready(other);
2313 	scm_destroy(&scm);
2314 	return size;
2315 
2316 err_state_unlock:
2317 	unix_state_unlock(other);
2318 err_unlock:
2319 	mutex_unlock(&unix_sk(other)->iolock);
2320 err:
2321 	kfree_skb(newskb);
2322 	if (send_sigpipe && !(flags & MSG_NOSIGNAL))
2323 		send_sig(SIGPIPE, current, 0);
2324 	if (!init_scm)
2325 		scm_destroy(&scm);
2326 	return err;
2327 }
2328 
2329 static int unix_seqpacket_sendmsg(struct socket *sock, struct msghdr *msg,
2330 				  size_t len)
2331 {
2332 	int err;
2333 	struct sock *sk = sock->sk;
2334 
2335 	err = sock_error(sk);
2336 	if (err)
2337 		return err;
2338 
2339 	if (sk->sk_state != TCP_ESTABLISHED)
2340 		return -ENOTCONN;
2341 
2342 	if (msg->msg_namelen)
2343 		msg->msg_namelen = 0;
2344 
2345 	return unix_dgram_sendmsg(sock, msg, len);
2346 }
2347 
2348 static int unix_seqpacket_recvmsg(struct socket *sock, struct msghdr *msg,
2349 				  size_t size, int flags)
2350 {
2351 	struct sock *sk = sock->sk;
2352 
2353 	if (sk->sk_state != TCP_ESTABLISHED)
2354 		return -ENOTCONN;
2355 
2356 	return unix_dgram_recvmsg(sock, msg, size, flags);
2357 }
2358 
2359 static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
2360 {
2361 	struct unix_address *addr = smp_load_acquire(&unix_sk(sk)->addr);
2362 
2363 	if (addr) {
2364 		msg->msg_namelen = addr->len;
2365 		memcpy(msg->msg_name, addr->name, addr->len);
2366 	}
2367 }
2368 
2369 int __unix_dgram_recvmsg(struct sock *sk, struct msghdr *msg, size_t size,
2370 			 int flags)
2371 {
2372 	struct scm_cookie scm;
2373 	struct socket *sock = sk->sk_socket;
2374 	struct unix_sock *u = unix_sk(sk);
2375 	struct sk_buff *skb, *last;
2376 	long timeo;
2377 	int skip;
2378 	int err;
2379 
2380 	err = -EOPNOTSUPP;
2381 	if (flags&MSG_OOB)
2382 		goto out;
2383 
2384 	timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
2385 
2386 	do {
2387 		mutex_lock(&u->iolock);
2388 
2389 		skip = sk_peek_offset(sk, flags);
2390 		skb = __skb_try_recv_datagram(sk, &sk->sk_receive_queue, flags,
2391 					      &skip, &err, &last);
2392 		if (skb) {
2393 			if (!(flags & MSG_PEEK))
2394 				scm_stat_del(sk, skb);
2395 			break;
2396 		}
2397 
2398 		mutex_unlock(&u->iolock);
2399 
2400 		if (err != -EAGAIN)
2401 			break;
2402 	} while (timeo &&
2403 		 !__skb_wait_for_more_packets(sk, &sk->sk_receive_queue,
2404 					      &err, &timeo, last));
2405 
2406 	if (!skb) { /* implies iolock unlocked */
2407 		unix_state_lock(sk);
2408 		/* Signal EOF on disconnected non-blocking SEQPACKET socket. */
2409 		if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN &&
2410 		    (sk->sk_shutdown & RCV_SHUTDOWN))
2411 			err = 0;
2412 		unix_state_unlock(sk);
2413 		goto out;
2414 	}
2415 
2416 	if (wq_has_sleeper(&u->peer_wait))
2417 		wake_up_interruptible_sync_poll(&u->peer_wait,
2418 						EPOLLOUT | EPOLLWRNORM |
2419 						EPOLLWRBAND);
2420 
2421 	if (msg->msg_name)
2422 		unix_copy_addr(msg, skb->sk);
2423 
2424 	if (size > skb->len - skip)
2425 		size = skb->len - skip;
2426 	else if (size < skb->len - skip)
2427 		msg->msg_flags |= MSG_TRUNC;
2428 
2429 	err = skb_copy_datagram_msg(skb, skip, msg, size);
2430 	if (err)
2431 		goto out_free;
2432 
2433 	if (sock_flag(sk, SOCK_RCVTSTAMP))
2434 		__sock_recv_timestamp(msg, sk, skb);
2435 
2436 	memset(&scm, 0, sizeof(scm));
2437 
2438 	scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2439 	unix_set_secdata(&scm, skb);
2440 
2441 	if (!(flags & MSG_PEEK)) {
2442 		if (UNIXCB(skb).fp)
2443 			unix_detach_fds(&scm, skb);
2444 
2445 		sk_peek_offset_bwd(sk, skb->len);
2446 	} else {
2447 		/* It is questionable: on PEEK we could:
2448 		   - do not return fds - good, but too simple 8)
2449 		   - return fds, and do not return them on read (old strategy,
2450 		     apparently wrong)
2451 		   - clone fds (I chose it for now, it is the most universal
2452 		     solution)
2453 
2454 		   POSIX 1003.1g does not actually define this clearly
2455 		   at all. POSIX 1003.1g doesn't define a lot of things
2456 		   clearly however!
2457 
2458 		*/
2459 
2460 		sk_peek_offset_fwd(sk, size);
2461 
2462 		if (UNIXCB(skb).fp)
2463 			unix_peek_fds(&scm, skb);
2464 	}
2465 	err = (flags & MSG_TRUNC) ? skb->len - skip : size;
2466 
2467 	scm_recv(sock, msg, &scm, flags);
2468 
2469 out_free:
2470 	skb_free_datagram(sk, skb);
2471 	mutex_unlock(&u->iolock);
2472 out:
2473 	return err;
2474 }
2475 
2476 static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
2477 			      int flags)
2478 {
2479 	struct sock *sk = sock->sk;
2480 
2481 #ifdef CONFIG_BPF_SYSCALL
2482 	const struct proto *prot = READ_ONCE(sk->sk_prot);
2483 
2484 	if (prot != &unix_dgram_proto)
2485 		return prot->recvmsg(sk, msg, size, flags & MSG_DONTWAIT,
2486 					    flags & ~MSG_DONTWAIT, NULL);
2487 #endif
2488 	return __unix_dgram_recvmsg(sk, msg, size, flags);
2489 }
2490 
2491 static int unix_read_sock(struct sock *sk, read_descriptor_t *desc,
2492 			  sk_read_actor_t recv_actor)
2493 {
2494 	int copied = 0;
2495 
2496 	while (1) {
2497 		struct unix_sock *u = unix_sk(sk);
2498 		struct sk_buff *skb;
2499 		int used, err;
2500 
2501 		mutex_lock(&u->iolock);
2502 		skb = skb_recv_datagram(sk, 0, 1, &err);
2503 		mutex_unlock(&u->iolock);
2504 		if (!skb)
2505 			return err;
2506 
2507 		used = recv_actor(desc, skb, 0, skb->len);
2508 		if (used <= 0) {
2509 			if (!copied)
2510 				copied = used;
2511 			kfree_skb(skb);
2512 			break;
2513 		} else if (used <= skb->len) {
2514 			copied += used;
2515 		}
2516 
2517 		kfree_skb(skb);
2518 		if (!desc->count)
2519 			break;
2520 	}
2521 
2522 	return copied;
2523 }
2524 
2525 /*
2526  *	Sleep until more data has arrived. But check for races..
2527  */
2528 static long unix_stream_data_wait(struct sock *sk, long timeo,
2529 				  struct sk_buff *last, unsigned int last_len,
2530 				  bool freezable)
2531 {
2532 	struct sk_buff *tail;
2533 	DEFINE_WAIT(wait);
2534 
2535 	unix_state_lock(sk);
2536 
2537 	for (;;) {
2538 		prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
2539 
2540 		tail = skb_peek_tail(&sk->sk_receive_queue);
2541 		if (tail != last ||
2542 		    (tail && tail->len != last_len) ||
2543 		    sk->sk_err ||
2544 		    (sk->sk_shutdown & RCV_SHUTDOWN) ||
2545 		    signal_pending(current) ||
2546 		    !timeo)
2547 			break;
2548 
2549 		sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2550 		unix_state_unlock(sk);
2551 		if (freezable)
2552 			timeo = freezable_schedule_timeout(timeo);
2553 		else
2554 			timeo = schedule_timeout(timeo);
2555 		unix_state_lock(sk);
2556 
2557 		if (sock_flag(sk, SOCK_DEAD))
2558 			break;
2559 
2560 		sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2561 	}
2562 
2563 	finish_wait(sk_sleep(sk), &wait);
2564 	unix_state_unlock(sk);
2565 	return timeo;
2566 }
2567 
2568 static unsigned int unix_skb_len(const struct sk_buff *skb)
2569 {
2570 	return skb->len - UNIXCB(skb).consumed;
2571 }
2572 
2573 struct unix_stream_read_state {
2574 	int (*recv_actor)(struct sk_buff *, int, int,
2575 			  struct unix_stream_read_state *);
2576 	struct socket *socket;
2577 	struct msghdr *msg;
2578 	struct pipe_inode_info *pipe;
2579 	size_t size;
2580 	int flags;
2581 	unsigned int splice_flags;
2582 };
2583 
2584 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
2585 static int unix_stream_recv_urg(struct unix_stream_read_state *state)
2586 {
2587 	struct socket *sock = state->socket;
2588 	struct sock *sk = sock->sk;
2589 	struct unix_sock *u = unix_sk(sk);
2590 	int chunk = 1;
2591 	struct sk_buff *oob_skb;
2592 
2593 	mutex_lock(&u->iolock);
2594 	unix_state_lock(sk);
2595 
2596 	if (sock_flag(sk, SOCK_URGINLINE) || !u->oob_skb) {
2597 		unix_state_unlock(sk);
2598 		mutex_unlock(&u->iolock);
2599 		return -EINVAL;
2600 	}
2601 
2602 	oob_skb = u->oob_skb;
2603 
2604 	if (!(state->flags & MSG_PEEK)) {
2605 		u->oob_skb = NULL;
2606 	}
2607 
2608 	unix_state_unlock(sk);
2609 
2610 	chunk = state->recv_actor(oob_skb, 0, chunk, state);
2611 
2612 	if (!(state->flags & MSG_PEEK)) {
2613 		UNIXCB(oob_skb).consumed += 1;
2614 		kfree_skb(oob_skb);
2615 	}
2616 
2617 	mutex_unlock(&u->iolock);
2618 
2619 	if (chunk < 0)
2620 		return -EFAULT;
2621 
2622 	state->msg->msg_flags |= MSG_OOB;
2623 	return 1;
2624 }
2625 
2626 static struct sk_buff *manage_oob(struct sk_buff *skb, struct sock *sk,
2627 				  int flags, int copied)
2628 {
2629 	struct unix_sock *u = unix_sk(sk);
2630 
2631 	if (!unix_skb_len(skb) && !(flags & MSG_PEEK)) {
2632 		skb_unlink(skb, &sk->sk_receive_queue);
2633 		consume_skb(skb);
2634 		skb = NULL;
2635 	} else {
2636 		if (skb == u->oob_skb) {
2637 			if (copied) {
2638 				skb = NULL;
2639 			} else if (sock_flag(sk, SOCK_URGINLINE)) {
2640 				if (!(flags & MSG_PEEK)) {
2641 					u->oob_skb = NULL;
2642 					consume_skb(skb);
2643 				}
2644 			} else if (!(flags & MSG_PEEK)) {
2645 				skb_unlink(skb, &sk->sk_receive_queue);
2646 				consume_skb(skb);
2647 				skb = skb_peek(&sk->sk_receive_queue);
2648 			}
2649 		}
2650 	}
2651 	return skb;
2652 }
2653 #endif
2654 
2655 static int unix_stream_read_sock(struct sock *sk, read_descriptor_t *desc,
2656 				 sk_read_actor_t recv_actor)
2657 {
2658 	if (unlikely(sk->sk_state != TCP_ESTABLISHED))
2659 		return -ENOTCONN;
2660 
2661 	return unix_read_sock(sk, desc, recv_actor);
2662 }
2663 
2664 static int unix_stream_read_generic(struct unix_stream_read_state *state,
2665 				    bool freezable)
2666 {
2667 	struct scm_cookie scm;
2668 	struct socket *sock = state->socket;
2669 	struct sock *sk = sock->sk;
2670 	struct unix_sock *u = unix_sk(sk);
2671 	int copied = 0;
2672 	int flags = state->flags;
2673 	int noblock = flags & MSG_DONTWAIT;
2674 	bool check_creds = false;
2675 	int target;
2676 	int err = 0;
2677 	long timeo;
2678 	int skip;
2679 	size_t size = state->size;
2680 	unsigned int last_len;
2681 
2682 	if (unlikely(sk->sk_state != TCP_ESTABLISHED)) {
2683 		err = -EINVAL;
2684 		goto out;
2685 	}
2686 
2687 	if (unlikely(flags & MSG_OOB)) {
2688 		err = -EOPNOTSUPP;
2689 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
2690 		err = unix_stream_recv_urg(state);
2691 #endif
2692 		goto out;
2693 	}
2694 
2695 	target = sock_rcvlowat(sk, flags & MSG_WAITALL, size);
2696 	timeo = sock_rcvtimeo(sk, noblock);
2697 
2698 	memset(&scm, 0, sizeof(scm));
2699 
2700 	/* Lock the socket to prevent queue disordering
2701 	 * while sleeps in memcpy_tomsg
2702 	 */
2703 	mutex_lock(&u->iolock);
2704 
2705 	skip = max(sk_peek_offset(sk, flags), 0);
2706 
2707 	do {
2708 		int chunk;
2709 		bool drop_skb;
2710 		struct sk_buff *skb, *last;
2711 
2712 redo:
2713 		unix_state_lock(sk);
2714 		if (sock_flag(sk, SOCK_DEAD)) {
2715 			err = -ECONNRESET;
2716 			goto unlock;
2717 		}
2718 		last = skb = skb_peek(&sk->sk_receive_queue);
2719 		last_len = last ? last->len : 0;
2720 
2721 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
2722 		if (skb) {
2723 			skb = manage_oob(skb, sk, flags, copied);
2724 			if (!skb) {
2725 				unix_state_unlock(sk);
2726 				if (copied)
2727 					break;
2728 				goto redo;
2729 			}
2730 		}
2731 #endif
2732 again:
2733 		if (skb == NULL) {
2734 			if (copied >= target)
2735 				goto unlock;
2736 
2737 			/*
2738 			 *	POSIX 1003.1g mandates this order.
2739 			 */
2740 
2741 			err = sock_error(sk);
2742 			if (err)
2743 				goto unlock;
2744 			if (sk->sk_shutdown & RCV_SHUTDOWN)
2745 				goto unlock;
2746 
2747 			unix_state_unlock(sk);
2748 			if (!timeo) {
2749 				err = -EAGAIN;
2750 				break;
2751 			}
2752 
2753 			mutex_unlock(&u->iolock);
2754 
2755 			timeo = unix_stream_data_wait(sk, timeo, last,
2756 						      last_len, freezable);
2757 
2758 			if (signal_pending(current)) {
2759 				err = sock_intr_errno(timeo);
2760 				scm_destroy(&scm);
2761 				goto out;
2762 			}
2763 
2764 			mutex_lock(&u->iolock);
2765 			goto redo;
2766 unlock:
2767 			unix_state_unlock(sk);
2768 			break;
2769 		}
2770 
2771 		while (skip >= unix_skb_len(skb)) {
2772 			skip -= unix_skb_len(skb);
2773 			last = skb;
2774 			last_len = skb->len;
2775 			skb = skb_peek_next(skb, &sk->sk_receive_queue);
2776 			if (!skb)
2777 				goto again;
2778 		}
2779 
2780 		unix_state_unlock(sk);
2781 
2782 		if (check_creds) {
2783 			/* Never glue messages from different writers */
2784 			if (!unix_skb_scm_eq(skb, &scm))
2785 				break;
2786 		} else if (test_bit(SOCK_PASSCRED, &sock->flags)) {
2787 			/* Copy credentials */
2788 			scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2789 			unix_set_secdata(&scm, skb);
2790 			check_creds = true;
2791 		}
2792 
2793 		/* Copy address just once */
2794 		if (state->msg && state->msg->msg_name) {
2795 			DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr,
2796 					 state->msg->msg_name);
2797 			unix_copy_addr(state->msg, skb->sk);
2798 			sunaddr = NULL;
2799 		}
2800 
2801 		chunk = min_t(unsigned int, unix_skb_len(skb) - skip, size);
2802 		skb_get(skb);
2803 		chunk = state->recv_actor(skb, skip, chunk, state);
2804 		drop_skb = !unix_skb_len(skb);
2805 		/* skb is only safe to use if !drop_skb */
2806 		consume_skb(skb);
2807 		if (chunk < 0) {
2808 			if (copied == 0)
2809 				copied = -EFAULT;
2810 			break;
2811 		}
2812 		copied += chunk;
2813 		size -= chunk;
2814 
2815 		if (drop_skb) {
2816 			/* the skb was touched by a concurrent reader;
2817 			 * we should not expect anything from this skb
2818 			 * anymore and assume it invalid - we can be
2819 			 * sure it was dropped from the socket queue
2820 			 *
2821 			 * let's report a short read
2822 			 */
2823 			err = 0;
2824 			break;
2825 		}
2826 
2827 		/* Mark read part of skb as used */
2828 		if (!(flags & MSG_PEEK)) {
2829 			UNIXCB(skb).consumed += chunk;
2830 
2831 			sk_peek_offset_bwd(sk, chunk);
2832 
2833 			if (UNIXCB(skb).fp) {
2834 				scm_stat_del(sk, skb);
2835 				unix_detach_fds(&scm, skb);
2836 			}
2837 
2838 			if (unix_skb_len(skb))
2839 				break;
2840 
2841 			skb_unlink(skb, &sk->sk_receive_queue);
2842 			consume_skb(skb);
2843 
2844 			if (scm.fp)
2845 				break;
2846 		} else {
2847 			/* It is questionable, see note in unix_dgram_recvmsg.
2848 			 */
2849 			if (UNIXCB(skb).fp)
2850 				unix_peek_fds(&scm, skb);
2851 
2852 			sk_peek_offset_fwd(sk, chunk);
2853 
2854 			if (UNIXCB(skb).fp)
2855 				break;
2856 
2857 			skip = 0;
2858 			last = skb;
2859 			last_len = skb->len;
2860 			unix_state_lock(sk);
2861 			skb = skb_peek_next(skb, &sk->sk_receive_queue);
2862 			if (skb)
2863 				goto again;
2864 			unix_state_unlock(sk);
2865 			break;
2866 		}
2867 	} while (size);
2868 
2869 	mutex_unlock(&u->iolock);
2870 	if (state->msg)
2871 		scm_recv(sock, state->msg, &scm, flags);
2872 	else
2873 		scm_destroy(&scm);
2874 out:
2875 	return copied ? : err;
2876 }
2877 
2878 static int unix_stream_read_actor(struct sk_buff *skb,
2879 				  int skip, int chunk,
2880 				  struct unix_stream_read_state *state)
2881 {
2882 	int ret;
2883 
2884 	ret = skb_copy_datagram_msg(skb, UNIXCB(skb).consumed + skip,
2885 				    state->msg, chunk);
2886 	return ret ?: chunk;
2887 }
2888 
2889 int __unix_stream_recvmsg(struct sock *sk, struct msghdr *msg,
2890 			  size_t size, int flags)
2891 {
2892 	struct unix_stream_read_state state = {
2893 		.recv_actor = unix_stream_read_actor,
2894 		.socket = sk->sk_socket,
2895 		.msg = msg,
2896 		.size = size,
2897 		.flags = flags
2898 	};
2899 
2900 	return unix_stream_read_generic(&state, true);
2901 }
2902 
2903 static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg,
2904 			       size_t size, int flags)
2905 {
2906 	struct unix_stream_read_state state = {
2907 		.recv_actor = unix_stream_read_actor,
2908 		.socket = sock,
2909 		.msg = msg,
2910 		.size = size,
2911 		.flags = flags
2912 	};
2913 
2914 #ifdef CONFIG_BPF_SYSCALL
2915 	struct sock *sk = sock->sk;
2916 	const struct proto *prot = READ_ONCE(sk->sk_prot);
2917 
2918 	if (prot != &unix_stream_proto)
2919 		return prot->recvmsg(sk, msg, size, flags & MSG_DONTWAIT,
2920 					    flags & ~MSG_DONTWAIT, NULL);
2921 #endif
2922 	return unix_stream_read_generic(&state, true);
2923 }
2924 
2925 static int unix_stream_splice_actor(struct sk_buff *skb,
2926 				    int skip, int chunk,
2927 				    struct unix_stream_read_state *state)
2928 {
2929 	return skb_splice_bits(skb, state->socket->sk,
2930 			       UNIXCB(skb).consumed + skip,
2931 			       state->pipe, chunk, state->splice_flags);
2932 }
2933 
2934 static ssize_t unix_stream_splice_read(struct socket *sock,  loff_t *ppos,
2935 				       struct pipe_inode_info *pipe,
2936 				       size_t size, unsigned int flags)
2937 {
2938 	struct unix_stream_read_state state = {
2939 		.recv_actor = unix_stream_splice_actor,
2940 		.socket = sock,
2941 		.pipe = pipe,
2942 		.size = size,
2943 		.splice_flags = flags,
2944 	};
2945 
2946 	if (unlikely(*ppos))
2947 		return -ESPIPE;
2948 
2949 	if (sock->file->f_flags & O_NONBLOCK ||
2950 	    flags & SPLICE_F_NONBLOCK)
2951 		state.flags = MSG_DONTWAIT;
2952 
2953 	return unix_stream_read_generic(&state, false);
2954 }
2955 
2956 static int unix_shutdown(struct socket *sock, int mode)
2957 {
2958 	struct sock *sk = sock->sk;
2959 	struct sock *other;
2960 
2961 	if (mode < SHUT_RD || mode > SHUT_RDWR)
2962 		return -EINVAL;
2963 	/* This maps:
2964 	 * SHUT_RD   (0) -> RCV_SHUTDOWN  (1)
2965 	 * SHUT_WR   (1) -> SEND_SHUTDOWN (2)
2966 	 * SHUT_RDWR (2) -> SHUTDOWN_MASK (3)
2967 	 */
2968 	++mode;
2969 
2970 	unix_state_lock(sk);
2971 	sk->sk_shutdown |= mode;
2972 	other = unix_peer(sk);
2973 	if (other)
2974 		sock_hold(other);
2975 	unix_state_unlock(sk);
2976 	sk->sk_state_change(sk);
2977 
2978 	if (other &&
2979 		(sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
2980 
2981 		int peer_mode = 0;
2982 		const struct proto *prot = READ_ONCE(other->sk_prot);
2983 
2984 		if (prot->unhash)
2985 			prot->unhash(other);
2986 		if (mode&RCV_SHUTDOWN)
2987 			peer_mode |= SEND_SHUTDOWN;
2988 		if (mode&SEND_SHUTDOWN)
2989 			peer_mode |= RCV_SHUTDOWN;
2990 		unix_state_lock(other);
2991 		other->sk_shutdown |= peer_mode;
2992 		unix_state_unlock(other);
2993 		other->sk_state_change(other);
2994 		if (peer_mode == SHUTDOWN_MASK)
2995 			sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
2996 		else if (peer_mode & RCV_SHUTDOWN)
2997 			sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
2998 	}
2999 	if (other)
3000 		sock_put(other);
3001 
3002 	return 0;
3003 }
3004 
3005 long unix_inq_len(struct sock *sk)
3006 {
3007 	struct sk_buff *skb;
3008 	long amount = 0;
3009 
3010 	if (sk->sk_state == TCP_LISTEN)
3011 		return -EINVAL;
3012 
3013 	spin_lock(&sk->sk_receive_queue.lock);
3014 	if (sk->sk_type == SOCK_STREAM ||
3015 	    sk->sk_type == SOCK_SEQPACKET) {
3016 		skb_queue_walk(&sk->sk_receive_queue, skb)
3017 			amount += unix_skb_len(skb);
3018 	} else {
3019 		skb = skb_peek(&sk->sk_receive_queue);
3020 		if (skb)
3021 			amount = skb->len;
3022 	}
3023 	spin_unlock(&sk->sk_receive_queue.lock);
3024 
3025 	return amount;
3026 }
3027 EXPORT_SYMBOL_GPL(unix_inq_len);
3028 
3029 long unix_outq_len(struct sock *sk)
3030 {
3031 	return sk_wmem_alloc_get(sk);
3032 }
3033 EXPORT_SYMBOL_GPL(unix_outq_len);
3034 
3035 static int unix_open_file(struct sock *sk)
3036 {
3037 	struct path path;
3038 	struct file *f;
3039 	int fd;
3040 
3041 	if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
3042 		return -EPERM;
3043 
3044 	if (!smp_load_acquire(&unix_sk(sk)->addr))
3045 		return -ENOENT;
3046 
3047 	path = unix_sk(sk)->path;
3048 	if (!path.dentry)
3049 		return -ENOENT;
3050 
3051 	path_get(&path);
3052 
3053 	fd = get_unused_fd_flags(O_CLOEXEC);
3054 	if (fd < 0)
3055 		goto out;
3056 
3057 	f = dentry_open(&path, O_PATH, current_cred());
3058 	if (IS_ERR(f)) {
3059 		put_unused_fd(fd);
3060 		fd = PTR_ERR(f);
3061 		goto out;
3062 	}
3063 
3064 	fd_install(fd, f);
3065 out:
3066 	path_put(&path);
3067 
3068 	return fd;
3069 }
3070 
3071 static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
3072 {
3073 	struct sock *sk = sock->sk;
3074 	long amount = 0;
3075 	int err;
3076 
3077 	switch (cmd) {
3078 	case SIOCOUTQ:
3079 		amount = unix_outq_len(sk);
3080 		err = put_user(amount, (int __user *)arg);
3081 		break;
3082 	case SIOCINQ:
3083 		amount = unix_inq_len(sk);
3084 		if (amount < 0)
3085 			err = amount;
3086 		else
3087 			err = put_user(amount, (int __user *)arg);
3088 		break;
3089 	case SIOCUNIXFILE:
3090 		err = unix_open_file(sk);
3091 		break;
3092 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
3093 	case SIOCATMARK:
3094 		{
3095 			struct sk_buff *skb;
3096 			struct unix_sock *u = unix_sk(sk);
3097 			int answ = 0;
3098 
3099 			skb = skb_peek(&sk->sk_receive_queue);
3100 			if (skb && skb == u->oob_skb)
3101 				answ = 1;
3102 			err = put_user(answ, (int __user *)arg);
3103 		}
3104 		break;
3105 #endif
3106 	default:
3107 		err = -ENOIOCTLCMD;
3108 		break;
3109 	}
3110 	return err;
3111 }
3112 
3113 #ifdef CONFIG_COMPAT
3114 static int unix_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
3115 {
3116 	return unix_ioctl(sock, cmd, (unsigned long)compat_ptr(arg));
3117 }
3118 #endif
3119 
3120 static __poll_t unix_poll(struct file *file, struct socket *sock, poll_table *wait)
3121 {
3122 	struct sock *sk = sock->sk;
3123 	__poll_t mask;
3124 
3125 	sock_poll_wait(file, sock, wait);
3126 	mask = 0;
3127 
3128 	/* exceptional events? */
3129 	if (sk->sk_err)
3130 		mask |= EPOLLERR;
3131 	if (sk->sk_shutdown == SHUTDOWN_MASK)
3132 		mask |= EPOLLHUP;
3133 	if (sk->sk_shutdown & RCV_SHUTDOWN)
3134 		mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
3135 
3136 	/* readable? */
3137 	if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
3138 		mask |= EPOLLIN | EPOLLRDNORM;
3139 	if (sk_is_readable(sk))
3140 		mask |= EPOLLIN | EPOLLRDNORM;
3141 
3142 	/* Connection-based need to check for termination and startup */
3143 	if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) &&
3144 	    sk->sk_state == TCP_CLOSE)
3145 		mask |= EPOLLHUP;
3146 
3147 	/*
3148 	 * we set writable also when the other side has shut down the
3149 	 * connection. This prevents stuck sockets.
3150 	 */
3151 	if (unix_writable(sk))
3152 		mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
3153 
3154 	return mask;
3155 }
3156 
3157 static __poll_t unix_dgram_poll(struct file *file, struct socket *sock,
3158 				    poll_table *wait)
3159 {
3160 	struct sock *sk = sock->sk, *other;
3161 	unsigned int writable;
3162 	__poll_t mask;
3163 
3164 	sock_poll_wait(file, sock, wait);
3165 	mask = 0;
3166 
3167 	/* exceptional events? */
3168 	if (sk->sk_err || !skb_queue_empty_lockless(&sk->sk_error_queue))
3169 		mask |= EPOLLERR |
3170 			(sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0);
3171 
3172 	if (sk->sk_shutdown & RCV_SHUTDOWN)
3173 		mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
3174 	if (sk->sk_shutdown == SHUTDOWN_MASK)
3175 		mask |= EPOLLHUP;
3176 
3177 	/* readable? */
3178 	if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
3179 		mask |= EPOLLIN | EPOLLRDNORM;
3180 	if (sk_is_readable(sk))
3181 		mask |= EPOLLIN | EPOLLRDNORM;
3182 
3183 	/* Connection-based need to check for termination and startup */
3184 	if (sk->sk_type == SOCK_SEQPACKET) {
3185 		if (sk->sk_state == TCP_CLOSE)
3186 			mask |= EPOLLHUP;
3187 		/* connection hasn't started yet? */
3188 		if (sk->sk_state == TCP_SYN_SENT)
3189 			return mask;
3190 	}
3191 
3192 	/* No write status requested, avoid expensive OUT tests. */
3193 	if (!(poll_requested_events(wait) & (EPOLLWRBAND|EPOLLWRNORM|EPOLLOUT)))
3194 		return mask;
3195 
3196 	writable = unix_writable(sk);
3197 	if (writable) {
3198 		unix_state_lock(sk);
3199 
3200 		other = unix_peer(sk);
3201 		if (other && unix_peer(other) != sk &&
3202 		    unix_recvq_full_lockless(other) &&
3203 		    unix_dgram_peer_wake_me(sk, other))
3204 			writable = 0;
3205 
3206 		unix_state_unlock(sk);
3207 	}
3208 
3209 	if (writable)
3210 		mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
3211 	else
3212 		sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
3213 
3214 	return mask;
3215 }
3216 
3217 #ifdef CONFIG_PROC_FS
3218 
3219 #define BUCKET_SPACE (BITS_PER_LONG - (UNIX_HASH_BITS + 1) - 1)
3220 
3221 #define get_bucket(x) ((x) >> BUCKET_SPACE)
3222 #define get_offset(x) ((x) & ((1UL << BUCKET_SPACE) - 1))
3223 #define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o))
3224 
3225 static struct sock *unix_from_bucket(struct seq_file *seq, loff_t *pos)
3226 {
3227 	unsigned long offset = get_offset(*pos);
3228 	unsigned long bucket = get_bucket(*pos);
3229 	struct sock *sk;
3230 	unsigned long count = 0;
3231 
3232 	for (sk = sk_head(&unix_socket_table[bucket]); sk; sk = sk_next(sk)) {
3233 		if (sock_net(sk) != seq_file_net(seq))
3234 			continue;
3235 		if (++count == offset)
3236 			break;
3237 	}
3238 
3239 	return sk;
3240 }
3241 
3242 static struct sock *unix_next_socket(struct seq_file *seq,
3243 				     struct sock *sk,
3244 				     loff_t *pos)
3245 {
3246 	unsigned long bucket = get_bucket(*pos);
3247 
3248 	while (sk > (struct sock *)SEQ_START_TOKEN) {
3249 		sk = sk_next(sk);
3250 		if (!sk)
3251 			goto next_bucket;
3252 		if (sock_net(sk) == seq_file_net(seq))
3253 			return sk;
3254 	}
3255 
3256 	do {
3257 		spin_lock(&unix_table_locks[bucket]);
3258 		sk = unix_from_bucket(seq, pos);
3259 		if (sk)
3260 			return sk;
3261 
3262 next_bucket:
3263 		spin_unlock(&unix_table_locks[bucket++]);
3264 		*pos = set_bucket_offset(bucket, 1);
3265 	} while (bucket < ARRAY_SIZE(unix_socket_table));
3266 
3267 	return NULL;
3268 }
3269 
3270 static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
3271 {
3272 	if (!*pos)
3273 		return SEQ_START_TOKEN;
3274 
3275 	if (get_bucket(*pos) >= ARRAY_SIZE(unix_socket_table))
3276 		return NULL;
3277 
3278 	return unix_next_socket(seq, NULL, pos);
3279 }
3280 
3281 static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3282 {
3283 	++*pos;
3284 	return unix_next_socket(seq, v, pos);
3285 }
3286 
3287 static void unix_seq_stop(struct seq_file *seq, void *v)
3288 {
3289 	struct sock *sk = v;
3290 
3291 	if (sk)
3292 		spin_unlock(&unix_table_locks[sk->sk_hash]);
3293 }
3294 
3295 static int unix_seq_show(struct seq_file *seq, void *v)
3296 {
3297 
3298 	if (v == SEQ_START_TOKEN)
3299 		seq_puts(seq, "Num       RefCount Protocol Flags    Type St "
3300 			 "Inode Path\n");
3301 	else {
3302 		struct sock *s = v;
3303 		struct unix_sock *u = unix_sk(s);
3304 		unix_state_lock(s);
3305 
3306 		seq_printf(seq, "%pK: %08X %08X %08X %04X %02X %5lu",
3307 			s,
3308 			refcount_read(&s->sk_refcnt),
3309 			0,
3310 			s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
3311 			s->sk_type,
3312 			s->sk_socket ?
3313 			(s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
3314 			(s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
3315 			sock_i_ino(s));
3316 
3317 		if (u->addr) {	// under unix_table_locks here
3318 			int i, len;
3319 			seq_putc(seq, ' ');
3320 
3321 			i = 0;
3322 			len = u->addr->len -
3323 				offsetof(struct sockaddr_un, sun_path);
3324 			if (u->addr->name->sun_path[0]) {
3325 				len--;
3326 			} else {
3327 				seq_putc(seq, '@');
3328 				i++;
3329 			}
3330 			for ( ; i < len; i++)
3331 				seq_putc(seq, u->addr->name->sun_path[i] ?:
3332 					 '@');
3333 		}
3334 		unix_state_unlock(s);
3335 		seq_putc(seq, '\n');
3336 	}
3337 
3338 	return 0;
3339 }
3340 
3341 static const struct seq_operations unix_seq_ops = {
3342 	.start  = unix_seq_start,
3343 	.next   = unix_seq_next,
3344 	.stop   = unix_seq_stop,
3345 	.show   = unix_seq_show,
3346 };
3347 
3348 #if IS_BUILTIN(CONFIG_UNIX) && defined(CONFIG_BPF_SYSCALL)
3349 struct bpf_iter__unix {
3350 	__bpf_md_ptr(struct bpf_iter_meta *, meta);
3351 	__bpf_md_ptr(struct unix_sock *, unix_sk);
3352 	uid_t uid __aligned(8);
3353 };
3354 
3355 static int unix_prog_seq_show(struct bpf_prog *prog, struct bpf_iter_meta *meta,
3356 			      struct unix_sock *unix_sk, uid_t uid)
3357 {
3358 	struct bpf_iter__unix ctx;
3359 
3360 	meta->seq_num--;  /* skip SEQ_START_TOKEN */
3361 	ctx.meta = meta;
3362 	ctx.unix_sk = unix_sk;
3363 	ctx.uid = uid;
3364 	return bpf_iter_run_prog(prog, &ctx);
3365 }
3366 
3367 static int bpf_iter_unix_seq_show(struct seq_file *seq, void *v)
3368 {
3369 	struct bpf_iter_meta meta;
3370 	struct bpf_prog *prog;
3371 	struct sock *sk = v;
3372 	uid_t uid;
3373 
3374 	if (v == SEQ_START_TOKEN)
3375 		return 0;
3376 
3377 	uid = from_kuid_munged(seq_user_ns(seq), sock_i_uid(sk));
3378 	meta.seq = seq;
3379 	prog = bpf_iter_get_info(&meta, false);
3380 	return unix_prog_seq_show(prog, &meta, v, uid);
3381 }
3382 
3383 static void bpf_iter_unix_seq_stop(struct seq_file *seq, void *v)
3384 {
3385 	struct bpf_iter_meta meta;
3386 	struct bpf_prog *prog;
3387 
3388 	if (!v) {
3389 		meta.seq = seq;
3390 		prog = bpf_iter_get_info(&meta, true);
3391 		if (prog)
3392 			(void)unix_prog_seq_show(prog, &meta, v, 0);
3393 	}
3394 
3395 	unix_seq_stop(seq, v);
3396 }
3397 
3398 static const struct seq_operations bpf_iter_unix_seq_ops = {
3399 	.start	= unix_seq_start,
3400 	.next	= unix_seq_next,
3401 	.stop	= bpf_iter_unix_seq_stop,
3402 	.show	= bpf_iter_unix_seq_show,
3403 };
3404 #endif
3405 #endif
3406 
3407 static const struct net_proto_family unix_family_ops = {
3408 	.family = PF_UNIX,
3409 	.create = unix_create,
3410 	.owner	= THIS_MODULE,
3411 };
3412 
3413 
3414 static int __net_init unix_net_init(struct net *net)
3415 {
3416 	int error = -ENOMEM;
3417 
3418 	net->unx.sysctl_max_dgram_qlen = 10;
3419 	if (unix_sysctl_register(net))
3420 		goto out;
3421 
3422 #ifdef CONFIG_PROC_FS
3423 	if (!proc_create_net("unix", 0, net->proc_net, &unix_seq_ops,
3424 			sizeof(struct seq_net_private))) {
3425 		unix_sysctl_unregister(net);
3426 		goto out;
3427 	}
3428 #endif
3429 	error = 0;
3430 out:
3431 	return error;
3432 }
3433 
3434 static void __net_exit unix_net_exit(struct net *net)
3435 {
3436 	unix_sysctl_unregister(net);
3437 	remove_proc_entry("unix", net->proc_net);
3438 }
3439 
3440 static struct pernet_operations unix_net_ops = {
3441 	.init = unix_net_init,
3442 	.exit = unix_net_exit,
3443 };
3444 
3445 #if IS_BUILTIN(CONFIG_UNIX) && defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
3446 DEFINE_BPF_ITER_FUNC(unix, struct bpf_iter_meta *meta,
3447 		     struct unix_sock *unix_sk, uid_t uid)
3448 
3449 static const struct bpf_iter_seq_info unix_seq_info = {
3450 	.seq_ops		= &bpf_iter_unix_seq_ops,
3451 	.init_seq_private	= bpf_iter_init_seq_net,
3452 	.fini_seq_private	= bpf_iter_fini_seq_net,
3453 	.seq_priv_size		= sizeof(struct seq_net_private),
3454 };
3455 
3456 static struct bpf_iter_reg unix_reg_info = {
3457 	.target			= "unix",
3458 	.ctx_arg_info_size	= 1,
3459 	.ctx_arg_info		= {
3460 		{ offsetof(struct bpf_iter__unix, unix_sk),
3461 		  PTR_TO_BTF_ID_OR_NULL },
3462 	},
3463 	.seq_info		= &unix_seq_info,
3464 };
3465 
3466 static void __init bpf_iter_register(void)
3467 {
3468 	unix_reg_info.ctx_arg_info[0].btf_id = btf_sock_ids[BTF_SOCK_TYPE_UNIX];
3469 	if (bpf_iter_reg_target(&unix_reg_info))
3470 		pr_warn("Warning: could not register bpf iterator unix\n");
3471 }
3472 #endif
3473 
3474 static int __init af_unix_init(void)
3475 {
3476 	int i, rc = -1;
3477 
3478 	BUILD_BUG_ON(sizeof(struct unix_skb_parms) > sizeof_field(struct sk_buff, cb));
3479 
3480 	for (i = 0; i < 2 * UNIX_HASH_SIZE; i++)
3481 		spin_lock_init(&unix_table_locks[i]);
3482 
3483 	rc = proto_register(&unix_dgram_proto, 1);
3484 	if (rc != 0) {
3485 		pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__);
3486 		goto out;
3487 	}
3488 
3489 	rc = proto_register(&unix_stream_proto, 1);
3490 	if (rc != 0) {
3491 		pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__);
3492 		goto out;
3493 	}
3494 
3495 	sock_register(&unix_family_ops);
3496 	register_pernet_subsys(&unix_net_ops);
3497 	unix_bpf_build_proto();
3498 
3499 #if IS_BUILTIN(CONFIG_UNIX) && defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
3500 	bpf_iter_register();
3501 #endif
3502 
3503 out:
3504 	return rc;
3505 }
3506 
3507 static void __exit af_unix_exit(void)
3508 {
3509 	sock_unregister(PF_UNIX);
3510 	proto_unregister(&unix_dgram_proto);
3511 	proto_unregister(&unix_stream_proto);
3512 	unregister_pernet_subsys(&unix_net_ops);
3513 }
3514 
3515 /* Earlier than device_initcall() so that other drivers invoking
3516    request_module() don't end up in a loop when modprobe tries
3517    to use a UNIX socket. But later than subsys_initcall() because
3518    we depend on stuff initialised there */
3519 fs_initcall(af_unix_init);
3520 module_exit(af_unix_exit);
3521 
3522 MODULE_LICENSE("GPL");
3523 MODULE_ALIAS_NETPROTO(PF_UNIX);
3524