xref: /openbmc/linux/net/unix/af_unix.c (revision 16f6ccde74a6f8538c62f127f17207c75f4dba7a)
12874c5fdSThomas Gleixner // SPDX-License-Identifier: GPL-2.0-or-later
21da177e4SLinus Torvalds /*
31da177e4SLinus Torvalds  * NET4:	Implementation of BSD Unix domain sockets.
41da177e4SLinus Torvalds  *
5113aa838SAlan Cox  * Authors:	Alan Cox, <alan@lxorguk.ukuu.org.uk>
61da177e4SLinus Torvalds  *
71da177e4SLinus Torvalds  * Fixes:
81da177e4SLinus Torvalds  *		Linus Torvalds	:	Assorted bug cures.
91da177e4SLinus Torvalds  *		Niibe Yutaka	:	async I/O support.
101da177e4SLinus Torvalds  *		Carsten Paeth	:	PF_UNIX check, address fixes.
111da177e4SLinus Torvalds  *		Alan Cox	:	Limit size of allocated blocks.
121da177e4SLinus Torvalds  *		Alan Cox	:	Fixed the stupid socketpair bug.
131da177e4SLinus Torvalds  *		Alan Cox	:	BSD compatibility fine tuning.
141da177e4SLinus Torvalds  *		Alan Cox	:	Fixed a bug in connect when interrupted.
151da177e4SLinus Torvalds  *		Alan Cox	:	Sorted out a proper draft version of
161da177e4SLinus Torvalds  *					file descriptor passing hacked up from
171da177e4SLinus Torvalds  *					Mike Shaver's work.
181da177e4SLinus Torvalds  *		Marty Leisner	:	Fixes to fd passing
191da177e4SLinus Torvalds  *		Nick Nevin	:	recvmsg bugfix.
201da177e4SLinus Torvalds  *		Alan Cox	:	Started proper garbage collector
211da177e4SLinus Torvalds  *		Heiko EiBfeldt	:	Missing verify_area check
221da177e4SLinus Torvalds  *		Alan Cox	:	Started POSIXisms
231da177e4SLinus Torvalds  *		Andreas Schwab	:	Replace inode by dentry for proper
241da177e4SLinus Torvalds  *					reference counting
251da177e4SLinus Torvalds  *		Kirk Petersen	:	Made this a module
261da177e4SLinus Torvalds  *	    Christoph Rohland	:	Elegant non-blocking accept/connect algorithm.
271da177e4SLinus Torvalds  *					Lots of bug fixes.
281da177e4SLinus Torvalds  *	     Alexey Kuznetosv	:	Repaired (I hope) bugs introduces
291da177e4SLinus Torvalds  *					by above two patches.
301da177e4SLinus Torvalds  *	     Andrea Arcangeli	:	If possible we block in connect(2)
311da177e4SLinus Torvalds  *					if the max backlog of the listen socket
321da177e4SLinus Torvalds  *					is been reached. This won't break
331da177e4SLinus Torvalds  *					old apps and it will avoid huge amount
341da177e4SLinus Torvalds  *					of socks hashed (this for unix_gc()
351da177e4SLinus Torvalds  *					performances reasons).
361da177e4SLinus Torvalds  *					Security fix that limits the max
371da177e4SLinus Torvalds  *					number of socks to 2*max_files and
381da177e4SLinus Torvalds  *					the number of skb queueable in the
391da177e4SLinus Torvalds  *					dgram receiver.
401da177e4SLinus Torvalds  *		Artur Skawina   :	Hash function optimizations
411da177e4SLinus Torvalds  *	     Alexey Kuznetsov   :	Full scale SMP. Lot of bugs are introduced 8)
421da177e4SLinus Torvalds  *	      Malcolm Beattie   :	Set peercred for socketpair
431da177e4SLinus Torvalds  *	     Michal Ostrowski   :       Module initialization cleanup.
441da177e4SLinus Torvalds  *	     Arnaldo C. Melo	:	Remove MOD_{INC,DEC}_USE_COUNT,
451da177e4SLinus Torvalds  *	     				the core infrastructure is doing that
461da177e4SLinus Torvalds  *	     				for all net proto families now (2.5.69+)
471da177e4SLinus Torvalds  *
481da177e4SLinus Torvalds  * Known differences from reference BSD that was tested:
491da177e4SLinus Torvalds  *
501da177e4SLinus Torvalds  *	[TO FIX]
511da177e4SLinus Torvalds  *	ECONNREFUSED is not returned from one end of a connected() socket to the
521da177e4SLinus Torvalds  *		other the moment one end closes.
531da177e4SLinus Torvalds  *	fstat() doesn't return st_dev=0, and give the blksize as high water mark
541da177e4SLinus Torvalds  *		and a fake inode identifier (nor the BSD first socket fstat twice bug).
551da177e4SLinus Torvalds  *	[NOT TO FIX]
561da177e4SLinus Torvalds  *	accept() returns a path name even if the connecting socket has closed
571da177e4SLinus Torvalds  *		in the meantime (BSD loses the path and gives up).
581da177e4SLinus Torvalds  *	accept() returns 0 length path for an unbound connector. BSD returns 16
591da177e4SLinus Torvalds  *		and a null first byte in the path (but not for gethost/peername - BSD bug ??)
601da177e4SLinus Torvalds  *	socketpair(...SOCK_RAW..) doesn't panic the kernel.
611da177e4SLinus Torvalds  *	BSD af_unix apparently has connect forgetting to block properly.
621da177e4SLinus Torvalds  *		(need to check this with the POSIX spec in detail)
631da177e4SLinus Torvalds  *
641da177e4SLinus Torvalds  * Differences from 2.0.0-11-... (ANK)
651da177e4SLinus Torvalds  *	Bug fixes and improvements.
661da177e4SLinus Torvalds  *		- client shutdown killed server socket.
671da177e4SLinus Torvalds  *		- removed all useless cli/sti pairs.
681da177e4SLinus Torvalds  *
691da177e4SLinus Torvalds  *	Semantic changes/extensions.
701da177e4SLinus Torvalds  *		- generic control message passing.
711da177e4SLinus Torvalds  *		- SCM_CREDENTIALS control message.
721da177e4SLinus Torvalds  *		- "Abstract" (not FS based) socket bindings.
731da177e4SLinus Torvalds  *		  Abstract names are sequences of bytes (not zero terminated)
741da177e4SLinus Torvalds  *		  started by 0, so that this name space does not intersect
751da177e4SLinus Torvalds  *		  with BSD names.
761da177e4SLinus Torvalds  */
771da177e4SLinus Torvalds 
785cc208beSwangweidong #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
795cc208beSwangweidong 
801da177e4SLinus Torvalds #include <linux/module.h>
811da177e4SLinus Torvalds #include <linux/kernel.h>
821da177e4SLinus Torvalds #include <linux/signal.h>
833f07c014SIngo Molnar #include <linux/sched/signal.h>
841da177e4SLinus Torvalds #include <linux/errno.h>
851da177e4SLinus Torvalds #include <linux/string.h>
861da177e4SLinus Torvalds #include <linux/stat.h>
871da177e4SLinus Torvalds #include <linux/dcache.h>
881da177e4SLinus Torvalds #include <linux/namei.h>
891da177e4SLinus Torvalds #include <linux/socket.h>
901da177e4SLinus Torvalds #include <linux/un.h>
911da177e4SLinus Torvalds #include <linux/fcntl.h>
92b6459415SJakub Kicinski #include <linux/filter.h>
931da177e4SLinus Torvalds #include <linux/termios.h>
941da177e4SLinus Torvalds #include <linux/sockios.h>
951da177e4SLinus Torvalds #include <linux/net.h>
961da177e4SLinus Torvalds #include <linux/in.h>
971da177e4SLinus Torvalds #include <linux/fs.h>
981da177e4SLinus Torvalds #include <linux/slab.h>
997c0f6ba6SLinus Torvalds #include <linux/uaccess.h>
1001da177e4SLinus Torvalds #include <linux/skbuff.h>
1011da177e4SLinus Torvalds #include <linux/netdevice.h>
102457c4cbcSEric W. Biederman #include <net/net_namespace.h>
1031da177e4SLinus Torvalds #include <net/sock.h>
104c752f073SArnaldo Carvalho de Melo #include <net/tcp_states.h>
1051da177e4SLinus Torvalds #include <net/af_unix.h>
1061da177e4SLinus Torvalds #include <linux/proc_fs.h>
1071da177e4SLinus Torvalds #include <linux/seq_file.h>
1081da177e4SLinus Torvalds #include <net/scm.h>
1091da177e4SLinus Torvalds #include <linux/init.h>
1101da177e4SLinus Torvalds #include <linux/poll.h>
1111da177e4SLinus Torvalds #include <linux/rtnetlink.h>
1121da177e4SLinus Torvalds #include <linux/mount.h>
1131da177e4SLinus Torvalds #include <net/checksum.h>
1141da177e4SLinus Torvalds #include <linux/security.h>
115509f15b9SJakub Kicinski #include <linux/splice.h>
1162b15af6fSColin Cross #include <linux/freezer.h>
117ba94f308SAndrey Vagin #include <linux/file.h>
1182c860a43SKuniyuki Iwashima #include <linux/btf_ids.h>
1191da177e4SLinus Torvalds 
120f4e65870SJens Axboe #include "scm.h"
121f4e65870SJens Axboe 
122518de9b3SEric Dumazet static atomic_long_t unix_nr_socks;
12351bae889SKuniyuki Iwashima static struct hlist_head bsd_socket_buckets[UNIX_HASH_SIZE / 2];
12451bae889SKuniyuki Iwashima static spinlock_t bsd_socket_locks[UNIX_HASH_SIZE / 2];
1251da177e4SLinus Torvalds 
126f452be49SKuniyuki Iwashima /* SMP locking strategy:
1272f7ca90aSKuniyuki Iwashima  *    hash table is protected with spinlock.
128f452be49SKuniyuki Iwashima  *    each socket state is protected by separate spinlock.
129f452be49SKuniyuki Iwashima  */
1301da177e4SLinus Torvalds 
unix_unbound_hash(struct sock * sk)131f452be49SKuniyuki Iwashima static unsigned int unix_unbound_hash(struct sock *sk)
1327123aaa3SEric Dumazet {
133f452be49SKuniyuki Iwashima 	unsigned long hash = (unsigned long)sk;
1347123aaa3SEric Dumazet 
1357123aaa3SEric Dumazet 	hash ^= hash >> 16;
1367123aaa3SEric Dumazet 	hash ^= hash >> 8;
137f452be49SKuniyuki Iwashima 	hash ^= sk->sk_type;
138f452be49SKuniyuki Iwashima 
139cf21b355SKuniyuki Iwashima 	return hash & UNIX_HASH_MOD;
140f452be49SKuniyuki Iwashima }
141f452be49SKuniyuki Iwashima 
unix_bsd_hash(struct inode * i)142f452be49SKuniyuki Iwashima static unsigned int unix_bsd_hash(struct inode *i)
143f452be49SKuniyuki Iwashima {
144f302d180SKuniyuki Iwashima 	return i->i_ino & UNIX_HASH_MOD;
145f452be49SKuniyuki Iwashima }
146f452be49SKuniyuki Iwashima 
unix_abstract_hash(struct sockaddr_un * sunaddr,int addr_len,int type)147f452be49SKuniyuki Iwashima static unsigned int unix_abstract_hash(struct sockaddr_un *sunaddr,
148f452be49SKuniyuki Iwashima 				       int addr_len, int type)
149f452be49SKuniyuki Iwashima {
150f452be49SKuniyuki Iwashima 	__wsum csum = csum_partial(sunaddr, addr_len, 0);
151f452be49SKuniyuki Iwashima 	unsigned int hash;
152f452be49SKuniyuki Iwashima 
153f452be49SKuniyuki Iwashima 	hash = (__force unsigned int)csum_fold(csum);
154f452be49SKuniyuki Iwashima 	hash ^= hash >> 8;
155f452be49SKuniyuki Iwashima 	hash ^= type;
156f452be49SKuniyuki Iwashima 
157cf21b355SKuniyuki Iwashima 	return UNIX_HASH_MOD + 1 + (hash & UNIX_HASH_MOD);
1587123aaa3SEric Dumazet }
1597123aaa3SEric Dumazet 
unix_table_double_lock(struct net * net,unsigned int hash1,unsigned int hash2)16079b05beaSKuniyuki Iwashima static void unix_table_double_lock(struct net *net,
16179b05beaSKuniyuki Iwashima 				   unsigned int hash1, unsigned int hash2)
162afd20b92SKuniyuki Iwashima {
163cf21b355SKuniyuki Iwashima 	if (hash1 == hash2) {
164cf21b355SKuniyuki Iwashima 		spin_lock(&net->unx.table.locks[hash1]);
165cf21b355SKuniyuki Iwashima 		return;
166cf21b355SKuniyuki Iwashima 	}
167cf21b355SKuniyuki Iwashima 
168afd20b92SKuniyuki Iwashima 	if (hash1 > hash2)
169afd20b92SKuniyuki Iwashima 		swap(hash1, hash2);
170afd20b92SKuniyuki Iwashima 
17179b05beaSKuniyuki Iwashima 	spin_lock(&net->unx.table.locks[hash1]);
17279b05beaSKuniyuki Iwashima 	spin_lock_nested(&net->unx.table.locks[hash2], SINGLE_DEPTH_NESTING);
173afd20b92SKuniyuki Iwashima }
174afd20b92SKuniyuki Iwashima 
unix_table_double_unlock(struct net * net,unsigned int hash1,unsigned int hash2)17579b05beaSKuniyuki Iwashima static void unix_table_double_unlock(struct net *net,
17679b05beaSKuniyuki Iwashima 				     unsigned int hash1, unsigned int hash2)
177afd20b92SKuniyuki Iwashima {
178cf21b355SKuniyuki Iwashima 	if (hash1 == hash2) {
179cf21b355SKuniyuki Iwashima 		spin_unlock(&net->unx.table.locks[hash1]);
180cf21b355SKuniyuki Iwashima 		return;
181cf21b355SKuniyuki Iwashima 	}
182cf21b355SKuniyuki Iwashima 
18379b05beaSKuniyuki Iwashima 	spin_unlock(&net->unx.table.locks[hash1]);
18479b05beaSKuniyuki Iwashima 	spin_unlock(&net->unx.table.locks[hash2]);
185afd20b92SKuniyuki Iwashima }
186afd20b92SKuniyuki Iwashima 
187877ce7c1SCatherine Zhang #ifdef CONFIG_SECURITY_NETWORK
unix_get_secdata(struct scm_cookie * scm,struct sk_buff * skb)188dc49c1f9SCatherine Zhang static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
189877ce7c1SCatherine Zhang {
19037a9a8dfSStephen Smalley 	UNIXCB(skb).secid = scm->secid;
191877ce7c1SCatherine Zhang }
192877ce7c1SCatherine Zhang 
unix_set_secdata(struct scm_cookie * scm,struct sk_buff * skb)193877ce7c1SCatherine Zhang static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
194877ce7c1SCatherine Zhang {
19537a9a8dfSStephen Smalley 	scm->secid = UNIXCB(skb).secid;
19637a9a8dfSStephen Smalley }
19737a9a8dfSStephen Smalley 
unix_secdata_eq(struct scm_cookie * scm,struct sk_buff * skb)19837a9a8dfSStephen Smalley static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
19937a9a8dfSStephen Smalley {
20037a9a8dfSStephen Smalley 	return (scm->secid == UNIXCB(skb).secid);
201877ce7c1SCatherine Zhang }
202877ce7c1SCatherine Zhang #else
unix_get_secdata(struct scm_cookie * scm,struct sk_buff * skb)203dc49c1f9SCatherine Zhang static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
204877ce7c1SCatherine Zhang { }
205877ce7c1SCatherine Zhang 
unix_set_secdata(struct scm_cookie * scm,struct sk_buff * skb)206877ce7c1SCatherine Zhang static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
207877ce7c1SCatherine Zhang { }
20837a9a8dfSStephen Smalley 
unix_secdata_eq(struct scm_cookie * scm,struct sk_buff * skb)20937a9a8dfSStephen Smalley static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
21037a9a8dfSStephen Smalley {
21137a9a8dfSStephen Smalley 	return true;
21237a9a8dfSStephen Smalley }
213877ce7c1SCatherine Zhang #endif /* CONFIG_SECURITY_NETWORK */
214877ce7c1SCatherine Zhang 
unix_our_peer(struct sock * sk,struct sock * osk)2151da177e4SLinus Torvalds static inline int unix_our_peer(struct sock *sk, struct sock *osk)
2161da177e4SLinus Torvalds {
2171da177e4SLinus Torvalds 	return unix_peer(osk) == sk;
2181da177e4SLinus Torvalds }
2191da177e4SLinus Torvalds 
unix_may_send(struct sock * sk,struct sock * osk)2201da177e4SLinus Torvalds static inline int unix_may_send(struct sock *sk, struct sock *osk)
2211da177e4SLinus Torvalds {
2226eba6a37SEric Dumazet 	return unix_peer(osk) == NULL || unix_our_peer(sk, osk);
2231da177e4SLinus Torvalds }
2241da177e4SLinus Torvalds 
unix_recvq_full_lockless(const struct sock * sk)22586b18aaaSQian Cai static inline int unix_recvq_full_lockless(const struct sock *sk)
22686b18aaaSQian Cai {
227f1683d07SKuniyuki Iwashima 	return skb_queue_len_lockless(&sk->sk_receive_queue) > sk->sk_max_ack_backlog;
22886b18aaaSQian Cai }
22986b18aaaSQian Cai 
unix_peer_get(struct sock * s)230fa7ff56fSPavel Emelyanov struct sock *unix_peer_get(struct sock *s)
2311da177e4SLinus Torvalds {
2321da177e4SLinus Torvalds 	struct sock *peer;
2331da177e4SLinus Torvalds 
2341c92b4e5SDavid S. Miller 	unix_state_lock(s);
2351da177e4SLinus Torvalds 	peer = unix_peer(s);
2361da177e4SLinus Torvalds 	if (peer)
2371da177e4SLinus Torvalds 		sock_hold(peer);
2381c92b4e5SDavid S. Miller 	unix_state_unlock(s);
2391da177e4SLinus Torvalds 	return peer;
2401da177e4SLinus Torvalds }
241fa7ff56fSPavel Emelyanov EXPORT_SYMBOL_GPL(unix_peer_get);
2421da177e4SLinus Torvalds 
unix_create_addr(struct sockaddr_un * sunaddr,int addr_len)24312f21c49SKuniyuki Iwashima static struct unix_address *unix_create_addr(struct sockaddr_un *sunaddr,
24412f21c49SKuniyuki Iwashima 					     int addr_len)
24512f21c49SKuniyuki Iwashima {
24612f21c49SKuniyuki Iwashima 	struct unix_address *addr;
24712f21c49SKuniyuki Iwashima 
24812f21c49SKuniyuki Iwashima 	addr = kmalloc(sizeof(*addr) + addr_len, GFP_KERNEL);
24912f21c49SKuniyuki Iwashima 	if (!addr)
25012f21c49SKuniyuki Iwashima 		return NULL;
25112f21c49SKuniyuki Iwashima 
25212f21c49SKuniyuki Iwashima 	refcount_set(&addr->refcnt, 1);
25312f21c49SKuniyuki Iwashima 	addr->len = addr_len;
25412f21c49SKuniyuki Iwashima 	memcpy(addr->name, sunaddr, addr_len);
25512f21c49SKuniyuki Iwashima 
25612f21c49SKuniyuki Iwashima 	return addr;
25712f21c49SKuniyuki Iwashima }
25812f21c49SKuniyuki Iwashima 
unix_release_addr(struct unix_address * addr)2591da177e4SLinus Torvalds static inline void unix_release_addr(struct unix_address *addr)
2601da177e4SLinus Torvalds {
2618c9814b9SReshetova, Elena 	if (refcount_dec_and_test(&addr->refcnt))
2621da177e4SLinus Torvalds 		kfree(addr);
2631da177e4SLinus Torvalds }
2641da177e4SLinus Torvalds 
2651da177e4SLinus Torvalds /*
2661da177e4SLinus Torvalds  *	Check unix socket name:
2671da177e4SLinus Torvalds  *		- should be not zero length.
2681da177e4SLinus Torvalds  *	        - if started by not zero, should be NULL terminated (FS object)
2691da177e4SLinus Torvalds  *		- if started by zero, it is abstract name.
2701da177e4SLinus Torvalds  */
2711da177e4SLinus Torvalds 
unix_validate_addr(struct sockaddr_un * sunaddr,int addr_len)272b8a58aa6SKuniyuki Iwashima static int unix_validate_addr(struct sockaddr_un *sunaddr, int addr_len)
273b8a58aa6SKuniyuki Iwashima {
274b8a58aa6SKuniyuki Iwashima 	if (addr_len <= offsetof(struct sockaddr_un, sun_path) ||
275b8a58aa6SKuniyuki Iwashima 	    addr_len > sizeof(*sunaddr))
276b8a58aa6SKuniyuki Iwashima 		return -EINVAL;
277b8a58aa6SKuniyuki Iwashima 
278b8a58aa6SKuniyuki Iwashima 	if (sunaddr->sun_family != AF_UNIX)
279b8a58aa6SKuniyuki Iwashima 		return -EINVAL;
280b8a58aa6SKuniyuki Iwashima 
281b8a58aa6SKuniyuki Iwashima 	return 0;
282b8a58aa6SKuniyuki Iwashima }
283b8a58aa6SKuniyuki Iwashima 
unix_mkname_bsd(struct sockaddr_un * sunaddr,int addr_len)284ecb4534bSKuniyuki Iwashima static int unix_mkname_bsd(struct sockaddr_un *sunaddr, int addr_len)
285d2d8c9fdSKuniyuki Iwashima {
286ecb4534bSKuniyuki Iwashima 	struct sockaddr_storage *addr = (struct sockaddr_storage *)sunaddr;
287ecb4534bSKuniyuki Iwashima 	short offset = offsetof(struct sockaddr_storage, __data);
288ecb4534bSKuniyuki Iwashima 
289ecb4534bSKuniyuki Iwashima 	BUILD_BUG_ON(offset != offsetof(struct sockaddr_un, sun_path));
290ecb4534bSKuniyuki Iwashima 
291d2d8c9fdSKuniyuki Iwashima 	/* This may look like an off by one error but it is a bit more
292d2d8c9fdSKuniyuki Iwashima 	 * subtle.  108 is the longest valid AF_UNIX path for a binding.
293d2d8c9fdSKuniyuki Iwashima 	 * sun_path[108] doesn't as such exist.  However in kernel space
294d2d8c9fdSKuniyuki Iwashima 	 * we are guaranteed that it is a valid memory location in our
295d2d8c9fdSKuniyuki Iwashima 	 * kernel address buffer because syscall functions always pass
296d2d8c9fdSKuniyuki Iwashima 	 * a pointer of struct sockaddr_storage which has a bigger buffer
297ecb4534bSKuniyuki Iwashima 	 * than 108.  Also, we must terminate sun_path for strlen() in
298ecb4534bSKuniyuki Iwashima 	 * getname_kernel().
299d2d8c9fdSKuniyuki Iwashima 	 */
300ecb4534bSKuniyuki Iwashima 	addr->__data[addr_len - offset] = 0;
301ecb4534bSKuniyuki Iwashima 
302ecb4534bSKuniyuki Iwashima 	/* Don't pass sunaddr->sun_path to strlen().  Otherwise, 108 will
303ecb4534bSKuniyuki Iwashima 	 * cause panic if CONFIG_FORTIFY_SOURCE=y.  Let __fortify_strlen()
304ecb4534bSKuniyuki Iwashima 	 * know the actual buffer.
305ecb4534bSKuniyuki Iwashima 	 */
306ecb4534bSKuniyuki Iwashima 	return strlen(addr->__data) + offset + 1;
307d2d8c9fdSKuniyuki Iwashima }
308d2d8c9fdSKuniyuki Iwashima 
__unix_remove_socket(struct sock * sk)3091da177e4SLinus Torvalds static void __unix_remove_socket(struct sock *sk)
3101da177e4SLinus Torvalds {
3111da177e4SLinus Torvalds 	sk_del_node_init(sk);
3121da177e4SLinus Torvalds }
3131da177e4SLinus Torvalds 
__unix_insert_socket(struct net * net,struct sock * sk)314cf2f225eSKuniyuki Iwashima static void __unix_insert_socket(struct net *net, struct sock *sk)
3151da177e4SLinus Torvalds {
316dd29c67dSEric Dumazet 	DEBUG_NET_WARN_ON_ONCE(!sk_unhashed(sk));
317cf2f225eSKuniyuki Iwashima 	sk_add_node(sk, &net->unx.table.buckets[sk->sk_hash]);
3181da177e4SLinus Torvalds }
3191da177e4SLinus Torvalds 
__unix_set_addr_hash(struct net * net,struct sock * sk,struct unix_address * addr,unsigned int hash)320cf2f225eSKuniyuki Iwashima static void __unix_set_addr_hash(struct net *net, struct sock *sk,
321cf2f225eSKuniyuki Iwashima 				 struct unix_address *addr, unsigned int hash)
322185ab886SAl Viro {
323185ab886SAl Viro 	__unix_remove_socket(sk);
324185ab886SAl Viro 	smp_store_release(&unix_sk(sk)->addr, addr);
325e6b4b873SKuniyuki Iwashima 
326e6b4b873SKuniyuki Iwashima 	sk->sk_hash = hash;
327cf2f225eSKuniyuki Iwashima 	__unix_insert_socket(net, sk);
328185ab886SAl Viro }
329185ab886SAl Viro 
unix_remove_socket(struct net * net,struct sock * sk)33079b05beaSKuniyuki Iwashima static void unix_remove_socket(struct net *net, struct sock *sk)
3311da177e4SLinus Torvalds {
33279b05beaSKuniyuki Iwashima 	spin_lock(&net->unx.table.locks[sk->sk_hash]);
3331da177e4SLinus Torvalds 	__unix_remove_socket(sk);
33479b05beaSKuniyuki Iwashima 	spin_unlock(&net->unx.table.locks[sk->sk_hash]);
3351da177e4SLinus Torvalds }
3361da177e4SLinus Torvalds 
unix_insert_unbound_socket(struct net * net,struct sock * sk)33779b05beaSKuniyuki Iwashima static void unix_insert_unbound_socket(struct net *net, struct sock *sk)
3381da177e4SLinus Torvalds {
33979b05beaSKuniyuki Iwashima 	spin_lock(&net->unx.table.locks[sk->sk_hash]);
340cf2f225eSKuniyuki Iwashima 	__unix_insert_socket(net, sk);
34179b05beaSKuniyuki Iwashima 	spin_unlock(&net->unx.table.locks[sk->sk_hash]);
3421da177e4SLinus Torvalds }
3431da177e4SLinus Torvalds 
unix_insert_bsd_socket(struct sock * sk)34451bae889SKuniyuki Iwashima static void unix_insert_bsd_socket(struct sock *sk)
34551bae889SKuniyuki Iwashima {
34651bae889SKuniyuki Iwashima 	spin_lock(&bsd_socket_locks[sk->sk_hash]);
34751bae889SKuniyuki Iwashima 	sk_add_bind_node(sk, &bsd_socket_buckets[sk->sk_hash]);
34851bae889SKuniyuki Iwashima 	spin_unlock(&bsd_socket_locks[sk->sk_hash]);
34951bae889SKuniyuki Iwashima }
35051bae889SKuniyuki Iwashima 
unix_remove_bsd_socket(struct sock * sk)35151bae889SKuniyuki Iwashima static void unix_remove_bsd_socket(struct sock *sk)
35251bae889SKuniyuki Iwashima {
35351bae889SKuniyuki Iwashima 	if (!hlist_unhashed(&sk->sk_bind_node)) {
35451bae889SKuniyuki Iwashima 		spin_lock(&bsd_socket_locks[sk->sk_hash]);
35551bae889SKuniyuki Iwashima 		__sk_del_bind_node(sk);
35651bae889SKuniyuki Iwashima 		spin_unlock(&bsd_socket_locks[sk->sk_hash]);
35751bae889SKuniyuki Iwashima 
35851bae889SKuniyuki Iwashima 		sk_node_init(&sk->sk_bind_node);
35951bae889SKuniyuki Iwashima 	}
36051bae889SKuniyuki Iwashima }
36151bae889SKuniyuki Iwashima 
__unix_find_socket_byname(struct net * net,struct sockaddr_un * sunname,int len,unsigned int hash)362097e66c5SDenis V. Lunev static struct sock *__unix_find_socket_byname(struct net *net,
363097e66c5SDenis V. Lunev 					      struct sockaddr_un *sunname,
364be752283SAl Viro 					      int len, unsigned int hash)
3651da177e4SLinus Torvalds {
3661da177e4SLinus Torvalds 	struct sock *s;
3671da177e4SLinus Torvalds 
368cf2f225eSKuniyuki Iwashima 	sk_for_each(s, &net->unx.table.buckets[hash]) {
3691da177e4SLinus Torvalds 		struct unix_sock *u = unix_sk(s);
3701da177e4SLinus Torvalds 
3711da177e4SLinus Torvalds 		if (u->addr->len == len &&
3721da177e4SLinus Torvalds 		    !memcmp(u->addr->name, sunname, len))
3731da177e4SLinus Torvalds 			return s;
3741da177e4SLinus Torvalds 	}
375262ce0afSVito Caputo 	return NULL;
376262ce0afSVito Caputo }
3771da177e4SLinus Torvalds 
unix_find_socket_byname(struct net * net,struct sockaddr_un * sunname,int len,unsigned int hash)378097e66c5SDenis V. Lunev static inline struct sock *unix_find_socket_byname(struct net *net,
379097e66c5SDenis V. Lunev 						   struct sockaddr_un *sunname,
380be752283SAl Viro 						   int len, unsigned int hash)
3811da177e4SLinus Torvalds {
3821da177e4SLinus Torvalds 	struct sock *s;
3831da177e4SLinus Torvalds 
38479b05beaSKuniyuki Iwashima 	spin_lock(&net->unx.table.locks[hash]);
385be752283SAl Viro 	s = __unix_find_socket_byname(net, sunname, len, hash);
3861da177e4SLinus Torvalds 	if (s)
3871da177e4SLinus Torvalds 		sock_hold(s);
38879b05beaSKuniyuki Iwashima 	spin_unlock(&net->unx.table.locks[hash]);
3891da177e4SLinus Torvalds 	return s;
3901da177e4SLinus Torvalds }
3911da177e4SLinus Torvalds 
unix_find_socket_byinode(struct inode * i)39251bae889SKuniyuki Iwashima static struct sock *unix_find_socket_byinode(struct inode *i)
3931da177e4SLinus Torvalds {
394f452be49SKuniyuki Iwashima 	unsigned int hash = unix_bsd_hash(i);
3951da177e4SLinus Torvalds 	struct sock *s;
3961da177e4SLinus Torvalds 
39751bae889SKuniyuki Iwashima 	spin_lock(&bsd_socket_locks[hash]);
39851bae889SKuniyuki Iwashima 	sk_for_each_bound(s, &bsd_socket_buckets[hash]) {
39940ffe67dSAl Viro 		struct dentry *dentry = unix_sk(s)->path.dentry;
4001da177e4SLinus Torvalds 
401beef5121SMiklos Szeredi 		if (dentry && d_backing_inode(dentry) == i) {
4021da177e4SLinus Torvalds 			sock_hold(s);
40351bae889SKuniyuki Iwashima 			spin_unlock(&bsd_socket_locks[hash]);
4041da177e4SLinus Torvalds 			return s;
4051da177e4SLinus Torvalds 		}
406afd20b92SKuniyuki Iwashima 	}
40751bae889SKuniyuki Iwashima 	spin_unlock(&bsd_socket_locks[hash]);
408afd20b92SKuniyuki Iwashima 	return NULL;
409afd20b92SKuniyuki Iwashima }
4101da177e4SLinus Torvalds 
4117d267278SRainer Weikusat /* Support code for asymmetrically connected dgram sockets
4127d267278SRainer Weikusat  *
4137d267278SRainer Weikusat  * If a datagram socket is connected to a socket not itself connected
4147d267278SRainer Weikusat  * to the first socket (eg, /dev/log), clients may only enqueue more
4157d267278SRainer Weikusat  * messages if the present receive queue of the server socket is not
4167d267278SRainer Weikusat  * "too large". This means there's a second writeability condition
4177d267278SRainer Weikusat  * poll and sendmsg need to test. The dgram recv code will do a wake
4187d267278SRainer Weikusat  * up on the peer_wait wait queue of a socket upon reception of a
4197d267278SRainer Weikusat  * datagram which needs to be propagated to sleeping would-be writers
4207d267278SRainer Weikusat  * since these might not have sent anything so far. This can't be
4217d267278SRainer Weikusat  * accomplished via poll_wait because the lifetime of the server
4227d267278SRainer Weikusat  * socket might be less than that of its clients if these break their
4237d267278SRainer Weikusat  * association with it or if the server socket is closed while clients
4247d267278SRainer Weikusat  * are still connected to it and there's no way to inform "a polling
4257d267278SRainer Weikusat  * implementation" that it should let go of a certain wait queue
4267d267278SRainer Weikusat  *
427ac6424b9SIngo Molnar  * In order to propagate a wake up, a wait_queue_entry_t of the client
4287d267278SRainer Weikusat  * socket is enqueued on the peer_wait queue of the server socket
4297d267278SRainer Weikusat  * whose wake function does a wake_up on the ordinary client socket
4307d267278SRainer Weikusat  * wait queue. This connection is established whenever a write (or
4317d267278SRainer Weikusat  * poll for write) hit the flow control condition and broken when the
4327d267278SRainer Weikusat  * association to the server socket is dissolved or after a wake up
4337d267278SRainer Weikusat  * was relayed.
4347d267278SRainer Weikusat  */
4357d267278SRainer Weikusat 
unix_dgram_peer_wake_relay(wait_queue_entry_t * q,unsigned mode,int flags,void * key)436ac6424b9SIngo Molnar static int unix_dgram_peer_wake_relay(wait_queue_entry_t *q, unsigned mode, int flags,
4377d267278SRainer Weikusat 				      void *key)
4387d267278SRainer Weikusat {
4397d267278SRainer Weikusat 	struct unix_sock *u;
4407d267278SRainer Weikusat 	wait_queue_head_t *u_sleep;
4417d267278SRainer Weikusat 
4427d267278SRainer Weikusat 	u = container_of(q, struct unix_sock, peer_wake);
4437d267278SRainer Weikusat 
4447d267278SRainer Weikusat 	__remove_wait_queue(&unix_sk(u->peer_wake.private)->peer_wait,
4457d267278SRainer Weikusat 			    q);
4467d267278SRainer Weikusat 	u->peer_wake.private = NULL;
4477d267278SRainer Weikusat 
4487d267278SRainer Weikusat 	/* relaying can only happen while the wq still exists */
4497d267278SRainer Weikusat 	u_sleep = sk_sleep(&u->sk);
4507d267278SRainer Weikusat 	if (u_sleep)
4513ad6f93eSAl Viro 		wake_up_interruptible_poll(u_sleep, key_to_poll(key));
4527d267278SRainer Weikusat 
4537d267278SRainer Weikusat 	return 0;
4547d267278SRainer Weikusat }
4557d267278SRainer Weikusat 
unix_dgram_peer_wake_connect(struct sock * sk,struct sock * other)4567d267278SRainer Weikusat static int unix_dgram_peer_wake_connect(struct sock *sk, struct sock *other)
4577d267278SRainer Weikusat {
4587d267278SRainer Weikusat 	struct unix_sock *u, *u_other;
4597d267278SRainer Weikusat 	int rc;
4607d267278SRainer Weikusat 
4617d267278SRainer Weikusat 	u = unix_sk(sk);
4627d267278SRainer Weikusat 	u_other = unix_sk(other);
4637d267278SRainer Weikusat 	rc = 0;
4647d267278SRainer Weikusat 	spin_lock(&u_other->peer_wait.lock);
4657d267278SRainer Weikusat 
4667d267278SRainer Weikusat 	if (!u->peer_wake.private) {
4677d267278SRainer Weikusat 		u->peer_wake.private = other;
4687d267278SRainer Weikusat 		__add_wait_queue(&u_other->peer_wait, &u->peer_wake);
4697d267278SRainer Weikusat 
4707d267278SRainer Weikusat 		rc = 1;
4717d267278SRainer Weikusat 	}
4727d267278SRainer Weikusat 
4737d267278SRainer Weikusat 	spin_unlock(&u_other->peer_wait.lock);
4747d267278SRainer Weikusat 	return rc;
4757d267278SRainer Weikusat }
4767d267278SRainer Weikusat 
unix_dgram_peer_wake_disconnect(struct sock * sk,struct sock * other)4777d267278SRainer Weikusat static void unix_dgram_peer_wake_disconnect(struct sock *sk,
4787d267278SRainer Weikusat 					    struct sock *other)
4797d267278SRainer Weikusat {
4807d267278SRainer Weikusat 	struct unix_sock *u, *u_other;
4817d267278SRainer Weikusat 
4827d267278SRainer Weikusat 	u = unix_sk(sk);
4837d267278SRainer Weikusat 	u_other = unix_sk(other);
4847d267278SRainer Weikusat 	spin_lock(&u_other->peer_wait.lock);
4857d267278SRainer Weikusat 
4867d267278SRainer Weikusat 	if (u->peer_wake.private == other) {
4877d267278SRainer Weikusat 		__remove_wait_queue(&u_other->peer_wait, &u->peer_wake);
4887d267278SRainer Weikusat 		u->peer_wake.private = NULL;
4897d267278SRainer Weikusat 	}
4907d267278SRainer Weikusat 
4917d267278SRainer Weikusat 	spin_unlock(&u_other->peer_wait.lock);
4927d267278SRainer Weikusat }
4937d267278SRainer Weikusat 
unix_dgram_peer_wake_disconnect_wakeup(struct sock * sk,struct sock * other)4947d267278SRainer Weikusat static void unix_dgram_peer_wake_disconnect_wakeup(struct sock *sk,
4957d267278SRainer Weikusat 						   struct sock *other)
4967d267278SRainer Weikusat {
4977d267278SRainer Weikusat 	unix_dgram_peer_wake_disconnect(sk, other);
4987d267278SRainer Weikusat 	wake_up_interruptible_poll(sk_sleep(sk),
499a9a08845SLinus Torvalds 				   EPOLLOUT |
500a9a08845SLinus Torvalds 				   EPOLLWRNORM |
501a9a08845SLinus Torvalds 				   EPOLLWRBAND);
5027d267278SRainer Weikusat }
5037d267278SRainer Weikusat 
5047d267278SRainer Weikusat /* preconditions:
5057d267278SRainer Weikusat  *	- unix_peer(sk) == other
5067d267278SRainer Weikusat  *	- association is stable
5077d267278SRainer Weikusat  */
unix_dgram_peer_wake_me(struct sock * sk,struct sock * other)5087d267278SRainer Weikusat static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other)
5097d267278SRainer Weikusat {
5107d267278SRainer Weikusat 	int connected;
5117d267278SRainer Weikusat 
5127d267278SRainer Weikusat 	connected = unix_dgram_peer_wake_connect(sk, other);
5137d267278SRainer Weikusat 
51451f7e951SJason Baron 	/* If other is SOCK_DEAD, we want to make sure we signal
51551f7e951SJason Baron 	 * POLLOUT, such that a subsequent write() can get a
51651f7e951SJason Baron 	 * -ECONNREFUSED. Otherwise, if we haven't queued any skbs
51751f7e951SJason Baron 	 * to other and its full, we will hang waiting for POLLOUT.
51851f7e951SJason Baron 	 */
519662a8094SKuniyuki Iwashima 	if (unix_recvq_full_lockless(other) && !sock_flag(other, SOCK_DEAD))
5207d267278SRainer Weikusat 		return 1;
5217d267278SRainer Weikusat 
5227d267278SRainer Weikusat 	if (connected)
5237d267278SRainer Weikusat 		unix_dgram_peer_wake_disconnect(sk, other);
5247d267278SRainer Weikusat 
5257d267278SRainer Weikusat 	return 0;
5267d267278SRainer Weikusat }
5277d267278SRainer Weikusat 
unix_writable(const struct sock * sk,unsigned char state)528484e036eSKuniyuki Iwashima static int unix_writable(const struct sock *sk, unsigned char state)
5291da177e4SLinus Torvalds {
530484e036eSKuniyuki Iwashima 	return state != TCP_LISTEN &&
531996ec22fSKuniyuki Iwashima 		(refcount_read(&sk->sk_wmem_alloc) << 2) <= READ_ONCE(sk->sk_sndbuf);
5321da177e4SLinus Torvalds }
5331da177e4SLinus Torvalds 
unix_write_space(struct sock * sk)5341da177e4SLinus Torvalds static void unix_write_space(struct sock *sk)
5351da177e4SLinus Torvalds {
53643815482SEric Dumazet 	struct socket_wq *wq;
53743815482SEric Dumazet 
53843815482SEric Dumazet 	rcu_read_lock();
539484e036eSKuniyuki Iwashima 	if (unix_writable(sk, READ_ONCE(sk->sk_state))) {
54043815482SEric Dumazet 		wq = rcu_dereference(sk->sk_wq);
5411ce0bf50SHerbert Xu 		if (skwq_has_sleeper(wq))
54267426b75SEric Dumazet 			wake_up_interruptible_sync_poll(&wq->wait,
543a9a08845SLinus Torvalds 				EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND);
5448d8ad9d7SPavel Emelyanov 		sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
5451da177e4SLinus Torvalds 	}
54643815482SEric Dumazet 	rcu_read_unlock();
5471da177e4SLinus Torvalds }
5481da177e4SLinus Torvalds 
5491da177e4SLinus Torvalds /* When dgram socket disconnects (or changes its peer), we clear its receive
5501da177e4SLinus Torvalds  * queue of packets arrived from previous peer. First, it allows to do
5511da177e4SLinus Torvalds  * flow control based only on wmem_alloc; second, sk connected to peer
5521da177e4SLinus Torvalds  * may receive messages only from that peer. */
unix_dgram_disconnected(struct sock * sk,struct sock * other)5531da177e4SLinus Torvalds static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
5541da177e4SLinus Torvalds {
555b03efcfbSDavid S. Miller 	if (!skb_queue_empty(&sk->sk_receive_queue)) {
5561da177e4SLinus Torvalds 		skb_queue_purge(&sk->sk_receive_queue);
5571da177e4SLinus Torvalds 		wake_up_interruptible_all(&unix_sk(sk)->peer_wait);
5581da177e4SLinus Torvalds 
5591da177e4SLinus Torvalds 		/* If one link of bidirectional dgram pipe is disconnected,
5601da177e4SLinus Torvalds 		 * we signal error. Messages are lost. Do not make this,
5611da177e4SLinus Torvalds 		 * when peer was not connected to us.
5621da177e4SLinus Torvalds 		 */
5631da177e4SLinus Torvalds 		if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
564cc04410aSEric Dumazet 			WRITE_ONCE(other->sk_err, ECONNRESET);
565e3ae2365SAlexander Aring 			sk_error_report(other);
5661da177e4SLinus Torvalds 		}
5671da177e4SLinus Torvalds 	}
5681da177e4SLinus Torvalds }
5691da177e4SLinus Torvalds 
unix_sock_destructor(struct sock * sk)5701da177e4SLinus Torvalds static void unix_sock_destructor(struct sock *sk)
5711da177e4SLinus Torvalds {
5721da177e4SLinus Torvalds 	struct unix_sock *u = unix_sk(sk);
5731da177e4SLinus Torvalds 
5741da177e4SLinus Torvalds 	skb_queue_purge(&sk->sk_receive_queue);
5751da177e4SLinus Torvalds 
576dd29c67dSEric Dumazet 	DEBUG_NET_WARN_ON_ONCE(refcount_read(&sk->sk_wmem_alloc));
577dd29c67dSEric Dumazet 	DEBUG_NET_WARN_ON_ONCE(!sk_unhashed(sk));
578dd29c67dSEric Dumazet 	DEBUG_NET_WARN_ON_ONCE(sk->sk_socket);
5791da177e4SLinus Torvalds 	if (!sock_flag(sk, SOCK_DEAD)) {
5805cc208beSwangweidong 		pr_info("Attempt to release alive unix socket: %p\n", sk);
5811da177e4SLinus Torvalds 		return;
5821da177e4SLinus Torvalds 	}
5831da177e4SLinus Torvalds 
5841da177e4SLinus Torvalds 	if (u->addr)
5851da177e4SLinus Torvalds 		unix_release_addr(u->addr);
5861da177e4SLinus Torvalds 
587518de9b3SEric Dumazet 	atomic_long_dec(&unix_nr_socks);
588a8076d8dSEric Dumazet 	sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
5891da177e4SLinus Torvalds #ifdef UNIX_REFCNT_DEBUG
5905cc208beSwangweidong 	pr_debug("UNIX %p is destroyed, %ld are still alive.\n", sk,
591518de9b3SEric Dumazet 		atomic_long_read(&unix_nr_socks));
5921da177e4SLinus Torvalds #endif
5931da177e4SLinus Torvalds }
5941da177e4SLinus Torvalds 
unix_release_sock(struct sock * sk,int embrion)595ded34e0fSPaul Moore static void unix_release_sock(struct sock *sk, int embrion)
5961da177e4SLinus Torvalds {
5971da177e4SLinus Torvalds 	struct unix_sock *u = unix_sk(sk);
5981da177e4SLinus Torvalds 	struct sock *skpair;
5991da177e4SLinus Torvalds 	struct sk_buff *skb;
60079b05beaSKuniyuki Iwashima 	struct path path;
6011da177e4SLinus Torvalds 	int state;
6021da177e4SLinus Torvalds 
60379b05beaSKuniyuki Iwashima 	unix_remove_socket(sock_net(sk), sk);
60451bae889SKuniyuki Iwashima 	unix_remove_bsd_socket(sk);
6051da177e4SLinus Torvalds 
6061da177e4SLinus Torvalds 	/* Clear state */
6071c92b4e5SDavid S. Miller 	unix_state_lock(sk);
6081da177e4SLinus Torvalds 	sock_orphan(sk);
609e1d09c2cSKuniyuki Iwashima 	WRITE_ONCE(sk->sk_shutdown, SHUTDOWN_MASK);
61040ffe67dSAl Viro 	path	     = u->path;
61140ffe67dSAl Viro 	u->path.dentry = NULL;
61240ffe67dSAl Viro 	u->path.mnt = NULL;
6131da177e4SLinus Torvalds 	state = sk->sk_state;
61445733e98SKuniyuki Iwashima 	WRITE_ONCE(sk->sk_state, TCP_CLOSE);
615a494bd64SEric Dumazet 
616a494bd64SEric Dumazet 	skpair = unix_peer(sk);
617a494bd64SEric Dumazet 	unix_peer(sk) = NULL;
618a494bd64SEric Dumazet 
6191c92b4e5SDavid S. Miller 	unix_state_unlock(sk);
6201da177e4SLinus Torvalds 
6217a62ed61SKuniyuki Iwashima #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
6227a62ed61SKuniyuki Iwashima 	if (u->oob_skb) {
6237a62ed61SKuniyuki Iwashima 		kfree_skb(u->oob_skb);
6247a62ed61SKuniyuki Iwashima 		u->oob_skb = NULL;
6257a62ed61SKuniyuki Iwashima 	}
6267a62ed61SKuniyuki Iwashima #endif
6277a62ed61SKuniyuki Iwashima 
6281da177e4SLinus Torvalds 	wake_up_interruptible_all(&u->peer_wait);
6291da177e4SLinus Torvalds 
6301da177e4SLinus Torvalds 	if (skpair != NULL) {
6311da177e4SLinus Torvalds 		if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
6321c92b4e5SDavid S. Miller 			unix_state_lock(skpair);
6331da177e4SLinus Torvalds 			/* No more writes */
634e1d09c2cSKuniyuki Iwashima 			WRITE_ONCE(skpair->sk_shutdown, SHUTDOWN_MASK);
635471ec7b7SKuniyuki Iwashima 			if (!skb_queue_empty_lockless(&sk->sk_receive_queue) || embrion)
636cc04410aSEric Dumazet 				WRITE_ONCE(skpair->sk_err, ECONNRESET);
6371c92b4e5SDavid S. Miller 			unix_state_unlock(skpair);
6381da177e4SLinus Torvalds 			skpair->sk_state_change(skpair);
6398d8ad9d7SPavel Emelyanov 			sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
6401da177e4SLinus Torvalds 		}
6417d267278SRainer Weikusat 
6427d267278SRainer Weikusat 		unix_dgram_peer_wake_disconnect(sk, skpair);
6431da177e4SLinus Torvalds 		sock_put(skpair); /* It may now die */
6441da177e4SLinus Torvalds 	}
6451da177e4SLinus Torvalds 
6461da177e4SLinus Torvalds 	/* Try to flush out this socket. Throw out buffers at least */
6471da177e4SLinus Torvalds 
6481da177e4SLinus Torvalds 	while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
6491da177e4SLinus Torvalds 		if (state == TCP_LISTEN)
6501da177e4SLinus Torvalds 			unix_release_sock(skb->sk, 1);
6511da177e4SLinus Torvalds 		/* passed fds are erased in the kfree_skb hook	      */
65273ed5d25SHannes Frederic Sowa 		UNIXCB(skb).consumed = skb->len;
6531da177e4SLinus Torvalds 		kfree_skb(skb);
6541da177e4SLinus Torvalds 	}
6551da177e4SLinus Torvalds 
65640ffe67dSAl Viro 	if (path.dentry)
65740ffe67dSAl Viro 		path_put(&path);
6581da177e4SLinus Torvalds 
6591da177e4SLinus Torvalds 	sock_put(sk);
6601da177e4SLinus Torvalds 
6611da177e4SLinus Torvalds 	/* ---- Socket is dead now and most probably destroyed ---- */
6621da177e4SLinus Torvalds 
6631da177e4SLinus Torvalds 	/*
664e04dae84SAlan Cox 	 * Fixme: BSD difference: In BSD all sockets connected to us get
6651da177e4SLinus Torvalds 	 *	  ECONNRESET and we die on the spot. In Linux we behave
6661da177e4SLinus Torvalds 	 *	  like files and pipes do and wait for the last
6671da177e4SLinus Torvalds 	 *	  dereference.
6681da177e4SLinus Torvalds 	 *
6691da177e4SLinus Torvalds 	 * Can't we simply set sock->err?
6701da177e4SLinus Torvalds 	 *
6711da177e4SLinus Torvalds 	 *	  What the above comment does talk about? --ANK(980817)
6721da177e4SLinus Torvalds 	 */
6731da177e4SLinus Torvalds 
674ade32bd8SKuniyuki Iwashima 	if (READ_ONCE(unix_tot_inflight))
6751da177e4SLinus Torvalds 		unix_gc();		/* Garbage collect fds */
6761da177e4SLinus Torvalds }
6771da177e4SLinus Torvalds 
init_peercred(struct sock * sk)678109f6e39SEric W. Biederman static void init_peercred(struct sock *sk)
679109f6e39SEric W. Biederman {
68035306eb2SEric Dumazet 	const struct cred *old_cred;
68135306eb2SEric Dumazet 	struct pid *old_pid;
68235306eb2SEric Dumazet 
68335306eb2SEric Dumazet 	spin_lock(&sk->sk_peer_lock);
68435306eb2SEric Dumazet 	old_pid = sk->sk_peer_pid;
68535306eb2SEric Dumazet 	old_cred = sk->sk_peer_cred;
686109f6e39SEric W. Biederman 	sk->sk_peer_pid  = get_pid(task_tgid(current));
687109f6e39SEric W. Biederman 	sk->sk_peer_cred = get_current_cred();
68835306eb2SEric Dumazet 	spin_unlock(&sk->sk_peer_lock);
68935306eb2SEric Dumazet 
69035306eb2SEric Dumazet 	put_pid(old_pid);
69135306eb2SEric Dumazet 	put_cred(old_cred);
692109f6e39SEric W. Biederman }
693109f6e39SEric W. Biederman 
copy_peercred(struct sock * sk,struct sock * peersk)694109f6e39SEric W. Biederman static void copy_peercred(struct sock *sk, struct sock *peersk)
695109f6e39SEric W. Biederman {
69635306eb2SEric Dumazet 	if (sk < peersk) {
69735306eb2SEric Dumazet 		spin_lock(&sk->sk_peer_lock);
69835306eb2SEric Dumazet 		spin_lock_nested(&peersk->sk_peer_lock, SINGLE_DEPTH_NESTING);
69935306eb2SEric Dumazet 	} else {
70035306eb2SEric Dumazet 		spin_lock(&peersk->sk_peer_lock);
70135306eb2SEric Dumazet 		spin_lock_nested(&sk->sk_peer_lock, SINGLE_DEPTH_NESTING);
70235306eb2SEric Dumazet 	}
7039c2450cfSKuniyuki Iwashima 
704109f6e39SEric W. Biederman 	sk->sk_peer_pid  = get_pid(peersk->sk_peer_pid);
705109f6e39SEric W. Biederman 	sk->sk_peer_cred = get_cred(peersk->sk_peer_cred);
70635306eb2SEric Dumazet 
70735306eb2SEric Dumazet 	spin_unlock(&sk->sk_peer_lock);
70835306eb2SEric Dumazet 	spin_unlock(&peersk->sk_peer_lock);
709109f6e39SEric W. Biederman }
710109f6e39SEric W. Biederman 
unix_listen(struct socket * sock,int backlog)7111da177e4SLinus Torvalds static int unix_listen(struct socket *sock, int backlog)
7121da177e4SLinus Torvalds {
7131da177e4SLinus Torvalds 	int err;
7141da177e4SLinus Torvalds 	struct sock *sk = sock->sk;
7151da177e4SLinus Torvalds 	struct unix_sock *u = unix_sk(sk);
7161da177e4SLinus Torvalds 
7171da177e4SLinus Torvalds 	err = -EOPNOTSUPP;
7181da177e4SLinus Torvalds 	if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
7191da177e4SLinus Torvalds 		goto out;	/* Only stream/seqpacket sockets accept */
7201da177e4SLinus Torvalds 	err = -EINVAL;
721302fe8ddSKuniyuki Iwashima 	if (!READ_ONCE(u->addr))
7221da177e4SLinus Torvalds 		goto out;	/* No listens on an unbound socket */
7231c92b4e5SDavid S. Miller 	unix_state_lock(sk);
7241da177e4SLinus Torvalds 	if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
7251da177e4SLinus Torvalds 		goto out_unlock;
7261da177e4SLinus Torvalds 	if (backlog > sk->sk_max_ack_backlog)
7271da177e4SLinus Torvalds 		wake_up_interruptible_all(&u->peer_wait);
7281da177e4SLinus Torvalds 	sk->sk_max_ack_backlog	= backlog;
72945733e98SKuniyuki Iwashima 	WRITE_ONCE(sk->sk_state, TCP_LISTEN);
73045733e98SKuniyuki Iwashima 
7311da177e4SLinus Torvalds 	/* set credentials so connect can copy them */
732109f6e39SEric W. Biederman 	init_peercred(sk);
7331da177e4SLinus Torvalds 	err = 0;
7341da177e4SLinus Torvalds 
7351da177e4SLinus Torvalds out_unlock:
7361c92b4e5SDavid S. Miller 	unix_state_unlock(sk);
7371da177e4SLinus Torvalds out:
7381da177e4SLinus Torvalds 	return err;
7391da177e4SLinus Torvalds }
7401da177e4SLinus Torvalds 
7411da177e4SLinus Torvalds static int unix_release(struct socket *);
7421da177e4SLinus Torvalds static int unix_bind(struct socket *, struct sockaddr *, int);
7431da177e4SLinus Torvalds static int unix_stream_connect(struct socket *, struct sockaddr *,
7441da177e4SLinus Torvalds 			       int addr_len, int flags);
7451da177e4SLinus Torvalds static int unix_socketpair(struct socket *, struct socket *);
746cdfbabfbSDavid Howells static int unix_accept(struct socket *, struct socket *, int, bool);
7479b2c45d4SDenys Vlasenko static int unix_getname(struct socket *, struct sockaddr *, int);
748a11e1d43SLinus Torvalds static __poll_t unix_poll(struct file *, struct socket *, poll_table *);
749a11e1d43SLinus Torvalds static __poll_t unix_dgram_poll(struct file *, struct socket *,
750a11e1d43SLinus Torvalds 				    poll_table *);
7511da177e4SLinus Torvalds static int unix_ioctl(struct socket *, unsigned int, unsigned long);
7525f6beb9eSArnd Bergmann #ifdef CONFIG_COMPAT
7535f6beb9eSArnd Bergmann static int unix_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg);
7545f6beb9eSArnd Bergmann #endif
7551da177e4SLinus Torvalds static int unix_shutdown(struct socket *, int);
7561b784140SYing Xue static int unix_stream_sendmsg(struct socket *, struct msghdr *, size_t);
7571b784140SYing Xue static int unix_stream_recvmsg(struct socket *, struct msghdr *, size_t, int);
7582b514574SHannes Frederic Sowa static ssize_t unix_stream_splice_read(struct socket *,  loff_t *ppos,
7592b514574SHannes Frederic Sowa 				       struct pipe_inode_info *, size_t size,
7602b514574SHannes Frederic Sowa 				       unsigned int flags);
7611b784140SYing Xue static int unix_dgram_sendmsg(struct socket *, struct msghdr *, size_t);
7621b784140SYing Xue static int unix_dgram_recvmsg(struct socket *, struct msghdr *, size_t, int);
763965b57b4SCong Wang static int unix_read_skb(struct sock *sk, skb_read_actor_t recv_actor);
764965b57b4SCong Wang static int unix_stream_read_skb(struct sock *sk, skb_read_actor_t recv_actor);
7651da177e4SLinus Torvalds static int unix_dgram_connect(struct socket *, struct sockaddr *,
7661da177e4SLinus Torvalds 			      int, int);
7671b784140SYing Xue static int unix_seqpacket_sendmsg(struct socket *, struct msghdr *, size_t);
7681b784140SYing Xue static int unix_seqpacket_recvmsg(struct socket *, struct msghdr *, size_t,
7691b784140SYing Xue 				  int);
7701da177e4SLinus Torvalds 
unix_set_peek_off(struct sock * sk,int val)77112663bfcSSasha Levin static int unix_set_peek_off(struct sock *sk, int val)
772f55bb7f9SPavel Emelyanov {
773f55bb7f9SPavel Emelyanov 	struct unix_sock *u = unix_sk(sk);
774f55bb7f9SPavel Emelyanov 
7756e1ce3c3SLinus Torvalds 	if (mutex_lock_interruptible(&u->iolock))
77612663bfcSSasha Levin 		return -EINTR;
77712663bfcSSasha Levin 
77811695c6eSEric Dumazet 	WRITE_ONCE(sk->sk_peek_off, val);
7796e1ce3c3SLinus Torvalds 	mutex_unlock(&u->iolock);
78012663bfcSSasha Levin 
78112663bfcSSasha Levin 	return 0;
782f55bb7f9SPavel Emelyanov }
783f55bb7f9SPavel Emelyanov 
7845c05a164SDavid S. Miller #ifdef CONFIG_PROC_FS
unix_count_nr_fds(struct sock * sk)785de437089SKirill Tkhai static int unix_count_nr_fds(struct sock *sk)
786de437089SKirill Tkhai {
787de437089SKirill Tkhai 	struct sk_buff *skb;
788de437089SKirill Tkhai 	struct unix_sock *u;
789de437089SKirill Tkhai 	int nr_fds = 0;
790de437089SKirill Tkhai 
791de437089SKirill Tkhai 	spin_lock(&sk->sk_receive_queue.lock);
792de437089SKirill Tkhai 	skb = skb_peek(&sk->sk_receive_queue);
793de437089SKirill Tkhai 	while (skb) {
794de437089SKirill Tkhai 		u = unix_sk(skb->sk);
795de437089SKirill Tkhai 		nr_fds += atomic_read(&u->scm_stat.nr_fds);
796de437089SKirill Tkhai 		skb = skb_peek_next(skb, &sk->sk_receive_queue);
797de437089SKirill Tkhai 	}
798de437089SKirill Tkhai 	spin_unlock(&sk->sk_receive_queue.lock);
799de437089SKirill Tkhai 
800de437089SKirill Tkhai 	return nr_fds;
801de437089SKirill Tkhai }
802de437089SKirill Tkhai 
unix_show_fdinfo(struct seq_file * m,struct socket * sock)8033c32da19SKirill Tkhai static void unix_show_fdinfo(struct seq_file *m, struct socket *sock)
8043c32da19SKirill Tkhai {
8053c32da19SKirill Tkhai 	struct sock *sk = sock->sk;
806b27401a3SKirill Tkhai 	unsigned char s_state;
8073c32da19SKirill Tkhai 	struct unix_sock *u;
808b27401a3SKirill Tkhai 	int nr_fds = 0;
8093c32da19SKirill Tkhai 
8103c32da19SKirill Tkhai 	if (sk) {
811b27401a3SKirill Tkhai 		s_state = READ_ONCE(sk->sk_state);
812de437089SKirill Tkhai 		u = unix_sk(sk);
813de437089SKirill Tkhai 
814b27401a3SKirill Tkhai 		/* SOCK_STREAM and SOCK_SEQPACKET sockets never change their
815b27401a3SKirill Tkhai 		 * sk_state after switching to TCP_ESTABLISHED or TCP_LISTEN.
816b27401a3SKirill Tkhai 		 * SOCK_DGRAM is ordinary. So, no lock is needed.
817b27401a3SKirill Tkhai 		 */
818b27401a3SKirill Tkhai 		if (sock->type == SOCK_DGRAM || s_state == TCP_ESTABLISHED)
819de437089SKirill Tkhai 			nr_fds = atomic_read(&u->scm_stat.nr_fds);
820b27401a3SKirill Tkhai 		else if (s_state == TCP_LISTEN)
821de437089SKirill Tkhai 			nr_fds = unix_count_nr_fds(sk);
822b27401a3SKirill Tkhai 
823de437089SKirill Tkhai 		seq_printf(m, "scm_fds: %u\n", nr_fds);
8243c32da19SKirill Tkhai 	}
8253c32da19SKirill Tkhai }
8263a12500eSTobias Klauser #else
8273a12500eSTobias Klauser #define unix_show_fdinfo NULL
8283a12500eSTobias Klauser #endif
829f55bb7f9SPavel Emelyanov 
83090ddc4f0SEric Dumazet static const struct proto_ops unix_stream_ops = {
8311da177e4SLinus Torvalds 	.family =	PF_UNIX,
8321da177e4SLinus Torvalds 	.owner =	THIS_MODULE,
8331da177e4SLinus Torvalds 	.release =	unix_release,
8341da177e4SLinus Torvalds 	.bind =		unix_bind,
8351da177e4SLinus Torvalds 	.connect =	unix_stream_connect,
8361da177e4SLinus Torvalds 	.socketpair =	unix_socketpair,
8371da177e4SLinus Torvalds 	.accept =	unix_accept,
8381da177e4SLinus Torvalds 	.getname =	unix_getname,
839a11e1d43SLinus Torvalds 	.poll =		unix_poll,
8401da177e4SLinus Torvalds 	.ioctl =	unix_ioctl,
8415f6beb9eSArnd Bergmann #ifdef CONFIG_COMPAT
8425f6beb9eSArnd Bergmann 	.compat_ioctl =	unix_compat_ioctl,
8435f6beb9eSArnd Bergmann #endif
8441da177e4SLinus Torvalds 	.listen =	unix_listen,
8451da177e4SLinus Torvalds 	.shutdown =	unix_shutdown,
8461da177e4SLinus Torvalds 	.sendmsg =	unix_stream_sendmsg,
8471da177e4SLinus Torvalds 	.recvmsg =	unix_stream_recvmsg,
848965b57b4SCong Wang 	.read_skb =	unix_stream_read_skb,
8491da177e4SLinus Torvalds 	.mmap =		sock_no_mmap,
8502b514574SHannes Frederic Sowa 	.splice_read =	unix_stream_splice_read,
851fc0d7536SPavel Emelyanov 	.set_peek_off =	unix_set_peek_off,
8523c32da19SKirill Tkhai 	.show_fdinfo =	unix_show_fdinfo,
8531da177e4SLinus Torvalds };
8541da177e4SLinus Torvalds 
85590ddc4f0SEric Dumazet static const struct proto_ops unix_dgram_ops = {
8561da177e4SLinus Torvalds 	.family =	PF_UNIX,
8571da177e4SLinus Torvalds 	.owner =	THIS_MODULE,
8581da177e4SLinus Torvalds 	.release =	unix_release,
8591da177e4SLinus Torvalds 	.bind =		unix_bind,
8601da177e4SLinus Torvalds 	.connect =	unix_dgram_connect,
8611da177e4SLinus Torvalds 	.socketpair =	unix_socketpair,
8621da177e4SLinus Torvalds 	.accept =	sock_no_accept,
8631da177e4SLinus Torvalds 	.getname =	unix_getname,
864a11e1d43SLinus Torvalds 	.poll =		unix_dgram_poll,
8651da177e4SLinus Torvalds 	.ioctl =	unix_ioctl,
8665f6beb9eSArnd Bergmann #ifdef CONFIG_COMPAT
8675f6beb9eSArnd Bergmann 	.compat_ioctl =	unix_compat_ioctl,
8685f6beb9eSArnd Bergmann #endif
8691da177e4SLinus Torvalds 	.listen =	sock_no_listen,
8701da177e4SLinus Torvalds 	.shutdown =	unix_shutdown,
8711da177e4SLinus Torvalds 	.sendmsg =	unix_dgram_sendmsg,
872965b57b4SCong Wang 	.read_skb =	unix_read_skb,
8731da177e4SLinus Torvalds 	.recvmsg =	unix_dgram_recvmsg,
8741da177e4SLinus Torvalds 	.mmap =		sock_no_mmap,
875f55bb7f9SPavel Emelyanov 	.set_peek_off =	unix_set_peek_off,
8763c32da19SKirill Tkhai 	.show_fdinfo =	unix_show_fdinfo,
8771da177e4SLinus Torvalds };
8781da177e4SLinus Torvalds 
87990ddc4f0SEric Dumazet static const struct proto_ops unix_seqpacket_ops = {
8801da177e4SLinus Torvalds 	.family =	PF_UNIX,
8811da177e4SLinus Torvalds 	.owner =	THIS_MODULE,
8821da177e4SLinus Torvalds 	.release =	unix_release,
8831da177e4SLinus Torvalds 	.bind =		unix_bind,
8841da177e4SLinus Torvalds 	.connect =	unix_stream_connect,
8851da177e4SLinus Torvalds 	.socketpair =	unix_socketpair,
8861da177e4SLinus Torvalds 	.accept =	unix_accept,
8871da177e4SLinus Torvalds 	.getname =	unix_getname,
888a11e1d43SLinus Torvalds 	.poll =		unix_dgram_poll,
8891da177e4SLinus Torvalds 	.ioctl =	unix_ioctl,
8905f6beb9eSArnd Bergmann #ifdef CONFIG_COMPAT
8915f6beb9eSArnd Bergmann 	.compat_ioctl =	unix_compat_ioctl,
8925f6beb9eSArnd Bergmann #endif
8931da177e4SLinus Torvalds 	.listen =	unix_listen,
8941da177e4SLinus Torvalds 	.shutdown =	unix_shutdown,
8951da177e4SLinus Torvalds 	.sendmsg =	unix_seqpacket_sendmsg,
896a05d2ad1SEric W. Biederman 	.recvmsg =	unix_seqpacket_recvmsg,
8971da177e4SLinus Torvalds 	.mmap =		sock_no_mmap,
898f55bb7f9SPavel Emelyanov 	.set_peek_off =	unix_set_peek_off,
8993c32da19SKirill Tkhai 	.show_fdinfo =	unix_show_fdinfo,
9001da177e4SLinus Torvalds };
9011da177e4SLinus Torvalds 
unix_close(struct sock * sk,long timeout)902c7272e15SCong Wang static void unix_close(struct sock *sk, long timeout)
903c7272e15SCong Wang {
904c7272e15SCong Wang 	/* Nothing to do here, unix socket does not need a ->close().
905c7272e15SCong Wang 	 * This is merely for sockmap.
906c7272e15SCong Wang 	 */
907c7272e15SCong Wang }
908c7272e15SCong Wang 
unix_unhash(struct sock * sk)90994531cfcSJiang Wang static void unix_unhash(struct sock *sk)
91094531cfcSJiang Wang {
91194531cfcSJiang Wang 	/* Nothing to do here, unix socket does not need a ->unhash().
91294531cfcSJiang Wang 	 * This is merely for sockmap.
91394531cfcSJiang Wang 	 */
91494531cfcSJiang Wang }
91594531cfcSJiang Wang 
unix_bpf_bypass_getsockopt(int level,int optname)9167b26952aSAlexander Mikhalitsyn static bool unix_bpf_bypass_getsockopt(int level, int optname)
9177b26952aSAlexander Mikhalitsyn {
9187b26952aSAlexander Mikhalitsyn 	if (level == SOL_SOCKET) {
9197b26952aSAlexander Mikhalitsyn 		switch (optname) {
9207b26952aSAlexander Mikhalitsyn 		case SO_PEERPIDFD:
9217b26952aSAlexander Mikhalitsyn 			return true;
9227b26952aSAlexander Mikhalitsyn 		default:
9237b26952aSAlexander Mikhalitsyn 			return false;
9247b26952aSAlexander Mikhalitsyn 		}
9257b26952aSAlexander Mikhalitsyn 	}
9267b26952aSAlexander Mikhalitsyn 
9277b26952aSAlexander Mikhalitsyn 	return false;
9287b26952aSAlexander Mikhalitsyn }
9297b26952aSAlexander Mikhalitsyn 
93094531cfcSJiang Wang struct proto unix_dgram_proto = {
9310edf0824SStephen Boyd 	.name			= "UNIX",
9321da177e4SLinus Torvalds 	.owner			= THIS_MODULE,
9331da177e4SLinus Torvalds 	.obj_size		= sizeof(struct unix_sock),
934c7272e15SCong Wang 	.close			= unix_close,
9357b26952aSAlexander Mikhalitsyn 	.bpf_bypass_getsockopt	= unix_bpf_bypass_getsockopt,
936c6382918SCong Wang #ifdef CONFIG_BPF_SYSCALL
93794531cfcSJiang Wang 	.psock_update_sk_prot	= unix_dgram_bpf_update_proto,
938c6382918SCong Wang #endif
9391da177e4SLinus Torvalds };
9401da177e4SLinus Torvalds 
94194531cfcSJiang Wang struct proto unix_stream_proto = {
94294531cfcSJiang Wang 	.name			= "UNIX-STREAM",
94394531cfcSJiang Wang 	.owner			= THIS_MODULE,
94494531cfcSJiang Wang 	.obj_size		= sizeof(struct unix_sock),
94594531cfcSJiang Wang 	.close			= unix_close,
94694531cfcSJiang Wang 	.unhash			= unix_unhash,
9477b26952aSAlexander Mikhalitsyn 	.bpf_bypass_getsockopt	= unix_bpf_bypass_getsockopt,
94894531cfcSJiang Wang #ifdef CONFIG_BPF_SYSCALL
94994531cfcSJiang Wang 	.psock_update_sk_prot	= unix_stream_bpf_update_proto,
95094531cfcSJiang Wang #endif
95194531cfcSJiang Wang };
95294531cfcSJiang Wang 
unix_create1(struct net * net,struct socket * sock,int kern,int type)95394531cfcSJiang Wang static struct sock *unix_create1(struct net *net, struct socket *sock, int kern, int type)
9541da177e4SLinus Torvalds {
9551da177e4SLinus Torvalds 	struct unix_sock *u;
956f4bd73b5SKuniyuki Iwashima 	struct sock *sk;
957f4bd73b5SKuniyuki Iwashima 	int err;
9581da177e4SLinus Torvalds 
959518de9b3SEric Dumazet 	atomic_long_inc(&unix_nr_socks);
960f4bd73b5SKuniyuki Iwashima 	if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files()) {
961f4bd73b5SKuniyuki Iwashima 		err = -ENFILE;
962f4bd73b5SKuniyuki Iwashima 		goto err;
963f4bd73b5SKuniyuki Iwashima 	}
9641da177e4SLinus Torvalds 
96594531cfcSJiang Wang 	if (type == SOCK_STREAM)
96694531cfcSJiang Wang 		sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_stream_proto, kern);
96794531cfcSJiang Wang 	else /*dgram and  seqpacket */
96894531cfcSJiang Wang 		sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_dgram_proto, kern);
96994531cfcSJiang Wang 
970f4bd73b5SKuniyuki Iwashima 	if (!sk) {
971f4bd73b5SKuniyuki Iwashima 		err = -ENOMEM;
972f4bd73b5SKuniyuki Iwashima 		goto err;
973f4bd73b5SKuniyuki Iwashima 	}
9741da177e4SLinus Torvalds 
9751da177e4SLinus Torvalds 	sock_init_data(sock, sk);
9761da177e4SLinus Torvalds 
977e6b4b873SKuniyuki Iwashima 	sk->sk_hash		= unix_unbound_hash(sk);
9783aa9799eSVladimir Davydov 	sk->sk_allocation	= GFP_KERNEL_ACCOUNT;
9791da177e4SLinus Torvalds 	sk->sk_write_space	= unix_write_space;
98029fce603SKuniyuki Iwashima 	sk->sk_max_ack_backlog	= READ_ONCE(net->unx.sysctl_max_dgram_qlen);
9811da177e4SLinus Torvalds 	sk->sk_destruct		= unix_sock_destructor;
9821da177e4SLinus Torvalds 	u = unix_sk(sk);
983301fdbaaSKuniyuki Iwashima 	u->inflight = 0;
98440ffe67dSAl Viro 	u->path.dentry = NULL;
98540ffe67dSAl Viro 	u->path.mnt = NULL;
986fd19f329SBenjamin LaHaise 	spin_lock_init(&u->lock);
9871fd05ba5SMiklos Szeredi 	INIT_LIST_HEAD(&u->link);
9886e1ce3c3SLinus Torvalds 	mutex_init(&u->iolock); /* single task reading lock */
9896e1ce3c3SLinus Torvalds 	mutex_init(&u->bindlock); /* single task binding lock */
9901da177e4SLinus Torvalds 	init_waitqueue_head(&u->peer_wait);
9917d267278SRainer Weikusat 	init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay);
9923c32da19SKirill Tkhai 	memset(&u->scm_stat, 0, sizeof(struct scm_stat));
99379b05beaSKuniyuki Iwashima 	unix_insert_unbound_socket(net, sk);
994f4bd73b5SKuniyuki Iwashima 
995340c3d33SKuniyuki Iwashima 	sock_prot_inuse_add(net, sk->sk_prot, 1);
996f4bd73b5SKuniyuki Iwashima 
9971da177e4SLinus Torvalds 	return sk;
998f4bd73b5SKuniyuki Iwashima 
999f4bd73b5SKuniyuki Iwashima err:
1000f4bd73b5SKuniyuki Iwashima 	atomic_long_dec(&unix_nr_socks);
1001f4bd73b5SKuniyuki Iwashima 	return ERR_PTR(err);
10021da177e4SLinus Torvalds }
10031da177e4SLinus Torvalds 
unix_create(struct net * net,struct socket * sock,int protocol,int kern)10043f378b68SEric Paris static int unix_create(struct net *net, struct socket *sock, int protocol,
10053f378b68SEric Paris 		       int kern)
10061da177e4SLinus Torvalds {
1007f4bd73b5SKuniyuki Iwashima 	struct sock *sk;
1008f4bd73b5SKuniyuki Iwashima 
10091da177e4SLinus Torvalds 	if (protocol && protocol != PF_UNIX)
10101da177e4SLinus Torvalds 		return -EPROTONOSUPPORT;
10111da177e4SLinus Torvalds 
10121da177e4SLinus Torvalds 	sock->state = SS_UNCONNECTED;
10131da177e4SLinus Torvalds 
10141da177e4SLinus Torvalds 	switch (sock->type) {
10151da177e4SLinus Torvalds 	case SOCK_STREAM:
10161da177e4SLinus Torvalds 		sock->ops = &unix_stream_ops;
10171da177e4SLinus Torvalds 		break;
10181da177e4SLinus Torvalds 		/*
10191da177e4SLinus Torvalds 		 *	Believe it or not BSD has AF_UNIX, SOCK_RAW though
10201da177e4SLinus Torvalds 		 *	nothing uses it.
10211da177e4SLinus Torvalds 		 */
10221da177e4SLinus Torvalds 	case SOCK_RAW:
10231da177e4SLinus Torvalds 		sock->type = SOCK_DGRAM;
1024df561f66SGustavo A. R. Silva 		fallthrough;
10251da177e4SLinus Torvalds 	case SOCK_DGRAM:
10261da177e4SLinus Torvalds 		sock->ops = &unix_dgram_ops;
10271da177e4SLinus Torvalds 		break;
10281da177e4SLinus Torvalds 	case SOCK_SEQPACKET:
10291da177e4SLinus Torvalds 		sock->ops = &unix_seqpacket_ops;
10301da177e4SLinus Torvalds 		break;
10311da177e4SLinus Torvalds 	default:
10321da177e4SLinus Torvalds 		return -ESOCKTNOSUPPORT;
10331da177e4SLinus Torvalds 	}
10341da177e4SLinus Torvalds 
1035f4bd73b5SKuniyuki Iwashima 	sk = unix_create1(net, sock, kern, sock->type);
1036f4bd73b5SKuniyuki Iwashima 	if (IS_ERR(sk))
1037f4bd73b5SKuniyuki Iwashima 		return PTR_ERR(sk);
1038f4bd73b5SKuniyuki Iwashima 
1039f4bd73b5SKuniyuki Iwashima 	return 0;
10401da177e4SLinus Torvalds }
10411da177e4SLinus Torvalds 
unix_release(struct socket * sock)10421da177e4SLinus Torvalds static int unix_release(struct socket *sock)
10431da177e4SLinus Torvalds {
10441da177e4SLinus Torvalds 	struct sock *sk = sock->sk;
10451da177e4SLinus Torvalds 
10461da177e4SLinus Torvalds 	if (!sk)
10471da177e4SLinus Torvalds 		return 0;
10481da177e4SLinus Torvalds 
1049c7272e15SCong Wang 	sk->sk_prot->close(sk, 0);
1050ded34e0fSPaul Moore 	unix_release_sock(sk, 0);
10511da177e4SLinus Torvalds 	sock->sk = NULL;
10521da177e4SLinus Torvalds 
1053ded34e0fSPaul Moore 	return 0;
10541da177e4SLinus Torvalds }
10551da177e4SLinus Torvalds 
unix_find_bsd(struct sockaddr_un * sunaddr,int addr_len,int type)105651bae889SKuniyuki Iwashima static struct sock *unix_find_bsd(struct sockaddr_un *sunaddr, int addr_len,
105751bae889SKuniyuki Iwashima 				  int type)
1058fa39ef0eSKuniyuki Iwashima {
1059fa39ef0eSKuniyuki Iwashima 	struct inode *inode;
1060fa39ef0eSKuniyuki Iwashima 	struct path path;
1061fa39ef0eSKuniyuki Iwashima 	struct sock *sk;
1062fa39ef0eSKuniyuki Iwashima 	int err;
1063fa39ef0eSKuniyuki Iwashima 
1064d2d8c9fdSKuniyuki Iwashima 	unix_mkname_bsd(sunaddr, addr_len);
1065fa39ef0eSKuniyuki Iwashima 	err = kern_path(sunaddr->sun_path, LOOKUP_FOLLOW, &path);
1066fa39ef0eSKuniyuki Iwashima 	if (err)
1067fa39ef0eSKuniyuki Iwashima 		goto fail;
1068fa39ef0eSKuniyuki Iwashima 
1069fa39ef0eSKuniyuki Iwashima 	err = path_permission(&path, MAY_WRITE);
1070fa39ef0eSKuniyuki Iwashima 	if (err)
1071fa39ef0eSKuniyuki Iwashima 		goto path_put;
1072fa39ef0eSKuniyuki Iwashima 
1073fa39ef0eSKuniyuki Iwashima 	err = -ECONNREFUSED;
1074fa39ef0eSKuniyuki Iwashima 	inode = d_backing_inode(path.dentry);
1075fa39ef0eSKuniyuki Iwashima 	if (!S_ISSOCK(inode->i_mode))
1076fa39ef0eSKuniyuki Iwashima 		goto path_put;
1077fa39ef0eSKuniyuki Iwashima 
107851bae889SKuniyuki Iwashima 	sk = unix_find_socket_byinode(inode);
1079fa39ef0eSKuniyuki Iwashima 	if (!sk)
1080fa39ef0eSKuniyuki Iwashima 		goto path_put;
1081fa39ef0eSKuniyuki Iwashima 
1082fa39ef0eSKuniyuki Iwashima 	err = -EPROTOTYPE;
1083fa39ef0eSKuniyuki Iwashima 	if (sk->sk_type == type)
1084fa39ef0eSKuniyuki Iwashima 		touch_atime(&path);
1085fa39ef0eSKuniyuki Iwashima 	else
1086fa39ef0eSKuniyuki Iwashima 		goto sock_put;
1087fa39ef0eSKuniyuki Iwashima 
1088fa39ef0eSKuniyuki Iwashima 	path_put(&path);
1089fa39ef0eSKuniyuki Iwashima 
1090fa39ef0eSKuniyuki Iwashima 	return sk;
1091fa39ef0eSKuniyuki Iwashima 
1092fa39ef0eSKuniyuki Iwashima sock_put:
1093fa39ef0eSKuniyuki Iwashima 	sock_put(sk);
1094fa39ef0eSKuniyuki Iwashima path_put:
1095fa39ef0eSKuniyuki Iwashima 	path_put(&path);
1096fa39ef0eSKuniyuki Iwashima fail:
1097aed26f55SKuniyuki Iwashima 	return ERR_PTR(err);
1098fa39ef0eSKuniyuki Iwashima }
1099fa39ef0eSKuniyuki Iwashima 
unix_find_abstract(struct net * net,struct sockaddr_un * sunaddr,int addr_len,int type)1100fa39ef0eSKuniyuki Iwashima static struct sock *unix_find_abstract(struct net *net,
1101fa39ef0eSKuniyuki Iwashima 				       struct sockaddr_un *sunaddr,
1102d2d8c9fdSKuniyuki Iwashima 				       int addr_len, int type)
1103fa39ef0eSKuniyuki Iwashima {
1104f452be49SKuniyuki Iwashima 	unsigned int hash = unix_abstract_hash(sunaddr, addr_len, type);
1105fa39ef0eSKuniyuki Iwashima 	struct dentry *dentry;
1106fa39ef0eSKuniyuki Iwashima 	struct sock *sk;
1107fa39ef0eSKuniyuki Iwashima 
1108f452be49SKuniyuki Iwashima 	sk = unix_find_socket_byname(net, sunaddr, addr_len, hash);
1109aed26f55SKuniyuki Iwashima 	if (!sk)
1110aed26f55SKuniyuki Iwashima 		return ERR_PTR(-ECONNREFUSED);
1111fa39ef0eSKuniyuki Iwashima 
1112fa39ef0eSKuniyuki Iwashima 	dentry = unix_sk(sk)->path.dentry;
1113fa39ef0eSKuniyuki Iwashima 	if (dentry)
1114fa39ef0eSKuniyuki Iwashima 		touch_atime(&unix_sk(sk)->path);
1115fa39ef0eSKuniyuki Iwashima 
1116fa39ef0eSKuniyuki Iwashima 	return sk;
1117fa39ef0eSKuniyuki Iwashima }
1118fa39ef0eSKuniyuki Iwashima 
unix_find_other(struct net * net,struct sockaddr_un * sunaddr,int addr_len,int type)1119fa39ef0eSKuniyuki Iwashima static struct sock *unix_find_other(struct net *net,
1120fa39ef0eSKuniyuki Iwashima 				    struct sockaddr_un *sunaddr,
1121d2d8c9fdSKuniyuki Iwashima 				    int addr_len, int type)
1122fa39ef0eSKuniyuki Iwashima {
1123fa39ef0eSKuniyuki Iwashima 	struct sock *sk;
1124fa39ef0eSKuniyuki Iwashima 
1125fa39ef0eSKuniyuki Iwashima 	if (sunaddr->sun_path[0])
112651bae889SKuniyuki Iwashima 		sk = unix_find_bsd(sunaddr, addr_len, type);
1127fa39ef0eSKuniyuki Iwashima 	else
1128d2d8c9fdSKuniyuki Iwashima 		sk = unix_find_abstract(net, sunaddr, addr_len, type);
1129fa39ef0eSKuniyuki Iwashima 
1130fa39ef0eSKuniyuki Iwashima 	return sk;
1131fa39ef0eSKuniyuki Iwashima }
1132fa39ef0eSKuniyuki Iwashima 
unix_autobind(struct sock * sk)1133f7ed31f4SKuniyuki Iwashima static int unix_autobind(struct sock *sk)
11341da177e4SLinus Torvalds {
11351da177e4SLinus Torvalds 	struct unix_sock *u = unix_sk(sk);
1136ac325c7fSKuniyuki Iwashima 	unsigned int new_hash, old_hash;
113779b05beaSKuniyuki Iwashima 	struct net *net = sock_net(sk);
11381da177e4SLinus Torvalds 	struct unix_address *addr;
11399acbc584SKuniyuki Iwashima 	u32 lastnum, ordernum;
1140f7ed31f4SKuniyuki Iwashima 	int err;
11411da177e4SLinus Torvalds 
11426e1ce3c3SLinus Torvalds 	err = mutex_lock_interruptible(&u->bindlock);
114337ab4fa7SSasha Levin 	if (err)
114437ab4fa7SSasha Levin 		return err;
11451da177e4SLinus Torvalds 
11461da177e4SLinus Torvalds 	if (u->addr)
11471da177e4SLinus Torvalds 		goto out;
11481da177e4SLinus Torvalds 
11491da177e4SLinus Torvalds 	err = -ENOMEM;
1150755662ceSKuniyuki Iwashima 	addr = kzalloc(sizeof(*addr) +
1151755662ceSKuniyuki Iwashima 		       offsetof(struct sockaddr_un, sun_path) + 16, GFP_KERNEL);
11521da177e4SLinus Torvalds 	if (!addr)
11531da177e4SLinus Torvalds 		goto out;
11541da177e4SLinus Torvalds 
11559acbc584SKuniyuki Iwashima 	addr->len = offsetof(struct sockaddr_un, sun_path) + 6;
11561da177e4SLinus Torvalds 	addr->name->sun_family = AF_UNIX;
11578c9814b9SReshetova, Elena 	refcount_set(&addr->refcnt, 1);
11581da177e4SLinus Torvalds 
1159ac325c7fSKuniyuki Iwashima 	old_hash = sk->sk_hash;
1160a251c17aSJason A. Donenfeld 	ordernum = get_random_u32();
11619acbc584SKuniyuki Iwashima 	lastnum = ordernum & 0xFFFFF;
11621da177e4SLinus Torvalds retry:
11639acbc584SKuniyuki Iwashima 	ordernum = (ordernum + 1) & 0xFFFFF;
11649acbc584SKuniyuki Iwashima 	sprintf(addr->name->sun_path + 1, "%05x", ordernum);
11651da177e4SLinus Torvalds 
1166e6b4b873SKuniyuki Iwashima 	new_hash = unix_abstract_hash(addr->name, addr->len, sk->sk_type);
116779b05beaSKuniyuki Iwashima 	unix_table_double_lock(net, old_hash, new_hash);
11681da177e4SLinus Torvalds 
116979b05beaSKuniyuki Iwashima 	if (__unix_find_socket_byname(net, addr->name, addr->len, new_hash)) {
117079b05beaSKuniyuki Iwashima 		unix_table_double_unlock(net, old_hash, new_hash);
1171afd20b92SKuniyuki Iwashima 
11729acbc584SKuniyuki Iwashima 		/* __unix_find_socket_byname() may take long time if many names
11738df73ff9STetsuo Handa 		 * are already in use.
11748df73ff9STetsuo Handa 		 */
11758df73ff9STetsuo Handa 		cond_resched();
11769acbc584SKuniyuki Iwashima 
11779acbc584SKuniyuki Iwashima 		if (ordernum == lastnum) {
11788df73ff9STetsuo Handa 			/* Give up if all names seems to be in use. */
11798df73ff9STetsuo Handa 			err = -ENOSPC;
11809acbc584SKuniyuki Iwashima 			unix_release_addr(addr);
11818df73ff9STetsuo Handa 			goto out;
11828df73ff9STetsuo Handa 		}
11839acbc584SKuniyuki Iwashima 
11841da177e4SLinus Torvalds 		goto retry;
11851da177e4SLinus Torvalds 	}
11861da177e4SLinus Torvalds 
1187cf2f225eSKuniyuki Iwashima 	__unix_set_addr_hash(net, sk, addr, new_hash);
118879b05beaSKuniyuki Iwashima 	unix_table_double_unlock(net, old_hash, new_hash);
11891da177e4SLinus Torvalds 	err = 0;
11901da177e4SLinus Torvalds 
11916e1ce3c3SLinus Torvalds out:	mutex_unlock(&u->bindlock);
11921da177e4SLinus Torvalds 	return err;
11931da177e4SLinus Torvalds }
11941da177e4SLinus Torvalds 
unix_bind_bsd(struct sock * sk,struct sockaddr_un * sunaddr,int addr_len)119512f21c49SKuniyuki Iwashima static int unix_bind_bsd(struct sock *sk, struct sockaddr_un *sunaddr,
119612f21c49SKuniyuki Iwashima 			 int addr_len)
1197faf02010SAl Viro {
119871e6be6fSAl Viro 	umode_t mode = S_IFSOCK |
119971e6be6fSAl Viro 	       (SOCK_INODE(sk->sk_socket)->i_mode & ~current_umask());
120012f21c49SKuniyuki Iwashima 	struct unix_sock *u = unix_sk(sk);
1201ac325c7fSKuniyuki Iwashima 	unsigned int new_hash, old_hash;
120279b05beaSKuniyuki Iwashima 	struct net *net = sock_net(sk);
1203abf08576SChristian Brauner 	struct mnt_idmap *idmap;
120412f21c49SKuniyuki Iwashima 	struct unix_address *addr;
120538f7bd94SLinus Torvalds 	struct dentry *dentry;
120612f21c49SKuniyuki Iwashima 	struct path parent;
120771e6be6fSAl Viro 	int err;
120871e6be6fSAl Viro 
1209ecb4534bSKuniyuki Iwashima 	addr_len = unix_mkname_bsd(sunaddr, addr_len);
121012f21c49SKuniyuki Iwashima 	addr = unix_create_addr(sunaddr, addr_len);
121112f21c49SKuniyuki Iwashima 	if (!addr)
121212f21c49SKuniyuki Iwashima 		return -ENOMEM;
121312f21c49SKuniyuki Iwashima 
121438f7bd94SLinus Torvalds 	/*
121538f7bd94SLinus Torvalds 	 * Get the parent directory, calculate the hash for last
121638f7bd94SLinus Torvalds 	 * component.
121738f7bd94SLinus Torvalds 	 */
121871e6be6fSAl Viro 	dentry = kern_path_create(AT_FDCWD, addr->name->sun_path, &parent, 0);
121912f21c49SKuniyuki Iwashima 	if (IS_ERR(dentry)) {
122012f21c49SKuniyuki Iwashima 		err = PTR_ERR(dentry);
122112f21c49SKuniyuki Iwashima 		goto out;
122212f21c49SKuniyuki Iwashima 	}
1223faf02010SAl Viro 
122438f7bd94SLinus Torvalds 	/*
122538f7bd94SLinus Torvalds 	 * All right, let's create it.
122638f7bd94SLinus Torvalds 	 */
1227abf08576SChristian Brauner 	idmap = mnt_idmap(parent.mnt);
122871e6be6fSAl Viro 	err = security_path_mknod(&parent, dentry, mode, 0);
122956c1731bSAl Viro 	if (!err)
1230abf08576SChristian Brauner 		err = vfs_mknod(idmap, d_inode(parent.dentry), dentry, mode, 0);
1231c0c3b8d3SAl Viro 	if (err)
123212f21c49SKuniyuki Iwashima 		goto out_path;
12336e1ce3c3SLinus Torvalds 	err = mutex_lock_interruptible(&u->bindlock);
1234c0c3b8d3SAl Viro 	if (err)
1235c0c3b8d3SAl Viro 		goto out_unlink;
1236c0c3b8d3SAl Viro 	if (u->addr)
1237c0c3b8d3SAl Viro 		goto out_unlock;
12381da177e4SLinus Torvalds 
1239ac325c7fSKuniyuki Iwashima 	old_hash = sk->sk_hash;
1240e6b4b873SKuniyuki Iwashima 	new_hash = unix_bsd_hash(d_backing_inode(dentry));
124179b05beaSKuniyuki Iwashima 	unix_table_double_lock(net, old_hash, new_hash);
124256c1731bSAl Viro 	u->path.mnt = mntget(parent.mnt);
124356c1731bSAl Viro 	u->path.dentry = dget(dentry);
1244cf2f225eSKuniyuki Iwashima 	__unix_set_addr_hash(net, sk, addr, new_hash);
124579b05beaSKuniyuki Iwashima 	unix_table_double_unlock(net, old_hash, new_hash);
124651bae889SKuniyuki Iwashima 	unix_insert_bsd_socket(sk);
1247aee51517SAl Viro 	mutex_unlock(&u->bindlock);
124856c1731bSAl Viro 	done_path_create(&parent, dentry);
1249fa42d910SAl Viro 	return 0;
1250c0c3b8d3SAl Viro 
1251c0c3b8d3SAl Viro out_unlock:
1252c0c3b8d3SAl Viro 	mutex_unlock(&u->bindlock);
1253c0c3b8d3SAl Viro 	err = -EINVAL;
1254c0c3b8d3SAl Viro out_unlink:
1255c0c3b8d3SAl Viro 	/* failed after successful mknod?  unlink what we'd created... */
1256abf08576SChristian Brauner 	vfs_unlink(idmap, d_inode(parent.dentry), dentry, NULL);
125712f21c49SKuniyuki Iwashima out_path:
1258c0c3b8d3SAl Viro 	done_path_create(&parent, dentry);
125912f21c49SKuniyuki Iwashima out:
126012f21c49SKuniyuki Iwashima 	unix_release_addr(addr);
126112f21c49SKuniyuki Iwashima 	return err == -EEXIST ? -EADDRINUSE : err;
1262fa42d910SAl Viro }
1263fa42d910SAl Viro 
unix_bind_abstract(struct sock * sk,struct sockaddr_un * sunaddr,int addr_len)126412f21c49SKuniyuki Iwashima static int unix_bind_abstract(struct sock *sk, struct sockaddr_un *sunaddr,
126512f21c49SKuniyuki Iwashima 			      int addr_len)
1266fa42d910SAl Viro {
1267fa42d910SAl Viro 	struct unix_sock *u = unix_sk(sk);
1268ac325c7fSKuniyuki Iwashima 	unsigned int new_hash, old_hash;
126979b05beaSKuniyuki Iwashima 	struct net *net = sock_net(sk);
127012f21c49SKuniyuki Iwashima 	struct unix_address *addr;
1271fa42d910SAl Viro 	int err;
1272fa42d910SAl Viro 
127312f21c49SKuniyuki Iwashima 	addr = unix_create_addr(sunaddr, addr_len);
127412f21c49SKuniyuki Iwashima 	if (!addr)
127512f21c49SKuniyuki Iwashima 		return -ENOMEM;
127612f21c49SKuniyuki Iwashima 
1277aee51517SAl Viro 	err = mutex_lock_interruptible(&u->bindlock);
1278aee51517SAl Viro 	if (err)
127912f21c49SKuniyuki Iwashima 		goto out;
1280aee51517SAl Viro 
1281aee51517SAl Viro 	if (u->addr) {
128212f21c49SKuniyuki Iwashima 		err = -EINVAL;
128312f21c49SKuniyuki Iwashima 		goto out_mutex;
1284aee51517SAl Viro 	}
1285aee51517SAl Viro 
1286ac325c7fSKuniyuki Iwashima 	old_hash = sk->sk_hash;
1287e6b4b873SKuniyuki Iwashima 	new_hash = unix_abstract_hash(addr->name, addr->len, sk->sk_type);
128879b05beaSKuniyuki Iwashima 	unix_table_double_lock(net, old_hash, new_hash);
128912f21c49SKuniyuki Iwashima 
129079b05beaSKuniyuki Iwashima 	if (__unix_find_socket_byname(net, addr->name, addr->len, new_hash))
129112f21c49SKuniyuki Iwashima 		goto out_spin;
129212f21c49SKuniyuki Iwashima 
1293cf2f225eSKuniyuki Iwashima 	__unix_set_addr_hash(net, sk, addr, new_hash);
129479b05beaSKuniyuki Iwashima 	unix_table_double_unlock(net, old_hash, new_hash);
1295aee51517SAl Viro 	mutex_unlock(&u->bindlock);
1296fa42d910SAl Viro 	return 0;
129712f21c49SKuniyuki Iwashima 
129812f21c49SKuniyuki Iwashima out_spin:
129979b05beaSKuniyuki Iwashima 	unix_table_double_unlock(net, old_hash, new_hash);
130012f21c49SKuniyuki Iwashima 	err = -EADDRINUSE;
130112f21c49SKuniyuki Iwashima out_mutex:
130212f21c49SKuniyuki Iwashima 	mutex_unlock(&u->bindlock);
130312f21c49SKuniyuki Iwashima out:
130412f21c49SKuniyuki Iwashima 	unix_release_addr(addr);
130512f21c49SKuniyuki Iwashima 	return err;
1306aee51517SAl Viro }
1307fa42d910SAl Viro 
unix_bind(struct socket * sock,struct sockaddr * uaddr,int addr_len)1308fa42d910SAl Viro static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
1309fa42d910SAl Viro {
1310fa42d910SAl Viro 	struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
13115c32a3edSKuniyuki Iwashima 	struct sock *sk = sock->sk;
13125c32a3edSKuniyuki Iwashima 	int err;
1313fa42d910SAl Viro 
1314b8a58aa6SKuniyuki Iwashima 	if (addr_len == offsetof(struct sockaddr_un, sun_path) &&
1315b8a58aa6SKuniyuki Iwashima 	    sunaddr->sun_family == AF_UNIX)
1316f7ed31f4SKuniyuki Iwashima 		return unix_autobind(sk);
1317fa42d910SAl Viro 
1318b8a58aa6SKuniyuki Iwashima 	err = unix_validate_addr(sunaddr, addr_len);
1319b8a58aa6SKuniyuki Iwashima 	if (err)
1320b8a58aa6SKuniyuki Iwashima 		return err;
1321b8a58aa6SKuniyuki Iwashima 
132212f21c49SKuniyuki Iwashima 	if (sunaddr->sun_path[0])
132312f21c49SKuniyuki Iwashima 		err = unix_bind_bsd(sk, sunaddr, addr_len);
1324fa42d910SAl Viro 	else
132512f21c49SKuniyuki Iwashima 		err = unix_bind_abstract(sk, sunaddr, addr_len);
132612f21c49SKuniyuki Iwashima 
132712f21c49SKuniyuki Iwashima 	return err;
13281da177e4SLinus Torvalds }
13291da177e4SLinus Torvalds 
unix_state_double_lock(struct sock * sk1,struct sock * sk2)1330278a3de5SDavid S. Miller static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
1331278a3de5SDavid S. Miller {
1332278a3de5SDavid S. Miller 	if (unlikely(sk1 == sk2) || !sk2) {
1333278a3de5SDavid S. Miller 		unix_state_lock(sk1);
1334278a3de5SDavid S. Miller 		return;
1335278a3de5SDavid S. Miller 	}
13365e7f3e03SEric Dumazet 	if (sk1 > sk2)
13375e7f3e03SEric Dumazet 		swap(sk1, sk2);
13385e7f3e03SEric Dumazet 
1339278a3de5SDavid S. Miller 	unix_state_lock(sk1);
13405e7f3e03SEric Dumazet 	unix_state_lock_nested(sk2, U_LOCK_SECOND);
1341278a3de5SDavid S. Miller }
1342278a3de5SDavid S. Miller 
unix_state_double_unlock(struct sock * sk1,struct sock * sk2)1343278a3de5SDavid S. Miller static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)
1344278a3de5SDavid S. Miller {
1345278a3de5SDavid S. Miller 	if (unlikely(sk1 == sk2) || !sk2) {
1346278a3de5SDavid S. Miller 		unix_state_unlock(sk1);
1347278a3de5SDavid S. Miller 		return;
1348278a3de5SDavid S. Miller 	}
1349278a3de5SDavid S. Miller 	unix_state_unlock(sk1);
1350278a3de5SDavid S. Miller 	unix_state_unlock(sk2);
1351278a3de5SDavid S. Miller }
1352278a3de5SDavid S. Miller 
unix_dgram_connect(struct socket * sock,struct sockaddr * addr,int alen,int flags)13531da177e4SLinus Torvalds static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
13541da177e4SLinus Torvalds 			      int alen, int flags)
13551da177e4SLinus Torvalds {
13561da177e4SLinus Torvalds 	struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr;
1357340c3d33SKuniyuki Iwashima 	struct sock *sk = sock->sk;
13581da177e4SLinus Torvalds 	struct sock *other;
13591da177e4SLinus Torvalds 	int err;
13601da177e4SLinus Torvalds 
1361defbcf2dSMateusz Jurczyk 	err = -EINVAL;
1362defbcf2dSMateusz Jurczyk 	if (alen < offsetofend(struct sockaddr, sa_family))
1363defbcf2dSMateusz Jurczyk 		goto out;
1364defbcf2dSMateusz Jurczyk 
13651da177e4SLinus Torvalds 	if (addr->sa_family != AF_UNSPEC) {
1366b8a58aa6SKuniyuki Iwashima 		err = unix_validate_addr(sunaddr, alen);
1367b8a58aa6SKuniyuki Iwashima 		if (err)
1368b8a58aa6SKuniyuki Iwashima 			goto out;
1369b8a58aa6SKuniyuki Iwashima 
13705e2ff670SAlexander Mikhalitsyn 		if ((test_bit(SOCK_PASSCRED, &sock->flags) ||
13715e2ff670SAlexander Mikhalitsyn 		     test_bit(SOCK_PASSPIDFD, &sock->flags)) &&
1372302fe8ddSKuniyuki Iwashima 		    !READ_ONCE(unix_sk(sk)->addr)) {
1373f7ed31f4SKuniyuki Iwashima 			err = unix_autobind(sk);
1374f7ed31f4SKuniyuki Iwashima 			if (err)
13751da177e4SLinus Torvalds 				goto out;
1376f7ed31f4SKuniyuki Iwashima 		}
13771da177e4SLinus Torvalds 
1378278a3de5SDavid S. Miller restart:
1379340c3d33SKuniyuki Iwashima 		other = unix_find_other(sock_net(sk), sunaddr, alen, sock->type);
1380aed26f55SKuniyuki Iwashima 		if (IS_ERR(other)) {
1381aed26f55SKuniyuki Iwashima 			err = PTR_ERR(other);
13821da177e4SLinus Torvalds 			goto out;
1383aed26f55SKuniyuki Iwashima 		}
13841da177e4SLinus Torvalds 
1385278a3de5SDavid S. Miller 		unix_state_double_lock(sk, other);
1386278a3de5SDavid S. Miller 
1387278a3de5SDavid S. Miller 		/* Apparently VFS overslept socket death. Retry. */
1388278a3de5SDavid S. Miller 		if (sock_flag(other, SOCK_DEAD)) {
1389278a3de5SDavid S. Miller 			unix_state_double_unlock(sk, other);
1390278a3de5SDavid S. Miller 			sock_put(other);
1391278a3de5SDavid S. Miller 			goto restart;
1392278a3de5SDavid S. Miller 		}
13931da177e4SLinus Torvalds 
13941da177e4SLinus Torvalds 		err = -EPERM;
13951da177e4SLinus Torvalds 		if (!unix_may_send(sk, other))
13961da177e4SLinus Torvalds 			goto out_unlock;
13971da177e4SLinus Torvalds 
13981da177e4SLinus Torvalds 		err = security_unix_may_send(sk->sk_socket, other->sk_socket);
13991da177e4SLinus Torvalds 		if (err)
14001da177e4SLinus Torvalds 			goto out_unlock;
14011da177e4SLinus Torvalds 
140245733e98SKuniyuki Iwashima 		WRITE_ONCE(sk->sk_state, TCP_ESTABLISHED);
140345733e98SKuniyuki Iwashima 		WRITE_ONCE(other->sk_state, TCP_ESTABLISHED);
14041da177e4SLinus Torvalds 	} else {
14051da177e4SLinus Torvalds 		/*
14061da177e4SLinus Torvalds 		 *	1003.1g breaking connected state with AF_UNSPEC
14071da177e4SLinus Torvalds 		 */
14081da177e4SLinus Torvalds 		other = NULL;
1409278a3de5SDavid S. Miller 		unix_state_double_lock(sk, other);
14101da177e4SLinus Torvalds 	}
14111da177e4SLinus Torvalds 
14121da177e4SLinus Torvalds 	/*
14131da177e4SLinus Torvalds 	 * If it was connected, reconnect.
14141da177e4SLinus Torvalds 	 */
14151da177e4SLinus Torvalds 	if (unix_peer(sk)) {
14161da177e4SLinus Torvalds 		struct sock *old_peer = unix_peer(sk);
1417dc56ad70SEric Dumazet 
14181da177e4SLinus Torvalds 		unix_peer(sk) = other;
1419dc56ad70SEric Dumazet 		if (!other)
142045733e98SKuniyuki Iwashima 			WRITE_ONCE(sk->sk_state, TCP_CLOSE);
14217d267278SRainer Weikusat 		unix_dgram_peer_wake_disconnect_wakeup(sk, old_peer);
14227d267278SRainer Weikusat 
1423278a3de5SDavid S. Miller 		unix_state_double_unlock(sk, other);
14241da177e4SLinus Torvalds 
14258003545cSKuniyuki Iwashima 		if (other != old_peer) {
14261da177e4SLinus Torvalds 			unix_dgram_disconnected(sk, old_peer);
14278003545cSKuniyuki Iwashima 
14288003545cSKuniyuki Iwashima 			unix_state_lock(old_peer);
14298003545cSKuniyuki Iwashima 			if (!unix_peer(old_peer))
14308003545cSKuniyuki Iwashima 				WRITE_ONCE(old_peer->sk_state, TCP_CLOSE);
14318003545cSKuniyuki Iwashima 			unix_state_unlock(old_peer);
14328003545cSKuniyuki Iwashima 		}
14338003545cSKuniyuki Iwashima 
14341da177e4SLinus Torvalds 		sock_put(old_peer);
14351da177e4SLinus Torvalds 	} else {
14361da177e4SLinus Torvalds 		unix_peer(sk) = other;
1437278a3de5SDavid S. Miller 		unix_state_double_unlock(sk, other);
14381da177e4SLinus Torvalds 	}
143983301b53SCong Wang 
14401da177e4SLinus Torvalds 	return 0;
14411da177e4SLinus Torvalds 
14421da177e4SLinus Torvalds out_unlock:
1443278a3de5SDavid S. Miller 	unix_state_double_unlock(sk, other);
14441da177e4SLinus Torvalds 	sock_put(other);
14451da177e4SLinus Torvalds out:
14461da177e4SLinus Torvalds 	return err;
14471da177e4SLinus Torvalds }
14481da177e4SLinus Torvalds 
unix_wait_for_peer(struct sock * other,long timeo)14491da177e4SLinus Torvalds static long unix_wait_for_peer(struct sock *other, long timeo)
145048851e9eSJules Irenge 	__releases(&unix_sk(other)->lock)
14511da177e4SLinus Torvalds {
14521da177e4SLinus Torvalds 	struct unix_sock *u = unix_sk(other);
14531da177e4SLinus Torvalds 	int sched;
14541da177e4SLinus Torvalds 	DEFINE_WAIT(wait);
14551da177e4SLinus Torvalds 
14561da177e4SLinus Torvalds 	prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);
14571da177e4SLinus Torvalds 
14581da177e4SLinus Torvalds 	sched = !sock_flag(other, SOCK_DEAD) &&
14591da177e4SLinus Torvalds 		!(other->sk_shutdown & RCV_SHUTDOWN) &&
1460679ed006SKuniyuki Iwashima 		unix_recvq_full_lockless(other);
14611da177e4SLinus Torvalds 
14621c92b4e5SDavid S. Miller 	unix_state_unlock(other);
14631da177e4SLinus Torvalds 
14641da177e4SLinus Torvalds 	if (sched)
14651da177e4SLinus Torvalds 		timeo = schedule_timeout(timeo);
14661da177e4SLinus Torvalds 
14671da177e4SLinus Torvalds 	finish_wait(&u->peer_wait, &wait);
14681da177e4SLinus Torvalds 	return timeo;
14691da177e4SLinus Torvalds }
14701da177e4SLinus Torvalds 
unix_stream_connect(struct socket * sock,struct sockaddr * uaddr,int addr_len,int flags)14711da177e4SLinus Torvalds static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
14721da177e4SLinus Torvalds 			       int addr_len, int flags)
14731da177e4SLinus Torvalds {
14741da177e4SLinus Torvalds 	struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1475340c3d33SKuniyuki Iwashima 	struct sock *sk = sock->sk, *newsk = NULL, *other = NULL;
14761da177e4SLinus Torvalds 	struct unix_sock *u = unix_sk(sk), *newu, *otheru;
1477340c3d33SKuniyuki Iwashima 	struct net *net = sock_net(sk);
14781da177e4SLinus Torvalds 	struct sk_buff *skb = NULL;
1479412f97f3SKuniyuki Iwashima 	unsigned char state;
14801da177e4SLinus Torvalds 	long timeo;
1481340c3d33SKuniyuki Iwashima 	int err;
14821da177e4SLinus Torvalds 
1483b8a58aa6SKuniyuki Iwashima 	err = unix_validate_addr(sunaddr, addr_len);
1484b8a58aa6SKuniyuki Iwashima 	if (err)
1485b8a58aa6SKuniyuki Iwashima 		goto out;
1486b8a58aa6SKuniyuki Iwashima 
14875e2ff670SAlexander Mikhalitsyn 	if ((test_bit(SOCK_PASSCRED, &sock->flags) ||
1488302fe8ddSKuniyuki Iwashima 	     test_bit(SOCK_PASSPIDFD, &sock->flags)) &&
1489302fe8ddSKuniyuki Iwashima 	    !READ_ONCE(u->addr)) {
1490f7ed31f4SKuniyuki Iwashima 		err = unix_autobind(sk);
1491f7ed31f4SKuniyuki Iwashima 		if (err)
14921da177e4SLinus Torvalds 			goto out;
1493f7ed31f4SKuniyuki Iwashima 	}
14941da177e4SLinus Torvalds 
14951da177e4SLinus Torvalds 	timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
14961da177e4SLinus Torvalds 
14971da177e4SLinus Torvalds 	/* First of all allocate resources.
14981da177e4SLinus Torvalds 	   If we will make it after state is locked,
14991da177e4SLinus Torvalds 	   we will have to recheck all again in any case.
15001da177e4SLinus Torvalds 	 */
15011da177e4SLinus Torvalds 
15021da177e4SLinus Torvalds 	/* create new sock for complete connection */
1503340c3d33SKuniyuki Iwashima 	newsk = unix_create1(net, NULL, 0, sock->type);
1504f4bd73b5SKuniyuki Iwashima 	if (IS_ERR(newsk)) {
1505f4bd73b5SKuniyuki Iwashima 		err = PTR_ERR(newsk);
1506f4bd73b5SKuniyuki Iwashima 		newsk = NULL;
15071da177e4SLinus Torvalds 		goto out;
1508f4bd73b5SKuniyuki Iwashima 	}
1509f4bd73b5SKuniyuki Iwashima 
1510f4bd73b5SKuniyuki Iwashima 	err = -ENOMEM;
15111da177e4SLinus Torvalds 
15121da177e4SLinus Torvalds 	/* Allocate skb for sending to listening sock */
15131da177e4SLinus Torvalds 	skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
15141da177e4SLinus Torvalds 	if (skb == NULL)
15151da177e4SLinus Torvalds 		goto out;
15161da177e4SLinus Torvalds 
15171da177e4SLinus Torvalds restart:
15181da177e4SLinus Torvalds 	/*  Find listening sock. */
1519d2d8c9fdSKuniyuki Iwashima 	other = unix_find_other(net, sunaddr, addr_len, sk->sk_type);
1520aed26f55SKuniyuki Iwashima 	if (IS_ERR(other)) {
1521aed26f55SKuniyuki Iwashima 		err = PTR_ERR(other);
1522aed26f55SKuniyuki Iwashima 		other = NULL;
15231da177e4SLinus Torvalds 		goto out;
1524aed26f55SKuniyuki Iwashima 	}
15251da177e4SLinus Torvalds 
15261c92b4e5SDavid S. Miller 	unix_state_lock(other);
15271da177e4SLinus Torvalds 
15281da177e4SLinus Torvalds 	/* Apparently VFS overslept socket death. Retry. */
15291da177e4SLinus Torvalds 	if (sock_flag(other, SOCK_DEAD)) {
15301c92b4e5SDavid S. Miller 		unix_state_unlock(other);
15311da177e4SLinus Torvalds 		sock_put(other);
15321da177e4SLinus Torvalds 		goto restart;
15331da177e4SLinus Torvalds 	}
15341da177e4SLinus Torvalds 
15351da177e4SLinus Torvalds 	err = -ECONNREFUSED;
15361da177e4SLinus Torvalds 	if (other->sk_state != TCP_LISTEN)
15371da177e4SLinus Torvalds 		goto out_unlock;
153877238f2bSTomoki Sekiyama 	if (other->sk_shutdown & RCV_SHUTDOWN)
153977238f2bSTomoki Sekiyama 		goto out_unlock;
15401da177e4SLinus Torvalds 
1541f1683d07SKuniyuki Iwashima 	if (unix_recvq_full_lockless(other)) {
15421da177e4SLinus Torvalds 		err = -EAGAIN;
15431da177e4SLinus Torvalds 		if (!timeo)
15441da177e4SLinus Torvalds 			goto out_unlock;
15451da177e4SLinus Torvalds 
15461da177e4SLinus Torvalds 		timeo = unix_wait_for_peer(other, timeo);
15471da177e4SLinus Torvalds 
15481da177e4SLinus Torvalds 		err = sock_intr_errno(timeo);
15491da177e4SLinus Torvalds 		if (signal_pending(current))
15501da177e4SLinus Torvalds 			goto out;
15511da177e4SLinus Torvalds 		sock_put(other);
15521da177e4SLinus Torvalds 		goto restart;
15531da177e4SLinus Torvalds 	}
15541da177e4SLinus Torvalds 
1555412f97f3SKuniyuki Iwashima 	/* self connect and simultaneous connect are eliminated
1556412f97f3SKuniyuki Iwashima 	 * by rejecting TCP_LISTEN socket to avoid deadlock.
15571da177e4SLinus Torvalds 	 */
1558412f97f3SKuniyuki Iwashima 	state = READ_ONCE(sk->sk_state);
1559412f97f3SKuniyuki Iwashima 	if (unlikely(state != TCP_CLOSE)) {
1560412f97f3SKuniyuki Iwashima 		err = state == TCP_ESTABLISHED ? -EISCONN : -EINVAL;
15611da177e4SLinus Torvalds 		goto out_unlock;
15621da177e4SLinus Torvalds 	}
15631da177e4SLinus Torvalds 
15645e7f3e03SEric Dumazet 	unix_state_lock_nested(sk, U_LOCK_SECOND);
15651da177e4SLinus Torvalds 
1566412f97f3SKuniyuki Iwashima 	if (unlikely(sk->sk_state != TCP_CLOSE)) {
1567412f97f3SKuniyuki Iwashima 		err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EINVAL;
15681c92b4e5SDavid S. Miller 		unix_state_unlock(sk);
1569412f97f3SKuniyuki Iwashima 		goto out_unlock;
15701da177e4SLinus Torvalds 	}
15711da177e4SLinus Torvalds 
15723610cda5SDavid S. Miller 	err = security_unix_stream_connect(sk, other, newsk);
15731da177e4SLinus Torvalds 	if (err) {
15741c92b4e5SDavid S. Miller 		unix_state_unlock(sk);
15751da177e4SLinus Torvalds 		goto out_unlock;
15761da177e4SLinus Torvalds 	}
15771da177e4SLinus Torvalds 
15781da177e4SLinus Torvalds 	/* The way is open! Fastly set all the necessary fields... */
15791da177e4SLinus Torvalds 
15801da177e4SLinus Torvalds 	sock_hold(sk);
15811da177e4SLinus Torvalds 	unix_peer(newsk)	= sk;
15821da177e4SLinus Torvalds 	newsk->sk_state		= TCP_ESTABLISHED;
15831da177e4SLinus Torvalds 	newsk->sk_type		= sk->sk_type;
1584109f6e39SEric W. Biederman 	init_peercred(newsk);
15851da177e4SLinus Torvalds 	newu = unix_sk(newsk);
1586eaefd110SEric Dumazet 	RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq);
15871da177e4SLinus Torvalds 	otheru = unix_sk(other);
15881da177e4SLinus Torvalds 
1589ae3b5641SAl Viro 	/* copy address information from listening to new sock
1590ae3b5641SAl Viro 	 *
1591ae3b5641SAl Viro 	 * The contents of *(otheru->addr) and otheru->path
1592ae3b5641SAl Viro 	 * are seen fully set up here, since we have found
15932f7ca90aSKuniyuki Iwashima 	 * otheru in hash under its lock.  Insertion into the
15942f7ca90aSKuniyuki Iwashima 	 * hash chain we'd found it in had been done in an
15952f7ca90aSKuniyuki Iwashima 	 * earlier critical area protected by the chain's lock,
1596ae3b5641SAl Viro 	 * the same one where we'd set *(otheru->addr) contents,
1597ae3b5641SAl Viro 	 * as well as otheru->path and otheru->addr itself.
1598ae3b5641SAl Viro 	 *
1599ae3b5641SAl Viro 	 * Using smp_store_release() here to set newu->addr
1600ae3b5641SAl Viro 	 * is enough to make those stores, as well as stores
1601ae3b5641SAl Viro 	 * to newu->path visible to anyone who gets newu->addr
1602ae3b5641SAl Viro 	 * by smp_load_acquire().  IOW, the same warranties
1603ae3b5641SAl Viro 	 * as for unix_sock instances bound in unix_bind() or
1604ae3b5641SAl Viro 	 * in unix_autobind().
1605ae3b5641SAl Viro 	 */
160640ffe67dSAl Viro 	if (otheru->path.dentry) {
160740ffe67dSAl Viro 		path_get(&otheru->path);
160840ffe67dSAl Viro 		newu->path = otheru->path;
16091da177e4SLinus Torvalds 	}
1610ae3b5641SAl Viro 	refcount_inc(&otheru->addr->refcnt);
1611ae3b5641SAl Viro 	smp_store_release(&newu->addr, otheru->addr);
16121da177e4SLinus Torvalds 
16131da177e4SLinus Torvalds 	/* Set credentials */
1614109f6e39SEric W. Biederman 	copy_peercred(sk, other);
16151da177e4SLinus Torvalds 
16161da177e4SLinus Torvalds 	sock->state	= SS_CONNECTED;
161745733e98SKuniyuki Iwashima 	WRITE_ONCE(sk->sk_state, TCP_ESTABLISHED);
1618830a1e5cSBenjamin LaHaise 	sock_hold(newsk);
1619830a1e5cSBenjamin LaHaise 
16204e857c58SPeter Zijlstra 	smp_mb__after_atomic();	/* sock_hold() does an atomic_inc() */
1621830a1e5cSBenjamin LaHaise 	unix_peer(sk)	= newsk;
16221da177e4SLinus Torvalds 
16231c92b4e5SDavid S. Miller 	unix_state_unlock(sk);
16241da177e4SLinus Torvalds 
16254e03d073Sgushengxian 	/* take ten and send info to listening sock */
16261da177e4SLinus Torvalds 	spin_lock(&other->sk_receive_queue.lock);
16271da177e4SLinus Torvalds 	__skb_queue_tail(&other->sk_receive_queue, skb);
16281da177e4SLinus Torvalds 	spin_unlock(&other->sk_receive_queue.lock);
16291c92b4e5SDavid S. Miller 	unix_state_unlock(other);
1630676d2369SDavid S. Miller 	other->sk_data_ready(other);
16311da177e4SLinus Torvalds 	sock_put(other);
16321da177e4SLinus Torvalds 	return 0;
16331da177e4SLinus Torvalds 
16341da177e4SLinus Torvalds out_unlock:
16351da177e4SLinus Torvalds 	if (other)
16361c92b4e5SDavid S. Miller 		unix_state_unlock(other);
16371da177e4SLinus Torvalds 
16381da177e4SLinus Torvalds out:
16391da177e4SLinus Torvalds 	kfree_skb(skb);
16401da177e4SLinus Torvalds 	if (newsk)
16411da177e4SLinus Torvalds 		unix_release_sock(newsk, 0);
16421da177e4SLinus Torvalds 	if (other)
16431da177e4SLinus Torvalds 		sock_put(other);
16441da177e4SLinus Torvalds 	return err;
16451da177e4SLinus Torvalds }
16461da177e4SLinus Torvalds 
unix_socketpair(struct socket * socka,struct socket * sockb)16471da177e4SLinus Torvalds static int unix_socketpair(struct socket *socka, struct socket *sockb)
16481da177e4SLinus Torvalds {
16491da177e4SLinus Torvalds 	struct sock *ska = socka->sk, *skb = sockb->sk;
16501da177e4SLinus Torvalds 
16511da177e4SLinus Torvalds 	/* Join our sockets back to back */
16521da177e4SLinus Torvalds 	sock_hold(ska);
16531da177e4SLinus Torvalds 	sock_hold(skb);
16541da177e4SLinus Torvalds 	unix_peer(ska) = skb;
16551da177e4SLinus Torvalds 	unix_peer(skb) = ska;
1656109f6e39SEric W. Biederman 	init_peercred(ska);
1657109f6e39SEric W. Biederman 	init_peercred(skb);
16581da177e4SLinus Torvalds 
16591da177e4SLinus Torvalds 	ska->sk_state = TCP_ESTABLISHED;
16601da177e4SLinus Torvalds 	skb->sk_state = TCP_ESTABLISHED;
16611da177e4SLinus Torvalds 	socka->state  = SS_CONNECTED;
16621da177e4SLinus Torvalds 	sockb->state  = SS_CONNECTED;
16631da177e4SLinus Torvalds 	return 0;
16641da177e4SLinus Torvalds }
16651da177e4SLinus Torvalds 
unix_sock_inherit_flags(const struct socket * old,struct socket * new)166690c6bd34SDaniel Borkmann static void unix_sock_inherit_flags(const struct socket *old,
166790c6bd34SDaniel Borkmann 				    struct socket *new)
166890c6bd34SDaniel Borkmann {
166990c6bd34SDaniel Borkmann 	if (test_bit(SOCK_PASSCRED, &old->flags))
167090c6bd34SDaniel Borkmann 		set_bit(SOCK_PASSCRED, &new->flags);
16715e2ff670SAlexander Mikhalitsyn 	if (test_bit(SOCK_PASSPIDFD, &old->flags))
16725e2ff670SAlexander Mikhalitsyn 		set_bit(SOCK_PASSPIDFD, &new->flags);
167390c6bd34SDaniel Borkmann 	if (test_bit(SOCK_PASSSEC, &old->flags))
167490c6bd34SDaniel Borkmann 		set_bit(SOCK_PASSSEC, &new->flags);
167590c6bd34SDaniel Borkmann }
167690c6bd34SDaniel Borkmann 
unix_accept(struct socket * sock,struct socket * newsock,int flags,bool kern)1677cdfbabfbSDavid Howells static int unix_accept(struct socket *sock, struct socket *newsock, int flags,
1678cdfbabfbSDavid Howells 		       bool kern)
16791da177e4SLinus Torvalds {
16801da177e4SLinus Torvalds 	struct sock *sk = sock->sk;
16811da177e4SLinus Torvalds 	struct sock *tsk;
16821da177e4SLinus Torvalds 	struct sk_buff *skb;
16831da177e4SLinus Torvalds 	int err;
16841da177e4SLinus Torvalds 
16851da177e4SLinus Torvalds 	err = -EOPNOTSUPP;
16861da177e4SLinus Torvalds 	if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
16871da177e4SLinus Torvalds 		goto out;
16881da177e4SLinus Torvalds 
16891da177e4SLinus Torvalds 	err = -EINVAL;
16906fdc1152SKuniyuki Iwashima 	if (READ_ONCE(sk->sk_state) != TCP_LISTEN)
16911da177e4SLinus Torvalds 		goto out;
16921da177e4SLinus Torvalds 
16931da177e4SLinus Torvalds 	/* If socket state is TCP_LISTEN it cannot change (for now...),
16941da177e4SLinus Torvalds 	 * so that no locks are necessary.
16951da177e4SLinus Torvalds 	 */
16961da177e4SLinus Torvalds 
1697f4b41f06SOliver Hartkopp 	skb = skb_recv_datagram(sk, (flags & O_NONBLOCK) ? MSG_DONTWAIT : 0,
1698f4b41f06SOliver Hartkopp 				&err);
16991da177e4SLinus Torvalds 	if (!skb) {
17001da177e4SLinus Torvalds 		/* This means receive shutdown. */
17011da177e4SLinus Torvalds 		if (err == 0)
17021da177e4SLinus Torvalds 			err = -EINVAL;
17031da177e4SLinus Torvalds 		goto out;
17041da177e4SLinus Torvalds 	}
17051da177e4SLinus Torvalds 
17061da177e4SLinus Torvalds 	tsk = skb->sk;
17071da177e4SLinus Torvalds 	skb_free_datagram(sk, skb);
17081da177e4SLinus Torvalds 	wake_up_interruptible(&unix_sk(sk)->peer_wait);
17091da177e4SLinus Torvalds 
17101da177e4SLinus Torvalds 	/* attach accepted sock to socket */
17111c92b4e5SDavid S. Miller 	unix_state_lock(tsk);
17121da177e4SLinus Torvalds 	newsock->state = SS_CONNECTED;
171390c6bd34SDaniel Borkmann 	unix_sock_inherit_flags(sock, newsock);
17141da177e4SLinus Torvalds 	sock_graft(tsk, newsock);
17151c92b4e5SDavid S. Miller 	unix_state_unlock(tsk);
17161da177e4SLinus Torvalds 	return 0;
17171da177e4SLinus Torvalds 
17181da177e4SLinus Torvalds out:
17191da177e4SLinus Torvalds 	return err;
17201da177e4SLinus Torvalds }
17211da177e4SLinus Torvalds 
17221da177e4SLinus Torvalds 
unix_getname(struct socket * sock,struct sockaddr * uaddr,int peer)17239b2c45d4SDenys Vlasenko static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int peer)
17241da177e4SLinus Torvalds {
17251da177e4SLinus Torvalds 	struct sock *sk = sock->sk;
1726ae3b5641SAl Viro 	struct unix_address *addr;
172713cfa97bSCyrill Gorcunov 	DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, uaddr);
17281da177e4SLinus Torvalds 	int err = 0;
17291da177e4SLinus Torvalds 
17301da177e4SLinus Torvalds 	if (peer) {
17311da177e4SLinus Torvalds 		sk = unix_peer_get(sk);
17321da177e4SLinus Torvalds 
17331da177e4SLinus Torvalds 		err = -ENOTCONN;
17341da177e4SLinus Torvalds 		if (!sk)
17351da177e4SLinus Torvalds 			goto out;
17361da177e4SLinus Torvalds 		err = 0;
17371da177e4SLinus Torvalds 	} else {
17381da177e4SLinus Torvalds 		sock_hold(sk);
17391da177e4SLinus Torvalds 	}
17401da177e4SLinus Torvalds 
1741ae3b5641SAl Viro 	addr = smp_load_acquire(&unix_sk(sk)->addr);
1742ae3b5641SAl Viro 	if (!addr) {
17431da177e4SLinus Torvalds 		sunaddr->sun_family = AF_UNIX;
17441da177e4SLinus Torvalds 		sunaddr->sun_path[0] = 0;
1745755662ceSKuniyuki Iwashima 		err = offsetof(struct sockaddr_un, sun_path);
17461da177e4SLinus Torvalds 	} else {
17479b2c45d4SDenys Vlasenko 		err = addr->len;
17489b2c45d4SDenys Vlasenko 		memcpy(sunaddr, addr->name, addr->len);
17491da177e4SLinus Torvalds 	}
17501da177e4SLinus Torvalds 	sock_put(sk);
17511da177e4SLinus Torvalds out:
17521da177e4SLinus Torvalds 	return err;
17531da177e4SLinus Torvalds }
17541da177e4SLinus Torvalds 
unix_peek_fds(struct scm_cookie * scm,struct sk_buff * skb)1755cbcf0112SMiklos Szeredi static void unix_peek_fds(struct scm_cookie *scm, struct sk_buff *skb)
1756cbcf0112SMiklos Szeredi {
1757cbcf0112SMiklos Szeredi 	scm->fp = scm_fp_dup(UNIXCB(skb).fp);
1758cbcf0112SMiklos Szeredi 
1759cbcf0112SMiklos Szeredi 	/*
1760cbcf0112SMiklos Szeredi 	 * Garbage collection of unix sockets starts by selecting a set of
1761cbcf0112SMiklos Szeredi 	 * candidate sockets which have reference only from being in flight
1762cbcf0112SMiklos Szeredi 	 * (total_refs == inflight_refs).  This condition is checked once during
1763cbcf0112SMiklos Szeredi 	 * the candidate collection phase, and candidates are marked as such, so
1764cbcf0112SMiklos Szeredi 	 * that non-candidates can later be ignored.  While inflight_refs is
1765cbcf0112SMiklos Szeredi 	 * protected by unix_gc_lock, total_refs (file count) is not, hence this
1766cbcf0112SMiklos Szeredi 	 * is an instantaneous decision.
1767cbcf0112SMiklos Szeredi 	 *
1768cbcf0112SMiklos Szeredi 	 * Once a candidate, however, the socket must not be reinstalled into a
1769cbcf0112SMiklos Szeredi 	 * file descriptor while the garbage collection is in progress.
1770cbcf0112SMiklos Szeredi 	 *
1771cbcf0112SMiklos Szeredi 	 * If the above conditions are met, then the directed graph of
1772cbcf0112SMiklos Szeredi 	 * candidates (*) does not change while unix_gc_lock is held.
1773cbcf0112SMiklos Szeredi 	 *
1774cbcf0112SMiklos Szeredi 	 * Any operations that changes the file count through file descriptors
1775cbcf0112SMiklos Szeredi 	 * (dup, close, sendmsg) does not change the graph since candidates are
1776cbcf0112SMiklos Szeredi 	 * not installed in fds.
1777cbcf0112SMiklos Szeredi 	 *
1778cbcf0112SMiklos Szeredi 	 * Dequeing a candidate via recvmsg would install it into an fd, but
1779cbcf0112SMiklos Szeredi 	 * that takes unix_gc_lock to decrement the inflight count, so it's
1780cbcf0112SMiklos Szeredi 	 * serialized with garbage collection.
1781cbcf0112SMiklos Szeredi 	 *
1782cbcf0112SMiklos Szeredi 	 * MSG_PEEK is special in that it does not change the inflight count,
1783cbcf0112SMiklos Szeredi 	 * yet does install the socket into an fd.  The following lock/unlock
1784cbcf0112SMiklos Szeredi 	 * pair is to ensure serialization with garbage collection.  It must be
1785cbcf0112SMiklos Szeredi 	 * done between incrementing the file count and installing the file into
1786cbcf0112SMiklos Szeredi 	 * an fd.
1787cbcf0112SMiklos Szeredi 	 *
1788cbcf0112SMiklos Szeredi 	 * If garbage collection starts after the barrier provided by the
1789cbcf0112SMiklos Szeredi 	 * lock/unlock, then it will see the elevated refcount and not mark this
1790cbcf0112SMiklos Szeredi 	 * as a candidate.  If a garbage collection is already in progress
1791cbcf0112SMiklos Szeredi 	 * before the file count was incremented, then the lock/unlock pair will
1792cbcf0112SMiklos Szeredi 	 * ensure that garbage collection is finished before progressing to
1793cbcf0112SMiklos Szeredi 	 * installing the fd.
1794cbcf0112SMiklos Szeredi 	 *
1795cbcf0112SMiklos Szeredi 	 * (*) A -> B where B is on the queue of A or B is on the queue of C
1796cbcf0112SMiklos Szeredi 	 * which is on the queue of listening socket A.
1797cbcf0112SMiklos Szeredi 	 */
1798cbcf0112SMiklos Szeredi 	spin_lock(&unix_gc_lock);
1799cbcf0112SMiklos Szeredi 	spin_unlock(&unix_gc_lock);
1800cbcf0112SMiklos Szeredi }
1801cbcf0112SMiklos Szeredi 
unix_scm_to_skb(struct scm_cookie * scm,struct sk_buff * skb,bool send_fds)1802f78a5fdaSDavid S. Miller static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)
18037361c36cSEric W. Biederman {
18047361c36cSEric W. Biederman 	int err = 0;
180516e57262SEric Dumazet 
18067361c36cSEric W. Biederman 	UNIXCB(skb).pid  = get_pid(scm->pid);
18076b0ee8c0SEric W. Biederman 	UNIXCB(skb).uid = scm->creds.uid;
18086b0ee8c0SEric W. Biederman 	UNIXCB(skb).gid = scm->creds.gid;
18097361c36cSEric W. Biederman 	UNIXCB(skb).fp = NULL;
181037a9a8dfSStephen Smalley 	unix_get_secdata(scm, skb);
18117361c36cSEric W. Biederman 	if (scm->fp && send_fds)
18127361c36cSEric W. Biederman 		err = unix_attach_fds(scm, skb);
18137361c36cSEric W. Biederman 
18147361c36cSEric W. Biederman 	skb->destructor = unix_destruct_scm;
18157361c36cSEric W. Biederman 	return err;
18167361c36cSEric W. Biederman }
18177361c36cSEric W. Biederman 
unix_passcred_enabled(const struct socket * sock,const struct sock * other)18189490f886SHannes Frederic Sowa static bool unix_passcred_enabled(const struct socket *sock,
18199490f886SHannes Frederic Sowa 				  const struct sock *other)
18209490f886SHannes Frederic Sowa {
18219490f886SHannes Frederic Sowa 	return test_bit(SOCK_PASSCRED, &sock->flags) ||
18225e2ff670SAlexander Mikhalitsyn 	       test_bit(SOCK_PASSPIDFD, &sock->flags) ||
18239490f886SHannes Frederic Sowa 	       !other->sk_socket ||
18245e2ff670SAlexander Mikhalitsyn 	       test_bit(SOCK_PASSCRED, &other->sk_socket->flags) ||
18255e2ff670SAlexander Mikhalitsyn 	       test_bit(SOCK_PASSPIDFD, &other->sk_socket->flags);
18269490f886SHannes Frederic Sowa }
18279490f886SHannes Frederic Sowa 
18281da177e4SLinus Torvalds /*
182916e57262SEric Dumazet  * Some apps rely on write() giving SCM_CREDENTIALS
183016e57262SEric Dumazet  * We include credentials if source or destination socket
183116e57262SEric Dumazet  * asserted SOCK_PASSCRED.
183216e57262SEric Dumazet  */
maybe_add_creds(struct sk_buff * skb,const struct socket * sock,const struct sock * other)183316e57262SEric Dumazet static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock,
183416e57262SEric Dumazet 			    const struct sock *other)
183516e57262SEric Dumazet {
18366b0ee8c0SEric W. Biederman 	if (UNIXCB(skb).pid)
183716e57262SEric Dumazet 		return;
18389490f886SHannes Frederic Sowa 	if (unix_passcred_enabled(sock, other)) {
183916e57262SEric Dumazet 		UNIXCB(skb).pid  = get_pid(task_tgid(current));
18406e0895c2SDavid S. Miller 		current_uid_gid(&UNIXCB(skb).uid, &UNIXCB(skb).gid);
184116e57262SEric Dumazet 	}
184216e57262SEric Dumazet }
184316e57262SEric Dumazet 
unix_skb_scm_eq(struct sk_buff * skb,struct scm_cookie * scm)18449490f886SHannes Frederic Sowa static bool unix_skb_scm_eq(struct sk_buff *skb,
18459490f886SHannes Frederic Sowa 			    struct scm_cookie *scm)
18469490f886SHannes Frederic Sowa {
1847b146cbf2SKees Cook 	return UNIXCB(skb).pid == scm->pid &&
1848b146cbf2SKees Cook 	       uid_eq(UNIXCB(skb).uid, scm->creds.uid) &&
1849b146cbf2SKees Cook 	       gid_eq(UNIXCB(skb).gid, scm->creds.gid) &&
18509490f886SHannes Frederic Sowa 	       unix_secdata_eq(scm, skb);
18519490f886SHannes Frederic Sowa }
18529490f886SHannes Frederic Sowa 
scm_stat_add(struct sock * sk,struct sk_buff * skb)18533c32da19SKirill Tkhai static void scm_stat_add(struct sock *sk, struct sk_buff *skb)
18543c32da19SKirill Tkhai {
18553c32da19SKirill Tkhai 	struct scm_fp_list *fp = UNIXCB(skb).fp;
18563c32da19SKirill Tkhai 	struct unix_sock *u = unix_sk(sk);
18573c32da19SKirill Tkhai 
18583c32da19SKirill Tkhai 	if (unlikely(fp && fp->count))
18597782040bSPaolo Abeni 		atomic_add(fp->count, &u->scm_stat.nr_fds);
18603c32da19SKirill Tkhai }
18613c32da19SKirill Tkhai 
scm_stat_del(struct sock * sk,struct sk_buff * skb)18623c32da19SKirill Tkhai static void scm_stat_del(struct sock *sk, struct sk_buff *skb)
18633c32da19SKirill Tkhai {
18643c32da19SKirill Tkhai 	struct scm_fp_list *fp = UNIXCB(skb).fp;
18653c32da19SKirill Tkhai 	struct unix_sock *u = unix_sk(sk);
18663c32da19SKirill Tkhai 
18673c32da19SKirill Tkhai 	if (unlikely(fp && fp->count))
18687782040bSPaolo Abeni 		atomic_sub(fp->count, &u->scm_stat.nr_fds);
18693c32da19SKirill Tkhai }
18703c32da19SKirill Tkhai 
187116e57262SEric Dumazet /*
18721da177e4SLinus Torvalds  *	Send AF_UNIX data.
18731da177e4SLinus Torvalds  */
18741da177e4SLinus Torvalds 
unix_dgram_sendmsg(struct socket * sock,struct msghdr * msg,size_t len)18751b784140SYing Xue static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
18761b784140SYing Xue 			      size_t len)
18771da177e4SLinus Torvalds {
1878342dfc30SSteffen Hurrle 	DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, msg->msg_name);
1879340c3d33SKuniyuki Iwashima 	struct sock *sk = sock->sk, *other = NULL;
1880340c3d33SKuniyuki Iwashima 	struct unix_sock *u = unix_sk(sk);
18817cc05662SChristoph Hellwig 	struct scm_cookie scm;
1882340c3d33SKuniyuki Iwashima 	struct sk_buff *skb;
1883eb6a2481SEric Dumazet 	int data_len = 0;
18847d267278SRainer Weikusat 	int sk_locked;
1885340c3d33SKuniyuki Iwashima 	long timeo;
1886340c3d33SKuniyuki Iwashima 	int err;
18871da177e4SLinus Torvalds 
18885f23b734Sdann frazier 	wait_for_unix_gc();
18897cc05662SChristoph Hellwig 	err = scm_send(sock, msg, &scm, false);
18901da177e4SLinus Torvalds 	if (err < 0)
18911da177e4SLinus Torvalds 		return err;
18921da177e4SLinus Torvalds 
18931da177e4SLinus Torvalds 	err = -EOPNOTSUPP;
18941da177e4SLinus Torvalds 	if (msg->msg_flags&MSG_OOB)
18951da177e4SLinus Torvalds 		goto out;
18961da177e4SLinus Torvalds 
18971da177e4SLinus Torvalds 	if (msg->msg_namelen) {
1898b8a58aa6SKuniyuki Iwashima 		err = unix_validate_addr(sunaddr, msg->msg_namelen);
1899b8a58aa6SKuniyuki Iwashima 		if (err)
1900b8a58aa6SKuniyuki Iwashima 			goto out;
19011da177e4SLinus Torvalds 	} else {
19021da177e4SLinus Torvalds 		sunaddr = NULL;
19031da177e4SLinus Torvalds 		err = -ENOTCONN;
19041da177e4SLinus Torvalds 		other = unix_peer_get(sk);
19051da177e4SLinus Torvalds 		if (!other)
19061da177e4SLinus Torvalds 			goto out;
19071da177e4SLinus Torvalds 	}
19081da177e4SLinus Torvalds 
19095e2ff670SAlexander Mikhalitsyn 	if ((test_bit(SOCK_PASSCRED, &sock->flags) ||
1910302fe8ddSKuniyuki Iwashima 	     test_bit(SOCK_PASSPIDFD, &sock->flags)) &&
1911302fe8ddSKuniyuki Iwashima 	    !READ_ONCE(u->addr)) {
1912f7ed31f4SKuniyuki Iwashima 		err = unix_autobind(sk);
1913f7ed31f4SKuniyuki Iwashima 		if (err)
19141da177e4SLinus Torvalds 			goto out;
1915f7ed31f4SKuniyuki Iwashima 	}
19161da177e4SLinus Torvalds 
19171da177e4SLinus Torvalds 	err = -EMSGSIZE;
1918996ec22fSKuniyuki Iwashima 	if (len > READ_ONCE(sk->sk_sndbuf) - 32)
19191da177e4SLinus Torvalds 		goto out;
19201da177e4SLinus Torvalds 
192131ff6aa5SKirill Tkhai 	if (len > SKB_MAX_ALLOC) {
1922eb6a2481SEric Dumazet 		data_len = min_t(size_t,
1923eb6a2481SEric Dumazet 				 len - SKB_MAX_ALLOC,
1924eb6a2481SEric Dumazet 				 MAX_SKB_FRAGS * PAGE_SIZE);
192531ff6aa5SKirill Tkhai 		data_len = PAGE_ALIGN(data_len);
192631ff6aa5SKirill Tkhai 
192731ff6aa5SKirill Tkhai 		BUILD_BUG_ON(SKB_MAX_ALLOC < PAGE_SIZE);
192831ff6aa5SKirill Tkhai 	}
1929eb6a2481SEric Dumazet 
1930eb6a2481SEric Dumazet 	skb = sock_alloc_send_pskb(sk, len - data_len, data_len,
193128d64271SEric Dumazet 				   msg->msg_flags & MSG_DONTWAIT, &err,
193228d64271SEric Dumazet 				   PAGE_ALLOC_COSTLY_ORDER);
19331da177e4SLinus Torvalds 	if (skb == NULL)
19341da177e4SLinus Torvalds 		goto out;
19351da177e4SLinus Torvalds 
19367cc05662SChristoph Hellwig 	err = unix_scm_to_skb(&scm, skb, true);
193725888e30SEric Dumazet 	if (err < 0)
19386209344fSMiklos Szeredi 		goto out_free;
1939877ce7c1SCatherine Zhang 
1940eb6a2481SEric Dumazet 	skb_put(skb, len - data_len);
1941eb6a2481SEric Dumazet 	skb->data_len = data_len;
1942eb6a2481SEric Dumazet 	skb->len = len;
1943c0371da6SAl Viro 	err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, len);
19441da177e4SLinus Torvalds 	if (err)
19451da177e4SLinus Torvalds 		goto out_free;
19461da177e4SLinus Torvalds 
19471da177e4SLinus Torvalds 	timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
19481da177e4SLinus Torvalds 
19491da177e4SLinus Torvalds restart:
19501da177e4SLinus Torvalds 	if (!other) {
19511da177e4SLinus Torvalds 		err = -ECONNRESET;
19521da177e4SLinus Torvalds 		if (sunaddr == NULL)
19531da177e4SLinus Torvalds 			goto out_free;
19541da177e4SLinus Torvalds 
1955340c3d33SKuniyuki Iwashima 		other = unix_find_other(sock_net(sk), sunaddr, msg->msg_namelen,
1956d2d8c9fdSKuniyuki Iwashima 					sk->sk_type);
1957aed26f55SKuniyuki Iwashima 		if (IS_ERR(other)) {
1958aed26f55SKuniyuki Iwashima 			err = PTR_ERR(other);
1959aed26f55SKuniyuki Iwashima 			other = NULL;
19601da177e4SLinus Torvalds 			goto out_free;
19611da177e4SLinus Torvalds 		}
1962aed26f55SKuniyuki Iwashima 	}
19631da177e4SLinus Torvalds 
1964d6ae3baeSAlban Crequy 	if (sk_filter(other, skb) < 0) {
1965d6ae3baeSAlban Crequy 		/* Toss the packet but do not return any error to the sender */
1966d6ae3baeSAlban Crequy 		err = len;
1967d6ae3baeSAlban Crequy 		goto out_free;
1968d6ae3baeSAlban Crequy 	}
1969d6ae3baeSAlban Crequy 
19707d267278SRainer Weikusat 	sk_locked = 0;
19711c92b4e5SDavid S. Miller 	unix_state_lock(other);
19727d267278SRainer Weikusat restart_locked:
19731da177e4SLinus Torvalds 	err = -EPERM;
19741da177e4SLinus Torvalds 	if (!unix_may_send(sk, other))
19751da177e4SLinus Torvalds 		goto out_unlock;
19761da177e4SLinus Torvalds 
19777d267278SRainer Weikusat 	if (unlikely(sock_flag(other, SOCK_DEAD))) {
19781da177e4SLinus Torvalds 		/*
19791da177e4SLinus Torvalds 		 *	Check with 1003.1g - what should
19801da177e4SLinus Torvalds 		 *	datagram error
19811da177e4SLinus Torvalds 		 */
19821c92b4e5SDavid S. Miller 		unix_state_unlock(other);
19831da177e4SLinus Torvalds 		sock_put(other);
19841da177e4SLinus Torvalds 
19857d267278SRainer Weikusat 		if (!sk_locked)
19861c92b4e5SDavid S. Miller 			unix_state_lock(sk);
19877d267278SRainer Weikusat 
19887d267278SRainer Weikusat 		err = 0;
19893ff8bff7SKirill Tkhai 		if (sk->sk_type == SOCK_SEQPACKET) {
19903ff8bff7SKirill Tkhai 			/* We are here only when racing with unix_release_sock()
19913ff8bff7SKirill Tkhai 			 * is clearing @other. Never change state to TCP_CLOSE
19923ff8bff7SKirill Tkhai 			 * unlike SOCK_DGRAM wants.
19933ff8bff7SKirill Tkhai 			 */
19943ff8bff7SKirill Tkhai 			unix_state_unlock(sk);
19953ff8bff7SKirill Tkhai 			err = -EPIPE;
19963ff8bff7SKirill Tkhai 		} else if (unix_peer(sk) == other) {
19971da177e4SLinus Torvalds 			unix_peer(sk) = NULL;
19987d267278SRainer Weikusat 			unix_dgram_peer_wake_disconnect_wakeup(sk, other);
19997d267278SRainer Weikusat 
200045733e98SKuniyuki Iwashima 			WRITE_ONCE(sk->sk_state, TCP_CLOSE);
20011c92b4e5SDavid S. Miller 			unix_state_unlock(sk);
20021da177e4SLinus Torvalds 
20031da177e4SLinus Torvalds 			unix_dgram_disconnected(sk, other);
20041da177e4SLinus Torvalds 			sock_put(other);
20051da177e4SLinus Torvalds 			err = -ECONNREFUSED;
20061da177e4SLinus Torvalds 		} else {
20071c92b4e5SDavid S. Miller 			unix_state_unlock(sk);
20081da177e4SLinus Torvalds 		}
20091da177e4SLinus Torvalds 
20101da177e4SLinus Torvalds 		other = NULL;
20111da177e4SLinus Torvalds 		if (err)
20121da177e4SLinus Torvalds 			goto out_free;
20131da177e4SLinus Torvalds 		goto restart;
20141da177e4SLinus Torvalds 	}
20151da177e4SLinus Torvalds 
20161da177e4SLinus Torvalds 	err = -EPIPE;
20171da177e4SLinus Torvalds 	if (other->sk_shutdown & RCV_SHUTDOWN)
20181da177e4SLinus Torvalds 		goto out_unlock;
20191da177e4SLinus Torvalds 
20201da177e4SLinus Torvalds 	if (sk->sk_type != SOCK_SEQPACKET) {
20211da177e4SLinus Torvalds 		err = security_unix_may_send(sk->sk_socket, other->sk_socket);
20221da177e4SLinus Torvalds 		if (err)
20231da177e4SLinus Torvalds 			goto out_unlock;
20241da177e4SLinus Torvalds 	}
20251da177e4SLinus Torvalds 
2026a5527ddaSRainer Weikusat 	/* other == sk && unix_peer(other) != sk if
2027a5527ddaSRainer Weikusat 	 * - unix_peer(sk) == NULL, destination address bound to sk
2028a5527ddaSRainer Weikusat 	 * - unix_peer(sk) == sk by time of get but disconnected before lock
2029a5527ddaSRainer Weikusat 	 */
2030a5527ddaSRainer Weikusat 	if (other != sk &&
203186b18aaaSQian Cai 	    unlikely(unix_peer(other) != sk &&
203286b18aaaSQian Cai 	    unix_recvq_full_lockless(other))) {
20337d267278SRainer Weikusat 		if (timeo) {
20341da177e4SLinus Torvalds 			timeo = unix_wait_for_peer(other, timeo);
20351da177e4SLinus Torvalds 
20361da177e4SLinus Torvalds 			err = sock_intr_errno(timeo);
20371da177e4SLinus Torvalds 			if (signal_pending(current))
20381da177e4SLinus Torvalds 				goto out_free;
20391da177e4SLinus Torvalds 
20401da177e4SLinus Torvalds 			goto restart;
20411da177e4SLinus Torvalds 		}
20421da177e4SLinus Torvalds 
20437d267278SRainer Weikusat 		if (!sk_locked) {
20447d267278SRainer Weikusat 			unix_state_unlock(other);
20457d267278SRainer Weikusat 			unix_state_double_lock(sk, other);
20467d267278SRainer Weikusat 		}
20477d267278SRainer Weikusat 
20487d267278SRainer Weikusat 		if (unix_peer(sk) != other ||
20497d267278SRainer Weikusat 		    unix_dgram_peer_wake_me(sk, other)) {
20507d267278SRainer Weikusat 			err = -EAGAIN;
20517d267278SRainer Weikusat 			sk_locked = 1;
20527d267278SRainer Weikusat 			goto out_unlock;
20537d267278SRainer Weikusat 		}
20547d267278SRainer Weikusat 
20557d267278SRainer Weikusat 		if (!sk_locked) {
20567d267278SRainer Weikusat 			sk_locked = 1;
20577d267278SRainer Weikusat 			goto restart_locked;
20587d267278SRainer Weikusat 		}
20597d267278SRainer Weikusat 	}
20607d267278SRainer Weikusat 
20617d267278SRainer Weikusat 	if (unlikely(sk_locked))
20627d267278SRainer Weikusat 		unix_state_unlock(sk);
20637d267278SRainer Weikusat 
20643f66116eSAlban Crequy 	if (sock_flag(other, SOCK_RCVTSTAMP))
20653f66116eSAlban Crequy 		__net_timestamp(skb);
206616e57262SEric Dumazet 	maybe_add_creds(skb, sock, other);
20673c32da19SKirill Tkhai 	scm_stat_add(other, skb);
20687782040bSPaolo Abeni 	skb_queue_tail(&other->sk_receive_queue, skb);
20691c92b4e5SDavid S. Miller 	unix_state_unlock(other);
2070676d2369SDavid S. Miller 	other->sk_data_ready(other);
20711da177e4SLinus Torvalds 	sock_put(other);
20727cc05662SChristoph Hellwig 	scm_destroy(&scm);
20731da177e4SLinus Torvalds 	return len;
20741da177e4SLinus Torvalds 
20751da177e4SLinus Torvalds out_unlock:
20767d267278SRainer Weikusat 	if (sk_locked)
20777d267278SRainer Weikusat 		unix_state_unlock(sk);
20781c92b4e5SDavid S. Miller 	unix_state_unlock(other);
20791da177e4SLinus Torvalds out_free:
20801da177e4SLinus Torvalds 	kfree_skb(skb);
20811da177e4SLinus Torvalds out:
20821da177e4SLinus Torvalds 	if (other)
20831da177e4SLinus Torvalds 		sock_put(other);
20847cc05662SChristoph Hellwig 	scm_destroy(&scm);
20851da177e4SLinus Torvalds 	return err;
20861da177e4SLinus Torvalds }
20871da177e4SLinus Torvalds 
2088e370a723SEric Dumazet /* We use paged skbs for stream sockets, and limit occupancy to 32768
2089d4e9a408STobias Klauser  * bytes, and a minimum of a full page.
2090e370a723SEric Dumazet  */
2091e370a723SEric Dumazet #define UNIX_SKB_FRAGS_SZ (PAGE_SIZE << get_order(32768))
20921da177e4SLinus Torvalds 
20934edf21aaSKuniyuki Iwashima #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
queue_oob(struct socket * sock,struct msghdr * msg,struct sock * other,struct scm_cookie * scm,bool fds_sent)20942aab4b96SEric Dumazet static int queue_oob(struct socket *sock, struct msghdr *msg, struct sock *other,
20952aab4b96SEric Dumazet 		     struct scm_cookie *scm, bool fds_sent)
2096314001f0SRao Shoaib {
2097314001f0SRao Shoaib 	struct unix_sock *ousk = unix_sk(other);
2098314001f0SRao Shoaib 	struct sk_buff *skb;
2099314001f0SRao Shoaib 	int err = 0;
2100314001f0SRao Shoaib 
2101314001f0SRao Shoaib 	skb = sock_alloc_send_skb(sock->sk, 1, msg->msg_flags & MSG_DONTWAIT, &err);
2102314001f0SRao Shoaib 
2103314001f0SRao Shoaib 	if (!skb)
2104314001f0SRao Shoaib 		return err;
2105314001f0SRao Shoaib 
21062aab4b96SEric Dumazet 	err = unix_scm_to_skb(scm, skb, !fds_sent);
21072aab4b96SEric Dumazet 	if (err < 0) {
21082aab4b96SEric Dumazet 		kfree_skb(skb);
21092aab4b96SEric Dumazet 		return err;
21102aab4b96SEric Dumazet 	}
2111314001f0SRao Shoaib 	skb_put(skb, 1);
2112314001f0SRao Shoaib 	err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, 1);
2113314001f0SRao Shoaib 
2114314001f0SRao Shoaib 	if (err) {
2115314001f0SRao Shoaib 		kfree_skb(skb);
2116314001f0SRao Shoaib 		return err;
2117314001f0SRao Shoaib 	}
2118314001f0SRao Shoaib 
2119314001f0SRao Shoaib 	unix_state_lock(other);
212019eed721SRao Shoaib 
212119eed721SRao Shoaib 	if (sock_flag(other, SOCK_DEAD) ||
212219eed721SRao Shoaib 	    (other->sk_shutdown & RCV_SHUTDOWN)) {
212319eed721SRao Shoaib 		unix_state_unlock(other);
212419eed721SRao Shoaib 		kfree_skb(skb);
212519eed721SRao Shoaib 		return -EPIPE;
212619eed721SRao Shoaib 	}
212719eed721SRao Shoaib 
2128314001f0SRao Shoaib 	maybe_add_creds(skb, sock, other);
2129314001f0SRao Shoaib 	skb_get(skb);
2130314001f0SRao Shoaib 
2131d59ae931SKuniyuki Iwashima 	scm_stat_add(other, skb);
2132d59ae931SKuniyuki Iwashima 
2133d59ae931SKuniyuki Iwashima 	spin_lock(&other->sk_receive_queue.lock);
2134314001f0SRao Shoaib 	if (ousk->oob_skb)
213519eed721SRao Shoaib 		consume_skb(ousk->oob_skb);
2136e82025c6SKuniyuki Iwashima 	WRITE_ONCE(ousk->oob_skb, skb);
2137d59ae931SKuniyuki Iwashima 	__skb_queue_tail(&other->sk_receive_queue, skb);
2138d59ae931SKuniyuki Iwashima 	spin_unlock(&other->sk_receive_queue.lock);
2139314001f0SRao Shoaib 
2140314001f0SRao Shoaib 	sk_send_sigurg(other);
2141314001f0SRao Shoaib 	unix_state_unlock(other);
2142314001f0SRao Shoaib 	other->sk_data_ready(other);
2143314001f0SRao Shoaib 
2144314001f0SRao Shoaib 	return err;
2145314001f0SRao Shoaib }
2146314001f0SRao Shoaib #endif
2147314001f0SRao Shoaib 
unix_stream_sendmsg(struct socket * sock,struct msghdr * msg,size_t len)21481b784140SYing Xue static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
21491b784140SYing Xue 			       size_t len)
21501da177e4SLinus Torvalds {
21511da177e4SLinus Torvalds 	struct sock *sk = sock->sk;
21521da177e4SLinus Torvalds 	struct sock *other = NULL;
21531da177e4SLinus Torvalds 	int err, size;
2154f78a5fdaSDavid S. Miller 	struct sk_buff *skb;
21551da177e4SLinus Torvalds 	int sent = 0;
21567cc05662SChristoph Hellwig 	struct scm_cookie scm;
21578ba69ba6SMiklos Szeredi 	bool fds_sent = false;
2158e370a723SEric Dumazet 	int data_len;
21591da177e4SLinus Torvalds 
21605f23b734Sdann frazier 	wait_for_unix_gc();
21617cc05662SChristoph Hellwig 	err = scm_send(sock, msg, &scm, false);
21621da177e4SLinus Torvalds 	if (err < 0)
21631da177e4SLinus Torvalds 		return err;
21641da177e4SLinus Torvalds 
21651da177e4SLinus Torvalds 	err = -EOPNOTSUPP;
2166314001f0SRao Shoaib 	if (msg->msg_flags & MSG_OOB) {
21674edf21aaSKuniyuki Iwashima #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
2168314001f0SRao Shoaib 		if (len)
2169314001f0SRao Shoaib 			len--;
2170314001f0SRao Shoaib 		else
2171314001f0SRao Shoaib #endif
21721da177e4SLinus Torvalds 			goto out_err;
2173314001f0SRao Shoaib 	}
21741da177e4SLinus Torvalds 
21751da177e4SLinus Torvalds 	if (msg->msg_namelen) {
2176776fcc45SKuniyuki Iwashima 		err = READ_ONCE(sk->sk_state) == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
21771da177e4SLinus Torvalds 		goto out_err;
21781da177e4SLinus Torvalds 	} else {
21791da177e4SLinus Torvalds 		err = -ENOTCONN;
2180830a1e5cSBenjamin LaHaise 		other = unix_peer(sk);
21811da177e4SLinus Torvalds 		if (!other)
21821da177e4SLinus Torvalds 			goto out_err;
21831da177e4SLinus Torvalds 	}
21841da177e4SLinus Torvalds 
21850688d4e4SBreno Leitao 	if (READ_ONCE(sk->sk_shutdown) & SEND_SHUTDOWN)
21861da177e4SLinus Torvalds 		goto pipe_err;
21871da177e4SLinus Torvalds 
21886eba6a37SEric Dumazet 	while (sent < len) {
21891da177e4SLinus Torvalds 		size = len - sent;
21901da177e4SLinus Torvalds 
2191a0dbf5f8SDavid Howells 		if (unlikely(msg->msg_flags & MSG_SPLICE_PAGES)) {
2192a0dbf5f8SDavid Howells 			skb = sock_alloc_send_pskb(sk, 0, 0,
2193a0dbf5f8SDavid Howells 						   msg->msg_flags & MSG_DONTWAIT,
2194a0dbf5f8SDavid Howells 						   &err, 0);
2195a0dbf5f8SDavid Howells 		} else {
21961da177e4SLinus Torvalds 			/* Keep two messages in the pipe so it schedules better */
2197996ec22fSKuniyuki Iwashima 			size = min_t(int, size, (READ_ONCE(sk->sk_sndbuf) >> 1) - 64);
21981da177e4SLinus Torvalds 
2199e370a723SEric Dumazet 			/* allow fallback to order-0 allocations */
2200e370a723SEric Dumazet 			size = min_t(int, size, SKB_MAX_HEAD(0) + UNIX_SKB_FRAGS_SZ);
22011da177e4SLinus Torvalds 
2202e370a723SEric Dumazet 			data_len = max_t(int, 0, size - SKB_MAX_HEAD(0));
22031da177e4SLinus Torvalds 
220431ff6aa5SKirill Tkhai 			data_len = min_t(size_t, size, PAGE_ALIGN(data_len));
220531ff6aa5SKirill Tkhai 
2206e370a723SEric Dumazet 			skb = sock_alloc_send_pskb(sk, size - data_len, data_len,
220728d64271SEric Dumazet 						   msg->msg_flags & MSG_DONTWAIT, &err,
220828d64271SEric Dumazet 						   get_order(UNIX_SKB_FRAGS_SZ));
2209a0dbf5f8SDavid Howells 		}
2210e370a723SEric Dumazet 		if (!skb)
22111da177e4SLinus Torvalds 			goto out_err;
22121da177e4SLinus Torvalds 
2213f78a5fdaSDavid S. Miller 		/* Only send the fds in the first buffer */
22147cc05662SChristoph Hellwig 		err = unix_scm_to_skb(&scm, skb, !fds_sent);
221525888e30SEric Dumazet 		if (err < 0) {
22166209344fSMiklos Szeredi 			kfree_skb(skb);
2217f78a5fdaSDavid S. Miller 			goto out_err;
22186209344fSMiklos Szeredi 		}
22198ba69ba6SMiklos Szeredi 		fds_sent = true;
22201da177e4SLinus Torvalds 
2221a0dbf5f8SDavid Howells 		if (unlikely(msg->msg_flags & MSG_SPLICE_PAGES)) {
2222*bc6d8cc2SFrederik Deweerdt 			skb->ip_summed = CHECKSUM_UNNECESSARY;
2223a0dbf5f8SDavid Howells 			err = skb_splice_from_iter(skb, &msg->msg_iter, size,
2224a0dbf5f8SDavid Howells 						   sk->sk_allocation);
2225a0dbf5f8SDavid Howells 			if (err < 0) {
2226a0dbf5f8SDavid Howells 				kfree_skb(skb);
2227a0dbf5f8SDavid Howells 				goto out_err;
2228a0dbf5f8SDavid Howells 			}
2229a0dbf5f8SDavid Howells 			size = err;
2230a0dbf5f8SDavid Howells 			refcount_add(size, &sk->sk_wmem_alloc);
2231a0dbf5f8SDavid Howells 		} else {
2232e370a723SEric Dumazet 			skb_put(skb, size - data_len);
2233e370a723SEric Dumazet 			skb->data_len = data_len;
2234e370a723SEric Dumazet 			skb->len = size;
2235c0371da6SAl Viro 			err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size);
22366eba6a37SEric Dumazet 			if (err) {
22371da177e4SLinus Torvalds 				kfree_skb(skb);
2238f78a5fdaSDavid S. Miller 				goto out_err;
22391da177e4SLinus Torvalds 			}
2240a0dbf5f8SDavid Howells 		}
22411da177e4SLinus Torvalds 
22421c92b4e5SDavid S. Miller 		unix_state_lock(other);
22431da177e4SLinus Torvalds 
22441da177e4SLinus Torvalds 		if (sock_flag(other, SOCK_DEAD) ||
22451da177e4SLinus Torvalds 		    (other->sk_shutdown & RCV_SHUTDOWN))
22461da177e4SLinus Torvalds 			goto pipe_err_free;
22471da177e4SLinus Torvalds 
224816e57262SEric Dumazet 		maybe_add_creds(skb, sock, other);
22493c32da19SKirill Tkhai 		scm_stat_add(other, skb);
22507782040bSPaolo Abeni 		skb_queue_tail(&other->sk_receive_queue, skb);
22511c92b4e5SDavid S. Miller 		unix_state_unlock(other);
2252676d2369SDavid S. Miller 		other->sk_data_ready(other);
22531da177e4SLinus Torvalds 		sent += size;
22541da177e4SLinus Torvalds 	}
22551da177e4SLinus Torvalds 
22564edf21aaSKuniyuki Iwashima #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
2257314001f0SRao Shoaib 	if (msg->msg_flags & MSG_OOB) {
22582aab4b96SEric Dumazet 		err = queue_oob(sock, msg, other, &scm, fds_sent);
2259314001f0SRao Shoaib 		if (err)
2260314001f0SRao Shoaib 			goto out_err;
2261314001f0SRao Shoaib 		sent++;
2262314001f0SRao Shoaib 	}
2263314001f0SRao Shoaib #endif
2264314001f0SRao Shoaib 
22657cc05662SChristoph Hellwig 	scm_destroy(&scm);
22661da177e4SLinus Torvalds 
22671da177e4SLinus Torvalds 	return sent;
22681da177e4SLinus Torvalds 
22691da177e4SLinus Torvalds pipe_err_free:
22701c92b4e5SDavid S. Miller 	unix_state_unlock(other);
22711da177e4SLinus Torvalds 	kfree_skb(skb);
22721da177e4SLinus Torvalds pipe_err:
22731da177e4SLinus Torvalds 	if (sent == 0 && !(msg->msg_flags&MSG_NOSIGNAL))
22741da177e4SLinus Torvalds 		send_sig(SIGPIPE, current, 0);
22751da177e4SLinus Torvalds 	err = -EPIPE;
22761da177e4SLinus Torvalds out_err:
22777cc05662SChristoph Hellwig 	scm_destroy(&scm);
22781da177e4SLinus Torvalds 	return sent ? : err;
22791da177e4SLinus Torvalds }
22801da177e4SLinus Torvalds 
unix_seqpacket_sendmsg(struct socket * sock,struct msghdr * msg,size_t len)22811b784140SYing Xue static int unix_seqpacket_sendmsg(struct socket *sock, struct msghdr *msg,
22821b784140SYing Xue 				  size_t len)
22831da177e4SLinus Torvalds {
22841da177e4SLinus Torvalds 	int err;
22851da177e4SLinus Torvalds 	struct sock *sk = sock->sk;
22861da177e4SLinus Torvalds 
22871da177e4SLinus Torvalds 	err = sock_error(sk);
22881da177e4SLinus Torvalds 	if (err)
22891da177e4SLinus Torvalds 		return err;
22901da177e4SLinus Torvalds 
2291776fcc45SKuniyuki Iwashima 	if (READ_ONCE(sk->sk_state) != TCP_ESTABLISHED)
22921da177e4SLinus Torvalds 		return -ENOTCONN;
22931da177e4SLinus Torvalds 
22941da177e4SLinus Torvalds 	if (msg->msg_namelen)
22951da177e4SLinus Torvalds 		msg->msg_namelen = 0;
22961da177e4SLinus Torvalds 
22971b784140SYing Xue 	return unix_dgram_sendmsg(sock, msg, len);
22981da177e4SLinus Torvalds }
22991da177e4SLinus Torvalds 
unix_seqpacket_recvmsg(struct socket * sock,struct msghdr * msg,size_t size,int flags)23001b784140SYing Xue static int unix_seqpacket_recvmsg(struct socket *sock, struct msghdr *msg,
23011b784140SYing Xue 				  size_t size, int flags)
2302a05d2ad1SEric W. Biederman {
2303a05d2ad1SEric W. Biederman 	struct sock *sk = sock->sk;
2304a05d2ad1SEric W. Biederman 
2305776fcc45SKuniyuki Iwashima 	if (READ_ONCE(sk->sk_state) != TCP_ESTABLISHED)
2306a05d2ad1SEric W. Biederman 		return -ENOTCONN;
2307a05d2ad1SEric W. Biederman 
23081b784140SYing Xue 	return unix_dgram_recvmsg(sock, msg, size, flags);
2309a05d2ad1SEric W. Biederman }
2310a05d2ad1SEric W. Biederman 
unix_copy_addr(struct msghdr * msg,struct sock * sk)23111da177e4SLinus Torvalds static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
23121da177e4SLinus Torvalds {
2313ae3b5641SAl Viro 	struct unix_address *addr = smp_load_acquire(&unix_sk(sk)->addr);
23141da177e4SLinus Torvalds 
2315ae3b5641SAl Viro 	if (addr) {
2316ae3b5641SAl Viro 		msg->msg_namelen = addr->len;
2317ae3b5641SAl Viro 		memcpy(msg->msg_name, addr->name, addr->len);
23181da177e4SLinus Torvalds 	}
23191da177e4SLinus Torvalds }
23201da177e4SLinus Torvalds 
__unix_dgram_recvmsg(struct sock * sk,struct msghdr * msg,size_t size,int flags)23219825d866SCong Wang int __unix_dgram_recvmsg(struct sock *sk, struct msghdr *msg, size_t size,
23229825d866SCong Wang 			 int flags)
23231da177e4SLinus Torvalds {
23247cc05662SChristoph Hellwig 	struct scm_cookie scm;
23259825d866SCong Wang 	struct socket *sock = sk->sk_socket;
23261da177e4SLinus Torvalds 	struct unix_sock *u = unix_sk(sk);
232764874280SRainer Weikusat 	struct sk_buff *skb, *last;
232864874280SRainer Weikusat 	long timeo;
2329fd69c399SPaolo Abeni 	int skip;
23301da177e4SLinus Torvalds 	int err;
23311da177e4SLinus Torvalds 
23321da177e4SLinus Torvalds 	err = -EOPNOTSUPP;
23331da177e4SLinus Torvalds 	if (flags&MSG_OOB)
23341da177e4SLinus Torvalds 		goto out;
23351da177e4SLinus Torvalds 
233664874280SRainer Weikusat 	timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
233764874280SRainer Weikusat 
233864874280SRainer Weikusat 	do {
23396e1ce3c3SLinus Torvalds 		mutex_lock(&u->iolock);
23401da177e4SLinus Torvalds 
2341f55bb7f9SPavel Emelyanov 		skip = sk_peek_offset(sk, flags);
2342b50b0580SSabrina Dubroca 		skb = __skb_try_recv_datagram(sk, &sk->sk_receive_queue, flags,
2343e427cad6SPaolo Abeni 					      &skip, &err, &last);
2344e427cad6SPaolo Abeni 		if (skb) {
2345e427cad6SPaolo Abeni 			if (!(flags & MSG_PEEK))
2346e427cad6SPaolo Abeni 				scm_stat_del(sk, skb);
234764874280SRainer Weikusat 			break;
2348e427cad6SPaolo Abeni 		}
2349f55bb7f9SPavel Emelyanov 
23506e1ce3c3SLinus Torvalds 		mutex_unlock(&u->iolock);
235164874280SRainer Weikusat 
235264874280SRainer Weikusat 		if (err != -EAGAIN)
235364874280SRainer Weikusat 			break;
235464874280SRainer Weikusat 	} while (timeo &&
2355b50b0580SSabrina Dubroca 		 !__skb_wait_for_more_packets(sk, &sk->sk_receive_queue,
2356b50b0580SSabrina Dubroca 					      &err, &timeo, last));
235764874280SRainer Weikusat 
23586e1ce3c3SLinus Torvalds 	if (!skb) { /* implies iolock unlocked */
23590a112258SFlorian Zumbiehl 		unix_state_lock(sk);
23600a112258SFlorian Zumbiehl 		/* Signal EOF on disconnected non-blocking SEQPACKET socket. */
23610a112258SFlorian Zumbiehl 		if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN &&
23620a112258SFlorian Zumbiehl 		    (sk->sk_shutdown & RCV_SHUTDOWN))
23630a112258SFlorian Zumbiehl 			err = 0;
23640a112258SFlorian Zumbiehl 		unix_state_unlock(sk);
236564874280SRainer Weikusat 		goto out;
23660a112258SFlorian Zumbiehl 	}
23671da177e4SLinus Torvalds 
236877b75f4dSRainer Weikusat 	if (wq_has_sleeper(&u->peer_wait))
236967426b75SEric Dumazet 		wake_up_interruptible_sync_poll(&u->peer_wait,
2370a9a08845SLinus Torvalds 						EPOLLOUT | EPOLLWRNORM |
2371a9a08845SLinus Torvalds 						EPOLLWRBAND);
23721da177e4SLinus Torvalds 
23731da177e4SLinus Torvalds 	if (msg->msg_name)
23741da177e4SLinus Torvalds 		unix_copy_addr(msg, skb->sk);
23751da177e4SLinus Torvalds 
2376f55bb7f9SPavel Emelyanov 	if (size > skb->len - skip)
2377f55bb7f9SPavel Emelyanov 		size = skb->len - skip;
2378f55bb7f9SPavel Emelyanov 	else if (size < skb->len - skip)
23791da177e4SLinus Torvalds 		msg->msg_flags |= MSG_TRUNC;
23801da177e4SLinus Torvalds 
238151f3d02bSDavid S. Miller 	err = skb_copy_datagram_msg(skb, skip, msg, size);
23821da177e4SLinus Torvalds 	if (err)
23831da177e4SLinus Torvalds 		goto out_free;
23841da177e4SLinus Torvalds 
23853f66116eSAlban Crequy 	if (sock_flag(sk, SOCK_RCVTSTAMP))
23863f66116eSAlban Crequy 		__sock_recv_timestamp(msg, sk, skb);
23873f66116eSAlban Crequy 
23887cc05662SChristoph Hellwig 	memset(&scm, 0, sizeof(scm));
23897cc05662SChristoph Hellwig 
23907cc05662SChristoph Hellwig 	scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
23917cc05662SChristoph Hellwig 	unix_set_secdata(&scm, skb);
23921da177e4SLinus Torvalds 
23936eba6a37SEric Dumazet 	if (!(flags & MSG_PEEK)) {
23941da177e4SLinus Torvalds 		if (UNIXCB(skb).fp)
23957cc05662SChristoph Hellwig 			unix_detach_fds(&scm, skb);
2396f55bb7f9SPavel Emelyanov 
2397f55bb7f9SPavel Emelyanov 		sk_peek_offset_bwd(sk, skb->len);
23986eba6a37SEric Dumazet 	} else {
23991da177e4SLinus Torvalds 		/* It is questionable: on PEEK we could:
24001da177e4SLinus Torvalds 		   - do not return fds - good, but too simple 8)
24011da177e4SLinus Torvalds 		   - return fds, and do not return them on read (old strategy,
24021da177e4SLinus Torvalds 		     apparently wrong)
24031da177e4SLinus Torvalds 		   - clone fds (I chose it for now, it is the most universal
24041da177e4SLinus Torvalds 		     solution)
24051da177e4SLinus Torvalds 
24061da177e4SLinus Torvalds 		   POSIX 1003.1g does not actually define this clearly
24071da177e4SLinus Torvalds 		   at all. POSIX 1003.1g doesn't define a lot of things
24081da177e4SLinus Torvalds 		   clearly however!
24091da177e4SLinus Torvalds 
24101da177e4SLinus Torvalds 		*/
2411f55bb7f9SPavel Emelyanov 
2412f55bb7f9SPavel Emelyanov 		sk_peek_offset_fwd(sk, size);
2413f55bb7f9SPavel Emelyanov 
24141da177e4SLinus Torvalds 		if (UNIXCB(skb).fp)
2415cbcf0112SMiklos Szeredi 			unix_peek_fds(&scm, skb);
24161da177e4SLinus Torvalds 	}
24179f6f9af7SEric Dumazet 	err = (flags & MSG_TRUNC) ? skb->len - skip : size;
24181da177e4SLinus Torvalds 
2419a9c49cc2SAlexander Mikhalitsyn 	scm_recv_unix(sock, msg, &scm, flags);
24201da177e4SLinus Torvalds 
24211da177e4SLinus Torvalds out_free:
24221da177e4SLinus Torvalds 	skb_free_datagram(sk, skb);
24236e1ce3c3SLinus Torvalds 	mutex_unlock(&u->iolock);
24241da177e4SLinus Torvalds out:
24251da177e4SLinus Torvalds 	return err;
24261da177e4SLinus Torvalds }
24271da177e4SLinus Torvalds 
unix_dgram_recvmsg(struct socket * sock,struct msghdr * msg,size_t size,int flags)24289825d866SCong Wang static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
24299825d866SCong Wang 			      int flags)
24309825d866SCong Wang {
24319825d866SCong Wang 	struct sock *sk = sock->sk;
24329825d866SCong Wang 
24339825d866SCong Wang #ifdef CONFIG_BPF_SYSCALL
243494531cfcSJiang Wang 	const struct proto *prot = READ_ONCE(sk->sk_prot);
243594531cfcSJiang Wang 
243694531cfcSJiang Wang 	if (prot != &unix_dgram_proto)
2437ec095263SOliver Hartkopp 		return prot->recvmsg(sk, msg, size, flags, NULL);
24389825d866SCong Wang #endif
24399825d866SCong Wang 	return __unix_dgram_recvmsg(sk, msg, size, flags);
24409825d866SCong Wang }
24419825d866SCong Wang 
unix_read_skb(struct sock * sk,skb_read_actor_t recv_actor)2442965b57b4SCong Wang static int unix_read_skb(struct sock *sk, skb_read_actor_t recv_actor)
244329df44faSCong Wang {
244429df44faSCong Wang 	struct unix_sock *u = unix_sk(sk);
244529df44faSCong Wang 	struct sk_buff *skb;
244678fa0d61SJohn Fastabend 	int err;
244729df44faSCong Wang 
244829df44faSCong Wang 	mutex_lock(&u->iolock);
2449f4b41f06SOliver Hartkopp 	skb = skb_recv_datagram(sk, MSG_DONTWAIT, &err);
245029df44faSCong Wang 	mutex_unlock(&u->iolock);
245129df44faSCong Wang 	if (!skb)
245229df44faSCong Wang 		return err;
245329df44faSCong Wang 
245478fa0d61SJohn Fastabend 	return recv_actor(sk, skb);
245529df44faSCong Wang }
245629df44faSCong Wang 
24571da177e4SLinus Torvalds /*
245879f632c7SBenjamin Poirier  *	Sleep until more data has arrived. But check for races..
24591da177e4SLinus Torvalds  */
unix_stream_data_wait(struct sock * sk,long timeo,struct sk_buff * last,unsigned int last_len,bool freezable)246079f632c7SBenjamin Poirier static long unix_stream_data_wait(struct sock *sk, long timeo,
246106a77b07SWANG Cong 				  struct sk_buff *last, unsigned int last_len,
246206a77b07SWANG Cong 				  bool freezable)
24631da177e4SLinus Torvalds {
2464f5d39b02SPeter Zijlstra 	unsigned int state = TASK_INTERRUPTIBLE | freezable * TASK_FREEZABLE;
24652b514574SHannes Frederic Sowa 	struct sk_buff *tail;
24661da177e4SLinus Torvalds 	DEFINE_WAIT(wait);
24671da177e4SLinus Torvalds 
24681c92b4e5SDavid S. Miller 	unix_state_lock(sk);
24691da177e4SLinus Torvalds 
24701da177e4SLinus Torvalds 	for (;;) {
2471f5d39b02SPeter Zijlstra 		prepare_to_wait(sk_sleep(sk), &wait, state);
24721da177e4SLinus Torvalds 
24732b514574SHannes Frederic Sowa 		tail = skb_peek_tail(&sk->sk_receive_queue);
24742b514574SHannes Frederic Sowa 		if (tail != last ||
24752b514574SHannes Frederic Sowa 		    (tail && tail->len != last_len) ||
24761da177e4SLinus Torvalds 		    sk->sk_err ||
24771da177e4SLinus Torvalds 		    (sk->sk_shutdown & RCV_SHUTDOWN) ||
24781da177e4SLinus Torvalds 		    signal_pending(current) ||
24791da177e4SLinus Torvalds 		    !timeo)
24801da177e4SLinus Torvalds 			break;
24811da177e4SLinus Torvalds 
24829cd3e072SEric Dumazet 		sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
24831c92b4e5SDavid S. Miller 		unix_state_unlock(sk);
248406a77b07SWANG Cong 		timeo = schedule_timeout(timeo);
24851c92b4e5SDavid S. Miller 		unix_state_lock(sk);
2486b48732e4SMark Salyzyn 
2487b48732e4SMark Salyzyn 		if (sock_flag(sk, SOCK_DEAD))
2488b48732e4SMark Salyzyn 			break;
2489b48732e4SMark Salyzyn 
24909cd3e072SEric Dumazet 		sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
24911da177e4SLinus Torvalds 	}
24921da177e4SLinus Torvalds 
2493aa395145SEric Dumazet 	finish_wait(sk_sleep(sk), &wait);
24941c92b4e5SDavid S. Miller 	unix_state_unlock(sk);
24951da177e4SLinus Torvalds 	return timeo;
24961da177e4SLinus Torvalds }
24971da177e4SLinus Torvalds 
unix_skb_len(const struct sk_buff * skb)2498e370a723SEric Dumazet static unsigned int unix_skb_len(const struct sk_buff *skb)
2499e370a723SEric Dumazet {
2500e370a723SEric Dumazet 	return skb->len - UNIXCB(skb).consumed;
2501e370a723SEric Dumazet }
2502e370a723SEric Dumazet 
25032b514574SHannes Frederic Sowa struct unix_stream_read_state {
25042b514574SHannes Frederic Sowa 	int (*recv_actor)(struct sk_buff *, int, int,
25052b514574SHannes Frederic Sowa 			  struct unix_stream_read_state *);
25062b514574SHannes Frederic Sowa 	struct socket *socket;
25072b514574SHannes Frederic Sowa 	struct msghdr *msg;
25082b514574SHannes Frederic Sowa 	struct pipe_inode_info *pipe;
25092b514574SHannes Frederic Sowa 	size_t size;
25102b514574SHannes Frederic Sowa 	int flags;
25112b514574SHannes Frederic Sowa 	unsigned int splice_flags;
25122b514574SHannes Frederic Sowa };
25132b514574SHannes Frederic Sowa 
2514314001f0SRao Shoaib #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
unix_stream_recv_urg(struct unix_stream_read_state * state)2515314001f0SRao Shoaib static int unix_stream_recv_urg(struct unix_stream_read_state *state)
2516314001f0SRao Shoaib {
2517314001f0SRao Shoaib 	struct socket *sock = state->socket;
2518314001f0SRao Shoaib 	struct sock *sk = sock->sk;
2519314001f0SRao Shoaib 	struct unix_sock *u = unix_sk(sk);
2520314001f0SRao Shoaib 	int chunk = 1;
2521876c14adSRao Shoaib 	struct sk_buff *oob_skb;
2522314001f0SRao Shoaib 
2523876c14adSRao Shoaib 	mutex_lock(&u->iolock);
2524876c14adSRao Shoaib 	unix_state_lock(sk);
2525d59ae931SKuniyuki Iwashima 	spin_lock(&sk->sk_receive_queue.lock);
2526876c14adSRao Shoaib 
2527876c14adSRao Shoaib 	if (sock_flag(sk, SOCK_URGINLINE) || !u->oob_skb) {
2528d59ae931SKuniyuki Iwashima 		spin_unlock(&sk->sk_receive_queue.lock);
2529876c14adSRao Shoaib 		unix_state_unlock(sk);
2530876c14adSRao Shoaib 		mutex_unlock(&u->iolock);
2531314001f0SRao Shoaib 		return -EINVAL;
2532876c14adSRao Shoaib 	}
2533314001f0SRao Shoaib 
2534876c14adSRao Shoaib 	oob_skb = u->oob_skb;
2535876c14adSRao Shoaib 
2536e82025c6SKuniyuki Iwashima 	if (!(state->flags & MSG_PEEK))
2537e82025c6SKuniyuki Iwashima 		WRITE_ONCE(u->oob_skb, NULL);
2538069a3ec3SEric Dumazet 	else
2539069a3ec3SEric Dumazet 		skb_get(oob_skb);
2540d59ae931SKuniyuki Iwashima 
2541d59ae931SKuniyuki Iwashima 	spin_unlock(&sk->sk_receive_queue.lock);
2542876c14adSRao Shoaib 	unix_state_unlock(sk);
2543876c14adSRao Shoaib 
2544876c14adSRao Shoaib 	chunk = state->recv_actor(oob_skb, 0, chunk, state);
2545876c14adSRao Shoaib 
2546069a3ec3SEric Dumazet 	if (!(state->flags & MSG_PEEK))
2547876c14adSRao Shoaib 		UNIXCB(oob_skb).consumed += 1;
2548069a3ec3SEric Dumazet 
2549069a3ec3SEric Dumazet 	consume_skb(oob_skb);
2550876c14adSRao Shoaib 
2551876c14adSRao Shoaib 	mutex_unlock(&u->iolock);
2552876c14adSRao Shoaib 
2553314001f0SRao Shoaib 	if (chunk < 0)
2554314001f0SRao Shoaib 		return -EFAULT;
2555314001f0SRao Shoaib 
2556314001f0SRao Shoaib 	state->msg->msg_flags |= MSG_OOB;
2557314001f0SRao Shoaib 	return 1;
2558314001f0SRao Shoaib }
2559314001f0SRao Shoaib 
manage_oob(struct sk_buff * skb,struct sock * sk,int flags,int copied)2560314001f0SRao Shoaib static struct sk_buff *manage_oob(struct sk_buff *skb, struct sock *sk,
2561314001f0SRao Shoaib 				  int flags, int copied)
2562314001f0SRao Shoaib {
2563314001f0SRao Shoaib 	struct unix_sock *u = unix_sk(sk);
2564314001f0SRao Shoaib 
2565314001f0SRao Shoaib 	if (!unix_skb_len(skb) && !(flags & MSG_PEEK)) {
2566314001f0SRao Shoaib 		skb_unlink(skb, &sk->sk_receive_queue);
2567314001f0SRao Shoaib 		consume_skb(skb);
2568314001f0SRao Shoaib 		skb = NULL;
2569314001f0SRao Shoaib 	} else {
2570d59ae931SKuniyuki Iwashima 		struct sk_buff *unlinked_skb = NULL;
2571d59ae931SKuniyuki Iwashima 
2572d59ae931SKuniyuki Iwashima 		spin_lock(&sk->sk_receive_queue.lock);
2573d59ae931SKuniyuki Iwashima 
2574314001f0SRao Shoaib 		if (skb == u->oob_skb) {
2575314001f0SRao Shoaib 			if (copied) {
2576314001f0SRao Shoaib 				skb = NULL;
2577185c72f6SRao Shoaib 			} else if (!(flags & MSG_PEEK)) {
2578185c72f6SRao Shoaib 				if (sock_flag(sk, SOCK_URGINLINE)) {
2579e82025c6SKuniyuki Iwashima 					WRITE_ONCE(u->oob_skb, NULL);
2580314001f0SRao Shoaib 					consume_skb(skb);
2581022d81a7SKuniyuki Iwashima 				} else {
2582d59ae931SKuniyuki Iwashima 					__skb_unlink(skb, &sk->sk_receive_queue);
2583601a89eaSKuniyuki Iwashima 					WRITE_ONCE(u->oob_skb, NULL);
2584d59ae931SKuniyuki Iwashima 					unlinked_skb = skb;
2585314001f0SRao Shoaib 					skb = skb_peek(&sk->sk_receive_queue);
2586314001f0SRao Shoaib 				}
2587185c72f6SRao Shoaib 			} else if (!sock_flag(sk, SOCK_URGINLINE)) {
2588185c72f6SRao Shoaib 				skb = skb_peek_next(skb, &sk->sk_receive_queue);
2589185c72f6SRao Shoaib 			}
2590314001f0SRao Shoaib 		}
2591d59ae931SKuniyuki Iwashima 
2592d59ae931SKuniyuki Iwashima 		spin_unlock(&sk->sk_receive_queue.lock);
2593d59ae931SKuniyuki Iwashima 
2594d59ae931SKuniyuki Iwashima 		if (unlinked_skb) {
2595d59ae931SKuniyuki Iwashima 			WARN_ON_ONCE(skb_unref(unlinked_skb));
2596d59ae931SKuniyuki Iwashima 			kfree_skb(unlinked_skb);
2597d59ae931SKuniyuki Iwashima 		}
2598314001f0SRao Shoaib 	}
2599314001f0SRao Shoaib 	return skb;
2600314001f0SRao Shoaib }
2601314001f0SRao Shoaib #endif
2602314001f0SRao Shoaib 
unix_stream_read_skb(struct sock * sk,skb_read_actor_t recv_actor)2603965b57b4SCong Wang static int unix_stream_read_skb(struct sock *sk, skb_read_actor_t recv_actor)
260477462de1SJiang Wang {
2605772f9c31SMichal Luczaj 	struct unix_sock *u = unix_sk(sk);
2606772f9c31SMichal Luczaj 	struct sk_buff *skb;
2607772f9c31SMichal Luczaj 	int err;
2608772f9c31SMichal Luczaj 
26090ede400cSKuniyuki Iwashima 	if (unlikely(READ_ONCE(sk->sk_state) != TCP_ESTABLISHED))
261077462de1SJiang Wang 		return -ENOTCONN;
261177462de1SJiang Wang 
2612772f9c31SMichal Luczaj 	mutex_lock(&u->iolock);
2613772f9c31SMichal Luczaj 	skb = skb_recv_datagram(sk, MSG_DONTWAIT, &err);
2614772f9c31SMichal Luczaj 	mutex_unlock(&u->iolock);
2615772f9c31SMichal Luczaj 	if (!skb)
2616772f9c31SMichal Luczaj 		return err;
2617772f9c31SMichal Luczaj 
2618772f9c31SMichal Luczaj #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
2619772f9c31SMichal Luczaj 	if (unlikely(skb == READ_ONCE(u->oob_skb))) {
2620772f9c31SMichal Luczaj 		bool drop = false;
2621772f9c31SMichal Luczaj 
2622772f9c31SMichal Luczaj 		unix_state_lock(sk);
2623772f9c31SMichal Luczaj 
2624772f9c31SMichal Luczaj 		if (sock_flag(sk, SOCK_DEAD)) {
2625772f9c31SMichal Luczaj 			unix_state_unlock(sk);
2626772f9c31SMichal Luczaj 			kfree_skb(skb);
2627772f9c31SMichal Luczaj 			return -ECONNRESET;
2628772f9c31SMichal Luczaj 		}
2629772f9c31SMichal Luczaj 
2630772f9c31SMichal Luczaj 		spin_lock(&sk->sk_receive_queue.lock);
2631772f9c31SMichal Luczaj 		if (likely(skb == u->oob_skb)) {
2632772f9c31SMichal Luczaj 			WRITE_ONCE(u->oob_skb, NULL);
2633772f9c31SMichal Luczaj 			drop = true;
2634772f9c31SMichal Luczaj 		}
2635772f9c31SMichal Luczaj 		spin_unlock(&sk->sk_receive_queue.lock);
2636772f9c31SMichal Luczaj 
2637772f9c31SMichal Luczaj 		unix_state_unlock(sk);
2638772f9c31SMichal Luczaj 
2639772f9c31SMichal Luczaj 		if (drop) {
2640772f9c31SMichal Luczaj 			WARN_ON_ONCE(skb_unref(skb));
2641772f9c31SMichal Luczaj 			kfree_skb(skb);
2642772f9c31SMichal Luczaj 			return -EAGAIN;
2643772f9c31SMichal Luczaj 		}
2644772f9c31SMichal Luczaj 	}
2645772f9c31SMichal Luczaj #endif
2646772f9c31SMichal Luczaj 
2647772f9c31SMichal Luczaj 	return recv_actor(sk, skb);
264877462de1SJiang Wang }
264977462de1SJiang Wang 
unix_stream_read_generic(struct unix_stream_read_state * state,bool freezable)265006a77b07SWANG Cong static int unix_stream_read_generic(struct unix_stream_read_state *state,
265106a77b07SWANG Cong 				    bool freezable)
26521da177e4SLinus Torvalds {
26537cc05662SChristoph Hellwig 	struct scm_cookie scm;
26542b514574SHannes Frederic Sowa 	struct socket *sock = state->socket;
26551da177e4SLinus Torvalds 	struct sock *sk = sock->sk;
26561da177e4SLinus Torvalds 	struct unix_sock *u = unix_sk(sk);
26571da177e4SLinus Torvalds 	int copied = 0;
26582b514574SHannes Frederic Sowa 	int flags = state->flags;
2659de144391SEric Dumazet 	int noblock = flags & MSG_DONTWAIT;
26602b514574SHannes Frederic Sowa 	bool check_creds = false;
26611da177e4SLinus Torvalds 	int target;
26621da177e4SLinus Torvalds 	int err = 0;
26631da177e4SLinus Torvalds 	long timeo;
2664fc0d7536SPavel Emelyanov 	int skip;
26652b514574SHannes Frederic Sowa 	size_t size = state->size;
26662b514574SHannes Frederic Sowa 	unsigned int last_len;
26671da177e4SLinus Torvalds 
2668776fcc45SKuniyuki Iwashima 	if (unlikely(READ_ONCE(sk->sk_state) != TCP_ESTABLISHED)) {
26691da177e4SLinus Torvalds 		err = -EINVAL;
26701da177e4SLinus Torvalds 		goto out;
26711b92ee3dSRainer Weikusat 	}
26721da177e4SLinus Torvalds 
26731b92ee3dSRainer Weikusat 	if (unlikely(flags & MSG_OOB)) {
26741da177e4SLinus Torvalds 		err = -EOPNOTSUPP;
2675314001f0SRao Shoaib #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
2676314001f0SRao Shoaib 		err = unix_stream_recv_urg(state);
2677314001f0SRao Shoaib #endif
26781da177e4SLinus Torvalds 		goto out;
26791b92ee3dSRainer Weikusat 	}
26801da177e4SLinus Torvalds 
26811da177e4SLinus Torvalds 	target = sock_rcvlowat(sk, flags & MSG_WAITALL, size);
2682de144391SEric Dumazet 	timeo = sock_rcvtimeo(sk, noblock);
26831da177e4SLinus Torvalds 
26842b514574SHannes Frederic Sowa 	memset(&scm, 0, sizeof(scm));
26852b514574SHannes Frederic Sowa 
26861da177e4SLinus Torvalds 	/* Lock the socket to prevent queue disordering
26871da177e4SLinus Torvalds 	 * while sleeps in memcpy_tomsg
26881da177e4SLinus Torvalds 	 */
26896e1ce3c3SLinus Torvalds 	mutex_lock(&u->iolock);
26901da177e4SLinus Torvalds 
2691a0917e0bSMatthew Dawson 	skip = max(sk_peek_offset(sk, flags), 0);
2692e9193d60SAndrey Vagin 
26936eba6a37SEric Dumazet 	do {
26941da177e4SLinus Torvalds 		int chunk;
269573ed5d25SHannes Frederic Sowa 		bool drop_skb;
269679f632c7SBenjamin Poirier 		struct sk_buff *skb, *last;
26971da177e4SLinus Torvalds 
269818eceb81SRainer Weikusat redo:
26993c0d2f37SMiklos Szeredi 		unix_state_lock(sk);
2700b48732e4SMark Salyzyn 		if (sock_flag(sk, SOCK_DEAD)) {
2701b48732e4SMark Salyzyn 			err = -ECONNRESET;
2702b48732e4SMark Salyzyn 			goto unlock;
2703b48732e4SMark Salyzyn 		}
270479f632c7SBenjamin Poirier 		last = skb = skb_peek(&sk->sk_receive_queue);
27052b514574SHannes Frederic Sowa 		last_len = last ? last->len : 0;
2706314001f0SRao Shoaib 
2707aea3cb8cSKuniyuki Iwashima again:
2708314001f0SRao Shoaib #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
2709314001f0SRao Shoaib 		if (skb) {
2710314001f0SRao Shoaib 			skb = manage_oob(skb, sk, flags, copied);
2711022d81a7SKuniyuki Iwashima 			if (!skb && copied) {
2712314001f0SRao Shoaib 				unix_state_unlock(sk);
2713314001f0SRao Shoaib 				break;
2714314001f0SRao Shoaib 			}
2715314001f0SRao Shoaib 		}
2716314001f0SRao Shoaib #endif
27176eba6a37SEric Dumazet 		if (skb == NULL) {
27181da177e4SLinus Torvalds 			if (copied >= target)
27193c0d2f37SMiklos Szeredi 				goto unlock;
27201da177e4SLinus Torvalds 
27211da177e4SLinus Torvalds 			/*
27221da177e4SLinus Torvalds 			 *	POSIX 1003.1g mandates this order.
27231da177e4SLinus Torvalds 			 */
27241da177e4SLinus Torvalds 
27256eba6a37SEric Dumazet 			err = sock_error(sk);
27266eba6a37SEric Dumazet 			if (err)
27273c0d2f37SMiklos Szeredi 				goto unlock;
27281da177e4SLinus Torvalds 			if (sk->sk_shutdown & RCV_SHUTDOWN)
27293c0d2f37SMiklos Szeredi 				goto unlock;
27303c0d2f37SMiklos Szeredi 
27313c0d2f37SMiklos Szeredi 			unix_state_unlock(sk);
27321b92ee3dSRainer Weikusat 			if (!timeo) {
27331da177e4SLinus Torvalds 				err = -EAGAIN;
27341da177e4SLinus Torvalds 				break;
27351b92ee3dSRainer Weikusat 			}
27361b92ee3dSRainer Weikusat 
27376e1ce3c3SLinus Torvalds 			mutex_unlock(&u->iolock);
27381da177e4SLinus Torvalds 
27392b514574SHannes Frederic Sowa 			timeo = unix_stream_data_wait(sk, timeo, last,
274006a77b07SWANG Cong 						      last_len, freezable);
27411da177e4SLinus Torvalds 
27423822b5c2SRainer Weikusat 			if (signal_pending(current)) {
27431da177e4SLinus Torvalds 				err = sock_intr_errno(timeo);
2744fa0dc04dSEric Dumazet 				scm_destroy(&scm);
27451da177e4SLinus Torvalds 				goto out;
27461da177e4SLinus Torvalds 			}
2747b3ca9b02SRainer Weikusat 
27486e1ce3c3SLinus Torvalds 			mutex_lock(&u->iolock);
274918eceb81SRainer Weikusat 			goto redo;
27503c0d2f37SMiklos Szeredi unlock:
27513c0d2f37SMiklos Szeredi 			unix_state_unlock(sk);
27523c0d2f37SMiklos Szeredi 			break;
27531da177e4SLinus Torvalds 		}
2754fc0d7536SPavel Emelyanov 
2755e370a723SEric Dumazet 		while (skip >= unix_skb_len(skb)) {
2756e370a723SEric Dumazet 			skip -= unix_skb_len(skb);
275779f632c7SBenjamin Poirier 			last = skb;
27582b514574SHannes Frederic Sowa 			last_len = skb->len;
2759fc0d7536SPavel Emelyanov 			skb = skb_peek_next(skb, &sk->sk_receive_queue);
276079f632c7SBenjamin Poirier 			if (!skb)
2761fc0d7536SPavel Emelyanov 				goto again;
2762fc0d7536SPavel Emelyanov 		}
2763fc0d7536SPavel Emelyanov 
27643c0d2f37SMiklos Szeredi 		unix_state_unlock(sk);
27651da177e4SLinus Torvalds 
27661da177e4SLinus Torvalds 		if (check_creds) {
27671da177e4SLinus Torvalds 			/* Never glue messages from different writers */
27689490f886SHannes Frederic Sowa 			if (!unix_skb_scm_eq(skb, &scm))
27691da177e4SLinus Torvalds 				break;
27705e2ff670SAlexander Mikhalitsyn 		} else if (test_bit(SOCK_PASSCRED, &sock->flags) ||
27715e2ff670SAlexander Mikhalitsyn 			   test_bit(SOCK_PASSPIDFD, &sock->flags)) {
27721da177e4SLinus Torvalds 			/* Copy credentials */
27737cc05662SChristoph Hellwig 			scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
277437a9a8dfSStephen Smalley 			unix_set_secdata(&scm, skb);
27752b514574SHannes Frederic Sowa 			check_creds = true;
27761da177e4SLinus Torvalds 		}
27771da177e4SLinus Torvalds 
27781da177e4SLinus Torvalds 		/* Copy address just once */
27792b514574SHannes Frederic Sowa 		if (state->msg && state->msg->msg_name) {
27802b514574SHannes Frederic Sowa 			DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr,
27812b514574SHannes Frederic Sowa 					 state->msg->msg_name);
27822b514574SHannes Frederic Sowa 			unix_copy_addr(state->msg, skb->sk);
27831da177e4SLinus Torvalds 			sunaddr = NULL;
27841da177e4SLinus Torvalds 		}
27851da177e4SLinus Torvalds 
2786e370a723SEric Dumazet 		chunk = min_t(unsigned int, unix_skb_len(skb) - skip, size);
278773ed5d25SHannes Frederic Sowa 		skb_get(skb);
27882b514574SHannes Frederic Sowa 		chunk = state->recv_actor(skb, skip, chunk, state);
278973ed5d25SHannes Frederic Sowa 		drop_skb = !unix_skb_len(skb);
279073ed5d25SHannes Frederic Sowa 		/* skb is only safe to use if !drop_skb */
279173ed5d25SHannes Frederic Sowa 		consume_skb(skb);
27922b514574SHannes Frederic Sowa 		if (chunk < 0) {
27931da177e4SLinus Torvalds 			if (copied == 0)
27941da177e4SLinus Torvalds 				copied = -EFAULT;
27951da177e4SLinus Torvalds 			break;
27961da177e4SLinus Torvalds 		}
27971da177e4SLinus Torvalds 		copied += chunk;
27981da177e4SLinus Torvalds 		size -= chunk;
27991da177e4SLinus Torvalds 
280073ed5d25SHannes Frederic Sowa 		if (drop_skb) {
280173ed5d25SHannes Frederic Sowa 			/* the skb was touched by a concurrent reader;
280273ed5d25SHannes Frederic Sowa 			 * we should not expect anything from this skb
280373ed5d25SHannes Frederic Sowa 			 * anymore and assume it invalid - we can be
280473ed5d25SHannes Frederic Sowa 			 * sure it was dropped from the socket queue
280573ed5d25SHannes Frederic Sowa 			 *
280673ed5d25SHannes Frederic Sowa 			 * let's report a short read
280773ed5d25SHannes Frederic Sowa 			 */
280873ed5d25SHannes Frederic Sowa 			err = 0;
280973ed5d25SHannes Frederic Sowa 			break;
281073ed5d25SHannes Frederic Sowa 		}
281173ed5d25SHannes Frederic Sowa 
28121da177e4SLinus Torvalds 		/* Mark read part of skb as used */
28136eba6a37SEric Dumazet 		if (!(flags & MSG_PEEK)) {
2814e370a723SEric Dumazet 			UNIXCB(skb).consumed += chunk;
28151da177e4SLinus Torvalds 
2816fc0d7536SPavel Emelyanov 			sk_peek_offset_bwd(sk, chunk);
2817fc0d7536SPavel Emelyanov 
28183c32da19SKirill Tkhai 			if (UNIXCB(skb).fp) {
28193c32da19SKirill Tkhai 				scm_stat_del(sk, skb);
28207cc05662SChristoph Hellwig 				unix_detach_fds(&scm, skb);
28213c32da19SKirill Tkhai 			}
28221da177e4SLinus Torvalds 
2823e370a723SEric Dumazet 			if (unix_skb_len(skb))
28241da177e4SLinus Torvalds 				break;
28251da177e4SLinus Torvalds 
28266f01fd6eSEric Dumazet 			skb_unlink(skb, &sk->sk_receive_queue);
282770d4bf6dSNeil Horman 			consume_skb(skb);
28281da177e4SLinus Torvalds 
28297cc05662SChristoph Hellwig 			if (scm.fp)
28301da177e4SLinus Torvalds 				break;
28316eba6a37SEric Dumazet 		} else {
28321da177e4SLinus Torvalds 			/* It is questionable, see note in unix_dgram_recvmsg.
28331da177e4SLinus Torvalds 			 */
28341da177e4SLinus Torvalds 			if (UNIXCB(skb).fp)
2835cbcf0112SMiklos Szeredi 				unix_peek_fds(&scm, skb);
28361da177e4SLinus Torvalds 
2837fc0d7536SPavel Emelyanov 			sk_peek_offset_fwd(sk, chunk);
2838fc0d7536SPavel Emelyanov 
28399f389e35SAaron Conole 			if (UNIXCB(skb).fp)
28409f389e35SAaron Conole 				break;
28419f389e35SAaron Conole 
2842e9193d60SAndrey Vagin 			skip = 0;
28439f389e35SAaron Conole 			last = skb;
28449f389e35SAaron Conole 			last_len = skb->len;
28459f389e35SAaron Conole 			unix_state_lock(sk);
28469f389e35SAaron Conole 			skb = skb_peek_next(skb, &sk->sk_receive_queue);
28479f389e35SAaron Conole 			if (skb)
28489f389e35SAaron Conole 				goto again;
28499f389e35SAaron Conole 			unix_state_unlock(sk);
28501da177e4SLinus Torvalds 			break;
28511da177e4SLinus Torvalds 		}
28521da177e4SLinus Torvalds 	} while (size);
28531da177e4SLinus Torvalds 
28546e1ce3c3SLinus Torvalds 	mutex_unlock(&u->iolock);
28559d797ee2SKuniyuki Iwashima 	if (state->msg)
2856a9c49cc2SAlexander Mikhalitsyn 		scm_recv_unix(sock, state->msg, &scm, flags);
28572b514574SHannes Frederic Sowa 	else
28582b514574SHannes Frederic Sowa 		scm_destroy(&scm);
28591da177e4SLinus Torvalds out:
28601da177e4SLinus Torvalds 	return copied ? : err;
28611da177e4SLinus Torvalds }
28621da177e4SLinus Torvalds 
unix_stream_read_actor(struct sk_buff * skb,int skip,int chunk,struct unix_stream_read_state * state)28632b514574SHannes Frederic Sowa static int unix_stream_read_actor(struct sk_buff *skb,
28642b514574SHannes Frederic Sowa 				  int skip, int chunk,
28652b514574SHannes Frederic Sowa 				  struct unix_stream_read_state *state)
28662b514574SHannes Frederic Sowa {
28672b514574SHannes Frederic Sowa 	int ret;
28682b514574SHannes Frederic Sowa 
28692b514574SHannes Frederic Sowa 	ret = skb_copy_datagram_msg(skb, UNIXCB(skb).consumed + skip,
28702b514574SHannes Frederic Sowa 				    state->msg, chunk);
28712b514574SHannes Frederic Sowa 	return ret ?: chunk;
28722b514574SHannes Frederic Sowa }
28732b514574SHannes Frederic Sowa 
__unix_stream_recvmsg(struct sock * sk,struct msghdr * msg,size_t size,int flags)287494531cfcSJiang Wang int __unix_stream_recvmsg(struct sock *sk, struct msghdr *msg,
287594531cfcSJiang Wang 			  size_t size, int flags)
287694531cfcSJiang Wang {
287794531cfcSJiang Wang 	struct unix_stream_read_state state = {
287894531cfcSJiang Wang 		.recv_actor = unix_stream_read_actor,
287994531cfcSJiang Wang 		.socket = sk->sk_socket,
288094531cfcSJiang Wang 		.msg = msg,
288194531cfcSJiang Wang 		.size = size,
288294531cfcSJiang Wang 		.flags = flags
288394531cfcSJiang Wang 	};
288494531cfcSJiang Wang 
288594531cfcSJiang Wang 	return unix_stream_read_generic(&state, true);
288694531cfcSJiang Wang }
288794531cfcSJiang Wang 
unix_stream_recvmsg(struct socket * sock,struct msghdr * msg,size_t size,int flags)28882b514574SHannes Frederic Sowa static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg,
28892b514574SHannes Frederic Sowa 			       size_t size, int flags)
28902b514574SHannes Frederic Sowa {
28912b514574SHannes Frederic Sowa 	struct unix_stream_read_state state = {
28922b514574SHannes Frederic Sowa 		.recv_actor = unix_stream_read_actor,
28932b514574SHannes Frederic Sowa 		.socket = sock,
28942b514574SHannes Frederic Sowa 		.msg = msg,
28952b514574SHannes Frederic Sowa 		.size = size,
28962b514574SHannes Frederic Sowa 		.flags = flags
28972b514574SHannes Frederic Sowa 	};
28982b514574SHannes Frederic Sowa 
289994531cfcSJiang Wang #ifdef CONFIG_BPF_SYSCALL
290094531cfcSJiang Wang 	struct sock *sk = sock->sk;
290194531cfcSJiang Wang 	const struct proto *prot = READ_ONCE(sk->sk_prot);
290294531cfcSJiang Wang 
290394531cfcSJiang Wang 	if (prot != &unix_stream_proto)
2904ec095263SOliver Hartkopp 		return prot->recvmsg(sk, msg, size, flags, NULL);
290594531cfcSJiang Wang #endif
290606a77b07SWANG Cong 	return unix_stream_read_generic(&state, true);
29072b514574SHannes Frederic Sowa }
29082b514574SHannes Frederic Sowa 
unix_stream_splice_actor(struct sk_buff * skb,int skip,int chunk,struct unix_stream_read_state * state)29092b514574SHannes Frederic Sowa static int unix_stream_splice_actor(struct sk_buff *skb,
29102b514574SHannes Frederic Sowa 				    int skip, int chunk,
29112b514574SHannes Frederic Sowa 				    struct unix_stream_read_state *state)
29122b514574SHannes Frederic Sowa {
29132b514574SHannes Frederic Sowa 	return skb_splice_bits(skb, state->socket->sk,
29142b514574SHannes Frederic Sowa 			       UNIXCB(skb).consumed + skip,
291525869262SAl Viro 			       state->pipe, chunk, state->splice_flags);
29162b514574SHannes Frederic Sowa }
29172b514574SHannes Frederic Sowa 
unix_stream_splice_read(struct socket * sock,loff_t * ppos,struct pipe_inode_info * pipe,size_t size,unsigned int flags)29182b514574SHannes Frederic Sowa static ssize_t unix_stream_splice_read(struct socket *sock,  loff_t *ppos,
29192b514574SHannes Frederic Sowa 				       struct pipe_inode_info *pipe,
29202b514574SHannes Frederic Sowa 				       size_t size, unsigned int flags)
29212b514574SHannes Frederic Sowa {
29222b514574SHannes Frederic Sowa 	struct unix_stream_read_state state = {
29232b514574SHannes Frederic Sowa 		.recv_actor = unix_stream_splice_actor,
29242b514574SHannes Frederic Sowa 		.socket = sock,
29252b514574SHannes Frederic Sowa 		.pipe = pipe,
29262b514574SHannes Frederic Sowa 		.size = size,
29272b514574SHannes Frederic Sowa 		.splice_flags = flags,
29282b514574SHannes Frederic Sowa 	};
29292b514574SHannes Frederic Sowa 
29302b514574SHannes Frederic Sowa 	if (unlikely(*ppos))
29312b514574SHannes Frederic Sowa 		return -ESPIPE;
29322b514574SHannes Frederic Sowa 
29332b514574SHannes Frederic Sowa 	if (sock->file->f_flags & O_NONBLOCK ||
29342b514574SHannes Frederic Sowa 	    flags & SPLICE_F_NONBLOCK)
29352b514574SHannes Frederic Sowa 		state.flags = MSG_DONTWAIT;
29362b514574SHannes Frederic Sowa 
293706a77b07SWANG Cong 	return unix_stream_read_generic(&state, false);
29382b514574SHannes Frederic Sowa }
29392b514574SHannes Frederic Sowa 
unix_shutdown(struct socket * sock,int mode)29401da177e4SLinus Torvalds static int unix_shutdown(struct socket *sock, int mode)
29411da177e4SLinus Torvalds {
29421da177e4SLinus Torvalds 	struct sock *sk = sock->sk;
29431da177e4SLinus Torvalds 	struct sock *other;
29441da177e4SLinus Torvalds 
2945fc61b928SXi Wang 	if (mode < SHUT_RD || mode > SHUT_RDWR)
2946fc61b928SXi Wang 		return -EINVAL;
2947fc61b928SXi Wang 	/* This maps:
2948fc61b928SXi Wang 	 * SHUT_RD   (0) -> RCV_SHUTDOWN  (1)
2949fc61b928SXi Wang 	 * SHUT_WR   (1) -> SEND_SHUTDOWN (2)
2950fc61b928SXi Wang 	 * SHUT_RDWR (2) -> SHUTDOWN_MASK (3)
2951fc61b928SXi Wang 	 */
2952fc61b928SXi Wang 	++mode;
29537180a031SAlban Crequy 
29541c92b4e5SDavid S. Miller 	unix_state_lock(sk);
2955e1d09c2cSKuniyuki Iwashima 	WRITE_ONCE(sk->sk_shutdown, sk->sk_shutdown | mode);
29561da177e4SLinus Torvalds 	other = unix_peer(sk);
29571da177e4SLinus Torvalds 	if (other)
29581da177e4SLinus Torvalds 		sock_hold(other);
29591c92b4e5SDavid S. Miller 	unix_state_unlock(sk);
29601da177e4SLinus Torvalds 	sk->sk_state_change(sk);
29611da177e4SLinus Torvalds 
29621da177e4SLinus Torvalds 	if (other &&
29631da177e4SLinus Torvalds 		(sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
29641da177e4SLinus Torvalds 
29651da177e4SLinus Torvalds 		int peer_mode = 0;
296694531cfcSJiang Wang 		const struct proto *prot = READ_ONCE(other->sk_prot);
29671da177e4SLinus Torvalds 
2968d359902dSJiang Wang 		if (prot->unhash)
296994531cfcSJiang Wang 			prot->unhash(other);
29701da177e4SLinus Torvalds 		if (mode&RCV_SHUTDOWN)
29711da177e4SLinus Torvalds 			peer_mode |= SEND_SHUTDOWN;
29721da177e4SLinus Torvalds 		if (mode&SEND_SHUTDOWN)
29731da177e4SLinus Torvalds 			peer_mode |= RCV_SHUTDOWN;
29741c92b4e5SDavid S. Miller 		unix_state_lock(other);
2975e1d09c2cSKuniyuki Iwashima 		WRITE_ONCE(other->sk_shutdown, other->sk_shutdown | peer_mode);
29761c92b4e5SDavid S. Miller 		unix_state_unlock(other);
29771da177e4SLinus Torvalds 		other->sk_state_change(other);
2978d0c6416bSJiang Wang 		if (peer_mode == SHUTDOWN_MASK)
29798d8ad9d7SPavel Emelyanov 			sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
2980d0c6416bSJiang Wang 		else if (peer_mode & RCV_SHUTDOWN)
29818d8ad9d7SPavel Emelyanov 			sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
29821da177e4SLinus Torvalds 	}
29831da177e4SLinus Torvalds 	if (other)
29841da177e4SLinus Torvalds 		sock_put(other);
29857180a031SAlban Crequy 
29861da177e4SLinus Torvalds 	return 0;
29871da177e4SLinus Torvalds }
29881da177e4SLinus Torvalds 
unix_inq_len(struct sock * sk)2989885ee74dSPavel Emelyanov long unix_inq_len(struct sock *sk)
29901da177e4SLinus Torvalds {
29911da177e4SLinus Torvalds 	struct sk_buff *skb;
2992885ee74dSPavel Emelyanov 	long amount = 0;
29931da177e4SLinus Torvalds 
29944e38d6c0SKuniyuki Iwashima 	if (READ_ONCE(sk->sk_state) == TCP_LISTEN)
2995885ee74dSPavel Emelyanov 		return -EINVAL;
29961da177e4SLinus Torvalds 
29971da177e4SLinus Torvalds 	spin_lock(&sk->sk_receive_queue.lock);
29981da177e4SLinus Torvalds 	if (sk->sk_type == SOCK_STREAM ||
29991da177e4SLinus Torvalds 	    sk->sk_type == SOCK_SEQPACKET) {
30001da177e4SLinus Torvalds 		skb_queue_walk(&sk->sk_receive_queue, skb)
3001e370a723SEric Dumazet 			amount += unix_skb_len(skb);
30021da177e4SLinus Torvalds 	} else {
30031da177e4SLinus Torvalds 		skb = skb_peek(&sk->sk_receive_queue);
30041da177e4SLinus Torvalds 		if (skb)
30051da177e4SLinus Torvalds 			amount = skb->len;
30061da177e4SLinus Torvalds 	}
30071da177e4SLinus Torvalds 	spin_unlock(&sk->sk_receive_queue.lock);
3008885ee74dSPavel Emelyanov 
3009885ee74dSPavel Emelyanov 	return amount;
3010885ee74dSPavel Emelyanov }
3011885ee74dSPavel Emelyanov EXPORT_SYMBOL_GPL(unix_inq_len);
3012885ee74dSPavel Emelyanov 
unix_outq_len(struct sock * sk)3013885ee74dSPavel Emelyanov long unix_outq_len(struct sock *sk)
3014885ee74dSPavel Emelyanov {
3015885ee74dSPavel Emelyanov 	return sk_wmem_alloc_get(sk);
3016885ee74dSPavel Emelyanov }
3017885ee74dSPavel Emelyanov EXPORT_SYMBOL_GPL(unix_outq_len);
3018885ee74dSPavel Emelyanov 
unix_open_file(struct sock * sk)3019ba94f308SAndrey Vagin static int unix_open_file(struct sock *sk)
3020ba94f308SAndrey Vagin {
3021ba94f308SAndrey Vagin 	struct path path;
3022ba94f308SAndrey Vagin 	struct file *f;
3023ba94f308SAndrey Vagin 	int fd;
3024ba94f308SAndrey Vagin 
3025ba94f308SAndrey Vagin 	if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
3026ba94f308SAndrey Vagin 		return -EPERM;
3027ba94f308SAndrey Vagin 
3028ae3b5641SAl Viro 	if (!smp_load_acquire(&unix_sk(sk)->addr))
3029ba94f308SAndrey Vagin 		return -ENOENT;
3030ae3b5641SAl Viro 
3031ae3b5641SAl Viro 	path = unix_sk(sk)->path;
3032ae3b5641SAl Viro 	if (!path.dentry)
3033ae3b5641SAl Viro 		return -ENOENT;
3034ba94f308SAndrey Vagin 
3035ba94f308SAndrey Vagin 	path_get(&path);
3036ba94f308SAndrey Vagin 
3037ba94f308SAndrey Vagin 	fd = get_unused_fd_flags(O_CLOEXEC);
3038ba94f308SAndrey Vagin 	if (fd < 0)
3039ba94f308SAndrey Vagin 		goto out;
3040ba94f308SAndrey Vagin 
3041ba94f308SAndrey Vagin 	f = dentry_open(&path, O_PATH, current_cred());
3042ba94f308SAndrey Vagin 	if (IS_ERR(f)) {
3043ba94f308SAndrey Vagin 		put_unused_fd(fd);
3044ba94f308SAndrey Vagin 		fd = PTR_ERR(f);
3045ba94f308SAndrey Vagin 		goto out;
3046ba94f308SAndrey Vagin 	}
3047ba94f308SAndrey Vagin 
3048ba94f308SAndrey Vagin 	fd_install(fd, f);
3049ba94f308SAndrey Vagin out:
3050ba94f308SAndrey Vagin 	path_put(&path);
3051ba94f308SAndrey Vagin 
3052ba94f308SAndrey Vagin 	return fd;
3053ba94f308SAndrey Vagin }
3054ba94f308SAndrey Vagin 
unix_ioctl(struct socket * sock,unsigned int cmd,unsigned long arg)3055885ee74dSPavel Emelyanov static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
3056885ee74dSPavel Emelyanov {
3057885ee74dSPavel Emelyanov 	struct sock *sk = sock->sk;
3058885ee74dSPavel Emelyanov 	long amount = 0;
3059885ee74dSPavel Emelyanov 	int err;
3060885ee74dSPavel Emelyanov 
3061885ee74dSPavel Emelyanov 	switch (cmd) {
3062885ee74dSPavel Emelyanov 	case SIOCOUTQ:
3063885ee74dSPavel Emelyanov 		amount = unix_outq_len(sk);
30641da177e4SLinus Torvalds 		err = put_user(amount, (int __user *)arg);
30651da177e4SLinus Torvalds 		break;
3066885ee74dSPavel Emelyanov 	case SIOCINQ:
3067885ee74dSPavel Emelyanov 		amount = unix_inq_len(sk);
3068885ee74dSPavel Emelyanov 		if (amount < 0)
3069885ee74dSPavel Emelyanov 			err = amount;
3070885ee74dSPavel Emelyanov 		else
3071885ee74dSPavel Emelyanov 			err = put_user(amount, (int __user *)arg);
3072885ee74dSPavel Emelyanov 		break;
3073ba94f308SAndrey Vagin 	case SIOCUNIXFILE:
3074ba94f308SAndrey Vagin 		err = unix_open_file(sk);
3075ba94f308SAndrey Vagin 		break;
3076314001f0SRao Shoaib #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
3077314001f0SRao Shoaib 	case SIOCATMARK:
3078314001f0SRao Shoaib 		{
3079314001f0SRao Shoaib 			struct sk_buff *skb;
3080314001f0SRao Shoaib 			int answ = 0;
3081314001f0SRao Shoaib 
3082314001f0SRao Shoaib 			skb = skb_peek(&sk->sk_receive_queue);
3083e82025c6SKuniyuki Iwashima 			if (skb && skb == READ_ONCE(unix_sk(sk)->oob_skb))
3084314001f0SRao Shoaib 				answ = 1;
3085314001f0SRao Shoaib 			err = put_user(answ, (int __user *)arg);
3086314001f0SRao Shoaib 		}
3087314001f0SRao Shoaib 		break;
3088314001f0SRao Shoaib #endif
30891da177e4SLinus Torvalds 	default:
3090b5e5fa5eSChristoph Hellwig 		err = -ENOIOCTLCMD;
30911da177e4SLinus Torvalds 		break;
30921da177e4SLinus Torvalds 	}
30931da177e4SLinus Torvalds 	return err;
30941da177e4SLinus Torvalds }
30951da177e4SLinus Torvalds 
30965f6beb9eSArnd Bergmann #ifdef CONFIG_COMPAT
unix_compat_ioctl(struct socket * sock,unsigned int cmd,unsigned long arg)30975f6beb9eSArnd Bergmann static int unix_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
30985f6beb9eSArnd Bergmann {
30995f6beb9eSArnd Bergmann 	return unix_ioctl(sock, cmd, (unsigned long)compat_ptr(arg));
31005f6beb9eSArnd Bergmann }
31015f6beb9eSArnd Bergmann #endif
31025f6beb9eSArnd Bergmann 
unix_poll(struct file * file,struct socket * sock,poll_table * wait)3103a11e1d43SLinus Torvalds static __poll_t unix_poll(struct file *file, struct socket *sock, poll_table *wait)
31041da177e4SLinus Torvalds {
31051da177e4SLinus Torvalds 	struct sock *sk = sock->sk;
3106484e036eSKuniyuki Iwashima 	unsigned char state;
3107a11e1d43SLinus Torvalds 	__poll_t mask;
3108e1d09c2cSKuniyuki Iwashima 	u8 shutdown;
3109a11e1d43SLinus Torvalds 
311089ab066dSKarsten Graul 	sock_poll_wait(file, sock, wait);
3111a11e1d43SLinus Torvalds 	mask = 0;
3112e1d09c2cSKuniyuki Iwashima 	shutdown = READ_ONCE(sk->sk_shutdown);
3113484e036eSKuniyuki Iwashima 	state = READ_ONCE(sk->sk_state);
31141da177e4SLinus Torvalds 
31151da177e4SLinus Torvalds 	/* exceptional events? */
3116cc04410aSEric Dumazet 	if (READ_ONCE(sk->sk_err))
3117a9a08845SLinus Torvalds 		mask |= EPOLLERR;
3118e1d09c2cSKuniyuki Iwashima 	if (shutdown == SHUTDOWN_MASK)
3119a9a08845SLinus Torvalds 		mask |= EPOLLHUP;
3120e1d09c2cSKuniyuki Iwashima 	if (shutdown & RCV_SHUTDOWN)
3121a9a08845SLinus Torvalds 		mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
31221da177e4SLinus Torvalds 
31231da177e4SLinus Torvalds 	/* readable? */
31243ef7cf57SEric Dumazet 	if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
3125a9a08845SLinus Torvalds 		mask |= EPOLLIN | EPOLLRDNORM;
3126af493388SCong Wang 	if (sk_is_readable(sk))
3127af493388SCong Wang 		mask |= EPOLLIN | EPOLLRDNORM;
3128d9a232d4SKuniyuki Iwashima #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
3129d9a232d4SKuniyuki Iwashima 	if (READ_ONCE(unix_sk(sk)->oob_skb))
3130d9a232d4SKuniyuki Iwashima 		mask |= EPOLLPRI;
3131d9a232d4SKuniyuki Iwashima #endif
31321da177e4SLinus Torvalds 
31331da177e4SLinus Torvalds 	/* Connection-based need to check for termination and startup */
31346eba6a37SEric Dumazet 	if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) &&
3135484e036eSKuniyuki Iwashima 	    state == TCP_CLOSE)
3136a9a08845SLinus Torvalds 		mask |= EPOLLHUP;
31371da177e4SLinus Torvalds 
31381da177e4SLinus Torvalds 	/*
31391da177e4SLinus Torvalds 	 * we set writable also when the other side has shut down the
31401da177e4SLinus Torvalds 	 * connection. This prevents stuck sockets.
31411da177e4SLinus Torvalds 	 */
3142484e036eSKuniyuki Iwashima 	if (unix_writable(sk, state))
3143a9a08845SLinus Torvalds 		mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
31441da177e4SLinus Torvalds 
31451da177e4SLinus Torvalds 	return mask;
31461da177e4SLinus Torvalds }
31471da177e4SLinus Torvalds 
unix_dgram_poll(struct file * file,struct socket * sock,poll_table * wait)3148a11e1d43SLinus Torvalds static __poll_t unix_dgram_poll(struct file *file, struct socket *sock,
3149a11e1d43SLinus Torvalds 				    poll_table *wait)
31503c73419cSRainer Weikusat {
3151ec0d215fSRainer Weikusat 	struct sock *sk = sock->sk, *other;
3152a11e1d43SLinus Torvalds 	unsigned int writable;
3153484e036eSKuniyuki Iwashima 	unsigned char state;
3154a11e1d43SLinus Torvalds 	__poll_t mask;
3155e1d09c2cSKuniyuki Iwashima 	u8 shutdown;
3156a11e1d43SLinus Torvalds 
315789ab066dSKarsten Graul 	sock_poll_wait(file, sock, wait);
3158a11e1d43SLinus Torvalds 	mask = 0;
3159e1d09c2cSKuniyuki Iwashima 	shutdown = READ_ONCE(sk->sk_shutdown);
3160484e036eSKuniyuki Iwashima 	state = READ_ONCE(sk->sk_state);
31613c73419cSRainer Weikusat 
31623c73419cSRainer Weikusat 	/* exceptional events? */
3163cc04410aSEric Dumazet 	if (READ_ONCE(sk->sk_err) ||
3164cc04410aSEric Dumazet 	    !skb_queue_empty_lockless(&sk->sk_error_queue))
3165a9a08845SLinus Torvalds 		mask |= EPOLLERR |
3166a9a08845SLinus Torvalds 			(sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0);
31677d4c04fcSKeller, Jacob E 
3168e1d09c2cSKuniyuki Iwashima 	if (shutdown & RCV_SHUTDOWN)
3169a9a08845SLinus Torvalds 		mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
3170e1d09c2cSKuniyuki Iwashima 	if (shutdown == SHUTDOWN_MASK)
3171a9a08845SLinus Torvalds 		mask |= EPOLLHUP;
31723c73419cSRainer Weikusat 
31733c73419cSRainer Weikusat 	/* readable? */
31743ef7cf57SEric Dumazet 	if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
3175a9a08845SLinus Torvalds 		mask |= EPOLLIN | EPOLLRDNORM;
3176af493388SCong Wang 	if (sk_is_readable(sk))
3177af493388SCong Wang 		mask |= EPOLLIN | EPOLLRDNORM;
31783c73419cSRainer Weikusat 
31793c73419cSRainer Weikusat 	/* Connection-based need to check for termination and startup */
3180484e036eSKuniyuki Iwashima 	if (sk->sk_type == SOCK_SEQPACKET && state == TCP_CLOSE)
3181a9a08845SLinus Torvalds 		mask |= EPOLLHUP;
31823c73419cSRainer Weikusat 
3183973a34aaSEric Dumazet 	/* No write status requested, avoid expensive OUT tests. */
3184a11e1d43SLinus Torvalds 	if (!(poll_requested_events(wait) & (EPOLLWRBAND|EPOLLWRNORM|EPOLLOUT)))
3185973a34aaSEric Dumazet 		return mask;
3186973a34aaSEric Dumazet 
3187484e036eSKuniyuki Iwashima 	writable = unix_writable(sk, state);
31887d267278SRainer Weikusat 	if (writable) {
31897d267278SRainer Weikusat 		unix_state_lock(sk);
31907d267278SRainer Weikusat 
31917d267278SRainer Weikusat 		other = unix_peer(sk);
31927d267278SRainer Weikusat 		if (other && unix_peer(other) != sk &&
319304f08eb4SEric Dumazet 		    unix_recvq_full_lockless(other) &&
31947d267278SRainer Weikusat 		    unix_dgram_peer_wake_me(sk, other))
3195ec0d215fSRainer Weikusat 			writable = 0;
31967d267278SRainer Weikusat 
31977d267278SRainer Weikusat 		unix_state_unlock(sk);
3198ec0d215fSRainer Weikusat 	}
3199ec0d215fSRainer Weikusat 
3200ec0d215fSRainer Weikusat 	if (writable)
3201a9a08845SLinus Torvalds 		mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
32023c73419cSRainer Weikusat 	else
32039cd3e072SEric Dumazet 		sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
32043c73419cSRainer Weikusat 
32053c73419cSRainer Weikusat 	return mask;
32063c73419cSRainer Weikusat }
32071da177e4SLinus Torvalds 
32081da177e4SLinus Torvalds #ifdef CONFIG_PROC_FS
3209a53eb3feSPavel Emelyanov 
32107123aaa3SEric Dumazet #define BUCKET_SPACE (BITS_PER_LONG - (UNIX_HASH_BITS + 1) - 1)
32117123aaa3SEric Dumazet 
32127123aaa3SEric Dumazet #define get_bucket(x) ((x) >> BUCKET_SPACE)
3213afd20b92SKuniyuki Iwashima #define get_offset(x) ((x) & ((1UL << BUCKET_SPACE) - 1))
32147123aaa3SEric Dumazet #define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o))
3215a53eb3feSPavel Emelyanov 
unix_from_bucket(struct seq_file * seq,loff_t * pos)32167123aaa3SEric Dumazet static struct sock *unix_from_bucket(struct seq_file *seq, loff_t *pos)
32171da177e4SLinus Torvalds {
32187123aaa3SEric Dumazet 	unsigned long offset = get_offset(*pos);
32197123aaa3SEric Dumazet 	unsigned long bucket = get_bucket(*pos);
32207123aaa3SEric Dumazet 	unsigned long count = 0;
3221cf2f225eSKuniyuki Iwashima 	struct sock *sk;
32221da177e4SLinus Torvalds 
3223cf2f225eSKuniyuki Iwashima 	for (sk = sk_head(&seq_file_net(seq)->unx.table.buckets[bucket]);
3224cf2f225eSKuniyuki Iwashima 	     sk; sk = sk_next(sk)) {
32257123aaa3SEric Dumazet 		if (++count == offset)
32267123aaa3SEric Dumazet 			break;
32271da177e4SLinus Torvalds 	}
32287123aaa3SEric Dumazet 
32297123aaa3SEric Dumazet 	return sk;
32307123aaa3SEric Dumazet }
32317123aaa3SEric Dumazet 
unix_get_first(struct seq_file * seq,loff_t * pos)32324408d55aSKuniyuki Iwashima static struct sock *unix_get_first(struct seq_file *seq, loff_t *pos)
32337123aaa3SEric Dumazet {
3234afd20b92SKuniyuki Iwashima 	unsigned long bucket = get_bucket(*pos);
323579b05beaSKuniyuki Iwashima 	struct net *net = seq_file_net(seq);
32364408d55aSKuniyuki Iwashima 	struct sock *sk;
32377123aaa3SEric Dumazet 
3238f302d180SKuniyuki Iwashima 	while (bucket < UNIX_HASH_SIZE) {
323979b05beaSKuniyuki Iwashima 		spin_lock(&net->unx.table.locks[bucket]);
32404408d55aSKuniyuki Iwashima 
32417123aaa3SEric Dumazet 		sk = unix_from_bucket(seq, pos);
32427123aaa3SEric Dumazet 		if (sk)
32437123aaa3SEric Dumazet 			return sk;
32447123aaa3SEric Dumazet 
324579b05beaSKuniyuki Iwashima 		spin_unlock(&net->unx.table.locks[bucket]);
32464408d55aSKuniyuki Iwashima 
32474408d55aSKuniyuki Iwashima 		*pos = set_bucket_offset(++bucket, 1);
32484408d55aSKuniyuki Iwashima 	}
32497123aaa3SEric Dumazet 
32501da177e4SLinus Torvalds 	return NULL;
32511da177e4SLinus Torvalds }
32521da177e4SLinus Torvalds 
unix_get_next(struct seq_file * seq,struct sock * sk,loff_t * pos)32534408d55aSKuniyuki Iwashima static struct sock *unix_get_next(struct seq_file *seq, struct sock *sk,
32544408d55aSKuniyuki Iwashima 				  loff_t *pos)
32554408d55aSKuniyuki Iwashima {
32564408d55aSKuniyuki Iwashima 	unsigned long bucket = get_bucket(*pos);
32574408d55aSKuniyuki Iwashima 
3258cf2f225eSKuniyuki Iwashima 	sk = sk_next(sk);
3259cf2f225eSKuniyuki Iwashima 	if (sk)
32604408d55aSKuniyuki Iwashima 		return sk;
32614408d55aSKuniyuki Iwashima 
3262cf2f225eSKuniyuki Iwashima 
3263cf2f225eSKuniyuki Iwashima 	spin_unlock(&seq_file_net(seq)->unx.table.locks[bucket]);
32644408d55aSKuniyuki Iwashima 
32654408d55aSKuniyuki Iwashima 	*pos = set_bucket_offset(++bucket, 1);
32664408d55aSKuniyuki Iwashima 
32674408d55aSKuniyuki Iwashima 	return unix_get_first(seq, pos);
32684408d55aSKuniyuki Iwashima }
32694408d55aSKuniyuki Iwashima 
unix_seq_start(struct seq_file * seq,loff_t * pos)32701da177e4SLinus Torvalds static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
32711da177e4SLinus Torvalds {
32727123aaa3SEric Dumazet 	if (!*pos)
32737123aaa3SEric Dumazet 		return SEQ_START_TOKEN;
32747123aaa3SEric Dumazet 
32754408d55aSKuniyuki Iwashima 	return unix_get_first(seq, pos);
32761da177e4SLinus Torvalds }
32771da177e4SLinus Torvalds 
unix_seq_next(struct seq_file * seq,void * v,loff_t * pos)32781da177e4SLinus Torvalds static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
32791da177e4SLinus Torvalds {
32801da177e4SLinus Torvalds 	++*pos;
32814408d55aSKuniyuki Iwashima 
32824408d55aSKuniyuki Iwashima 	if (v == SEQ_START_TOKEN)
32834408d55aSKuniyuki Iwashima 		return unix_get_first(seq, pos);
32844408d55aSKuniyuki Iwashima 
32854408d55aSKuniyuki Iwashima 	return unix_get_next(seq, v, pos);
32861da177e4SLinus Torvalds }
32871da177e4SLinus Torvalds 
unix_seq_stop(struct seq_file * seq,void * v)32881da177e4SLinus Torvalds static void unix_seq_stop(struct seq_file *seq, void *v)
32891da177e4SLinus Torvalds {
3290afd20b92SKuniyuki Iwashima 	struct sock *sk = v;
3291afd20b92SKuniyuki Iwashima 
32922f7ca90aSKuniyuki Iwashima 	if (sk)
329379b05beaSKuniyuki Iwashima 		spin_unlock(&seq_file_net(seq)->unx.table.locks[sk->sk_hash]);
329479b05beaSKuniyuki Iwashima }
32951da177e4SLinus Torvalds 
unix_seq_show(struct seq_file * seq,void * v)32961da177e4SLinus Torvalds static int unix_seq_show(struct seq_file *seq, void *v)
32971da177e4SLinus Torvalds {
32981da177e4SLinus Torvalds 
3299b9f3124fSJoe Perches 	if (v == SEQ_START_TOKEN)
33001da177e4SLinus Torvalds 		seq_puts(seq, "Num       RefCount Protocol Flags    Type St "
33011da177e4SLinus Torvalds 			 "Inode Path\n");
33021da177e4SLinus Torvalds 	else {
33031da177e4SLinus Torvalds 		struct sock *s = v;
33041da177e4SLinus Torvalds 		struct unix_sock *u = unix_sk(s);
33051c92b4e5SDavid S. Miller 		unix_state_lock(s);
33061da177e4SLinus Torvalds 
330771338aa7SDan Rosenberg 		seq_printf(seq, "%pK: %08X %08X %08X %04X %02X %5lu",
33081da177e4SLinus Torvalds 			s,
330941c6d650SReshetova, Elena 			refcount_read(&s->sk_refcnt),
33101da177e4SLinus Torvalds 			0,
33111da177e4SLinus Torvalds 			s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
33121da177e4SLinus Torvalds 			s->sk_type,
33131da177e4SLinus Torvalds 			s->sk_socket ?
33141da177e4SLinus Torvalds 			(s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
33151da177e4SLinus Torvalds 			(s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
33161da177e4SLinus Torvalds 			sock_i_ino(s));
33171da177e4SLinus Torvalds 
33182f7ca90aSKuniyuki Iwashima 		if (u->addr) {	// under a hash table lock here
33191da177e4SLinus Torvalds 			int i, len;
33201da177e4SLinus Torvalds 			seq_putc(seq, ' ');
33211da177e4SLinus Torvalds 
33221da177e4SLinus Torvalds 			i = 0;
3323755662ceSKuniyuki Iwashima 			len = u->addr->len -
3324755662ceSKuniyuki Iwashima 				offsetof(struct sockaddr_un, sun_path);
33255ce7ab49SKuniyuki Iwashima 			if (u->addr->name->sun_path[0]) {
33261da177e4SLinus Torvalds 				len--;
33275ce7ab49SKuniyuki Iwashima 			} else {
33281da177e4SLinus Torvalds 				seq_putc(seq, '@');
33291da177e4SLinus Torvalds 				i++;
33301da177e4SLinus Torvalds 			}
33311da177e4SLinus Torvalds 			for ( ; i < len; i++)
3332e7947ea7SIsaac Boukris 				seq_putc(seq, u->addr->name->sun_path[i] ?:
3333e7947ea7SIsaac Boukris 					 '@');
33341da177e4SLinus Torvalds 		}
33351c92b4e5SDavid S. Miller 		unix_state_unlock(s);
33361da177e4SLinus Torvalds 		seq_putc(seq, '\n');
33371da177e4SLinus Torvalds 	}
33381da177e4SLinus Torvalds 
33391da177e4SLinus Torvalds 	return 0;
33401da177e4SLinus Torvalds }
33411da177e4SLinus Torvalds 
334256b3d975SPhilippe De Muyter static const struct seq_operations unix_seq_ops = {
33431da177e4SLinus Torvalds 	.start  = unix_seq_start,
33441da177e4SLinus Torvalds 	.next   = unix_seq_next,
33451da177e4SLinus Torvalds 	.stop   = unix_seq_stop,
33461da177e4SLinus Torvalds 	.show   = unix_seq_show,
33471da177e4SLinus Torvalds };
33482c860a43SKuniyuki Iwashima 
33492c860a43SKuniyuki Iwashima #if IS_BUILTIN(CONFIG_UNIX) && defined(CONFIG_BPF_SYSCALL)
3350855d8e77SKuniyuki Iwashima struct bpf_unix_iter_state {
3351855d8e77SKuniyuki Iwashima 	struct seq_net_private p;
3352855d8e77SKuniyuki Iwashima 	unsigned int cur_sk;
3353855d8e77SKuniyuki Iwashima 	unsigned int end_sk;
3354855d8e77SKuniyuki Iwashima 	unsigned int max_sk;
3355855d8e77SKuniyuki Iwashima 	struct sock **batch;
3356855d8e77SKuniyuki Iwashima 	bool st_bucket_done;
3357855d8e77SKuniyuki Iwashima };
3358855d8e77SKuniyuki Iwashima 
33592c860a43SKuniyuki Iwashima struct bpf_iter__unix {
33602c860a43SKuniyuki Iwashima 	__bpf_md_ptr(struct bpf_iter_meta *, meta);
33612c860a43SKuniyuki Iwashima 	__bpf_md_ptr(struct unix_sock *, unix_sk);
33622c860a43SKuniyuki Iwashima 	uid_t uid __aligned(8);
33632c860a43SKuniyuki Iwashima };
33642c860a43SKuniyuki Iwashima 
unix_prog_seq_show(struct bpf_prog * prog,struct bpf_iter_meta * meta,struct unix_sock * unix_sk,uid_t uid)33652c860a43SKuniyuki Iwashima static int unix_prog_seq_show(struct bpf_prog *prog, struct bpf_iter_meta *meta,
33662c860a43SKuniyuki Iwashima 			      struct unix_sock *unix_sk, uid_t uid)
33672c860a43SKuniyuki Iwashima {
33682c860a43SKuniyuki Iwashima 	struct bpf_iter__unix ctx;
33692c860a43SKuniyuki Iwashima 
33702c860a43SKuniyuki Iwashima 	meta->seq_num--;  /* skip SEQ_START_TOKEN */
33712c860a43SKuniyuki Iwashima 	ctx.meta = meta;
33722c860a43SKuniyuki Iwashima 	ctx.unix_sk = unix_sk;
33732c860a43SKuniyuki Iwashima 	ctx.uid = uid;
33742c860a43SKuniyuki Iwashima 	return bpf_iter_run_prog(prog, &ctx);
33752c860a43SKuniyuki Iwashima }
33762c860a43SKuniyuki Iwashima 
bpf_iter_unix_hold_batch(struct seq_file * seq,struct sock * start_sk)3377855d8e77SKuniyuki Iwashima static int bpf_iter_unix_hold_batch(struct seq_file *seq, struct sock *start_sk)
3378855d8e77SKuniyuki Iwashima 
3379855d8e77SKuniyuki Iwashima {
3380855d8e77SKuniyuki Iwashima 	struct bpf_unix_iter_state *iter = seq->private;
3381855d8e77SKuniyuki Iwashima 	unsigned int expected = 1;
3382855d8e77SKuniyuki Iwashima 	struct sock *sk;
3383855d8e77SKuniyuki Iwashima 
3384855d8e77SKuniyuki Iwashima 	sock_hold(start_sk);
3385855d8e77SKuniyuki Iwashima 	iter->batch[iter->end_sk++] = start_sk;
3386855d8e77SKuniyuki Iwashima 
3387855d8e77SKuniyuki Iwashima 	for (sk = sk_next(start_sk); sk; sk = sk_next(sk)) {
3388855d8e77SKuniyuki Iwashima 		if (iter->end_sk < iter->max_sk) {
3389855d8e77SKuniyuki Iwashima 			sock_hold(sk);
3390855d8e77SKuniyuki Iwashima 			iter->batch[iter->end_sk++] = sk;
3391855d8e77SKuniyuki Iwashima 		}
3392855d8e77SKuniyuki Iwashima 
3393855d8e77SKuniyuki Iwashima 		expected++;
3394855d8e77SKuniyuki Iwashima 	}
3395855d8e77SKuniyuki Iwashima 
3396cf2f225eSKuniyuki Iwashima 	spin_unlock(&seq_file_net(seq)->unx.table.locks[start_sk->sk_hash]);
3397855d8e77SKuniyuki Iwashima 
3398855d8e77SKuniyuki Iwashima 	return expected;
3399855d8e77SKuniyuki Iwashima }
3400855d8e77SKuniyuki Iwashima 
bpf_iter_unix_put_batch(struct bpf_unix_iter_state * iter)3401855d8e77SKuniyuki Iwashima static void bpf_iter_unix_put_batch(struct bpf_unix_iter_state *iter)
3402855d8e77SKuniyuki Iwashima {
3403855d8e77SKuniyuki Iwashima 	while (iter->cur_sk < iter->end_sk)
3404855d8e77SKuniyuki Iwashima 		sock_put(iter->batch[iter->cur_sk++]);
3405855d8e77SKuniyuki Iwashima }
3406855d8e77SKuniyuki Iwashima 
bpf_iter_unix_realloc_batch(struct bpf_unix_iter_state * iter,unsigned int new_batch_sz)3407855d8e77SKuniyuki Iwashima static int bpf_iter_unix_realloc_batch(struct bpf_unix_iter_state *iter,
3408855d8e77SKuniyuki Iwashima 				       unsigned int new_batch_sz)
3409855d8e77SKuniyuki Iwashima {
3410855d8e77SKuniyuki Iwashima 	struct sock **new_batch;
3411855d8e77SKuniyuki Iwashima 
3412855d8e77SKuniyuki Iwashima 	new_batch = kvmalloc(sizeof(*new_batch) * new_batch_sz,
3413855d8e77SKuniyuki Iwashima 			     GFP_USER | __GFP_NOWARN);
3414855d8e77SKuniyuki Iwashima 	if (!new_batch)
3415855d8e77SKuniyuki Iwashima 		return -ENOMEM;
3416855d8e77SKuniyuki Iwashima 
3417855d8e77SKuniyuki Iwashima 	bpf_iter_unix_put_batch(iter);
3418855d8e77SKuniyuki Iwashima 	kvfree(iter->batch);
3419855d8e77SKuniyuki Iwashima 	iter->batch = new_batch;
3420855d8e77SKuniyuki Iwashima 	iter->max_sk = new_batch_sz;
3421855d8e77SKuniyuki Iwashima 
3422855d8e77SKuniyuki Iwashima 	return 0;
3423855d8e77SKuniyuki Iwashima }
3424855d8e77SKuniyuki Iwashima 
bpf_iter_unix_batch(struct seq_file * seq,loff_t * pos)3425855d8e77SKuniyuki Iwashima static struct sock *bpf_iter_unix_batch(struct seq_file *seq,
3426855d8e77SKuniyuki Iwashima 					loff_t *pos)
3427855d8e77SKuniyuki Iwashima {
3428855d8e77SKuniyuki Iwashima 	struct bpf_unix_iter_state *iter = seq->private;
3429855d8e77SKuniyuki Iwashima 	unsigned int expected;
3430855d8e77SKuniyuki Iwashima 	bool resized = false;
3431855d8e77SKuniyuki Iwashima 	struct sock *sk;
3432855d8e77SKuniyuki Iwashima 
3433855d8e77SKuniyuki Iwashima 	if (iter->st_bucket_done)
3434855d8e77SKuniyuki Iwashima 		*pos = set_bucket_offset(get_bucket(*pos) + 1, 1);
3435855d8e77SKuniyuki Iwashima 
3436855d8e77SKuniyuki Iwashima again:
3437855d8e77SKuniyuki Iwashima 	/* Get a new batch */
3438855d8e77SKuniyuki Iwashima 	iter->cur_sk = 0;
3439855d8e77SKuniyuki Iwashima 	iter->end_sk = 0;
3440855d8e77SKuniyuki Iwashima 
3441855d8e77SKuniyuki Iwashima 	sk = unix_get_first(seq, pos);
3442855d8e77SKuniyuki Iwashima 	if (!sk)
3443855d8e77SKuniyuki Iwashima 		return NULL; /* Done */
3444855d8e77SKuniyuki Iwashima 
3445855d8e77SKuniyuki Iwashima 	expected = bpf_iter_unix_hold_batch(seq, sk);
3446855d8e77SKuniyuki Iwashima 
3447855d8e77SKuniyuki Iwashima 	if (iter->end_sk == expected) {
3448855d8e77SKuniyuki Iwashima 		iter->st_bucket_done = true;
3449855d8e77SKuniyuki Iwashima 		return sk;
3450855d8e77SKuniyuki Iwashima 	}
3451855d8e77SKuniyuki Iwashima 
3452855d8e77SKuniyuki Iwashima 	if (!resized && !bpf_iter_unix_realloc_batch(iter, expected * 3 / 2)) {
3453855d8e77SKuniyuki Iwashima 		resized = true;
3454855d8e77SKuniyuki Iwashima 		goto again;
3455855d8e77SKuniyuki Iwashima 	}
3456855d8e77SKuniyuki Iwashima 
3457855d8e77SKuniyuki Iwashima 	return sk;
3458855d8e77SKuniyuki Iwashima }
3459855d8e77SKuniyuki Iwashima 
bpf_iter_unix_seq_start(struct seq_file * seq,loff_t * pos)3460855d8e77SKuniyuki Iwashima static void *bpf_iter_unix_seq_start(struct seq_file *seq, loff_t *pos)
3461855d8e77SKuniyuki Iwashima {
3462855d8e77SKuniyuki Iwashima 	if (!*pos)
3463855d8e77SKuniyuki Iwashima 		return SEQ_START_TOKEN;
3464855d8e77SKuniyuki Iwashima 
3465855d8e77SKuniyuki Iwashima 	/* bpf iter does not support lseek, so it always
3466855d8e77SKuniyuki Iwashima 	 * continue from where it was stop()-ped.
3467855d8e77SKuniyuki Iwashima 	 */
3468855d8e77SKuniyuki Iwashima 	return bpf_iter_unix_batch(seq, pos);
3469855d8e77SKuniyuki Iwashima }
3470855d8e77SKuniyuki Iwashima 
bpf_iter_unix_seq_next(struct seq_file * seq,void * v,loff_t * pos)3471855d8e77SKuniyuki Iwashima static void *bpf_iter_unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3472855d8e77SKuniyuki Iwashima {
3473855d8e77SKuniyuki Iwashima 	struct bpf_unix_iter_state *iter = seq->private;
3474855d8e77SKuniyuki Iwashima 	struct sock *sk;
3475855d8e77SKuniyuki Iwashima 
3476855d8e77SKuniyuki Iwashima 	/* Whenever seq_next() is called, the iter->cur_sk is
3477855d8e77SKuniyuki Iwashima 	 * done with seq_show(), so advance to the next sk in
3478855d8e77SKuniyuki Iwashima 	 * the batch.
3479855d8e77SKuniyuki Iwashima 	 */
3480855d8e77SKuniyuki Iwashima 	if (iter->cur_sk < iter->end_sk)
3481855d8e77SKuniyuki Iwashima 		sock_put(iter->batch[iter->cur_sk++]);
3482855d8e77SKuniyuki Iwashima 
3483855d8e77SKuniyuki Iwashima 	++*pos;
3484855d8e77SKuniyuki Iwashima 
3485855d8e77SKuniyuki Iwashima 	if (iter->cur_sk < iter->end_sk)
3486855d8e77SKuniyuki Iwashima 		sk = iter->batch[iter->cur_sk];
3487855d8e77SKuniyuki Iwashima 	else
3488855d8e77SKuniyuki Iwashima 		sk = bpf_iter_unix_batch(seq, pos);
3489855d8e77SKuniyuki Iwashima 
3490855d8e77SKuniyuki Iwashima 	return sk;
3491855d8e77SKuniyuki Iwashima }
3492855d8e77SKuniyuki Iwashima 
bpf_iter_unix_seq_show(struct seq_file * seq,void * v)34932c860a43SKuniyuki Iwashima static int bpf_iter_unix_seq_show(struct seq_file *seq, void *v)
34942c860a43SKuniyuki Iwashima {
34952c860a43SKuniyuki Iwashima 	struct bpf_iter_meta meta;
34962c860a43SKuniyuki Iwashima 	struct bpf_prog *prog;
34972c860a43SKuniyuki Iwashima 	struct sock *sk = v;
34982c860a43SKuniyuki Iwashima 	uid_t uid;
3499855d8e77SKuniyuki Iwashima 	bool slow;
3500855d8e77SKuniyuki Iwashima 	int ret;
35012c860a43SKuniyuki Iwashima 
35022c860a43SKuniyuki Iwashima 	if (v == SEQ_START_TOKEN)
35032c860a43SKuniyuki Iwashima 		return 0;
35042c860a43SKuniyuki Iwashima 
3505855d8e77SKuniyuki Iwashima 	slow = lock_sock_fast(sk);
3506855d8e77SKuniyuki Iwashima 
3507855d8e77SKuniyuki Iwashima 	if (unlikely(sk_unhashed(sk))) {
3508855d8e77SKuniyuki Iwashima 		ret = SEQ_SKIP;
3509855d8e77SKuniyuki Iwashima 		goto unlock;
3510855d8e77SKuniyuki Iwashima 	}
3511855d8e77SKuniyuki Iwashima 
35122c860a43SKuniyuki Iwashima 	uid = from_kuid_munged(seq_user_ns(seq), sock_i_uid(sk));
35132c860a43SKuniyuki Iwashima 	meta.seq = seq;
35142c860a43SKuniyuki Iwashima 	prog = bpf_iter_get_info(&meta, false);
3515855d8e77SKuniyuki Iwashima 	ret = unix_prog_seq_show(prog, &meta, v, uid);
3516855d8e77SKuniyuki Iwashima unlock:
3517855d8e77SKuniyuki Iwashima 	unlock_sock_fast(sk, slow);
3518855d8e77SKuniyuki Iwashima 	return ret;
35192c860a43SKuniyuki Iwashima }
35202c860a43SKuniyuki Iwashima 
bpf_iter_unix_seq_stop(struct seq_file * seq,void * v)35212c860a43SKuniyuki Iwashima static void bpf_iter_unix_seq_stop(struct seq_file *seq, void *v)
35222c860a43SKuniyuki Iwashima {
3523855d8e77SKuniyuki Iwashima 	struct bpf_unix_iter_state *iter = seq->private;
35242c860a43SKuniyuki Iwashima 	struct bpf_iter_meta meta;
35252c860a43SKuniyuki Iwashima 	struct bpf_prog *prog;
35262c860a43SKuniyuki Iwashima 
35272c860a43SKuniyuki Iwashima 	if (!v) {
35282c860a43SKuniyuki Iwashima 		meta.seq = seq;
35292c860a43SKuniyuki Iwashima 		prog = bpf_iter_get_info(&meta, true);
35302c860a43SKuniyuki Iwashima 		if (prog)
35312c860a43SKuniyuki Iwashima 			(void)unix_prog_seq_show(prog, &meta, v, 0);
35322c860a43SKuniyuki Iwashima 	}
35332c860a43SKuniyuki Iwashima 
3534855d8e77SKuniyuki Iwashima 	if (iter->cur_sk < iter->end_sk)
3535855d8e77SKuniyuki Iwashima 		bpf_iter_unix_put_batch(iter);
35362c860a43SKuniyuki Iwashima }
35372c860a43SKuniyuki Iwashima 
35382c860a43SKuniyuki Iwashima static const struct seq_operations bpf_iter_unix_seq_ops = {
3539855d8e77SKuniyuki Iwashima 	.start	= bpf_iter_unix_seq_start,
3540855d8e77SKuniyuki Iwashima 	.next	= bpf_iter_unix_seq_next,
35412c860a43SKuniyuki Iwashima 	.stop	= bpf_iter_unix_seq_stop,
35422c860a43SKuniyuki Iwashima 	.show	= bpf_iter_unix_seq_show,
35432c860a43SKuniyuki Iwashima };
35442c860a43SKuniyuki Iwashima #endif
35451da177e4SLinus Torvalds #endif
35461da177e4SLinus Torvalds 
3547ec1b4cf7SStephen Hemminger static const struct net_proto_family unix_family_ops = {
35481da177e4SLinus Torvalds 	.family = PF_UNIX,
35491da177e4SLinus Torvalds 	.create = unix_create,
35501da177e4SLinus Torvalds 	.owner	= THIS_MODULE,
35511da177e4SLinus Torvalds };
35521da177e4SLinus Torvalds 
3553097e66c5SDenis V. Lunev 
unix_net_init(struct net * net)35542c8c1e72SAlexey Dobriyan static int __net_init unix_net_init(struct net *net)
3555097e66c5SDenis V. Lunev {
3556b6e81138SKuniyuki Iwashima 	int i;
3557097e66c5SDenis V. Lunev 
3558a0a53c8bSDenis V. Lunev 	net->unx.sysctl_max_dgram_qlen = 10;
35591597fbc0SPavel Emelyanov 	if (unix_sysctl_register(net))
35601597fbc0SPavel Emelyanov 		goto out;
3561d392e497SPavel Emelyanov 
3562097e66c5SDenis V. Lunev #ifdef CONFIG_PROC_FS
3563c3506372SChristoph Hellwig 	if (!proc_create_net("unix", 0, net->proc_net, &unix_seq_ops,
3564b6e81138SKuniyuki Iwashima 			     sizeof(struct seq_net_private)))
3565b6e81138SKuniyuki Iwashima 		goto err_sysctl;
3566097e66c5SDenis V. Lunev #endif
3567b6e81138SKuniyuki Iwashima 
3568b6e81138SKuniyuki Iwashima 	net->unx.table.locks = kvmalloc_array(UNIX_HASH_SIZE,
3569b6e81138SKuniyuki Iwashima 					      sizeof(spinlock_t), GFP_KERNEL);
3570b6e81138SKuniyuki Iwashima 	if (!net->unx.table.locks)
3571b6e81138SKuniyuki Iwashima 		goto err_proc;
3572b6e81138SKuniyuki Iwashima 
3573b6e81138SKuniyuki Iwashima 	net->unx.table.buckets = kvmalloc_array(UNIX_HASH_SIZE,
3574b6e81138SKuniyuki Iwashima 						sizeof(struct hlist_head),
3575b6e81138SKuniyuki Iwashima 						GFP_KERNEL);
3576b6e81138SKuniyuki Iwashima 	if (!net->unx.table.buckets)
3577b6e81138SKuniyuki Iwashima 		goto free_locks;
3578b6e81138SKuniyuki Iwashima 
3579b6e81138SKuniyuki Iwashima 	for (i = 0; i < UNIX_HASH_SIZE; i++) {
3580b6e81138SKuniyuki Iwashima 		spin_lock_init(&net->unx.table.locks[i]);
3581b6e81138SKuniyuki Iwashima 		INIT_HLIST_HEAD(&net->unx.table.buckets[i]);
3582b6e81138SKuniyuki Iwashima 	}
3583b6e81138SKuniyuki Iwashima 
3584b6e81138SKuniyuki Iwashima 	return 0;
3585b6e81138SKuniyuki Iwashima 
3586b6e81138SKuniyuki Iwashima free_locks:
3587b6e81138SKuniyuki Iwashima 	kvfree(net->unx.table.locks);
3588b6e81138SKuniyuki Iwashima err_proc:
3589b6e81138SKuniyuki Iwashima #ifdef CONFIG_PROC_FS
3590b6e81138SKuniyuki Iwashima 	remove_proc_entry("unix", net->proc_net);
3591b6e81138SKuniyuki Iwashima err_sysctl:
3592b6e81138SKuniyuki Iwashima #endif
3593b6e81138SKuniyuki Iwashima 	unix_sysctl_unregister(net);
3594097e66c5SDenis V. Lunev out:
3595b6e81138SKuniyuki Iwashima 	return -ENOMEM;
3596097e66c5SDenis V. Lunev }
3597097e66c5SDenis V. Lunev 
unix_net_exit(struct net * net)35982c8c1e72SAlexey Dobriyan static void __net_exit unix_net_exit(struct net *net)
3599097e66c5SDenis V. Lunev {
3600b6e81138SKuniyuki Iwashima 	kvfree(net->unx.table.buckets);
3601b6e81138SKuniyuki Iwashima 	kvfree(net->unx.table.locks);
36021597fbc0SPavel Emelyanov 	unix_sysctl_unregister(net);
3603ece31ffdSGao feng 	remove_proc_entry("unix", net->proc_net);
3604097e66c5SDenis V. Lunev }
3605097e66c5SDenis V. Lunev 
3606097e66c5SDenis V. Lunev static struct pernet_operations unix_net_ops = {
3607097e66c5SDenis V. Lunev 	.init = unix_net_init,
3608097e66c5SDenis V. Lunev 	.exit = unix_net_exit,
3609097e66c5SDenis V. Lunev };
3610097e66c5SDenis V. Lunev 
36112c860a43SKuniyuki Iwashima #if IS_BUILTIN(CONFIG_UNIX) && defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
DEFINE_BPF_ITER_FUNC(unix,struct bpf_iter_meta * meta,struct unix_sock * unix_sk,uid_t uid)36122c860a43SKuniyuki Iwashima DEFINE_BPF_ITER_FUNC(unix, struct bpf_iter_meta *meta,
36132c860a43SKuniyuki Iwashima 		     struct unix_sock *unix_sk, uid_t uid)
36142c860a43SKuniyuki Iwashima 
3615855d8e77SKuniyuki Iwashima #define INIT_BATCH_SZ 16
3616855d8e77SKuniyuki Iwashima 
3617855d8e77SKuniyuki Iwashima static int bpf_iter_init_unix(void *priv_data, struct bpf_iter_aux_info *aux)
3618855d8e77SKuniyuki Iwashima {
3619855d8e77SKuniyuki Iwashima 	struct bpf_unix_iter_state *iter = priv_data;
3620855d8e77SKuniyuki Iwashima 	int err;
3621855d8e77SKuniyuki Iwashima 
3622855d8e77SKuniyuki Iwashima 	err = bpf_iter_init_seq_net(priv_data, aux);
3623855d8e77SKuniyuki Iwashima 	if (err)
3624855d8e77SKuniyuki Iwashima 		return err;
3625855d8e77SKuniyuki Iwashima 
3626855d8e77SKuniyuki Iwashima 	err = bpf_iter_unix_realloc_batch(iter, INIT_BATCH_SZ);
3627855d8e77SKuniyuki Iwashima 	if (err) {
3628855d8e77SKuniyuki Iwashima 		bpf_iter_fini_seq_net(priv_data);
3629855d8e77SKuniyuki Iwashima 		return err;
3630855d8e77SKuniyuki Iwashima 	}
3631855d8e77SKuniyuki Iwashima 
3632855d8e77SKuniyuki Iwashima 	return 0;
3633855d8e77SKuniyuki Iwashima }
3634855d8e77SKuniyuki Iwashima 
bpf_iter_fini_unix(void * priv_data)3635855d8e77SKuniyuki Iwashima static void bpf_iter_fini_unix(void *priv_data)
3636855d8e77SKuniyuki Iwashima {
3637855d8e77SKuniyuki Iwashima 	struct bpf_unix_iter_state *iter = priv_data;
3638855d8e77SKuniyuki Iwashima 
3639855d8e77SKuniyuki Iwashima 	bpf_iter_fini_seq_net(priv_data);
3640855d8e77SKuniyuki Iwashima 	kvfree(iter->batch);
3641855d8e77SKuniyuki Iwashima }
3642855d8e77SKuniyuki Iwashima 
36432c860a43SKuniyuki Iwashima static const struct bpf_iter_seq_info unix_seq_info = {
36442c860a43SKuniyuki Iwashima 	.seq_ops		= &bpf_iter_unix_seq_ops,
3645855d8e77SKuniyuki Iwashima 	.init_seq_private	= bpf_iter_init_unix,
3646855d8e77SKuniyuki Iwashima 	.fini_seq_private	= bpf_iter_fini_unix,
3647855d8e77SKuniyuki Iwashima 	.seq_priv_size		= sizeof(struct bpf_unix_iter_state),
36482c860a43SKuniyuki Iwashima };
36492c860a43SKuniyuki Iwashima 
3650eb7d8f1dSKuniyuki Iwashima static const struct bpf_func_proto *
bpf_iter_unix_get_func_proto(enum bpf_func_id func_id,const struct bpf_prog * prog)3651eb7d8f1dSKuniyuki Iwashima bpf_iter_unix_get_func_proto(enum bpf_func_id func_id,
3652eb7d8f1dSKuniyuki Iwashima 			     const struct bpf_prog *prog)
3653eb7d8f1dSKuniyuki Iwashima {
3654eb7d8f1dSKuniyuki Iwashima 	switch (func_id) {
3655eb7d8f1dSKuniyuki Iwashima 	case BPF_FUNC_setsockopt:
3656eb7d8f1dSKuniyuki Iwashima 		return &bpf_sk_setsockopt_proto;
3657eb7d8f1dSKuniyuki Iwashima 	case BPF_FUNC_getsockopt:
3658eb7d8f1dSKuniyuki Iwashima 		return &bpf_sk_getsockopt_proto;
3659eb7d8f1dSKuniyuki Iwashima 	default:
3660eb7d8f1dSKuniyuki Iwashima 		return NULL;
3661eb7d8f1dSKuniyuki Iwashima 	}
3662eb7d8f1dSKuniyuki Iwashima }
3663eb7d8f1dSKuniyuki Iwashima 
36642c860a43SKuniyuki Iwashima static struct bpf_iter_reg unix_reg_info = {
36652c860a43SKuniyuki Iwashima 	.target			= "unix",
36662c860a43SKuniyuki Iwashima 	.ctx_arg_info_size	= 1,
36672c860a43SKuniyuki Iwashima 	.ctx_arg_info		= {
36682c860a43SKuniyuki Iwashima 		{ offsetof(struct bpf_iter__unix, unix_sk),
36692c860a43SKuniyuki Iwashima 		  PTR_TO_BTF_ID_OR_NULL },
36702c860a43SKuniyuki Iwashima 	},
3671eb7d8f1dSKuniyuki Iwashima 	.get_func_proto         = bpf_iter_unix_get_func_proto,
36722c860a43SKuniyuki Iwashima 	.seq_info		= &unix_seq_info,
36732c860a43SKuniyuki Iwashima };
36742c860a43SKuniyuki Iwashima 
bpf_iter_register(void)36752c860a43SKuniyuki Iwashima static void __init bpf_iter_register(void)
36762c860a43SKuniyuki Iwashima {
36772c860a43SKuniyuki Iwashima 	unix_reg_info.ctx_arg_info[0].btf_id = btf_sock_ids[BTF_SOCK_TYPE_UNIX];
36782c860a43SKuniyuki Iwashima 	if (bpf_iter_reg_target(&unix_reg_info))
36792c860a43SKuniyuki Iwashima 		pr_warn("Warning: could not register bpf iterator unix\n");
36802c860a43SKuniyuki Iwashima }
36812c860a43SKuniyuki Iwashima #endif
36822c860a43SKuniyuki Iwashima 
af_unix_init(void)36831da177e4SLinus Torvalds static int __init af_unix_init(void)
36841da177e4SLinus Torvalds {
368551bae889SKuniyuki Iwashima 	int i, rc = -1;
36861da177e4SLinus Torvalds 
3687c593642cSPankaj Bharadiya 	BUILD_BUG_ON(sizeof(struct unix_skb_parms) > sizeof_field(struct sk_buff, cb));
36881da177e4SLinus Torvalds 
368951bae889SKuniyuki Iwashima 	for (i = 0; i < UNIX_HASH_SIZE / 2; i++) {
369051bae889SKuniyuki Iwashima 		spin_lock_init(&bsd_socket_locks[i]);
369151bae889SKuniyuki Iwashima 		INIT_HLIST_HEAD(&bsd_socket_buckets[i]);
369251bae889SKuniyuki Iwashima 	}
369351bae889SKuniyuki Iwashima 
369494531cfcSJiang Wang 	rc = proto_register(&unix_dgram_proto, 1);
369594531cfcSJiang Wang 	if (rc != 0) {
369694531cfcSJiang Wang 		pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__);
369794531cfcSJiang Wang 		goto out;
369894531cfcSJiang Wang 	}
369994531cfcSJiang Wang 
370094531cfcSJiang Wang 	rc = proto_register(&unix_stream_proto, 1);
37011da177e4SLinus Torvalds 	if (rc != 0) {
37025cc208beSwangweidong 		pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__);
370373e341e0SYang Yingliang 		proto_unregister(&unix_dgram_proto);
37041da177e4SLinus Torvalds 		goto out;
37051da177e4SLinus Torvalds 	}
37061da177e4SLinus Torvalds 
37071da177e4SLinus Torvalds 	sock_register(&unix_family_ops);
3708097e66c5SDenis V. Lunev 	register_pernet_subsys(&unix_net_ops);
3709c6382918SCong Wang 	unix_bpf_build_proto();
37102c860a43SKuniyuki Iwashima 
37112c860a43SKuniyuki Iwashima #if IS_BUILTIN(CONFIG_UNIX) && defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
37122c860a43SKuniyuki Iwashima 	bpf_iter_register();
37132c860a43SKuniyuki Iwashima #endif
37142c860a43SKuniyuki Iwashima 
37151da177e4SLinus Torvalds out:
37161da177e4SLinus Torvalds 	return rc;
37171da177e4SLinus Torvalds }
37181da177e4SLinus Torvalds 
af_unix_exit(void)37191da177e4SLinus Torvalds static void __exit af_unix_exit(void)
37201da177e4SLinus Torvalds {
37211da177e4SLinus Torvalds 	sock_unregister(PF_UNIX);
372294531cfcSJiang Wang 	proto_unregister(&unix_dgram_proto);
372394531cfcSJiang Wang 	proto_unregister(&unix_stream_proto);
3724097e66c5SDenis V. Lunev 	unregister_pernet_subsys(&unix_net_ops);
37251da177e4SLinus Torvalds }
37261da177e4SLinus Torvalds 
37273d366960SDavid Woodhouse /* Earlier than device_initcall() so that other drivers invoking
37283d366960SDavid Woodhouse    request_module() don't end up in a loop when modprobe tries
37293d366960SDavid Woodhouse    to use a UNIX socket. But later than subsys_initcall() because
37303d366960SDavid Woodhouse    we depend on stuff initialised there */
37313d366960SDavid Woodhouse fs_initcall(af_unix_init);
37321da177e4SLinus Torvalds module_exit(af_unix_exit);
37331da177e4SLinus Torvalds 
37341da177e4SLinus Torvalds MODULE_LICENSE("GPL");
37351da177e4SLinus Torvalds MODULE_ALIAS_NETPROTO(PF_UNIX);
3736