12874c5fdSThomas Gleixner // SPDX-License-Identifier: GPL-2.0-or-later
21da177e4SLinus Torvalds /*
31da177e4SLinus Torvalds * NET4: Implementation of BSD Unix domain sockets.
41da177e4SLinus Torvalds *
5113aa838SAlan Cox * Authors: Alan Cox, <alan@lxorguk.ukuu.org.uk>
61da177e4SLinus Torvalds *
71da177e4SLinus Torvalds * Fixes:
81da177e4SLinus Torvalds * Linus Torvalds : Assorted bug cures.
91da177e4SLinus Torvalds * Niibe Yutaka : async I/O support.
101da177e4SLinus Torvalds * Carsten Paeth : PF_UNIX check, address fixes.
111da177e4SLinus Torvalds * Alan Cox : Limit size of allocated blocks.
121da177e4SLinus Torvalds * Alan Cox : Fixed the stupid socketpair bug.
131da177e4SLinus Torvalds * Alan Cox : BSD compatibility fine tuning.
141da177e4SLinus Torvalds * Alan Cox : Fixed a bug in connect when interrupted.
151da177e4SLinus Torvalds * Alan Cox : Sorted out a proper draft version of
161da177e4SLinus Torvalds * file descriptor passing hacked up from
171da177e4SLinus Torvalds * Mike Shaver's work.
181da177e4SLinus Torvalds * Marty Leisner : Fixes to fd passing
191da177e4SLinus Torvalds * Nick Nevin : recvmsg bugfix.
201da177e4SLinus Torvalds * Alan Cox : Started proper garbage collector
211da177e4SLinus Torvalds * Heiko EiBfeldt : Missing verify_area check
221da177e4SLinus Torvalds * Alan Cox : Started POSIXisms
231da177e4SLinus Torvalds * Andreas Schwab : Replace inode by dentry for proper
241da177e4SLinus Torvalds * reference counting
251da177e4SLinus Torvalds * Kirk Petersen : Made this a module
261da177e4SLinus Torvalds * Christoph Rohland : Elegant non-blocking accept/connect algorithm.
271da177e4SLinus Torvalds * Lots of bug fixes.
281da177e4SLinus Torvalds * Alexey Kuznetosv : Repaired (I hope) bugs introduces
291da177e4SLinus Torvalds * by above two patches.
301da177e4SLinus Torvalds * Andrea Arcangeli : If possible we block in connect(2)
311da177e4SLinus Torvalds * if the max backlog of the listen socket
321da177e4SLinus Torvalds * is been reached. This won't break
331da177e4SLinus Torvalds * old apps and it will avoid huge amount
341da177e4SLinus Torvalds * of socks hashed (this for unix_gc()
351da177e4SLinus Torvalds * performances reasons).
361da177e4SLinus Torvalds * Security fix that limits the max
371da177e4SLinus Torvalds * number of socks to 2*max_files and
381da177e4SLinus Torvalds * the number of skb queueable in the
391da177e4SLinus Torvalds * dgram receiver.
401da177e4SLinus Torvalds * Artur Skawina : Hash function optimizations
411da177e4SLinus Torvalds * Alexey Kuznetsov : Full scale SMP. Lot of bugs are introduced 8)
421da177e4SLinus Torvalds * Malcolm Beattie : Set peercred for socketpair
431da177e4SLinus Torvalds * Michal Ostrowski : Module initialization cleanup.
441da177e4SLinus Torvalds * Arnaldo C. Melo : Remove MOD_{INC,DEC}_USE_COUNT,
451da177e4SLinus Torvalds * the core infrastructure is doing that
461da177e4SLinus Torvalds * for all net proto families now (2.5.69+)
471da177e4SLinus Torvalds *
481da177e4SLinus Torvalds * Known differences from reference BSD that was tested:
491da177e4SLinus Torvalds *
501da177e4SLinus Torvalds * [TO FIX]
511da177e4SLinus Torvalds * ECONNREFUSED is not returned from one end of a connected() socket to the
521da177e4SLinus Torvalds * other the moment one end closes.
531da177e4SLinus Torvalds * fstat() doesn't return st_dev=0, and give the blksize as high water mark
541da177e4SLinus Torvalds * and a fake inode identifier (nor the BSD first socket fstat twice bug).
551da177e4SLinus Torvalds * [NOT TO FIX]
561da177e4SLinus Torvalds * accept() returns a path name even if the connecting socket has closed
571da177e4SLinus Torvalds * in the meantime (BSD loses the path and gives up).
581da177e4SLinus Torvalds * accept() returns 0 length path for an unbound connector. BSD returns 16
591da177e4SLinus Torvalds * and a null first byte in the path (but not for gethost/peername - BSD bug ??)
601da177e4SLinus Torvalds * socketpair(...SOCK_RAW..) doesn't panic the kernel.
611da177e4SLinus Torvalds * BSD af_unix apparently has connect forgetting to block properly.
621da177e4SLinus Torvalds * (need to check this with the POSIX spec in detail)
631da177e4SLinus Torvalds *
641da177e4SLinus Torvalds * Differences from 2.0.0-11-... (ANK)
651da177e4SLinus Torvalds * Bug fixes and improvements.
661da177e4SLinus Torvalds * - client shutdown killed server socket.
671da177e4SLinus Torvalds * - removed all useless cli/sti pairs.
681da177e4SLinus Torvalds *
691da177e4SLinus Torvalds * Semantic changes/extensions.
701da177e4SLinus Torvalds * - generic control message passing.
711da177e4SLinus Torvalds * - SCM_CREDENTIALS control message.
721da177e4SLinus Torvalds * - "Abstract" (not FS based) socket bindings.
731da177e4SLinus Torvalds * Abstract names are sequences of bytes (not zero terminated)
741da177e4SLinus Torvalds * started by 0, so that this name space does not intersect
751da177e4SLinus Torvalds * with BSD names.
761da177e4SLinus Torvalds */
771da177e4SLinus Torvalds
785cc208beSwangweidong #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
795cc208beSwangweidong
801da177e4SLinus Torvalds #include <linux/module.h>
811da177e4SLinus Torvalds #include <linux/kernel.h>
821da177e4SLinus Torvalds #include <linux/signal.h>
833f07c014SIngo Molnar #include <linux/sched/signal.h>
841da177e4SLinus Torvalds #include <linux/errno.h>
851da177e4SLinus Torvalds #include <linux/string.h>
861da177e4SLinus Torvalds #include <linux/stat.h>
871da177e4SLinus Torvalds #include <linux/dcache.h>
881da177e4SLinus Torvalds #include <linux/namei.h>
891da177e4SLinus Torvalds #include <linux/socket.h>
901da177e4SLinus Torvalds #include <linux/un.h>
911da177e4SLinus Torvalds #include <linux/fcntl.h>
92b6459415SJakub Kicinski #include <linux/filter.h>
931da177e4SLinus Torvalds #include <linux/termios.h>
941da177e4SLinus Torvalds #include <linux/sockios.h>
951da177e4SLinus Torvalds #include <linux/net.h>
961da177e4SLinus Torvalds #include <linux/in.h>
971da177e4SLinus Torvalds #include <linux/fs.h>
981da177e4SLinus Torvalds #include <linux/slab.h>
997c0f6ba6SLinus Torvalds #include <linux/uaccess.h>
1001da177e4SLinus Torvalds #include <linux/skbuff.h>
1011da177e4SLinus Torvalds #include <linux/netdevice.h>
102457c4cbcSEric W. Biederman #include <net/net_namespace.h>
1031da177e4SLinus Torvalds #include <net/sock.h>
104c752f073SArnaldo Carvalho de Melo #include <net/tcp_states.h>
1051da177e4SLinus Torvalds #include <net/af_unix.h>
1061da177e4SLinus Torvalds #include <linux/proc_fs.h>
1071da177e4SLinus Torvalds #include <linux/seq_file.h>
1081da177e4SLinus Torvalds #include <net/scm.h>
1091da177e4SLinus Torvalds #include <linux/init.h>
1101da177e4SLinus Torvalds #include <linux/poll.h>
1111da177e4SLinus Torvalds #include <linux/rtnetlink.h>
1121da177e4SLinus Torvalds #include <linux/mount.h>
1131da177e4SLinus Torvalds #include <net/checksum.h>
1141da177e4SLinus Torvalds #include <linux/security.h>
115509f15b9SJakub Kicinski #include <linux/splice.h>
1162b15af6fSColin Cross #include <linux/freezer.h>
117ba94f308SAndrey Vagin #include <linux/file.h>
1182c860a43SKuniyuki Iwashima #include <linux/btf_ids.h>
1191da177e4SLinus Torvalds
120f4e65870SJens Axboe #include "scm.h"
121f4e65870SJens Axboe
122518de9b3SEric Dumazet static atomic_long_t unix_nr_socks;
12351bae889SKuniyuki Iwashima static struct hlist_head bsd_socket_buckets[UNIX_HASH_SIZE / 2];
12451bae889SKuniyuki Iwashima static spinlock_t bsd_socket_locks[UNIX_HASH_SIZE / 2];
1251da177e4SLinus Torvalds
126f452be49SKuniyuki Iwashima /* SMP locking strategy:
1272f7ca90aSKuniyuki Iwashima * hash table is protected with spinlock.
128f452be49SKuniyuki Iwashima * each socket state is protected by separate spinlock.
129f452be49SKuniyuki Iwashima */
1301da177e4SLinus Torvalds
unix_unbound_hash(struct sock * sk)131f452be49SKuniyuki Iwashima static unsigned int unix_unbound_hash(struct sock *sk)
1327123aaa3SEric Dumazet {
133f452be49SKuniyuki Iwashima unsigned long hash = (unsigned long)sk;
1347123aaa3SEric Dumazet
1357123aaa3SEric Dumazet hash ^= hash >> 16;
1367123aaa3SEric Dumazet hash ^= hash >> 8;
137f452be49SKuniyuki Iwashima hash ^= sk->sk_type;
138f452be49SKuniyuki Iwashima
139cf21b355SKuniyuki Iwashima return hash & UNIX_HASH_MOD;
140f452be49SKuniyuki Iwashima }
141f452be49SKuniyuki Iwashima
unix_bsd_hash(struct inode * i)142f452be49SKuniyuki Iwashima static unsigned int unix_bsd_hash(struct inode *i)
143f452be49SKuniyuki Iwashima {
144f302d180SKuniyuki Iwashima return i->i_ino & UNIX_HASH_MOD;
145f452be49SKuniyuki Iwashima }
146f452be49SKuniyuki Iwashima
unix_abstract_hash(struct sockaddr_un * sunaddr,int addr_len,int type)147f452be49SKuniyuki Iwashima static unsigned int unix_abstract_hash(struct sockaddr_un *sunaddr,
148f452be49SKuniyuki Iwashima int addr_len, int type)
149f452be49SKuniyuki Iwashima {
150f452be49SKuniyuki Iwashima __wsum csum = csum_partial(sunaddr, addr_len, 0);
151f452be49SKuniyuki Iwashima unsigned int hash;
152f452be49SKuniyuki Iwashima
153f452be49SKuniyuki Iwashima hash = (__force unsigned int)csum_fold(csum);
154f452be49SKuniyuki Iwashima hash ^= hash >> 8;
155f452be49SKuniyuki Iwashima hash ^= type;
156f452be49SKuniyuki Iwashima
157cf21b355SKuniyuki Iwashima return UNIX_HASH_MOD + 1 + (hash & UNIX_HASH_MOD);
1587123aaa3SEric Dumazet }
1597123aaa3SEric Dumazet
unix_table_double_lock(struct net * net,unsigned int hash1,unsigned int hash2)16079b05beaSKuniyuki Iwashima static void unix_table_double_lock(struct net *net,
16179b05beaSKuniyuki Iwashima unsigned int hash1, unsigned int hash2)
162afd20b92SKuniyuki Iwashima {
163cf21b355SKuniyuki Iwashima if (hash1 == hash2) {
164cf21b355SKuniyuki Iwashima spin_lock(&net->unx.table.locks[hash1]);
165cf21b355SKuniyuki Iwashima return;
166cf21b355SKuniyuki Iwashima }
167cf21b355SKuniyuki Iwashima
168afd20b92SKuniyuki Iwashima if (hash1 > hash2)
169afd20b92SKuniyuki Iwashima swap(hash1, hash2);
170afd20b92SKuniyuki Iwashima
17179b05beaSKuniyuki Iwashima spin_lock(&net->unx.table.locks[hash1]);
17279b05beaSKuniyuki Iwashima spin_lock_nested(&net->unx.table.locks[hash2], SINGLE_DEPTH_NESTING);
173afd20b92SKuniyuki Iwashima }
174afd20b92SKuniyuki Iwashima
unix_table_double_unlock(struct net * net,unsigned int hash1,unsigned int hash2)17579b05beaSKuniyuki Iwashima static void unix_table_double_unlock(struct net *net,
17679b05beaSKuniyuki Iwashima unsigned int hash1, unsigned int hash2)
177afd20b92SKuniyuki Iwashima {
178cf21b355SKuniyuki Iwashima if (hash1 == hash2) {
179cf21b355SKuniyuki Iwashima spin_unlock(&net->unx.table.locks[hash1]);
180cf21b355SKuniyuki Iwashima return;
181cf21b355SKuniyuki Iwashima }
182cf21b355SKuniyuki Iwashima
18379b05beaSKuniyuki Iwashima spin_unlock(&net->unx.table.locks[hash1]);
18479b05beaSKuniyuki Iwashima spin_unlock(&net->unx.table.locks[hash2]);
185afd20b92SKuniyuki Iwashima }
186afd20b92SKuniyuki Iwashima
187877ce7c1SCatherine Zhang #ifdef CONFIG_SECURITY_NETWORK
unix_get_secdata(struct scm_cookie * scm,struct sk_buff * skb)188dc49c1f9SCatherine Zhang static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
189877ce7c1SCatherine Zhang {
19037a9a8dfSStephen Smalley UNIXCB(skb).secid = scm->secid;
191877ce7c1SCatherine Zhang }
192877ce7c1SCatherine Zhang
unix_set_secdata(struct scm_cookie * scm,struct sk_buff * skb)193877ce7c1SCatherine Zhang static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
194877ce7c1SCatherine Zhang {
19537a9a8dfSStephen Smalley scm->secid = UNIXCB(skb).secid;
19637a9a8dfSStephen Smalley }
19737a9a8dfSStephen Smalley
unix_secdata_eq(struct scm_cookie * scm,struct sk_buff * skb)19837a9a8dfSStephen Smalley static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
19937a9a8dfSStephen Smalley {
20037a9a8dfSStephen Smalley return (scm->secid == UNIXCB(skb).secid);
201877ce7c1SCatherine Zhang }
202877ce7c1SCatherine Zhang #else
unix_get_secdata(struct scm_cookie * scm,struct sk_buff * skb)203dc49c1f9SCatherine Zhang static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
204877ce7c1SCatherine Zhang { }
205877ce7c1SCatherine Zhang
unix_set_secdata(struct scm_cookie * scm,struct sk_buff * skb)206877ce7c1SCatherine Zhang static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
207877ce7c1SCatherine Zhang { }
20837a9a8dfSStephen Smalley
unix_secdata_eq(struct scm_cookie * scm,struct sk_buff * skb)20937a9a8dfSStephen Smalley static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
21037a9a8dfSStephen Smalley {
21137a9a8dfSStephen Smalley return true;
21237a9a8dfSStephen Smalley }
213877ce7c1SCatherine Zhang #endif /* CONFIG_SECURITY_NETWORK */
214877ce7c1SCatherine Zhang
unix_our_peer(struct sock * sk,struct sock * osk)2151da177e4SLinus Torvalds static inline int unix_our_peer(struct sock *sk, struct sock *osk)
2161da177e4SLinus Torvalds {
2171da177e4SLinus Torvalds return unix_peer(osk) == sk;
2181da177e4SLinus Torvalds }
2191da177e4SLinus Torvalds
unix_may_send(struct sock * sk,struct sock * osk)2201da177e4SLinus Torvalds static inline int unix_may_send(struct sock *sk, struct sock *osk)
2211da177e4SLinus Torvalds {
2226eba6a37SEric Dumazet return unix_peer(osk) == NULL || unix_our_peer(sk, osk);
2231da177e4SLinus Torvalds }
2241da177e4SLinus Torvalds
unix_recvq_full_lockless(const struct sock * sk)22586b18aaaSQian Cai static inline int unix_recvq_full_lockless(const struct sock *sk)
22686b18aaaSQian Cai {
227f1683d07SKuniyuki Iwashima return skb_queue_len_lockless(&sk->sk_receive_queue) > sk->sk_max_ack_backlog;
22886b18aaaSQian Cai }
22986b18aaaSQian Cai
unix_peer_get(struct sock * s)230fa7ff56fSPavel Emelyanov struct sock *unix_peer_get(struct sock *s)
2311da177e4SLinus Torvalds {
2321da177e4SLinus Torvalds struct sock *peer;
2331da177e4SLinus Torvalds
2341c92b4e5SDavid S. Miller unix_state_lock(s);
2351da177e4SLinus Torvalds peer = unix_peer(s);
2361da177e4SLinus Torvalds if (peer)
2371da177e4SLinus Torvalds sock_hold(peer);
2381c92b4e5SDavid S. Miller unix_state_unlock(s);
2391da177e4SLinus Torvalds return peer;
2401da177e4SLinus Torvalds }
241fa7ff56fSPavel Emelyanov EXPORT_SYMBOL_GPL(unix_peer_get);
2421da177e4SLinus Torvalds
unix_create_addr(struct sockaddr_un * sunaddr,int addr_len)24312f21c49SKuniyuki Iwashima static struct unix_address *unix_create_addr(struct sockaddr_un *sunaddr,
24412f21c49SKuniyuki Iwashima int addr_len)
24512f21c49SKuniyuki Iwashima {
24612f21c49SKuniyuki Iwashima struct unix_address *addr;
24712f21c49SKuniyuki Iwashima
24812f21c49SKuniyuki Iwashima addr = kmalloc(sizeof(*addr) + addr_len, GFP_KERNEL);
24912f21c49SKuniyuki Iwashima if (!addr)
25012f21c49SKuniyuki Iwashima return NULL;
25112f21c49SKuniyuki Iwashima
25212f21c49SKuniyuki Iwashima refcount_set(&addr->refcnt, 1);
25312f21c49SKuniyuki Iwashima addr->len = addr_len;
25412f21c49SKuniyuki Iwashima memcpy(addr->name, sunaddr, addr_len);
25512f21c49SKuniyuki Iwashima
25612f21c49SKuniyuki Iwashima return addr;
25712f21c49SKuniyuki Iwashima }
25812f21c49SKuniyuki Iwashima
unix_release_addr(struct unix_address * addr)2591da177e4SLinus Torvalds static inline void unix_release_addr(struct unix_address *addr)
2601da177e4SLinus Torvalds {
2618c9814b9SReshetova, Elena if (refcount_dec_and_test(&addr->refcnt))
2621da177e4SLinus Torvalds kfree(addr);
2631da177e4SLinus Torvalds }
2641da177e4SLinus Torvalds
2651da177e4SLinus Torvalds /*
2661da177e4SLinus Torvalds * Check unix socket name:
2671da177e4SLinus Torvalds * - should be not zero length.
2681da177e4SLinus Torvalds * - if started by not zero, should be NULL terminated (FS object)
2691da177e4SLinus Torvalds * - if started by zero, it is abstract name.
2701da177e4SLinus Torvalds */
2711da177e4SLinus Torvalds
unix_validate_addr(struct sockaddr_un * sunaddr,int addr_len)272b8a58aa6SKuniyuki Iwashima static int unix_validate_addr(struct sockaddr_un *sunaddr, int addr_len)
273b8a58aa6SKuniyuki Iwashima {
274b8a58aa6SKuniyuki Iwashima if (addr_len <= offsetof(struct sockaddr_un, sun_path) ||
275b8a58aa6SKuniyuki Iwashima addr_len > sizeof(*sunaddr))
276b8a58aa6SKuniyuki Iwashima return -EINVAL;
277b8a58aa6SKuniyuki Iwashima
278b8a58aa6SKuniyuki Iwashima if (sunaddr->sun_family != AF_UNIX)
279b8a58aa6SKuniyuki Iwashima return -EINVAL;
280b8a58aa6SKuniyuki Iwashima
281b8a58aa6SKuniyuki Iwashima return 0;
282b8a58aa6SKuniyuki Iwashima }
283b8a58aa6SKuniyuki Iwashima
unix_mkname_bsd(struct sockaddr_un * sunaddr,int addr_len)284ecb4534bSKuniyuki Iwashima static int unix_mkname_bsd(struct sockaddr_un *sunaddr, int addr_len)
285d2d8c9fdSKuniyuki Iwashima {
286ecb4534bSKuniyuki Iwashima struct sockaddr_storage *addr = (struct sockaddr_storage *)sunaddr;
287ecb4534bSKuniyuki Iwashima short offset = offsetof(struct sockaddr_storage, __data);
288ecb4534bSKuniyuki Iwashima
289ecb4534bSKuniyuki Iwashima BUILD_BUG_ON(offset != offsetof(struct sockaddr_un, sun_path));
290ecb4534bSKuniyuki Iwashima
291d2d8c9fdSKuniyuki Iwashima /* This may look like an off by one error but it is a bit more
292d2d8c9fdSKuniyuki Iwashima * subtle. 108 is the longest valid AF_UNIX path for a binding.
293d2d8c9fdSKuniyuki Iwashima * sun_path[108] doesn't as such exist. However in kernel space
294d2d8c9fdSKuniyuki Iwashima * we are guaranteed that it is a valid memory location in our
295d2d8c9fdSKuniyuki Iwashima * kernel address buffer because syscall functions always pass
296d2d8c9fdSKuniyuki Iwashima * a pointer of struct sockaddr_storage which has a bigger buffer
297ecb4534bSKuniyuki Iwashima * than 108. Also, we must terminate sun_path for strlen() in
298ecb4534bSKuniyuki Iwashima * getname_kernel().
299d2d8c9fdSKuniyuki Iwashima */
300ecb4534bSKuniyuki Iwashima addr->__data[addr_len - offset] = 0;
301ecb4534bSKuniyuki Iwashima
302ecb4534bSKuniyuki Iwashima /* Don't pass sunaddr->sun_path to strlen(). Otherwise, 108 will
303ecb4534bSKuniyuki Iwashima * cause panic if CONFIG_FORTIFY_SOURCE=y. Let __fortify_strlen()
304ecb4534bSKuniyuki Iwashima * know the actual buffer.
305ecb4534bSKuniyuki Iwashima */
306ecb4534bSKuniyuki Iwashima return strlen(addr->__data) + offset + 1;
307d2d8c9fdSKuniyuki Iwashima }
308d2d8c9fdSKuniyuki Iwashima
__unix_remove_socket(struct sock * sk)3091da177e4SLinus Torvalds static void __unix_remove_socket(struct sock *sk)
3101da177e4SLinus Torvalds {
3111da177e4SLinus Torvalds sk_del_node_init(sk);
3121da177e4SLinus Torvalds }
3131da177e4SLinus Torvalds
__unix_insert_socket(struct net * net,struct sock * sk)314cf2f225eSKuniyuki Iwashima static void __unix_insert_socket(struct net *net, struct sock *sk)
3151da177e4SLinus Torvalds {
316dd29c67dSEric Dumazet DEBUG_NET_WARN_ON_ONCE(!sk_unhashed(sk));
317cf2f225eSKuniyuki Iwashima sk_add_node(sk, &net->unx.table.buckets[sk->sk_hash]);
3181da177e4SLinus Torvalds }
3191da177e4SLinus Torvalds
__unix_set_addr_hash(struct net * net,struct sock * sk,struct unix_address * addr,unsigned int hash)320cf2f225eSKuniyuki Iwashima static void __unix_set_addr_hash(struct net *net, struct sock *sk,
321cf2f225eSKuniyuki Iwashima struct unix_address *addr, unsigned int hash)
322185ab886SAl Viro {
323185ab886SAl Viro __unix_remove_socket(sk);
324185ab886SAl Viro smp_store_release(&unix_sk(sk)->addr, addr);
325e6b4b873SKuniyuki Iwashima
326e6b4b873SKuniyuki Iwashima sk->sk_hash = hash;
327cf2f225eSKuniyuki Iwashima __unix_insert_socket(net, sk);
328185ab886SAl Viro }
329185ab886SAl Viro
unix_remove_socket(struct net * net,struct sock * sk)33079b05beaSKuniyuki Iwashima static void unix_remove_socket(struct net *net, struct sock *sk)
3311da177e4SLinus Torvalds {
33279b05beaSKuniyuki Iwashima spin_lock(&net->unx.table.locks[sk->sk_hash]);
3331da177e4SLinus Torvalds __unix_remove_socket(sk);
33479b05beaSKuniyuki Iwashima spin_unlock(&net->unx.table.locks[sk->sk_hash]);
3351da177e4SLinus Torvalds }
3361da177e4SLinus Torvalds
unix_insert_unbound_socket(struct net * net,struct sock * sk)33779b05beaSKuniyuki Iwashima static void unix_insert_unbound_socket(struct net *net, struct sock *sk)
3381da177e4SLinus Torvalds {
33979b05beaSKuniyuki Iwashima spin_lock(&net->unx.table.locks[sk->sk_hash]);
340cf2f225eSKuniyuki Iwashima __unix_insert_socket(net, sk);
34179b05beaSKuniyuki Iwashima spin_unlock(&net->unx.table.locks[sk->sk_hash]);
3421da177e4SLinus Torvalds }
3431da177e4SLinus Torvalds
unix_insert_bsd_socket(struct sock * sk)34451bae889SKuniyuki Iwashima static void unix_insert_bsd_socket(struct sock *sk)
34551bae889SKuniyuki Iwashima {
34651bae889SKuniyuki Iwashima spin_lock(&bsd_socket_locks[sk->sk_hash]);
34751bae889SKuniyuki Iwashima sk_add_bind_node(sk, &bsd_socket_buckets[sk->sk_hash]);
34851bae889SKuniyuki Iwashima spin_unlock(&bsd_socket_locks[sk->sk_hash]);
34951bae889SKuniyuki Iwashima }
35051bae889SKuniyuki Iwashima
unix_remove_bsd_socket(struct sock * sk)35151bae889SKuniyuki Iwashima static void unix_remove_bsd_socket(struct sock *sk)
35251bae889SKuniyuki Iwashima {
35351bae889SKuniyuki Iwashima if (!hlist_unhashed(&sk->sk_bind_node)) {
35451bae889SKuniyuki Iwashima spin_lock(&bsd_socket_locks[sk->sk_hash]);
35551bae889SKuniyuki Iwashima __sk_del_bind_node(sk);
35651bae889SKuniyuki Iwashima spin_unlock(&bsd_socket_locks[sk->sk_hash]);
35751bae889SKuniyuki Iwashima
35851bae889SKuniyuki Iwashima sk_node_init(&sk->sk_bind_node);
35951bae889SKuniyuki Iwashima }
36051bae889SKuniyuki Iwashima }
36151bae889SKuniyuki Iwashima
__unix_find_socket_byname(struct net * net,struct sockaddr_un * sunname,int len,unsigned int hash)362097e66c5SDenis V. Lunev static struct sock *__unix_find_socket_byname(struct net *net,
363097e66c5SDenis V. Lunev struct sockaddr_un *sunname,
364be752283SAl Viro int len, unsigned int hash)
3651da177e4SLinus Torvalds {
3661da177e4SLinus Torvalds struct sock *s;
3671da177e4SLinus Torvalds
368cf2f225eSKuniyuki Iwashima sk_for_each(s, &net->unx.table.buckets[hash]) {
3691da177e4SLinus Torvalds struct unix_sock *u = unix_sk(s);
3701da177e4SLinus Torvalds
3711da177e4SLinus Torvalds if (u->addr->len == len &&
3721da177e4SLinus Torvalds !memcmp(u->addr->name, sunname, len))
3731da177e4SLinus Torvalds return s;
3741da177e4SLinus Torvalds }
375262ce0afSVito Caputo return NULL;
376262ce0afSVito Caputo }
3771da177e4SLinus Torvalds
unix_find_socket_byname(struct net * net,struct sockaddr_un * sunname,int len,unsigned int hash)378097e66c5SDenis V. Lunev static inline struct sock *unix_find_socket_byname(struct net *net,
379097e66c5SDenis V. Lunev struct sockaddr_un *sunname,
380be752283SAl Viro int len, unsigned int hash)
3811da177e4SLinus Torvalds {
3821da177e4SLinus Torvalds struct sock *s;
3831da177e4SLinus Torvalds
38479b05beaSKuniyuki Iwashima spin_lock(&net->unx.table.locks[hash]);
385be752283SAl Viro s = __unix_find_socket_byname(net, sunname, len, hash);
3861da177e4SLinus Torvalds if (s)
3871da177e4SLinus Torvalds sock_hold(s);
38879b05beaSKuniyuki Iwashima spin_unlock(&net->unx.table.locks[hash]);
3891da177e4SLinus Torvalds return s;
3901da177e4SLinus Torvalds }
3911da177e4SLinus Torvalds
unix_find_socket_byinode(struct inode * i)39251bae889SKuniyuki Iwashima static struct sock *unix_find_socket_byinode(struct inode *i)
3931da177e4SLinus Torvalds {
394f452be49SKuniyuki Iwashima unsigned int hash = unix_bsd_hash(i);
3951da177e4SLinus Torvalds struct sock *s;
3961da177e4SLinus Torvalds
39751bae889SKuniyuki Iwashima spin_lock(&bsd_socket_locks[hash]);
39851bae889SKuniyuki Iwashima sk_for_each_bound(s, &bsd_socket_buckets[hash]) {
39940ffe67dSAl Viro struct dentry *dentry = unix_sk(s)->path.dentry;
4001da177e4SLinus Torvalds
401beef5121SMiklos Szeredi if (dentry && d_backing_inode(dentry) == i) {
4021da177e4SLinus Torvalds sock_hold(s);
40351bae889SKuniyuki Iwashima spin_unlock(&bsd_socket_locks[hash]);
4041da177e4SLinus Torvalds return s;
4051da177e4SLinus Torvalds }
406afd20b92SKuniyuki Iwashima }
40751bae889SKuniyuki Iwashima spin_unlock(&bsd_socket_locks[hash]);
408afd20b92SKuniyuki Iwashima return NULL;
409afd20b92SKuniyuki Iwashima }
4101da177e4SLinus Torvalds
4117d267278SRainer Weikusat /* Support code for asymmetrically connected dgram sockets
4127d267278SRainer Weikusat *
4137d267278SRainer Weikusat * If a datagram socket is connected to a socket not itself connected
4147d267278SRainer Weikusat * to the first socket (eg, /dev/log), clients may only enqueue more
4157d267278SRainer Weikusat * messages if the present receive queue of the server socket is not
4167d267278SRainer Weikusat * "too large". This means there's a second writeability condition
4177d267278SRainer Weikusat * poll and sendmsg need to test. The dgram recv code will do a wake
4187d267278SRainer Weikusat * up on the peer_wait wait queue of a socket upon reception of a
4197d267278SRainer Weikusat * datagram which needs to be propagated to sleeping would-be writers
4207d267278SRainer Weikusat * since these might not have sent anything so far. This can't be
4217d267278SRainer Weikusat * accomplished via poll_wait because the lifetime of the server
4227d267278SRainer Weikusat * socket might be less than that of its clients if these break their
4237d267278SRainer Weikusat * association with it or if the server socket is closed while clients
4247d267278SRainer Weikusat * are still connected to it and there's no way to inform "a polling
4257d267278SRainer Weikusat * implementation" that it should let go of a certain wait queue
4267d267278SRainer Weikusat *
427ac6424b9SIngo Molnar * In order to propagate a wake up, a wait_queue_entry_t of the client
4287d267278SRainer Weikusat * socket is enqueued on the peer_wait queue of the server socket
4297d267278SRainer Weikusat * whose wake function does a wake_up on the ordinary client socket
4307d267278SRainer Weikusat * wait queue. This connection is established whenever a write (or
4317d267278SRainer Weikusat * poll for write) hit the flow control condition and broken when the
4327d267278SRainer Weikusat * association to the server socket is dissolved or after a wake up
4337d267278SRainer Weikusat * was relayed.
4347d267278SRainer Weikusat */
4357d267278SRainer Weikusat
unix_dgram_peer_wake_relay(wait_queue_entry_t * q,unsigned mode,int flags,void * key)436ac6424b9SIngo Molnar static int unix_dgram_peer_wake_relay(wait_queue_entry_t *q, unsigned mode, int flags,
4377d267278SRainer Weikusat void *key)
4387d267278SRainer Weikusat {
4397d267278SRainer Weikusat struct unix_sock *u;
4407d267278SRainer Weikusat wait_queue_head_t *u_sleep;
4417d267278SRainer Weikusat
4427d267278SRainer Weikusat u = container_of(q, struct unix_sock, peer_wake);
4437d267278SRainer Weikusat
4447d267278SRainer Weikusat __remove_wait_queue(&unix_sk(u->peer_wake.private)->peer_wait,
4457d267278SRainer Weikusat q);
4467d267278SRainer Weikusat u->peer_wake.private = NULL;
4477d267278SRainer Weikusat
4487d267278SRainer Weikusat /* relaying can only happen while the wq still exists */
4497d267278SRainer Weikusat u_sleep = sk_sleep(&u->sk);
4507d267278SRainer Weikusat if (u_sleep)
4513ad6f93eSAl Viro wake_up_interruptible_poll(u_sleep, key_to_poll(key));
4527d267278SRainer Weikusat
4537d267278SRainer Weikusat return 0;
4547d267278SRainer Weikusat }
4557d267278SRainer Weikusat
unix_dgram_peer_wake_connect(struct sock * sk,struct sock * other)4567d267278SRainer Weikusat static int unix_dgram_peer_wake_connect(struct sock *sk, struct sock *other)
4577d267278SRainer Weikusat {
4587d267278SRainer Weikusat struct unix_sock *u, *u_other;
4597d267278SRainer Weikusat int rc;
4607d267278SRainer Weikusat
4617d267278SRainer Weikusat u = unix_sk(sk);
4627d267278SRainer Weikusat u_other = unix_sk(other);
4637d267278SRainer Weikusat rc = 0;
4647d267278SRainer Weikusat spin_lock(&u_other->peer_wait.lock);
4657d267278SRainer Weikusat
4667d267278SRainer Weikusat if (!u->peer_wake.private) {
4677d267278SRainer Weikusat u->peer_wake.private = other;
4687d267278SRainer Weikusat __add_wait_queue(&u_other->peer_wait, &u->peer_wake);
4697d267278SRainer Weikusat
4707d267278SRainer Weikusat rc = 1;
4717d267278SRainer Weikusat }
4727d267278SRainer Weikusat
4737d267278SRainer Weikusat spin_unlock(&u_other->peer_wait.lock);
4747d267278SRainer Weikusat return rc;
4757d267278SRainer Weikusat }
4767d267278SRainer Weikusat
unix_dgram_peer_wake_disconnect(struct sock * sk,struct sock * other)4777d267278SRainer Weikusat static void unix_dgram_peer_wake_disconnect(struct sock *sk,
4787d267278SRainer Weikusat struct sock *other)
4797d267278SRainer Weikusat {
4807d267278SRainer Weikusat struct unix_sock *u, *u_other;
4817d267278SRainer Weikusat
4827d267278SRainer Weikusat u = unix_sk(sk);
4837d267278SRainer Weikusat u_other = unix_sk(other);
4847d267278SRainer Weikusat spin_lock(&u_other->peer_wait.lock);
4857d267278SRainer Weikusat
4867d267278SRainer Weikusat if (u->peer_wake.private == other) {
4877d267278SRainer Weikusat __remove_wait_queue(&u_other->peer_wait, &u->peer_wake);
4887d267278SRainer Weikusat u->peer_wake.private = NULL;
4897d267278SRainer Weikusat }
4907d267278SRainer Weikusat
4917d267278SRainer Weikusat spin_unlock(&u_other->peer_wait.lock);
4927d267278SRainer Weikusat }
4937d267278SRainer Weikusat
unix_dgram_peer_wake_disconnect_wakeup(struct sock * sk,struct sock * other)4947d267278SRainer Weikusat static void unix_dgram_peer_wake_disconnect_wakeup(struct sock *sk,
4957d267278SRainer Weikusat struct sock *other)
4967d267278SRainer Weikusat {
4977d267278SRainer Weikusat unix_dgram_peer_wake_disconnect(sk, other);
4987d267278SRainer Weikusat wake_up_interruptible_poll(sk_sleep(sk),
499a9a08845SLinus Torvalds EPOLLOUT |
500a9a08845SLinus Torvalds EPOLLWRNORM |
501a9a08845SLinus Torvalds EPOLLWRBAND);
5027d267278SRainer Weikusat }
5037d267278SRainer Weikusat
5047d267278SRainer Weikusat /* preconditions:
5057d267278SRainer Weikusat * - unix_peer(sk) == other
5067d267278SRainer Weikusat * - association is stable
5077d267278SRainer Weikusat */
unix_dgram_peer_wake_me(struct sock * sk,struct sock * other)5087d267278SRainer Weikusat static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other)
5097d267278SRainer Weikusat {
5107d267278SRainer Weikusat int connected;
5117d267278SRainer Weikusat
5127d267278SRainer Weikusat connected = unix_dgram_peer_wake_connect(sk, other);
5137d267278SRainer Weikusat
51451f7e951SJason Baron /* If other is SOCK_DEAD, we want to make sure we signal
51551f7e951SJason Baron * POLLOUT, such that a subsequent write() can get a
51651f7e951SJason Baron * -ECONNREFUSED. Otherwise, if we haven't queued any skbs
51751f7e951SJason Baron * to other and its full, we will hang waiting for POLLOUT.
51851f7e951SJason Baron */
519662a8094SKuniyuki Iwashima if (unix_recvq_full_lockless(other) && !sock_flag(other, SOCK_DEAD))
5207d267278SRainer Weikusat return 1;
5217d267278SRainer Weikusat
5227d267278SRainer Weikusat if (connected)
5237d267278SRainer Weikusat unix_dgram_peer_wake_disconnect(sk, other);
5247d267278SRainer Weikusat
5257d267278SRainer Weikusat return 0;
5267d267278SRainer Weikusat }
5277d267278SRainer Weikusat
unix_writable(const struct sock * sk,unsigned char state)528484e036eSKuniyuki Iwashima static int unix_writable(const struct sock *sk, unsigned char state)
5291da177e4SLinus Torvalds {
530484e036eSKuniyuki Iwashima return state != TCP_LISTEN &&
531996ec22fSKuniyuki Iwashima (refcount_read(&sk->sk_wmem_alloc) << 2) <= READ_ONCE(sk->sk_sndbuf);
5321da177e4SLinus Torvalds }
5331da177e4SLinus Torvalds
unix_write_space(struct sock * sk)5341da177e4SLinus Torvalds static void unix_write_space(struct sock *sk)
5351da177e4SLinus Torvalds {
53643815482SEric Dumazet struct socket_wq *wq;
53743815482SEric Dumazet
53843815482SEric Dumazet rcu_read_lock();
539484e036eSKuniyuki Iwashima if (unix_writable(sk, READ_ONCE(sk->sk_state))) {
54043815482SEric Dumazet wq = rcu_dereference(sk->sk_wq);
5411ce0bf50SHerbert Xu if (skwq_has_sleeper(wq))
54267426b75SEric Dumazet wake_up_interruptible_sync_poll(&wq->wait,
543a9a08845SLinus Torvalds EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND);
5448d8ad9d7SPavel Emelyanov sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
5451da177e4SLinus Torvalds }
54643815482SEric Dumazet rcu_read_unlock();
5471da177e4SLinus Torvalds }
5481da177e4SLinus Torvalds
5491da177e4SLinus Torvalds /* When dgram socket disconnects (or changes its peer), we clear its receive
5501da177e4SLinus Torvalds * queue of packets arrived from previous peer. First, it allows to do
5511da177e4SLinus Torvalds * flow control based only on wmem_alloc; second, sk connected to peer
5521da177e4SLinus Torvalds * may receive messages only from that peer. */
unix_dgram_disconnected(struct sock * sk,struct sock * other)5531da177e4SLinus Torvalds static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
5541da177e4SLinus Torvalds {
555b03efcfbSDavid S. Miller if (!skb_queue_empty(&sk->sk_receive_queue)) {
5561da177e4SLinus Torvalds skb_queue_purge(&sk->sk_receive_queue);
5571da177e4SLinus Torvalds wake_up_interruptible_all(&unix_sk(sk)->peer_wait);
5581da177e4SLinus Torvalds
5591da177e4SLinus Torvalds /* If one link of bidirectional dgram pipe is disconnected,
5601da177e4SLinus Torvalds * we signal error. Messages are lost. Do not make this,
5611da177e4SLinus Torvalds * when peer was not connected to us.
5621da177e4SLinus Torvalds */
5631da177e4SLinus Torvalds if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
564cc04410aSEric Dumazet WRITE_ONCE(other->sk_err, ECONNRESET);
565e3ae2365SAlexander Aring sk_error_report(other);
5661da177e4SLinus Torvalds }
5671da177e4SLinus Torvalds }
5681da177e4SLinus Torvalds }
5691da177e4SLinus Torvalds
unix_sock_destructor(struct sock * sk)5701da177e4SLinus Torvalds static void unix_sock_destructor(struct sock *sk)
5711da177e4SLinus Torvalds {
5721da177e4SLinus Torvalds struct unix_sock *u = unix_sk(sk);
5731da177e4SLinus Torvalds
5741da177e4SLinus Torvalds skb_queue_purge(&sk->sk_receive_queue);
5751da177e4SLinus Torvalds
576dd29c67dSEric Dumazet DEBUG_NET_WARN_ON_ONCE(refcount_read(&sk->sk_wmem_alloc));
577dd29c67dSEric Dumazet DEBUG_NET_WARN_ON_ONCE(!sk_unhashed(sk));
578dd29c67dSEric Dumazet DEBUG_NET_WARN_ON_ONCE(sk->sk_socket);
5791da177e4SLinus Torvalds if (!sock_flag(sk, SOCK_DEAD)) {
5805cc208beSwangweidong pr_info("Attempt to release alive unix socket: %p\n", sk);
5811da177e4SLinus Torvalds return;
5821da177e4SLinus Torvalds }
5831da177e4SLinus Torvalds
5841da177e4SLinus Torvalds if (u->addr)
5851da177e4SLinus Torvalds unix_release_addr(u->addr);
5861da177e4SLinus Torvalds
587518de9b3SEric Dumazet atomic_long_dec(&unix_nr_socks);
588a8076d8dSEric Dumazet sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
5891da177e4SLinus Torvalds #ifdef UNIX_REFCNT_DEBUG
5905cc208beSwangweidong pr_debug("UNIX %p is destroyed, %ld are still alive.\n", sk,
591518de9b3SEric Dumazet atomic_long_read(&unix_nr_socks));
5921da177e4SLinus Torvalds #endif
5931da177e4SLinus Torvalds }
5941da177e4SLinus Torvalds
unix_release_sock(struct sock * sk,int embrion)595ded34e0fSPaul Moore static void unix_release_sock(struct sock *sk, int embrion)
5961da177e4SLinus Torvalds {
5971da177e4SLinus Torvalds struct unix_sock *u = unix_sk(sk);
5981da177e4SLinus Torvalds struct sock *skpair;
5991da177e4SLinus Torvalds struct sk_buff *skb;
60079b05beaSKuniyuki Iwashima struct path path;
6011da177e4SLinus Torvalds int state;
6021da177e4SLinus Torvalds
60379b05beaSKuniyuki Iwashima unix_remove_socket(sock_net(sk), sk);
60451bae889SKuniyuki Iwashima unix_remove_bsd_socket(sk);
6051da177e4SLinus Torvalds
6061da177e4SLinus Torvalds /* Clear state */
6071c92b4e5SDavid S. Miller unix_state_lock(sk);
6081da177e4SLinus Torvalds sock_orphan(sk);
609e1d09c2cSKuniyuki Iwashima WRITE_ONCE(sk->sk_shutdown, SHUTDOWN_MASK);
61040ffe67dSAl Viro path = u->path;
61140ffe67dSAl Viro u->path.dentry = NULL;
61240ffe67dSAl Viro u->path.mnt = NULL;
6131da177e4SLinus Torvalds state = sk->sk_state;
61445733e98SKuniyuki Iwashima WRITE_ONCE(sk->sk_state, TCP_CLOSE);
615a494bd64SEric Dumazet
616a494bd64SEric Dumazet skpair = unix_peer(sk);
617a494bd64SEric Dumazet unix_peer(sk) = NULL;
618a494bd64SEric Dumazet
6191c92b4e5SDavid S. Miller unix_state_unlock(sk);
6201da177e4SLinus Torvalds
6217a62ed61SKuniyuki Iwashima #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
6227a62ed61SKuniyuki Iwashima if (u->oob_skb) {
6237a62ed61SKuniyuki Iwashima kfree_skb(u->oob_skb);
6247a62ed61SKuniyuki Iwashima u->oob_skb = NULL;
6257a62ed61SKuniyuki Iwashima }
6267a62ed61SKuniyuki Iwashima #endif
6277a62ed61SKuniyuki Iwashima
6281da177e4SLinus Torvalds wake_up_interruptible_all(&u->peer_wait);
6291da177e4SLinus Torvalds
6301da177e4SLinus Torvalds if (skpair != NULL) {
6311da177e4SLinus Torvalds if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
6321c92b4e5SDavid S. Miller unix_state_lock(skpair);
6331da177e4SLinus Torvalds /* No more writes */
634e1d09c2cSKuniyuki Iwashima WRITE_ONCE(skpair->sk_shutdown, SHUTDOWN_MASK);
635471ec7b7SKuniyuki Iwashima if (!skb_queue_empty_lockless(&sk->sk_receive_queue) || embrion)
636cc04410aSEric Dumazet WRITE_ONCE(skpair->sk_err, ECONNRESET);
6371c92b4e5SDavid S. Miller unix_state_unlock(skpair);
6381da177e4SLinus Torvalds skpair->sk_state_change(skpair);
6398d8ad9d7SPavel Emelyanov sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
6401da177e4SLinus Torvalds }
6417d267278SRainer Weikusat
6427d267278SRainer Weikusat unix_dgram_peer_wake_disconnect(sk, skpair);
6431da177e4SLinus Torvalds sock_put(skpair); /* It may now die */
6441da177e4SLinus Torvalds }
6451da177e4SLinus Torvalds
6461da177e4SLinus Torvalds /* Try to flush out this socket. Throw out buffers at least */
6471da177e4SLinus Torvalds
6481da177e4SLinus Torvalds while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
6491da177e4SLinus Torvalds if (state == TCP_LISTEN)
6501da177e4SLinus Torvalds unix_release_sock(skb->sk, 1);
6511da177e4SLinus Torvalds /* passed fds are erased in the kfree_skb hook */
65273ed5d25SHannes Frederic Sowa UNIXCB(skb).consumed = skb->len;
6531da177e4SLinus Torvalds kfree_skb(skb);
6541da177e4SLinus Torvalds }
6551da177e4SLinus Torvalds
65640ffe67dSAl Viro if (path.dentry)
65740ffe67dSAl Viro path_put(&path);
6581da177e4SLinus Torvalds
6591da177e4SLinus Torvalds sock_put(sk);
6601da177e4SLinus Torvalds
6611da177e4SLinus Torvalds /* ---- Socket is dead now and most probably destroyed ---- */
6621da177e4SLinus Torvalds
6631da177e4SLinus Torvalds /*
664e04dae84SAlan Cox * Fixme: BSD difference: In BSD all sockets connected to us get
6651da177e4SLinus Torvalds * ECONNRESET and we die on the spot. In Linux we behave
6661da177e4SLinus Torvalds * like files and pipes do and wait for the last
6671da177e4SLinus Torvalds * dereference.
6681da177e4SLinus Torvalds *
6691da177e4SLinus Torvalds * Can't we simply set sock->err?
6701da177e4SLinus Torvalds *
6711da177e4SLinus Torvalds * What the above comment does talk about? --ANK(980817)
6721da177e4SLinus Torvalds */
6731da177e4SLinus Torvalds
674ade32bd8SKuniyuki Iwashima if (READ_ONCE(unix_tot_inflight))
6751da177e4SLinus Torvalds unix_gc(); /* Garbage collect fds */
6761da177e4SLinus Torvalds }
6771da177e4SLinus Torvalds
init_peercred(struct sock * sk)678109f6e39SEric W. Biederman static void init_peercred(struct sock *sk)
679109f6e39SEric W. Biederman {
68035306eb2SEric Dumazet const struct cred *old_cred;
68135306eb2SEric Dumazet struct pid *old_pid;
68235306eb2SEric Dumazet
68335306eb2SEric Dumazet spin_lock(&sk->sk_peer_lock);
68435306eb2SEric Dumazet old_pid = sk->sk_peer_pid;
68535306eb2SEric Dumazet old_cred = sk->sk_peer_cred;
686109f6e39SEric W. Biederman sk->sk_peer_pid = get_pid(task_tgid(current));
687109f6e39SEric W. Biederman sk->sk_peer_cred = get_current_cred();
68835306eb2SEric Dumazet spin_unlock(&sk->sk_peer_lock);
68935306eb2SEric Dumazet
69035306eb2SEric Dumazet put_pid(old_pid);
69135306eb2SEric Dumazet put_cred(old_cred);
692109f6e39SEric W. Biederman }
693109f6e39SEric W. Biederman
copy_peercred(struct sock * sk,struct sock * peersk)694109f6e39SEric W. Biederman static void copy_peercred(struct sock *sk, struct sock *peersk)
695109f6e39SEric W. Biederman {
69635306eb2SEric Dumazet if (sk < peersk) {
69735306eb2SEric Dumazet spin_lock(&sk->sk_peer_lock);
69835306eb2SEric Dumazet spin_lock_nested(&peersk->sk_peer_lock, SINGLE_DEPTH_NESTING);
69935306eb2SEric Dumazet } else {
70035306eb2SEric Dumazet spin_lock(&peersk->sk_peer_lock);
70135306eb2SEric Dumazet spin_lock_nested(&sk->sk_peer_lock, SINGLE_DEPTH_NESTING);
70235306eb2SEric Dumazet }
7039c2450cfSKuniyuki Iwashima
704109f6e39SEric W. Biederman sk->sk_peer_pid = get_pid(peersk->sk_peer_pid);
705109f6e39SEric W. Biederman sk->sk_peer_cred = get_cred(peersk->sk_peer_cred);
70635306eb2SEric Dumazet
70735306eb2SEric Dumazet spin_unlock(&sk->sk_peer_lock);
70835306eb2SEric Dumazet spin_unlock(&peersk->sk_peer_lock);
709109f6e39SEric W. Biederman }
710109f6e39SEric W. Biederman
unix_listen(struct socket * sock,int backlog)7111da177e4SLinus Torvalds static int unix_listen(struct socket *sock, int backlog)
7121da177e4SLinus Torvalds {
7131da177e4SLinus Torvalds int err;
7141da177e4SLinus Torvalds struct sock *sk = sock->sk;
7151da177e4SLinus Torvalds struct unix_sock *u = unix_sk(sk);
7161da177e4SLinus Torvalds
7171da177e4SLinus Torvalds err = -EOPNOTSUPP;
7181da177e4SLinus Torvalds if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
7191da177e4SLinus Torvalds goto out; /* Only stream/seqpacket sockets accept */
7201da177e4SLinus Torvalds err = -EINVAL;
721302fe8ddSKuniyuki Iwashima if (!READ_ONCE(u->addr))
7221da177e4SLinus Torvalds goto out; /* No listens on an unbound socket */
7231c92b4e5SDavid S. Miller unix_state_lock(sk);
7241da177e4SLinus Torvalds if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
7251da177e4SLinus Torvalds goto out_unlock;
7261da177e4SLinus Torvalds if (backlog > sk->sk_max_ack_backlog)
7271da177e4SLinus Torvalds wake_up_interruptible_all(&u->peer_wait);
7281da177e4SLinus Torvalds sk->sk_max_ack_backlog = backlog;
72945733e98SKuniyuki Iwashima WRITE_ONCE(sk->sk_state, TCP_LISTEN);
73045733e98SKuniyuki Iwashima
7311da177e4SLinus Torvalds /* set credentials so connect can copy them */
732109f6e39SEric W. Biederman init_peercred(sk);
7331da177e4SLinus Torvalds err = 0;
7341da177e4SLinus Torvalds
7351da177e4SLinus Torvalds out_unlock:
7361c92b4e5SDavid S. Miller unix_state_unlock(sk);
7371da177e4SLinus Torvalds out:
7381da177e4SLinus Torvalds return err;
7391da177e4SLinus Torvalds }
7401da177e4SLinus Torvalds
7411da177e4SLinus Torvalds static int unix_release(struct socket *);
7421da177e4SLinus Torvalds static int unix_bind(struct socket *, struct sockaddr *, int);
7431da177e4SLinus Torvalds static int unix_stream_connect(struct socket *, struct sockaddr *,
7441da177e4SLinus Torvalds int addr_len, int flags);
7451da177e4SLinus Torvalds static int unix_socketpair(struct socket *, struct socket *);
746cdfbabfbSDavid Howells static int unix_accept(struct socket *, struct socket *, int, bool);
7479b2c45d4SDenys Vlasenko static int unix_getname(struct socket *, struct sockaddr *, int);
748a11e1d43SLinus Torvalds static __poll_t unix_poll(struct file *, struct socket *, poll_table *);
749a11e1d43SLinus Torvalds static __poll_t unix_dgram_poll(struct file *, struct socket *,
750a11e1d43SLinus Torvalds poll_table *);
7511da177e4SLinus Torvalds static int unix_ioctl(struct socket *, unsigned int, unsigned long);
7525f6beb9eSArnd Bergmann #ifdef CONFIG_COMPAT
7535f6beb9eSArnd Bergmann static int unix_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg);
7545f6beb9eSArnd Bergmann #endif
7551da177e4SLinus Torvalds static int unix_shutdown(struct socket *, int);
7561b784140SYing Xue static int unix_stream_sendmsg(struct socket *, struct msghdr *, size_t);
7571b784140SYing Xue static int unix_stream_recvmsg(struct socket *, struct msghdr *, size_t, int);
7582b514574SHannes Frederic Sowa static ssize_t unix_stream_splice_read(struct socket *, loff_t *ppos,
7592b514574SHannes Frederic Sowa struct pipe_inode_info *, size_t size,
7602b514574SHannes Frederic Sowa unsigned int flags);
7611b784140SYing Xue static int unix_dgram_sendmsg(struct socket *, struct msghdr *, size_t);
7621b784140SYing Xue static int unix_dgram_recvmsg(struct socket *, struct msghdr *, size_t, int);
763965b57b4SCong Wang static int unix_read_skb(struct sock *sk, skb_read_actor_t recv_actor);
764965b57b4SCong Wang static int unix_stream_read_skb(struct sock *sk, skb_read_actor_t recv_actor);
7651da177e4SLinus Torvalds static int unix_dgram_connect(struct socket *, struct sockaddr *,
7661da177e4SLinus Torvalds int, int);
7671b784140SYing Xue static int unix_seqpacket_sendmsg(struct socket *, struct msghdr *, size_t);
7681b784140SYing Xue static int unix_seqpacket_recvmsg(struct socket *, struct msghdr *, size_t,
7691b784140SYing Xue int);
7701da177e4SLinus Torvalds
unix_set_peek_off(struct sock * sk,int val)77112663bfcSSasha Levin static int unix_set_peek_off(struct sock *sk, int val)
772f55bb7f9SPavel Emelyanov {
773f55bb7f9SPavel Emelyanov struct unix_sock *u = unix_sk(sk);
774f55bb7f9SPavel Emelyanov
7756e1ce3c3SLinus Torvalds if (mutex_lock_interruptible(&u->iolock))
77612663bfcSSasha Levin return -EINTR;
77712663bfcSSasha Levin
77811695c6eSEric Dumazet WRITE_ONCE(sk->sk_peek_off, val);
7796e1ce3c3SLinus Torvalds mutex_unlock(&u->iolock);
78012663bfcSSasha Levin
78112663bfcSSasha Levin return 0;
782f55bb7f9SPavel Emelyanov }
783f55bb7f9SPavel Emelyanov
7845c05a164SDavid S. Miller #ifdef CONFIG_PROC_FS
unix_count_nr_fds(struct sock * sk)785de437089SKirill Tkhai static int unix_count_nr_fds(struct sock *sk)
786de437089SKirill Tkhai {
787de437089SKirill Tkhai struct sk_buff *skb;
788de437089SKirill Tkhai struct unix_sock *u;
789de437089SKirill Tkhai int nr_fds = 0;
790de437089SKirill Tkhai
791de437089SKirill Tkhai spin_lock(&sk->sk_receive_queue.lock);
792de437089SKirill Tkhai skb = skb_peek(&sk->sk_receive_queue);
793de437089SKirill Tkhai while (skb) {
794de437089SKirill Tkhai u = unix_sk(skb->sk);
795de437089SKirill Tkhai nr_fds += atomic_read(&u->scm_stat.nr_fds);
796de437089SKirill Tkhai skb = skb_peek_next(skb, &sk->sk_receive_queue);
797de437089SKirill Tkhai }
798de437089SKirill Tkhai spin_unlock(&sk->sk_receive_queue.lock);
799de437089SKirill Tkhai
800de437089SKirill Tkhai return nr_fds;
801de437089SKirill Tkhai }
802de437089SKirill Tkhai
unix_show_fdinfo(struct seq_file * m,struct socket * sock)8033c32da19SKirill Tkhai static void unix_show_fdinfo(struct seq_file *m, struct socket *sock)
8043c32da19SKirill Tkhai {
8053c32da19SKirill Tkhai struct sock *sk = sock->sk;
806b27401a3SKirill Tkhai unsigned char s_state;
8073c32da19SKirill Tkhai struct unix_sock *u;
808b27401a3SKirill Tkhai int nr_fds = 0;
8093c32da19SKirill Tkhai
8103c32da19SKirill Tkhai if (sk) {
811b27401a3SKirill Tkhai s_state = READ_ONCE(sk->sk_state);
812de437089SKirill Tkhai u = unix_sk(sk);
813de437089SKirill Tkhai
814b27401a3SKirill Tkhai /* SOCK_STREAM and SOCK_SEQPACKET sockets never change their
815b27401a3SKirill Tkhai * sk_state after switching to TCP_ESTABLISHED or TCP_LISTEN.
816b27401a3SKirill Tkhai * SOCK_DGRAM is ordinary. So, no lock is needed.
817b27401a3SKirill Tkhai */
818b27401a3SKirill Tkhai if (sock->type == SOCK_DGRAM || s_state == TCP_ESTABLISHED)
819de437089SKirill Tkhai nr_fds = atomic_read(&u->scm_stat.nr_fds);
820b27401a3SKirill Tkhai else if (s_state == TCP_LISTEN)
821de437089SKirill Tkhai nr_fds = unix_count_nr_fds(sk);
822b27401a3SKirill Tkhai
823de437089SKirill Tkhai seq_printf(m, "scm_fds: %u\n", nr_fds);
8243c32da19SKirill Tkhai }
8253c32da19SKirill Tkhai }
8263a12500eSTobias Klauser #else
8273a12500eSTobias Klauser #define unix_show_fdinfo NULL
8283a12500eSTobias Klauser #endif
829f55bb7f9SPavel Emelyanov
83090ddc4f0SEric Dumazet static const struct proto_ops unix_stream_ops = {
8311da177e4SLinus Torvalds .family = PF_UNIX,
8321da177e4SLinus Torvalds .owner = THIS_MODULE,
8331da177e4SLinus Torvalds .release = unix_release,
8341da177e4SLinus Torvalds .bind = unix_bind,
8351da177e4SLinus Torvalds .connect = unix_stream_connect,
8361da177e4SLinus Torvalds .socketpair = unix_socketpair,
8371da177e4SLinus Torvalds .accept = unix_accept,
8381da177e4SLinus Torvalds .getname = unix_getname,
839a11e1d43SLinus Torvalds .poll = unix_poll,
8401da177e4SLinus Torvalds .ioctl = unix_ioctl,
8415f6beb9eSArnd Bergmann #ifdef CONFIG_COMPAT
8425f6beb9eSArnd Bergmann .compat_ioctl = unix_compat_ioctl,
8435f6beb9eSArnd Bergmann #endif
8441da177e4SLinus Torvalds .listen = unix_listen,
8451da177e4SLinus Torvalds .shutdown = unix_shutdown,
8461da177e4SLinus Torvalds .sendmsg = unix_stream_sendmsg,
8471da177e4SLinus Torvalds .recvmsg = unix_stream_recvmsg,
848965b57b4SCong Wang .read_skb = unix_stream_read_skb,
8491da177e4SLinus Torvalds .mmap = sock_no_mmap,
8502b514574SHannes Frederic Sowa .splice_read = unix_stream_splice_read,
851fc0d7536SPavel Emelyanov .set_peek_off = unix_set_peek_off,
8523c32da19SKirill Tkhai .show_fdinfo = unix_show_fdinfo,
8531da177e4SLinus Torvalds };
8541da177e4SLinus Torvalds
85590ddc4f0SEric Dumazet static const struct proto_ops unix_dgram_ops = {
8561da177e4SLinus Torvalds .family = PF_UNIX,
8571da177e4SLinus Torvalds .owner = THIS_MODULE,
8581da177e4SLinus Torvalds .release = unix_release,
8591da177e4SLinus Torvalds .bind = unix_bind,
8601da177e4SLinus Torvalds .connect = unix_dgram_connect,
8611da177e4SLinus Torvalds .socketpair = unix_socketpair,
8621da177e4SLinus Torvalds .accept = sock_no_accept,
8631da177e4SLinus Torvalds .getname = unix_getname,
864a11e1d43SLinus Torvalds .poll = unix_dgram_poll,
8651da177e4SLinus Torvalds .ioctl = unix_ioctl,
8665f6beb9eSArnd Bergmann #ifdef CONFIG_COMPAT
8675f6beb9eSArnd Bergmann .compat_ioctl = unix_compat_ioctl,
8685f6beb9eSArnd Bergmann #endif
8691da177e4SLinus Torvalds .listen = sock_no_listen,
8701da177e4SLinus Torvalds .shutdown = unix_shutdown,
8711da177e4SLinus Torvalds .sendmsg = unix_dgram_sendmsg,
872965b57b4SCong Wang .read_skb = unix_read_skb,
8731da177e4SLinus Torvalds .recvmsg = unix_dgram_recvmsg,
8741da177e4SLinus Torvalds .mmap = sock_no_mmap,
875f55bb7f9SPavel Emelyanov .set_peek_off = unix_set_peek_off,
8763c32da19SKirill Tkhai .show_fdinfo = unix_show_fdinfo,
8771da177e4SLinus Torvalds };
8781da177e4SLinus Torvalds
87990ddc4f0SEric Dumazet static const struct proto_ops unix_seqpacket_ops = {
8801da177e4SLinus Torvalds .family = PF_UNIX,
8811da177e4SLinus Torvalds .owner = THIS_MODULE,
8821da177e4SLinus Torvalds .release = unix_release,
8831da177e4SLinus Torvalds .bind = unix_bind,
8841da177e4SLinus Torvalds .connect = unix_stream_connect,
8851da177e4SLinus Torvalds .socketpair = unix_socketpair,
8861da177e4SLinus Torvalds .accept = unix_accept,
8871da177e4SLinus Torvalds .getname = unix_getname,
888a11e1d43SLinus Torvalds .poll = unix_dgram_poll,
8891da177e4SLinus Torvalds .ioctl = unix_ioctl,
8905f6beb9eSArnd Bergmann #ifdef CONFIG_COMPAT
8915f6beb9eSArnd Bergmann .compat_ioctl = unix_compat_ioctl,
8925f6beb9eSArnd Bergmann #endif
8931da177e4SLinus Torvalds .listen = unix_listen,
8941da177e4SLinus Torvalds .shutdown = unix_shutdown,
8951da177e4SLinus Torvalds .sendmsg = unix_seqpacket_sendmsg,
896a05d2ad1SEric W. Biederman .recvmsg = unix_seqpacket_recvmsg,
8971da177e4SLinus Torvalds .mmap = sock_no_mmap,
898f55bb7f9SPavel Emelyanov .set_peek_off = unix_set_peek_off,
8993c32da19SKirill Tkhai .show_fdinfo = unix_show_fdinfo,
9001da177e4SLinus Torvalds };
9011da177e4SLinus Torvalds
unix_close(struct sock * sk,long timeout)902c7272e15SCong Wang static void unix_close(struct sock *sk, long timeout)
903c7272e15SCong Wang {
904c7272e15SCong Wang /* Nothing to do here, unix socket does not need a ->close().
905c7272e15SCong Wang * This is merely for sockmap.
906c7272e15SCong Wang */
907c7272e15SCong Wang }
908c7272e15SCong Wang
unix_unhash(struct sock * sk)90994531cfcSJiang Wang static void unix_unhash(struct sock *sk)
91094531cfcSJiang Wang {
91194531cfcSJiang Wang /* Nothing to do here, unix socket does not need a ->unhash().
91294531cfcSJiang Wang * This is merely for sockmap.
91394531cfcSJiang Wang */
91494531cfcSJiang Wang }
91594531cfcSJiang Wang
unix_bpf_bypass_getsockopt(int level,int optname)9167b26952aSAlexander Mikhalitsyn static bool unix_bpf_bypass_getsockopt(int level, int optname)
9177b26952aSAlexander Mikhalitsyn {
9187b26952aSAlexander Mikhalitsyn if (level == SOL_SOCKET) {
9197b26952aSAlexander Mikhalitsyn switch (optname) {
9207b26952aSAlexander Mikhalitsyn case SO_PEERPIDFD:
9217b26952aSAlexander Mikhalitsyn return true;
9227b26952aSAlexander Mikhalitsyn default:
9237b26952aSAlexander Mikhalitsyn return false;
9247b26952aSAlexander Mikhalitsyn }
9257b26952aSAlexander Mikhalitsyn }
9267b26952aSAlexander Mikhalitsyn
9277b26952aSAlexander Mikhalitsyn return false;
9287b26952aSAlexander Mikhalitsyn }
9297b26952aSAlexander Mikhalitsyn
93094531cfcSJiang Wang struct proto unix_dgram_proto = {
9310edf0824SStephen Boyd .name = "UNIX",
9321da177e4SLinus Torvalds .owner = THIS_MODULE,
9331da177e4SLinus Torvalds .obj_size = sizeof(struct unix_sock),
934c7272e15SCong Wang .close = unix_close,
9357b26952aSAlexander Mikhalitsyn .bpf_bypass_getsockopt = unix_bpf_bypass_getsockopt,
936c6382918SCong Wang #ifdef CONFIG_BPF_SYSCALL
93794531cfcSJiang Wang .psock_update_sk_prot = unix_dgram_bpf_update_proto,
938c6382918SCong Wang #endif
9391da177e4SLinus Torvalds };
9401da177e4SLinus Torvalds
94194531cfcSJiang Wang struct proto unix_stream_proto = {
94294531cfcSJiang Wang .name = "UNIX-STREAM",
94394531cfcSJiang Wang .owner = THIS_MODULE,
94494531cfcSJiang Wang .obj_size = sizeof(struct unix_sock),
94594531cfcSJiang Wang .close = unix_close,
94694531cfcSJiang Wang .unhash = unix_unhash,
9477b26952aSAlexander Mikhalitsyn .bpf_bypass_getsockopt = unix_bpf_bypass_getsockopt,
94894531cfcSJiang Wang #ifdef CONFIG_BPF_SYSCALL
94994531cfcSJiang Wang .psock_update_sk_prot = unix_stream_bpf_update_proto,
95094531cfcSJiang Wang #endif
95194531cfcSJiang Wang };
95294531cfcSJiang Wang
unix_create1(struct net * net,struct socket * sock,int kern,int type)95394531cfcSJiang Wang static struct sock *unix_create1(struct net *net, struct socket *sock, int kern, int type)
9541da177e4SLinus Torvalds {
9551da177e4SLinus Torvalds struct unix_sock *u;
956f4bd73b5SKuniyuki Iwashima struct sock *sk;
957f4bd73b5SKuniyuki Iwashima int err;
9581da177e4SLinus Torvalds
959518de9b3SEric Dumazet atomic_long_inc(&unix_nr_socks);
960f4bd73b5SKuniyuki Iwashima if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files()) {
961f4bd73b5SKuniyuki Iwashima err = -ENFILE;
962f4bd73b5SKuniyuki Iwashima goto err;
963f4bd73b5SKuniyuki Iwashima }
9641da177e4SLinus Torvalds
96594531cfcSJiang Wang if (type == SOCK_STREAM)
96694531cfcSJiang Wang sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_stream_proto, kern);
96794531cfcSJiang Wang else /*dgram and seqpacket */
96894531cfcSJiang Wang sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_dgram_proto, kern);
96994531cfcSJiang Wang
970f4bd73b5SKuniyuki Iwashima if (!sk) {
971f4bd73b5SKuniyuki Iwashima err = -ENOMEM;
972f4bd73b5SKuniyuki Iwashima goto err;
973f4bd73b5SKuniyuki Iwashima }
9741da177e4SLinus Torvalds
9751da177e4SLinus Torvalds sock_init_data(sock, sk);
9761da177e4SLinus Torvalds
977e6b4b873SKuniyuki Iwashima sk->sk_hash = unix_unbound_hash(sk);
9783aa9799eSVladimir Davydov sk->sk_allocation = GFP_KERNEL_ACCOUNT;
9791da177e4SLinus Torvalds sk->sk_write_space = unix_write_space;
98029fce603SKuniyuki Iwashima sk->sk_max_ack_backlog = READ_ONCE(net->unx.sysctl_max_dgram_qlen);
9811da177e4SLinus Torvalds sk->sk_destruct = unix_sock_destructor;
9821da177e4SLinus Torvalds u = unix_sk(sk);
983301fdbaaSKuniyuki Iwashima u->inflight = 0;
98440ffe67dSAl Viro u->path.dentry = NULL;
98540ffe67dSAl Viro u->path.mnt = NULL;
986fd19f329SBenjamin LaHaise spin_lock_init(&u->lock);
9871fd05ba5SMiklos Szeredi INIT_LIST_HEAD(&u->link);
9886e1ce3c3SLinus Torvalds mutex_init(&u->iolock); /* single task reading lock */
9896e1ce3c3SLinus Torvalds mutex_init(&u->bindlock); /* single task binding lock */
9901da177e4SLinus Torvalds init_waitqueue_head(&u->peer_wait);
9917d267278SRainer Weikusat init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay);
9923c32da19SKirill Tkhai memset(&u->scm_stat, 0, sizeof(struct scm_stat));
99379b05beaSKuniyuki Iwashima unix_insert_unbound_socket(net, sk);
994f4bd73b5SKuniyuki Iwashima
995340c3d33SKuniyuki Iwashima sock_prot_inuse_add(net, sk->sk_prot, 1);
996f4bd73b5SKuniyuki Iwashima
9971da177e4SLinus Torvalds return sk;
998f4bd73b5SKuniyuki Iwashima
999f4bd73b5SKuniyuki Iwashima err:
1000f4bd73b5SKuniyuki Iwashima atomic_long_dec(&unix_nr_socks);
1001f4bd73b5SKuniyuki Iwashima return ERR_PTR(err);
10021da177e4SLinus Torvalds }
10031da177e4SLinus Torvalds
unix_create(struct net * net,struct socket * sock,int protocol,int kern)10043f378b68SEric Paris static int unix_create(struct net *net, struct socket *sock, int protocol,
10053f378b68SEric Paris int kern)
10061da177e4SLinus Torvalds {
1007f4bd73b5SKuniyuki Iwashima struct sock *sk;
1008f4bd73b5SKuniyuki Iwashima
10091da177e4SLinus Torvalds if (protocol && protocol != PF_UNIX)
10101da177e4SLinus Torvalds return -EPROTONOSUPPORT;
10111da177e4SLinus Torvalds
10121da177e4SLinus Torvalds sock->state = SS_UNCONNECTED;
10131da177e4SLinus Torvalds
10141da177e4SLinus Torvalds switch (sock->type) {
10151da177e4SLinus Torvalds case SOCK_STREAM:
10161da177e4SLinus Torvalds sock->ops = &unix_stream_ops;
10171da177e4SLinus Torvalds break;
10181da177e4SLinus Torvalds /*
10191da177e4SLinus Torvalds * Believe it or not BSD has AF_UNIX, SOCK_RAW though
10201da177e4SLinus Torvalds * nothing uses it.
10211da177e4SLinus Torvalds */
10221da177e4SLinus Torvalds case SOCK_RAW:
10231da177e4SLinus Torvalds sock->type = SOCK_DGRAM;
1024df561f66SGustavo A. R. Silva fallthrough;
10251da177e4SLinus Torvalds case SOCK_DGRAM:
10261da177e4SLinus Torvalds sock->ops = &unix_dgram_ops;
10271da177e4SLinus Torvalds break;
10281da177e4SLinus Torvalds case SOCK_SEQPACKET:
10291da177e4SLinus Torvalds sock->ops = &unix_seqpacket_ops;
10301da177e4SLinus Torvalds break;
10311da177e4SLinus Torvalds default:
10321da177e4SLinus Torvalds return -ESOCKTNOSUPPORT;
10331da177e4SLinus Torvalds }
10341da177e4SLinus Torvalds
1035f4bd73b5SKuniyuki Iwashima sk = unix_create1(net, sock, kern, sock->type);
1036f4bd73b5SKuniyuki Iwashima if (IS_ERR(sk))
1037f4bd73b5SKuniyuki Iwashima return PTR_ERR(sk);
1038f4bd73b5SKuniyuki Iwashima
1039f4bd73b5SKuniyuki Iwashima return 0;
10401da177e4SLinus Torvalds }
10411da177e4SLinus Torvalds
unix_release(struct socket * sock)10421da177e4SLinus Torvalds static int unix_release(struct socket *sock)
10431da177e4SLinus Torvalds {
10441da177e4SLinus Torvalds struct sock *sk = sock->sk;
10451da177e4SLinus Torvalds
10461da177e4SLinus Torvalds if (!sk)
10471da177e4SLinus Torvalds return 0;
10481da177e4SLinus Torvalds
1049c7272e15SCong Wang sk->sk_prot->close(sk, 0);
1050ded34e0fSPaul Moore unix_release_sock(sk, 0);
10511da177e4SLinus Torvalds sock->sk = NULL;
10521da177e4SLinus Torvalds
1053ded34e0fSPaul Moore return 0;
10541da177e4SLinus Torvalds }
10551da177e4SLinus Torvalds
unix_find_bsd(struct sockaddr_un * sunaddr,int addr_len,int type)105651bae889SKuniyuki Iwashima static struct sock *unix_find_bsd(struct sockaddr_un *sunaddr, int addr_len,
105751bae889SKuniyuki Iwashima int type)
1058fa39ef0eSKuniyuki Iwashima {
1059fa39ef0eSKuniyuki Iwashima struct inode *inode;
1060fa39ef0eSKuniyuki Iwashima struct path path;
1061fa39ef0eSKuniyuki Iwashima struct sock *sk;
1062fa39ef0eSKuniyuki Iwashima int err;
1063fa39ef0eSKuniyuki Iwashima
1064d2d8c9fdSKuniyuki Iwashima unix_mkname_bsd(sunaddr, addr_len);
1065fa39ef0eSKuniyuki Iwashima err = kern_path(sunaddr->sun_path, LOOKUP_FOLLOW, &path);
1066fa39ef0eSKuniyuki Iwashima if (err)
1067fa39ef0eSKuniyuki Iwashima goto fail;
1068fa39ef0eSKuniyuki Iwashima
1069fa39ef0eSKuniyuki Iwashima err = path_permission(&path, MAY_WRITE);
1070fa39ef0eSKuniyuki Iwashima if (err)
1071fa39ef0eSKuniyuki Iwashima goto path_put;
1072fa39ef0eSKuniyuki Iwashima
1073fa39ef0eSKuniyuki Iwashima err = -ECONNREFUSED;
1074fa39ef0eSKuniyuki Iwashima inode = d_backing_inode(path.dentry);
1075fa39ef0eSKuniyuki Iwashima if (!S_ISSOCK(inode->i_mode))
1076fa39ef0eSKuniyuki Iwashima goto path_put;
1077fa39ef0eSKuniyuki Iwashima
107851bae889SKuniyuki Iwashima sk = unix_find_socket_byinode(inode);
1079fa39ef0eSKuniyuki Iwashima if (!sk)
1080fa39ef0eSKuniyuki Iwashima goto path_put;
1081fa39ef0eSKuniyuki Iwashima
1082fa39ef0eSKuniyuki Iwashima err = -EPROTOTYPE;
1083fa39ef0eSKuniyuki Iwashima if (sk->sk_type == type)
1084fa39ef0eSKuniyuki Iwashima touch_atime(&path);
1085fa39ef0eSKuniyuki Iwashima else
1086fa39ef0eSKuniyuki Iwashima goto sock_put;
1087fa39ef0eSKuniyuki Iwashima
1088fa39ef0eSKuniyuki Iwashima path_put(&path);
1089fa39ef0eSKuniyuki Iwashima
1090fa39ef0eSKuniyuki Iwashima return sk;
1091fa39ef0eSKuniyuki Iwashima
1092fa39ef0eSKuniyuki Iwashima sock_put:
1093fa39ef0eSKuniyuki Iwashima sock_put(sk);
1094fa39ef0eSKuniyuki Iwashima path_put:
1095fa39ef0eSKuniyuki Iwashima path_put(&path);
1096fa39ef0eSKuniyuki Iwashima fail:
1097aed26f55SKuniyuki Iwashima return ERR_PTR(err);
1098fa39ef0eSKuniyuki Iwashima }
1099fa39ef0eSKuniyuki Iwashima
unix_find_abstract(struct net * net,struct sockaddr_un * sunaddr,int addr_len,int type)1100fa39ef0eSKuniyuki Iwashima static struct sock *unix_find_abstract(struct net *net,
1101fa39ef0eSKuniyuki Iwashima struct sockaddr_un *sunaddr,
1102d2d8c9fdSKuniyuki Iwashima int addr_len, int type)
1103fa39ef0eSKuniyuki Iwashima {
1104f452be49SKuniyuki Iwashima unsigned int hash = unix_abstract_hash(sunaddr, addr_len, type);
1105fa39ef0eSKuniyuki Iwashima struct dentry *dentry;
1106fa39ef0eSKuniyuki Iwashima struct sock *sk;
1107fa39ef0eSKuniyuki Iwashima
1108f452be49SKuniyuki Iwashima sk = unix_find_socket_byname(net, sunaddr, addr_len, hash);
1109aed26f55SKuniyuki Iwashima if (!sk)
1110aed26f55SKuniyuki Iwashima return ERR_PTR(-ECONNREFUSED);
1111fa39ef0eSKuniyuki Iwashima
1112fa39ef0eSKuniyuki Iwashima dentry = unix_sk(sk)->path.dentry;
1113fa39ef0eSKuniyuki Iwashima if (dentry)
1114fa39ef0eSKuniyuki Iwashima touch_atime(&unix_sk(sk)->path);
1115fa39ef0eSKuniyuki Iwashima
1116fa39ef0eSKuniyuki Iwashima return sk;
1117fa39ef0eSKuniyuki Iwashima }
1118fa39ef0eSKuniyuki Iwashima
unix_find_other(struct net * net,struct sockaddr_un * sunaddr,int addr_len,int type)1119fa39ef0eSKuniyuki Iwashima static struct sock *unix_find_other(struct net *net,
1120fa39ef0eSKuniyuki Iwashima struct sockaddr_un *sunaddr,
1121d2d8c9fdSKuniyuki Iwashima int addr_len, int type)
1122fa39ef0eSKuniyuki Iwashima {
1123fa39ef0eSKuniyuki Iwashima struct sock *sk;
1124fa39ef0eSKuniyuki Iwashima
1125fa39ef0eSKuniyuki Iwashima if (sunaddr->sun_path[0])
112651bae889SKuniyuki Iwashima sk = unix_find_bsd(sunaddr, addr_len, type);
1127fa39ef0eSKuniyuki Iwashima else
1128d2d8c9fdSKuniyuki Iwashima sk = unix_find_abstract(net, sunaddr, addr_len, type);
1129fa39ef0eSKuniyuki Iwashima
1130fa39ef0eSKuniyuki Iwashima return sk;
1131fa39ef0eSKuniyuki Iwashima }
1132fa39ef0eSKuniyuki Iwashima
unix_autobind(struct sock * sk)1133f7ed31f4SKuniyuki Iwashima static int unix_autobind(struct sock *sk)
11341da177e4SLinus Torvalds {
11351da177e4SLinus Torvalds struct unix_sock *u = unix_sk(sk);
1136ac325c7fSKuniyuki Iwashima unsigned int new_hash, old_hash;
113779b05beaSKuniyuki Iwashima struct net *net = sock_net(sk);
11381da177e4SLinus Torvalds struct unix_address *addr;
11399acbc584SKuniyuki Iwashima u32 lastnum, ordernum;
1140f7ed31f4SKuniyuki Iwashima int err;
11411da177e4SLinus Torvalds
11426e1ce3c3SLinus Torvalds err = mutex_lock_interruptible(&u->bindlock);
114337ab4fa7SSasha Levin if (err)
114437ab4fa7SSasha Levin return err;
11451da177e4SLinus Torvalds
11461da177e4SLinus Torvalds if (u->addr)
11471da177e4SLinus Torvalds goto out;
11481da177e4SLinus Torvalds
11491da177e4SLinus Torvalds err = -ENOMEM;
1150755662ceSKuniyuki Iwashima addr = kzalloc(sizeof(*addr) +
1151755662ceSKuniyuki Iwashima offsetof(struct sockaddr_un, sun_path) + 16, GFP_KERNEL);
11521da177e4SLinus Torvalds if (!addr)
11531da177e4SLinus Torvalds goto out;
11541da177e4SLinus Torvalds
11559acbc584SKuniyuki Iwashima addr->len = offsetof(struct sockaddr_un, sun_path) + 6;
11561da177e4SLinus Torvalds addr->name->sun_family = AF_UNIX;
11578c9814b9SReshetova, Elena refcount_set(&addr->refcnt, 1);
11581da177e4SLinus Torvalds
1159ac325c7fSKuniyuki Iwashima old_hash = sk->sk_hash;
1160a251c17aSJason A. Donenfeld ordernum = get_random_u32();
11619acbc584SKuniyuki Iwashima lastnum = ordernum & 0xFFFFF;
11621da177e4SLinus Torvalds retry:
11639acbc584SKuniyuki Iwashima ordernum = (ordernum + 1) & 0xFFFFF;
11649acbc584SKuniyuki Iwashima sprintf(addr->name->sun_path + 1, "%05x", ordernum);
11651da177e4SLinus Torvalds
1166e6b4b873SKuniyuki Iwashima new_hash = unix_abstract_hash(addr->name, addr->len, sk->sk_type);
116779b05beaSKuniyuki Iwashima unix_table_double_lock(net, old_hash, new_hash);
11681da177e4SLinus Torvalds
116979b05beaSKuniyuki Iwashima if (__unix_find_socket_byname(net, addr->name, addr->len, new_hash)) {
117079b05beaSKuniyuki Iwashima unix_table_double_unlock(net, old_hash, new_hash);
1171afd20b92SKuniyuki Iwashima
11729acbc584SKuniyuki Iwashima /* __unix_find_socket_byname() may take long time if many names
11738df73ff9STetsuo Handa * are already in use.
11748df73ff9STetsuo Handa */
11758df73ff9STetsuo Handa cond_resched();
11769acbc584SKuniyuki Iwashima
11779acbc584SKuniyuki Iwashima if (ordernum == lastnum) {
11788df73ff9STetsuo Handa /* Give up if all names seems to be in use. */
11798df73ff9STetsuo Handa err = -ENOSPC;
11809acbc584SKuniyuki Iwashima unix_release_addr(addr);
11818df73ff9STetsuo Handa goto out;
11828df73ff9STetsuo Handa }
11839acbc584SKuniyuki Iwashima
11841da177e4SLinus Torvalds goto retry;
11851da177e4SLinus Torvalds }
11861da177e4SLinus Torvalds
1187cf2f225eSKuniyuki Iwashima __unix_set_addr_hash(net, sk, addr, new_hash);
118879b05beaSKuniyuki Iwashima unix_table_double_unlock(net, old_hash, new_hash);
11891da177e4SLinus Torvalds err = 0;
11901da177e4SLinus Torvalds
11916e1ce3c3SLinus Torvalds out: mutex_unlock(&u->bindlock);
11921da177e4SLinus Torvalds return err;
11931da177e4SLinus Torvalds }
11941da177e4SLinus Torvalds
unix_bind_bsd(struct sock * sk,struct sockaddr_un * sunaddr,int addr_len)119512f21c49SKuniyuki Iwashima static int unix_bind_bsd(struct sock *sk, struct sockaddr_un *sunaddr,
119612f21c49SKuniyuki Iwashima int addr_len)
1197faf02010SAl Viro {
119871e6be6fSAl Viro umode_t mode = S_IFSOCK |
119971e6be6fSAl Viro (SOCK_INODE(sk->sk_socket)->i_mode & ~current_umask());
120012f21c49SKuniyuki Iwashima struct unix_sock *u = unix_sk(sk);
1201ac325c7fSKuniyuki Iwashima unsigned int new_hash, old_hash;
120279b05beaSKuniyuki Iwashima struct net *net = sock_net(sk);
1203abf08576SChristian Brauner struct mnt_idmap *idmap;
120412f21c49SKuniyuki Iwashima struct unix_address *addr;
120538f7bd94SLinus Torvalds struct dentry *dentry;
120612f21c49SKuniyuki Iwashima struct path parent;
120771e6be6fSAl Viro int err;
120871e6be6fSAl Viro
1209ecb4534bSKuniyuki Iwashima addr_len = unix_mkname_bsd(sunaddr, addr_len);
121012f21c49SKuniyuki Iwashima addr = unix_create_addr(sunaddr, addr_len);
121112f21c49SKuniyuki Iwashima if (!addr)
121212f21c49SKuniyuki Iwashima return -ENOMEM;
121312f21c49SKuniyuki Iwashima
121438f7bd94SLinus Torvalds /*
121538f7bd94SLinus Torvalds * Get the parent directory, calculate the hash for last
121638f7bd94SLinus Torvalds * component.
121738f7bd94SLinus Torvalds */
121871e6be6fSAl Viro dentry = kern_path_create(AT_FDCWD, addr->name->sun_path, &parent, 0);
121912f21c49SKuniyuki Iwashima if (IS_ERR(dentry)) {
122012f21c49SKuniyuki Iwashima err = PTR_ERR(dentry);
122112f21c49SKuniyuki Iwashima goto out;
122212f21c49SKuniyuki Iwashima }
1223faf02010SAl Viro
122438f7bd94SLinus Torvalds /*
122538f7bd94SLinus Torvalds * All right, let's create it.
122638f7bd94SLinus Torvalds */
1227abf08576SChristian Brauner idmap = mnt_idmap(parent.mnt);
122871e6be6fSAl Viro err = security_path_mknod(&parent, dentry, mode, 0);
122956c1731bSAl Viro if (!err)
1230abf08576SChristian Brauner err = vfs_mknod(idmap, d_inode(parent.dentry), dentry, mode, 0);
1231c0c3b8d3SAl Viro if (err)
123212f21c49SKuniyuki Iwashima goto out_path;
12336e1ce3c3SLinus Torvalds err = mutex_lock_interruptible(&u->bindlock);
1234c0c3b8d3SAl Viro if (err)
1235c0c3b8d3SAl Viro goto out_unlink;
1236c0c3b8d3SAl Viro if (u->addr)
1237c0c3b8d3SAl Viro goto out_unlock;
12381da177e4SLinus Torvalds
1239ac325c7fSKuniyuki Iwashima old_hash = sk->sk_hash;
1240e6b4b873SKuniyuki Iwashima new_hash = unix_bsd_hash(d_backing_inode(dentry));
124179b05beaSKuniyuki Iwashima unix_table_double_lock(net, old_hash, new_hash);
124256c1731bSAl Viro u->path.mnt = mntget(parent.mnt);
124356c1731bSAl Viro u->path.dentry = dget(dentry);
1244cf2f225eSKuniyuki Iwashima __unix_set_addr_hash(net, sk, addr, new_hash);
124579b05beaSKuniyuki Iwashima unix_table_double_unlock(net, old_hash, new_hash);
124651bae889SKuniyuki Iwashima unix_insert_bsd_socket(sk);
1247aee51517SAl Viro mutex_unlock(&u->bindlock);
124856c1731bSAl Viro done_path_create(&parent, dentry);
1249fa42d910SAl Viro return 0;
1250c0c3b8d3SAl Viro
1251c0c3b8d3SAl Viro out_unlock:
1252c0c3b8d3SAl Viro mutex_unlock(&u->bindlock);
1253c0c3b8d3SAl Viro err = -EINVAL;
1254c0c3b8d3SAl Viro out_unlink:
1255c0c3b8d3SAl Viro /* failed after successful mknod? unlink what we'd created... */
1256abf08576SChristian Brauner vfs_unlink(idmap, d_inode(parent.dentry), dentry, NULL);
125712f21c49SKuniyuki Iwashima out_path:
1258c0c3b8d3SAl Viro done_path_create(&parent, dentry);
125912f21c49SKuniyuki Iwashima out:
126012f21c49SKuniyuki Iwashima unix_release_addr(addr);
126112f21c49SKuniyuki Iwashima return err == -EEXIST ? -EADDRINUSE : err;
1262fa42d910SAl Viro }
1263fa42d910SAl Viro
unix_bind_abstract(struct sock * sk,struct sockaddr_un * sunaddr,int addr_len)126412f21c49SKuniyuki Iwashima static int unix_bind_abstract(struct sock *sk, struct sockaddr_un *sunaddr,
126512f21c49SKuniyuki Iwashima int addr_len)
1266fa42d910SAl Viro {
1267fa42d910SAl Viro struct unix_sock *u = unix_sk(sk);
1268ac325c7fSKuniyuki Iwashima unsigned int new_hash, old_hash;
126979b05beaSKuniyuki Iwashima struct net *net = sock_net(sk);
127012f21c49SKuniyuki Iwashima struct unix_address *addr;
1271fa42d910SAl Viro int err;
1272fa42d910SAl Viro
127312f21c49SKuniyuki Iwashima addr = unix_create_addr(sunaddr, addr_len);
127412f21c49SKuniyuki Iwashima if (!addr)
127512f21c49SKuniyuki Iwashima return -ENOMEM;
127612f21c49SKuniyuki Iwashima
1277aee51517SAl Viro err = mutex_lock_interruptible(&u->bindlock);
1278aee51517SAl Viro if (err)
127912f21c49SKuniyuki Iwashima goto out;
1280aee51517SAl Viro
1281aee51517SAl Viro if (u->addr) {
128212f21c49SKuniyuki Iwashima err = -EINVAL;
128312f21c49SKuniyuki Iwashima goto out_mutex;
1284aee51517SAl Viro }
1285aee51517SAl Viro
1286ac325c7fSKuniyuki Iwashima old_hash = sk->sk_hash;
1287e6b4b873SKuniyuki Iwashima new_hash = unix_abstract_hash(addr->name, addr->len, sk->sk_type);
128879b05beaSKuniyuki Iwashima unix_table_double_lock(net, old_hash, new_hash);
128912f21c49SKuniyuki Iwashima
129079b05beaSKuniyuki Iwashima if (__unix_find_socket_byname(net, addr->name, addr->len, new_hash))
129112f21c49SKuniyuki Iwashima goto out_spin;
129212f21c49SKuniyuki Iwashima
1293cf2f225eSKuniyuki Iwashima __unix_set_addr_hash(net, sk, addr, new_hash);
129479b05beaSKuniyuki Iwashima unix_table_double_unlock(net, old_hash, new_hash);
1295aee51517SAl Viro mutex_unlock(&u->bindlock);
1296fa42d910SAl Viro return 0;
129712f21c49SKuniyuki Iwashima
129812f21c49SKuniyuki Iwashima out_spin:
129979b05beaSKuniyuki Iwashima unix_table_double_unlock(net, old_hash, new_hash);
130012f21c49SKuniyuki Iwashima err = -EADDRINUSE;
130112f21c49SKuniyuki Iwashima out_mutex:
130212f21c49SKuniyuki Iwashima mutex_unlock(&u->bindlock);
130312f21c49SKuniyuki Iwashima out:
130412f21c49SKuniyuki Iwashima unix_release_addr(addr);
130512f21c49SKuniyuki Iwashima return err;
1306aee51517SAl Viro }
1307fa42d910SAl Viro
unix_bind(struct socket * sock,struct sockaddr * uaddr,int addr_len)1308fa42d910SAl Viro static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
1309fa42d910SAl Viro {
1310fa42d910SAl Viro struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
13115c32a3edSKuniyuki Iwashima struct sock *sk = sock->sk;
13125c32a3edSKuniyuki Iwashima int err;
1313fa42d910SAl Viro
1314b8a58aa6SKuniyuki Iwashima if (addr_len == offsetof(struct sockaddr_un, sun_path) &&
1315b8a58aa6SKuniyuki Iwashima sunaddr->sun_family == AF_UNIX)
1316f7ed31f4SKuniyuki Iwashima return unix_autobind(sk);
1317fa42d910SAl Viro
1318b8a58aa6SKuniyuki Iwashima err = unix_validate_addr(sunaddr, addr_len);
1319b8a58aa6SKuniyuki Iwashima if (err)
1320b8a58aa6SKuniyuki Iwashima return err;
1321b8a58aa6SKuniyuki Iwashima
132212f21c49SKuniyuki Iwashima if (sunaddr->sun_path[0])
132312f21c49SKuniyuki Iwashima err = unix_bind_bsd(sk, sunaddr, addr_len);
1324fa42d910SAl Viro else
132512f21c49SKuniyuki Iwashima err = unix_bind_abstract(sk, sunaddr, addr_len);
132612f21c49SKuniyuki Iwashima
132712f21c49SKuniyuki Iwashima return err;
13281da177e4SLinus Torvalds }
13291da177e4SLinus Torvalds
unix_state_double_lock(struct sock * sk1,struct sock * sk2)1330278a3de5SDavid S. Miller static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
1331278a3de5SDavid S. Miller {
1332278a3de5SDavid S. Miller if (unlikely(sk1 == sk2) || !sk2) {
1333278a3de5SDavid S. Miller unix_state_lock(sk1);
1334278a3de5SDavid S. Miller return;
1335278a3de5SDavid S. Miller }
13365e7f3e03SEric Dumazet if (sk1 > sk2)
13375e7f3e03SEric Dumazet swap(sk1, sk2);
13385e7f3e03SEric Dumazet
1339278a3de5SDavid S. Miller unix_state_lock(sk1);
13405e7f3e03SEric Dumazet unix_state_lock_nested(sk2, U_LOCK_SECOND);
1341278a3de5SDavid S. Miller }
1342278a3de5SDavid S. Miller
unix_state_double_unlock(struct sock * sk1,struct sock * sk2)1343278a3de5SDavid S. Miller static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)
1344278a3de5SDavid S. Miller {
1345278a3de5SDavid S. Miller if (unlikely(sk1 == sk2) || !sk2) {
1346278a3de5SDavid S. Miller unix_state_unlock(sk1);
1347278a3de5SDavid S. Miller return;
1348278a3de5SDavid S. Miller }
1349278a3de5SDavid S. Miller unix_state_unlock(sk1);
1350278a3de5SDavid S. Miller unix_state_unlock(sk2);
1351278a3de5SDavid S. Miller }
1352278a3de5SDavid S. Miller
unix_dgram_connect(struct socket * sock,struct sockaddr * addr,int alen,int flags)13531da177e4SLinus Torvalds static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
13541da177e4SLinus Torvalds int alen, int flags)
13551da177e4SLinus Torvalds {
13561da177e4SLinus Torvalds struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr;
1357340c3d33SKuniyuki Iwashima struct sock *sk = sock->sk;
13581da177e4SLinus Torvalds struct sock *other;
13591da177e4SLinus Torvalds int err;
13601da177e4SLinus Torvalds
1361defbcf2dSMateusz Jurczyk err = -EINVAL;
1362defbcf2dSMateusz Jurczyk if (alen < offsetofend(struct sockaddr, sa_family))
1363defbcf2dSMateusz Jurczyk goto out;
1364defbcf2dSMateusz Jurczyk
13651da177e4SLinus Torvalds if (addr->sa_family != AF_UNSPEC) {
1366b8a58aa6SKuniyuki Iwashima err = unix_validate_addr(sunaddr, alen);
1367b8a58aa6SKuniyuki Iwashima if (err)
1368b8a58aa6SKuniyuki Iwashima goto out;
1369b8a58aa6SKuniyuki Iwashima
13705e2ff670SAlexander Mikhalitsyn if ((test_bit(SOCK_PASSCRED, &sock->flags) ||
13715e2ff670SAlexander Mikhalitsyn test_bit(SOCK_PASSPIDFD, &sock->flags)) &&
1372302fe8ddSKuniyuki Iwashima !READ_ONCE(unix_sk(sk)->addr)) {
1373f7ed31f4SKuniyuki Iwashima err = unix_autobind(sk);
1374f7ed31f4SKuniyuki Iwashima if (err)
13751da177e4SLinus Torvalds goto out;
1376f7ed31f4SKuniyuki Iwashima }
13771da177e4SLinus Torvalds
1378278a3de5SDavid S. Miller restart:
1379340c3d33SKuniyuki Iwashima other = unix_find_other(sock_net(sk), sunaddr, alen, sock->type);
1380aed26f55SKuniyuki Iwashima if (IS_ERR(other)) {
1381aed26f55SKuniyuki Iwashima err = PTR_ERR(other);
13821da177e4SLinus Torvalds goto out;
1383aed26f55SKuniyuki Iwashima }
13841da177e4SLinus Torvalds
1385278a3de5SDavid S. Miller unix_state_double_lock(sk, other);
1386278a3de5SDavid S. Miller
1387278a3de5SDavid S. Miller /* Apparently VFS overslept socket death. Retry. */
1388278a3de5SDavid S. Miller if (sock_flag(other, SOCK_DEAD)) {
1389278a3de5SDavid S. Miller unix_state_double_unlock(sk, other);
1390278a3de5SDavid S. Miller sock_put(other);
1391278a3de5SDavid S. Miller goto restart;
1392278a3de5SDavid S. Miller }
13931da177e4SLinus Torvalds
13941da177e4SLinus Torvalds err = -EPERM;
13951da177e4SLinus Torvalds if (!unix_may_send(sk, other))
13961da177e4SLinus Torvalds goto out_unlock;
13971da177e4SLinus Torvalds
13981da177e4SLinus Torvalds err = security_unix_may_send(sk->sk_socket, other->sk_socket);
13991da177e4SLinus Torvalds if (err)
14001da177e4SLinus Torvalds goto out_unlock;
14011da177e4SLinus Torvalds
140245733e98SKuniyuki Iwashima WRITE_ONCE(sk->sk_state, TCP_ESTABLISHED);
140345733e98SKuniyuki Iwashima WRITE_ONCE(other->sk_state, TCP_ESTABLISHED);
14041da177e4SLinus Torvalds } else {
14051da177e4SLinus Torvalds /*
14061da177e4SLinus Torvalds * 1003.1g breaking connected state with AF_UNSPEC
14071da177e4SLinus Torvalds */
14081da177e4SLinus Torvalds other = NULL;
1409278a3de5SDavid S. Miller unix_state_double_lock(sk, other);
14101da177e4SLinus Torvalds }
14111da177e4SLinus Torvalds
14121da177e4SLinus Torvalds /*
14131da177e4SLinus Torvalds * If it was connected, reconnect.
14141da177e4SLinus Torvalds */
14151da177e4SLinus Torvalds if (unix_peer(sk)) {
14161da177e4SLinus Torvalds struct sock *old_peer = unix_peer(sk);
1417dc56ad70SEric Dumazet
14181da177e4SLinus Torvalds unix_peer(sk) = other;
1419dc56ad70SEric Dumazet if (!other)
142045733e98SKuniyuki Iwashima WRITE_ONCE(sk->sk_state, TCP_CLOSE);
14217d267278SRainer Weikusat unix_dgram_peer_wake_disconnect_wakeup(sk, old_peer);
14227d267278SRainer Weikusat
1423278a3de5SDavid S. Miller unix_state_double_unlock(sk, other);
14241da177e4SLinus Torvalds
14258003545cSKuniyuki Iwashima if (other != old_peer) {
14261da177e4SLinus Torvalds unix_dgram_disconnected(sk, old_peer);
14278003545cSKuniyuki Iwashima
14288003545cSKuniyuki Iwashima unix_state_lock(old_peer);
14298003545cSKuniyuki Iwashima if (!unix_peer(old_peer))
14308003545cSKuniyuki Iwashima WRITE_ONCE(old_peer->sk_state, TCP_CLOSE);
14318003545cSKuniyuki Iwashima unix_state_unlock(old_peer);
14328003545cSKuniyuki Iwashima }
14338003545cSKuniyuki Iwashima
14341da177e4SLinus Torvalds sock_put(old_peer);
14351da177e4SLinus Torvalds } else {
14361da177e4SLinus Torvalds unix_peer(sk) = other;
1437278a3de5SDavid S. Miller unix_state_double_unlock(sk, other);
14381da177e4SLinus Torvalds }
143983301b53SCong Wang
14401da177e4SLinus Torvalds return 0;
14411da177e4SLinus Torvalds
14421da177e4SLinus Torvalds out_unlock:
1443278a3de5SDavid S. Miller unix_state_double_unlock(sk, other);
14441da177e4SLinus Torvalds sock_put(other);
14451da177e4SLinus Torvalds out:
14461da177e4SLinus Torvalds return err;
14471da177e4SLinus Torvalds }
14481da177e4SLinus Torvalds
unix_wait_for_peer(struct sock * other,long timeo)14491da177e4SLinus Torvalds static long unix_wait_for_peer(struct sock *other, long timeo)
145048851e9eSJules Irenge __releases(&unix_sk(other)->lock)
14511da177e4SLinus Torvalds {
14521da177e4SLinus Torvalds struct unix_sock *u = unix_sk(other);
14531da177e4SLinus Torvalds int sched;
14541da177e4SLinus Torvalds DEFINE_WAIT(wait);
14551da177e4SLinus Torvalds
14561da177e4SLinus Torvalds prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);
14571da177e4SLinus Torvalds
14581da177e4SLinus Torvalds sched = !sock_flag(other, SOCK_DEAD) &&
14591da177e4SLinus Torvalds !(other->sk_shutdown & RCV_SHUTDOWN) &&
1460679ed006SKuniyuki Iwashima unix_recvq_full_lockless(other);
14611da177e4SLinus Torvalds
14621c92b4e5SDavid S. Miller unix_state_unlock(other);
14631da177e4SLinus Torvalds
14641da177e4SLinus Torvalds if (sched)
14651da177e4SLinus Torvalds timeo = schedule_timeout(timeo);
14661da177e4SLinus Torvalds
14671da177e4SLinus Torvalds finish_wait(&u->peer_wait, &wait);
14681da177e4SLinus Torvalds return timeo;
14691da177e4SLinus Torvalds }
14701da177e4SLinus Torvalds
unix_stream_connect(struct socket * sock,struct sockaddr * uaddr,int addr_len,int flags)14711da177e4SLinus Torvalds static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
14721da177e4SLinus Torvalds int addr_len, int flags)
14731da177e4SLinus Torvalds {
14741da177e4SLinus Torvalds struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1475340c3d33SKuniyuki Iwashima struct sock *sk = sock->sk, *newsk = NULL, *other = NULL;
14761da177e4SLinus Torvalds struct unix_sock *u = unix_sk(sk), *newu, *otheru;
1477340c3d33SKuniyuki Iwashima struct net *net = sock_net(sk);
14781da177e4SLinus Torvalds struct sk_buff *skb = NULL;
1479412f97f3SKuniyuki Iwashima unsigned char state;
14801da177e4SLinus Torvalds long timeo;
1481340c3d33SKuniyuki Iwashima int err;
14821da177e4SLinus Torvalds
1483b8a58aa6SKuniyuki Iwashima err = unix_validate_addr(sunaddr, addr_len);
1484b8a58aa6SKuniyuki Iwashima if (err)
1485b8a58aa6SKuniyuki Iwashima goto out;
1486b8a58aa6SKuniyuki Iwashima
14875e2ff670SAlexander Mikhalitsyn if ((test_bit(SOCK_PASSCRED, &sock->flags) ||
1488302fe8ddSKuniyuki Iwashima test_bit(SOCK_PASSPIDFD, &sock->flags)) &&
1489302fe8ddSKuniyuki Iwashima !READ_ONCE(u->addr)) {
1490f7ed31f4SKuniyuki Iwashima err = unix_autobind(sk);
1491f7ed31f4SKuniyuki Iwashima if (err)
14921da177e4SLinus Torvalds goto out;
1493f7ed31f4SKuniyuki Iwashima }
14941da177e4SLinus Torvalds
14951da177e4SLinus Torvalds timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
14961da177e4SLinus Torvalds
14971da177e4SLinus Torvalds /* First of all allocate resources.
14981da177e4SLinus Torvalds If we will make it after state is locked,
14991da177e4SLinus Torvalds we will have to recheck all again in any case.
15001da177e4SLinus Torvalds */
15011da177e4SLinus Torvalds
15021da177e4SLinus Torvalds /* create new sock for complete connection */
1503340c3d33SKuniyuki Iwashima newsk = unix_create1(net, NULL, 0, sock->type);
1504f4bd73b5SKuniyuki Iwashima if (IS_ERR(newsk)) {
1505f4bd73b5SKuniyuki Iwashima err = PTR_ERR(newsk);
1506f4bd73b5SKuniyuki Iwashima newsk = NULL;
15071da177e4SLinus Torvalds goto out;
1508f4bd73b5SKuniyuki Iwashima }
1509f4bd73b5SKuniyuki Iwashima
1510f4bd73b5SKuniyuki Iwashima err = -ENOMEM;
15111da177e4SLinus Torvalds
15121da177e4SLinus Torvalds /* Allocate skb for sending to listening sock */
15131da177e4SLinus Torvalds skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
15141da177e4SLinus Torvalds if (skb == NULL)
15151da177e4SLinus Torvalds goto out;
15161da177e4SLinus Torvalds
15171da177e4SLinus Torvalds restart:
15181da177e4SLinus Torvalds /* Find listening sock. */
1519d2d8c9fdSKuniyuki Iwashima other = unix_find_other(net, sunaddr, addr_len, sk->sk_type);
1520aed26f55SKuniyuki Iwashima if (IS_ERR(other)) {
1521aed26f55SKuniyuki Iwashima err = PTR_ERR(other);
1522aed26f55SKuniyuki Iwashima other = NULL;
15231da177e4SLinus Torvalds goto out;
1524aed26f55SKuniyuki Iwashima }
15251da177e4SLinus Torvalds
15261c92b4e5SDavid S. Miller unix_state_lock(other);
15271da177e4SLinus Torvalds
15281da177e4SLinus Torvalds /* Apparently VFS overslept socket death. Retry. */
15291da177e4SLinus Torvalds if (sock_flag(other, SOCK_DEAD)) {
15301c92b4e5SDavid S. Miller unix_state_unlock(other);
15311da177e4SLinus Torvalds sock_put(other);
15321da177e4SLinus Torvalds goto restart;
15331da177e4SLinus Torvalds }
15341da177e4SLinus Torvalds
15351da177e4SLinus Torvalds err = -ECONNREFUSED;
15361da177e4SLinus Torvalds if (other->sk_state != TCP_LISTEN)
15371da177e4SLinus Torvalds goto out_unlock;
153877238f2bSTomoki Sekiyama if (other->sk_shutdown & RCV_SHUTDOWN)
153977238f2bSTomoki Sekiyama goto out_unlock;
15401da177e4SLinus Torvalds
1541f1683d07SKuniyuki Iwashima if (unix_recvq_full_lockless(other)) {
15421da177e4SLinus Torvalds err = -EAGAIN;
15431da177e4SLinus Torvalds if (!timeo)
15441da177e4SLinus Torvalds goto out_unlock;
15451da177e4SLinus Torvalds
15461da177e4SLinus Torvalds timeo = unix_wait_for_peer(other, timeo);
15471da177e4SLinus Torvalds
15481da177e4SLinus Torvalds err = sock_intr_errno(timeo);
15491da177e4SLinus Torvalds if (signal_pending(current))
15501da177e4SLinus Torvalds goto out;
15511da177e4SLinus Torvalds sock_put(other);
15521da177e4SLinus Torvalds goto restart;
15531da177e4SLinus Torvalds }
15541da177e4SLinus Torvalds
1555412f97f3SKuniyuki Iwashima /* self connect and simultaneous connect are eliminated
1556412f97f3SKuniyuki Iwashima * by rejecting TCP_LISTEN socket to avoid deadlock.
15571da177e4SLinus Torvalds */
1558412f97f3SKuniyuki Iwashima state = READ_ONCE(sk->sk_state);
1559412f97f3SKuniyuki Iwashima if (unlikely(state != TCP_CLOSE)) {
1560412f97f3SKuniyuki Iwashima err = state == TCP_ESTABLISHED ? -EISCONN : -EINVAL;
15611da177e4SLinus Torvalds goto out_unlock;
15621da177e4SLinus Torvalds }
15631da177e4SLinus Torvalds
15645e7f3e03SEric Dumazet unix_state_lock_nested(sk, U_LOCK_SECOND);
15651da177e4SLinus Torvalds
1566412f97f3SKuniyuki Iwashima if (unlikely(sk->sk_state != TCP_CLOSE)) {
1567412f97f3SKuniyuki Iwashima err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EINVAL;
15681c92b4e5SDavid S. Miller unix_state_unlock(sk);
1569412f97f3SKuniyuki Iwashima goto out_unlock;
15701da177e4SLinus Torvalds }
15711da177e4SLinus Torvalds
15723610cda5SDavid S. Miller err = security_unix_stream_connect(sk, other, newsk);
15731da177e4SLinus Torvalds if (err) {
15741c92b4e5SDavid S. Miller unix_state_unlock(sk);
15751da177e4SLinus Torvalds goto out_unlock;
15761da177e4SLinus Torvalds }
15771da177e4SLinus Torvalds
15781da177e4SLinus Torvalds /* The way is open! Fastly set all the necessary fields... */
15791da177e4SLinus Torvalds
15801da177e4SLinus Torvalds sock_hold(sk);
15811da177e4SLinus Torvalds unix_peer(newsk) = sk;
15821da177e4SLinus Torvalds newsk->sk_state = TCP_ESTABLISHED;
15831da177e4SLinus Torvalds newsk->sk_type = sk->sk_type;
1584109f6e39SEric W. Biederman init_peercred(newsk);
15851da177e4SLinus Torvalds newu = unix_sk(newsk);
1586eaefd110SEric Dumazet RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq);
15871da177e4SLinus Torvalds otheru = unix_sk(other);
15881da177e4SLinus Torvalds
1589ae3b5641SAl Viro /* copy address information from listening to new sock
1590ae3b5641SAl Viro *
1591ae3b5641SAl Viro * The contents of *(otheru->addr) and otheru->path
1592ae3b5641SAl Viro * are seen fully set up here, since we have found
15932f7ca90aSKuniyuki Iwashima * otheru in hash under its lock. Insertion into the
15942f7ca90aSKuniyuki Iwashima * hash chain we'd found it in had been done in an
15952f7ca90aSKuniyuki Iwashima * earlier critical area protected by the chain's lock,
1596ae3b5641SAl Viro * the same one where we'd set *(otheru->addr) contents,
1597ae3b5641SAl Viro * as well as otheru->path and otheru->addr itself.
1598ae3b5641SAl Viro *
1599ae3b5641SAl Viro * Using smp_store_release() here to set newu->addr
1600ae3b5641SAl Viro * is enough to make those stores, as well as stores
1601ae3b5641SAl Viro * to newu->path visible to anyone who gets newu->addr
1602ae3b5641SAl Viro * by smp_load_acquire(). IOW, the same warranties
1603ae3b5641SAl Viro * as for unix_sock instances bound in unix_bind() or
1604ae3b5641SAl Viro * in unix_autobind().
1605ae3b5641SAl Viro */
160640ffe67dSAl Viro if (otheru->path.dentry) {
160740ffe67dSAl Viro path_get(&otheru->path);
160840ffe67dSAl Viro newu->path = otheru->path;
16091da177e4SLinus Torvalds }
1610ae3b5641SAl Viro refcount_inc(&otheru->addr->refcnt);
1611ae3b5641SAl Viro smp_store_release(&newu->addr, otheru->addr);
16121da177e4SLinus Torvalds
16131da177e4SLinus Torvalds /* Set credentials */
1614109f6e39SEric W. Biederman copy_peercred(sk, other);
16151da177e4SLinus Torvalds
16161da177e4SLinus Torvalds sock->state = SS_CONNECTED;
161745733e98SKuniyuki Iwashima WRITE_ONCE(sk->sk_state, TCP_ESTABLISHED);
1618830a1e5cSBenjamin LaHaise sock_hold(newsk);
1619830a1e5cSBenjamin LaHaise
16204e857c58SPeter Zijlstra smp_mb__after_atomic(); /* sock_hold() does an atomic_inc() */
1621830a1e5cSBenjamin LaHaise unix_peer(sk) = newsk;
16221da177e4SLinus Torvalds
16231c92b4e5SDavid S. Miller unix_state_unlock(sk);
16241da177e4SLinus Torvalds
16254e03d073Sgushengxian /* take ten and send info to listening sock */
16261da177e4SLinus Torvalds spin_lock(&other->sk_receive_queue.lock);
16271da177e4SLinus Torvalds __skb_queue_tail(&other->sk_receive_queue, skb);
16281da177e4SLinus Torvalds spin_unlock(&other->sk_receive_queue.lock);
16291c92b4e5SDavid S. Miller unix_state_unlock(other);
1630676d2369SDavid S. Miller other->sk_data_ready(other);
16311da177e4SLinus Torvalds sock_put(other);
16321da177e4SLinus Torvalds return 0;
16331da177e4SLinus Torvalds
16341da177e4SLinus Torvalds out_unlock:
16351da177e4SLinus Torvalds if (other)
16361c92b4e5SDavid S. Miller unix_state_unlock(other);
16371da177e4SLinus Torvalds
16381da177e4SLinus Torvalds out:
16391da177e4SLinus Torvalds kfree_skb(skb);
16401da177e4SLinus Torvalds if (newsk)
16411da177e4SLinus Torvalds unix_release_sock(newsk, 0);
16421da177e4SLinus Torvalds if (other)
16431da177e4SLinus Torvalds sock_put(other);
16441da177e4SLinus Torvalds return err;
16451da177e4SLinus Torvalds }
16461da177e4SLinus Torvalds
unix_socketpair(struct socket * socka,struct socket * sockb)16471da177e4SLinus Torvalds static int unix_socketpair(struct socket *socka, struct socket *sockb)
16481da177e4SLinus Torvalds {
16491da177e4SLinus Torvalds struct sock *ska = socka->sk, *skb = sockb->sk;
16501da177e4SLinus Torvalds
16511da177e4SLinus Torvalds /* Join our sockets back to back */
16521da177e4SLinus Torvalds sock_hold(ska);
16531da177e4SLinus Torvalds sock_hold(skb);
16541da177e4SLinus Torvalds unix_peer(ska) = skb;
16551da177e4SLinus Torvalds unix_peer(skb) = ska;
1656109f6e39SEric W. Biederman init_peercred(ska);
1657109f6e39SEric W. Biederman init_peercred(skb);
16581da177e4SLinus Torvalds
16591da177e4SLinus Torvalds ska->sk_state = TCP_ESTABLISHED;
16601da177e4SLinus Torvalds skb->sk_state = TCP_ESTABLISHED;
16611da177e4SLinus Torvalds socka->state = SS_CONNECTED;
16621da177e4SLinus Torvalds sockb->state = SS_CONNECTED;
16631da177e4SLinus Torvalds return 0;
16641da177e4SLinus Torvalds }
16651da177e4SLinus Torvalds
unix_sock_inherit_flags(const struct socket * old,struct socket * new)166690c6bd34SDaniel Borkmann static void unix_sock_inherit_flags(const struct socket *old,
166790c6bd34SDaniel Borkmann struct socket *new)
166890c6bd34SDaniel Borkmann {
166990c6bd34SDaniel Borkmann if (test_bit(SOCK_PASSCRED, &old->flags))
167090c6bd34SDaniel Borkmann set_bit(SOCK_PASSCRED, &new->flags);
16715e2ff670SAlexander Mikhalitsyn if (test_bit(SOCK_PASSPIDFD, &old->flags))
16725e2ff670SAlexander Mikhalitsyn set_bit(SOCK_PASSPIDFD, &new->flags);
167390c6bd34SDaniel Borkmann if (test_bit(SOCK_PASSSEC, &old->flags))
167490c6bd34SDaniel Borkmann set_bit(SOCK_PASSSEC, &new->flags);
167590c6bd34SDaniel Borkmann }
167690c6bd34SDaniel Borkmann
unix_accept(struct socket * sock,struct socket * newsock,int flags,bool kern)1677cdfbabfbSDavid Howells static int unix_accept(struct socket *sock, struct socket *newsock, int flags,
1678cdfbabfbSDavid Howells bool kern)
16791da177e4SLinus Torvalds {
16801da177e4SLinus Torvalds struct sock *sk = sock->sk;
16811da177e4SLinus Torvalds struct sock *tsk;
16821da177e4SLinus Torvalds struct sk_buff *skb;
16831da177e4SLinus Torvalds int err;
16841da177e4SLinus Torvalds
16851da177e4SLinus Torvalds err = -EOPNOTSUPP;
16861da177e4SLinus Torvalds if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
16871da177e4SLinus Torvalds goto out;
16881da177e4SLinus Torvalds
16891da177e4SLinus Torvalds err = -EINVAL;
16906fdc1152SKuniyuki Iwashima if (READ_ONCE(sk->sk_state) != TCP_LISTEN)
16911da177e4SLinus Torvalds goto out;
16921da177e4SLinus Torvalds
16931da177e4SLinus Torvalds /* If socket state is TCP_LISTEN it cannot change (for now...),
16941da177e4SLinus Torvalds * so that no locks are necessary.
16951da177e4SLinus Torvalds */
16961da177e4SLinus Torvalds
1697f4b41f06SOliver Hartkopp skb = skb_recv_datagram(sk, (flags & O_NONBLOCK) ? MSG_DONTWAIT : 0,
1698f4b41f06SOliver Hartkopp &err);
16991da177e4SLinus Torvalds if (!skb) {
17001da177e4SLinus Torvalds /* This means receive shutdown. */
17011da177e4SLinus Torvalds if (err == 0)
17021da177e4SLinus Torvalds err = -EINVAL;
17031da177e4SLinus Torvalds goto out;
17041da177e4SLinus Torvalds }
17051da177e4SLinus Torvalds
17061da177e4SLinus Torvalds tsk = skb->sk;
17071da177e4SLinus Torvalds skb_free_datagram(sk, skb);
17081da177e4SLinus Torvalds wake_up_interruptible(&unix_sk(sk)->peer_wait);
17091da177e4SLinus Torvalds
17101da177e4SLinus Torvalds /* attach accepted sock to socket */
17111c92b4e5SDavid S. Miller unix_state_lock(tsk);
17121da177e4SLinus Torvalds newsock->state = SS_CONNECTED;
171390c6bd34SDaniel Borkmann unix_sock_inherit_flags(sock, newsock);
17141da177e4SLinus Torvalds sock_graft(tsk, newsock);
17151c92b4e5SDavid S. Miller unix_state_unlock(tsk);
17161da177e4SLinus Torvalds return 0;
17171da177e4SLinus Torvalds
17181da177e4SLinus Torvalds out:
17191da177e4SLinus Torvalds return err;
17201da177e4SLinus Torvalds }
17211da177e4SLinus Torvalds
17221da177e4SLinus Torvalds
unix_getname(struct socket * sock,struct sockaddr * uaddr,int peer)17239b2c45d4SDenys Vlasenko static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int peer)
17241da177e4SLinus Torvalds {
17251da177e4SLinus Torvalds struct sock *sk = sock->sk;
1726ae3b5641SAl Viro struct unix_address *addr;
172713cfa97bSCyrill Gorcunov DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, uaddr);
17281da177e4SLinus Torvalds int err = 0;
17291da177e4SLinus Torvalds
17301da177e4SLinus Torvalds if (peer) {
17311da177e4SLinus Torvalds sk = unix_peer_get(sk);
17321da177e4SLinus Torvalds
17331da177e4SLinus Torvalds err = -ENOTCONN;
17341da177e4SLinus Torvalds if (!sk)
17351da177e4SLinus Torvalds goto out;
17361da177e4SLinus Torvalds err = 0;
17371da177e4SLinus Torvalds } else {
17381da177e4SLinus Torvalds sock_hold(sk);
17391da177e4SLinus Torvalds }
17401da177e4SLinus Torvalds
1741ae3b5641SAl Viro addr = smp_load_acquire(&unix_sk(sk)->addr);
1742ae3b5641SAl Viro if (!addr) {
17431da177e4SLinus Torvalds sunaddr->sun_family = AF_UNIX;
17441da177e4SLinus Torvalds sunaddr->sun_path[0] = 0;
1745755662ceSKuniyuki Iwashima err = offsetof(struct sockaddr_un, sun_path);
17461da177e4SLinus Torvalds } else {
17479b2c45d4SDenys Vlasenko err = addr->len;
17489b2c45d4SDenys Vlasenko memcpy(sunaddr, addr->name, addr->len);
17491da177e4SLinus Torvalds }
17501da177e4SLinus Torvalds sock_put(sk);
17511da177e4SLinus Torvalds out:
17521da177e4SLinus Torvalds return err;
17531da177e4SLinus Torvalds }
17541da177e4SLinus Torvalds
unix_peek_fds(struct scm_cookie * scm,struct sk_buff * skb)1755cbcf0112SMiklos Szeredi static void unix_peek_fds(struct scm_cookie *scm, struct sk_buff *skb)
1756cbcf0112SMiklos Szeredi {
1757cbcf0112SMiklos Szeredi scm->fp = scm_fp_dup(UNIXCB(skb).fp);
1758cbcf0112SMiklos Szeredi
1759cbcf0112SMiklos Szeredi /*
1760cbcf0112SMiklos Szeredi * Garbage collection of unix sockets starts by selecting a set of
1761cbcf0112SMiklos Szeredi * candidate sockets which have reference only from being in flight
1762cbcf0112SMiklos Szeredi * (total_refs == inflight_refs). This condition is checked once during
1763cbcf0112SMiklos Szeredi * the candidate collection phase, and candidates are marked as such, so
1764cbcf0112SMiklos Szeredi * that non-candidates can later be ignored. While inflight_refs is
1765cbcf0112SMiklos Szeredi * protected by unix_gc_lock, total_refs (file count) is not, hence this
1766cbcf0112SMiklos Szeredi * is an instantaneous decision.
1767cbcf0112SMiklos Szeredi *
1768cbcf0112SMiklos Szeredi * Once a candidate, however, the socket must not be reinstalled into a
1769cbcf0112SMiklos Szeredi * file descriptor while the garbage collection is in progress.
1770cbcf0112SMiklos Szeredi *
1771cbcf0112SMiklos Szeredi * If the above conditions are met, then the directed graph of
1772cbcf0112SMiklos Szeredi * candidates (*) does not change while unix_gc_lock is held.
1773cbcf0112SMiklos Szeredi *
1774cbcf0112SMiklos Szeredi * Any operations that changes the file count through file descriptors
1775cbcf0112SMiklos Szeredi * (dup, close, sendmsg) does not change the graph since candidates are
1776cbcf0112SMiklos Szeredi * not installed in fds.
1777cbcf0112SMiklos Szeredi *
1778cbcf0112SMiklos Szeredi * Dequeing a candidate via recvmsg would install it into an fd, but
1779cbcf0112SMiklos Szeredi * that takes unix_gc_lock to decrement the inflight count, so it's
1780cbcf0112SMiklos Szeredi * serialized with garbage collection.
1781cbcf0112SMiklos Szeredi *
1782cbcf0112SMiklos Szeredi * MSG_PEEK is special in that it does not change the inflight count,
1783cbcf0112SMiklos Szeredi * yet does install the socket into an fd. The following lock/unlock
1784cbcf0112SMiklos Szeredi * pair is to ensure serialization with garbage collection. It must be
1785cbcf0112SMiklos Szeredi * done between incrementing the file count and installing the file into
1786cbcf0112SMiklos Szeredi * an fd.
1787cbcf0112SMiklos Szeredi *
1788cbcf0112SMiklos Szeredi * If garbage collection starts after the barrier provided by the
1789cbcf0112SMiklos Szeredi * lock/unlock, then it will see the elevated refcount and not mark this
1790cbcf0112SMiklos Szeredi * as a candidate. If a garbage collection is already in progress
1791cbcf0112SMiklos Szeredi * before the file count was incremented, then the lock/unlock pair will
1792cbcf0112SMiklos Szeredi * ensure that garbage collection is finished before progressing to
1793cbcf0112SMiklos Szeredi * installing the fd.
1794cbcf0112SMiklos Szeredi *
1795cbcf0112SMiklos Szeredi * (*) A -> B where B is on the queue of A or B is on the queue of C
1796cbcf0112SMiklos Szeredi * which is on the queue of listening socket A.
1797cbcf0112SMiklos Szeredi */
1798cbcf0112SMiklos Szeredi spin_lock(&unix_gc_lock);
1799cbcf0112SMiklos Szeredi spin_unlock(&unix_gc_lock);
1800cbcf0112SMiklos Szeredi }
1801cbcf0112SMiklos Szeredi
unix_scm_to_skb(struct scm_cookie * scm,struct sk_buff * skb,bool send_fds)1802f78a5fdaSDavid S. Miller static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)
18037361c36cSEric W. Biederman {
18047361c36cSEric W. Biederman int err = 0;
180516e57262SEric Dumazet
18067361c36cSEric W. Biederman UNIXCB(skb).pid = get_pid(scm->pid);
18076b0ee8c0SEric W. Biederman UNIXCB(skb).uid = scm->creds.uid;
18086b0ee8c0SEric W. Biederman UNIXCB(skb).gid = scm->creds.gid;
18097361c36cSEric W. Biederman UNIXCB(skb).fp = NULL;
181037a9a8dfSStephen Smalley unix_get_secdata(scm, skb);
18117361c36cSEric W. Biederman if (scm->fp && send_fds)
18127361c36cSEric W. Biederman err = unix_attach_fds(scm, skb);
18137361c36cSEric W. Biederman
18147361c36cSEric W. Biederman skb->destructor = unix_destruct_scm;
18157361c36cSEric W. Biederman return err;
18167361c36cSEric W. Biederman }
18177361c36cSEric W. Biederman
unix_passcred_enabled(const struct socket * sock,const struct sock * other)18189490f886SHannes Frederic Sowa static bool unix_passcred_enabled(const struct socket *sock,
18199490f886SHannes Frederic Sowa const struct sock *other)
18209490f886SHannes Frederic Sowa {
18219490f886SHannes Frederic Sowa return test_bit(SOCK_PASSCRED, &sock->flags) ||
18225e2ff670SAlexander Mikhalitsyn test_bit(SOCK_PASSPIDFD, &sock->flags) ||
18239490f886SHannes Frederic Sowa !other->sk_socket ||
18245e2ff670SAlexander Mikhalitsyn test_bit(SOCK_PASSCRED, &other->sk_socket->flags) ||
18255e2ff670SAlexander Mikhalitsyn test_bit(SOCK_PASSPIDFD, &other->sk_socket->flags);
18269490f886SHannes Frederic Sowa }
18279490f886SHannes Frederic Sowa
18281da177e4SLinus Torvalds /*
182916e57262SEric Dumazet * Some apps rely on write() giving SCM_CREDENTIALS
183016e57262SEric Dumazet * We include credentials if source or destination socket
183116e57262SEric Dumazet * asserted SOCK_PASSCRED.
183216e57262SEric Dumazet */
maybe_add_creds(struct sk_buff * skb,const struct socket * sock,const struct sock * other)183316e57262SEric Dumazet static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock,
183416e57262SEric Dumazet const struct sock *other)
183516e57262SEric Dumazet {
18366b0ee8c0SEric W. Biederman if (UNIXCB(skb).pid)
183716e57262SEric Dumazet return;
18389490f886SHannes Frederic Sowa if (unix_passcred_enabled(sock, other)) {
183916e57262SEric Dumazet UNIXCB(skb).pid = get_pid(task_tgid(current));
18406e0895c2SDavid S. Miller current_uid_gid(&UNIXCB(skb).uid, &UNIXCB(skb).gid);
184116e57262SEric Dumazet }
184216e57262SEric Dumazet }
184316e57262SEric Dumazet
unix_skb_scm_eq(struct sk_buff * skb,struct scm_cookie * scm)18449490f886SHannes Frederic Sowa static bool unix_skb_scm_eq(struct sk_buff *skb,
18459490f886SHannes Frederic Sowa struct scm_cookie *scm)
18469490f886SHannes Frederic Sowa {
1847b146cbf2SKees Cook return UNIXCB(skb).pid == scm->pid &&
1848b146cbf2SKees Cook uid_eq(UNIXCB(skb).uid, scm->creds.uid) &&
1849b146cbf2SKees Cook gid_eq(UNIXCB(skb).gid, scm->creds.gid) &&
18509490f886SHannes Frederic Sowa unix_secdata_eq(scm, skb);
18519490f886SHannes Frederic Sowa }
18529490f886SHannes Frederic Sowa
scm_stat_add(struct sock * sk,struct sk_buff * skb)18533c32da19SKirill Tkhai static void scm_stat_add(struct sock *sk, struct sk_buff *skb)
18543c32da19SKirill Tkhai {
18553c32da19SKirill Tkhai struct scm_fp_list *fp = UNIXCB(skb).fp;
18563c32da19SKirill Tkhai struct unix_sock *u = unix_sk(sk);
18573c32da19SKirill Tkhai
18583c32da19SKirill Tkhai if (unlikely(fp && fp->count))
18597782040bSPaolo Abeni atomic_add(fp->count, &u->scm_stat.nr_fds);
18603c32da19SKirill Tkhai }
18613c32da19SKirill Tkhai
scm_stat_del(struct sock * sk,struct sk_buff * skb)18623c32da19SKirill Tkhai static void scm_stat_del(struct sock *sk, struct sk_buff *skb)
18633c32da19SKirill Tkhai {
18643c32da19SKirill Tkhai struct scm_fp_list *fp = UNIXCB(skb).fp;
18653c32da19SKirill Tkhai struct unix_sock *u = unix_sk(sk);
18663c32da19SKirill Tkhai
18673c32da19SKirill Tkhai if (unlikely(fp && fp->count))
18687782040bSPaolo Abeni atomic_sub(fp->count, &u->scm_stat.nr_fds);
18693c32da19SKirill Tkhai }
18703c32da19SKirill Tkhai
187116e57262SEric Dumazet /*
18721da177e4SLinus Torvalds * Send AF_UNIX data.
18731da177e4SLinus Torvalds */
18741da177e4SLinus Torvalds
unix_dgram_sendmsg(struct socket * sock,struct msghdr * msg,size_t len)18751b784140SYing Xue static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
18761b784140SYing Xue size_t len)
18771da177e4SLinus Torvalds {
1878342dfc30SSteffen Hurrle DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, msg->msg_name);
1879340c3d33SKuniyuki Iwashima struct sock *sk = sock->sk, *other = NULL;
1880340c3d33SKuniyuki Iwashima struct unix_sock *u = unix_sk(sk);
18817cc05662SChristoph Hellwig struct scm_cookie scm;
1882340c3d33SKuniyuki Iwashima struct sk_buff *skb;
1883eb6a2481SEric Dumazet int data_len = 0;
18847d267278SRainer Weikusat int sk_locked;
1885340c3d33SKuniyuki Iwashima long timeo;
1886340c3d33SKuniyuki Iwashima int err;
18871da177e4SLinus Torvalds
18885f23b734Sdann frazier wait_for_unix_gc();
18897cc05662SChristoph Hellwig err = scm_send(sock, msg, &scm, false);
18901da177e4SLinus Torvalds if (err < 0)
18911da177e4SLinus Torvalds return err;
18921da177e4SLinus Torvalds
18931da177e4SLinus Torvalds err = -EOPNOTSUPP;
18941da177e4SLinus Torvalds if (msg->msg_flags&MSG_OOB)
18951da177e4SLinus Torvalds goto out;
18961da177e4SLinus Torvalds
18971da177e4SLinus Torvalds if (msg->msg_namelen) {
1898b8a58aa6SKuniyuki Iwashima err = unix_validate_addr(sunaddr, msg->msg_namelen);
1899b8a58aa6SKuniyuki Iwashima if (err)
1900b8a58aa6SKuniyuki Iwashima goto out;
19011da177e4SLinus Torvalds } else {
19021da177e4SLinus Torvalds sunaddr = NULL;
19031da177e4SLinus Torvalds err = -ENOTCONN;
19041da177e4SLinus Torvalds other = unix_peer_get(sk);
19051da177e4SLinus Torvalds if (!other)
19061da177e4SLinus Torvalds goto out;
19071da177e4SLinus Torvalds }
19081da177e4SLinus Torvalds
19095e2ff670SAlexander Mikhalitsyn if ((test_bit(SOCK_PASSCRED, &sock->flags) ||
1910302fe8ddSKuniyuki Iwashima test_bit(SOCK_PASSPIDFD, &sock->flags)) &&
1911302fe8ddSKuniyuki Iwashima !READ_ONCE(u->addr)) {
1912f7ed31f4SKuniyuki Iwashima err = unix_autobind(sk);
1913f7ed31f4SKuniyuki Iwashima if (err)
19141da177e4SLinus Torvalds goto out;
1915f7ed31f4SKuniyuki Iwashima }
19161da177e4SLinus Torvalds
19171da177e4SLinus Torvalds err = -EMSGSIZE;
1918996ec22fSKuniyuki Iwashima if (len > READ_ONCE(sk->sk_sndbuf) - 32)
19191da177e4SLinus Torvalds goto out;
19201da177e4SLinus Torvalds
192131ff6aa5SKirill Tkhai if (len > SKB_MAX_ALLOC) {
1922eb6a2481SEric Dumazet data_len = min_t(size_t,
1923eb6a2481SEric Dumazet len - SKB_MAX_ALLOC,
1924eb6a2481SEric Dumazet MAX_SKB_FRAGS * PAGE_SIZE);
192531ff6aa5SKirill Tkhai data_len = PAGE_ALIGN(data_len);
192631ff6aa5SKirill Tkhai
192731ff6aa5SKirill Tkhai BUILD_BUG_ON(SKB_MAX_ALLOC < PAGE_SIZE);
192831ff6aa5SKirill Tkhai }
1929eb6a2481SEric Dumazet
1930eb6a2481SEric Dumazet skb = sock_alloc_send_pskb(sk, len - data_len, data_len,
193128d64271SEric Dumazet msg->msg_flags & MSG_DONTWAIT, &err,
193228d64271SEric Dumazet PAGE_ALLOC_COSTLY_ORDER);
19331da177e4SLinus Torvalds if (skb == NULL)
19341da177e4SLinus Torvalds goto out;
19351da177e4SLinus Torvalds
19367cc05662SChristoph Hellwig err = unix_scm_to_skb(&scm, skb, true);
193725888e30SEric Dumazet if (err < 0)
19386209344fSMiklos Szeredi goto out_free;
1939877ce7c1SCatherine Zhang
1940eb6a2481SEric Dumazet skb_put(skb, len - data_len);
1941eb6a2481SEric Dumazet skb->data_len = data_len;
1942eb6a2481SEric Dumazet skb->len = len;
1943c0371da6SAl Viro err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, len);
19441da177e4SLinus Torvalds if (err)
19451da177e4SLinus Torvalds goto out_free;
19461da177e4SLinus Torvalds
19471da177e4SLinus Torvalds timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
19481da177e4SLinus Torvalds
19491da177e4SLinus Torvalds restart:
19501da177e4SLinus Torvalds if (!other) {
19511da177e4SLinus Torvalds err = -ECONNRESET;
19521da177e4SLinus Torvalds if (sunaddr == NULL)
19531da177e4SLinus Torvalds goto out_free;
19541da177e4SLinus Torvalds
1955340c3d33SKuniyuki Iwashima other = unix_find_other(sock_net(sk), sunaddr, msg->msg_namelen,
1956d2d8c9fdSKuniyuki Iwashima sk->sk_type);
1957aed26f55SKuniyuki Iwashima if (IS_ERR(other)) {
1958aed26f55SKuniyuki Iwashima err = PTR_ERR(other);
1959aed26f55SKuniyuki Iwashima other = NULL;
19601da177e4SLinus Torvalds goto out_free;
19611da177e4SLinus Torvalds }
1962aed26f55SKuniyuki Iwashima }
19631da177e4SLinus Torvalds
1964d6ae3baeSAlban Crequy if (sk_filter(other, skb) < 0) {
1965d6ae3baeSAlban Crequy /* Toss the packet but do not return any error to the sender */
1966d6ae3baeSAlban Crequy err = len;
1967d6ae3baeSAlban Crequy goto out_free;
1968d6ae3baeSAlban Crequy }
1969d6ae3baeSAlban Crequy
19707d267278SRainer Weikusat sk_locked = 0;
19711c92b4e5SDavid S. Miller unix_state_lock(other);
19727d267278SRainer Weikusat restart_locked:
19731da177e4SLinus Torvalds err = -EPERM;
19741da177e4SLinus Torvalds if (!unix_may_send(sk, other))
19751da177e4SLinus Torvalds goto out_unlock;
19761da177e4SLinus Torvalds
19777d267278SRainer Weikusat if (unlikely(sock_flag(other, SOCK_DEAD))) {
19781da177e4SLinus Torvalds /*
19791da177e4SLinus Torvalds * Check with 1003.1g - what should
19801da177e4SLinus Torvalds * datagram error
19811da177e4SLinus Torvalds */
19821c92b4e5SDavid S. Miller unix_state_unlock(other);
19831da177e4SLinus Torvalds sock_put(other);
19841da177e4SLinus Torvalds
19857d267278SRainer Weikusat if (!sk_locked)
19861c92b4e5SDavid S. Miller unix_state_lock(sk);
19877d267278SRainer Weikusat
19887d267278SRainer Weikusat err = 0;
19893ff8bff7SKirill Tkhai if (sk->sk_type == SOCK_SEQPACKET) {
19903ff8bff7SKirill Tkhai /* We are here only when racing with unix_release_sock()
19913ff8bff7SKirill Tkhai * is clearing @other. Never change state to TCP_CLOSE
19923ff8bff7SKirill Tkhai * unlike SOCK_DGRAM wants.
19933ff8bff7SKirill Tkhai */
19943ff8bff7SKirill Tkhai unix_state_unlock(sk);
19953ff8bff7SKirill Tkhai err = -EPIPE;
19963ff8bff7SKirill Tkhai } else if (unix_peer(sk) == other) {
19971da177e4SLinus Torvalds unix_peer(sk) = NULL;
19987d267278SRainer Weikusat unix_dgram_peer_wake_disconnect_wakeup(sk, other);
19997d267278SRainer Weikusat
200045733e98SKuniyuki Iwashima WRITE_ONCE(sk->sk_state, TCP_CLOSE);
20011c92b4e5SDavid S. Miller unix_state_unlock(sk);
20021da177e4SLinus Torvalds
20031da177e4SLinus Torvalds unix_dgram_disconnected(sk, other);
20041da177e4SLinus Torvalds sock_put(other);
20051da177e4SLinus Torvalds err = -ECONNREFUSED;
20061da177e4SLinus Torvalds } else {
20071c92b4e5SDavid S. Miller unix_state_unlock(sk);
20081da177e4SLinus Torvalds }
20091da177e4SLinus Torvalds
20101da177e4SLinus Torvalds other = NULL;
20111da177e4SLinus Torvalds if (err)
20121da177e4SLinus Torvalds goto out_free;
20131da177e4SLinus Torvalds goto restart;
20141da177e4SLinus Torvalds }
20151da177e4SLinus Torvalds
20161da177e4SLinus Torvalds err = -EPIPE;
20171da177e4SLinus Torvalds if (other->sk_shutdown & RCV_SHUTDOWN)
20181da177e4SLinus Torvalds goto out_unlock;
20191da177e4SLinus Torvalds
20201da177e4SLinus Torvalds if (sk->sk_type != SOCK_SEQPACKET) {
20211da177e4SLinus Torvalds err = security_unix_may_send(sk->sk_socket, other->sk_socket);
20221da177e4SLinus Torvalds if (err)
20231da177e4SLinus Torvalds goto out_unlock;
20241da177e4SLinus Torvalds }
20251da177e4SLinus Torvalds
2026a5527ddaSRainer Weikusat /* other == sk && unix_peer(other) != sk if
2027a5527ddaSRainer Weikusat * - unix_peer(sk) == NULL, destination address bound to sk
2028a5527ddaSRainer Weikusat * - unix_peer(sk) == sk by time of get but disconnected before lock
2029a5527ddaSRainer Weikusat */
2030a5527ddaSRainer Weikusat if (other != sk &&
203186b18aaaSQian Cai unlikely(unix_peer(other) != sk &&
203286b18aaaSQian Cai unix_recvq_full_lockless(other))) {
20337d267278SRainer Weikusat if (timeo) {
20341da177e4SLinus Torvalds timeo = unix_wait_for_peer(other, timeo);
20351da177e4SLinus Torvalds
20361da177e4SLinus Torvalds err = sock_intr_errno(timeo);
20371da177e4SLinus Torvalds if (signal_pending(current))
20381da177e4SLinus Torvalds goto out_free;
20391da177e4SLinus Torvalds
20401da177e4SLinus Torvalds goto restart;
20411da177e4SLinus Torvalds }
20421da177e4SLinus Torvalds
20437d267278SRainer Weikusat if (!sk_locked) {
20447d267278SRainer Weikusat unix_state_unlock(other);
20457d267278SRainer Weikusat unix_state_double_lock(sk, other);
20467d267278SRainer Weikusat }
20477d267278SRainer Weikusat
20487d267278SRainer Weikusat if (unix_peer(sk) != other ||
20497d267278SRainer Weikusat unix_dgram_peer_wake_me(sk, other)) {
20507d267278SRainer Weikusat err = -EAGAIN;
20517d267278SRainer Weikusat sk_locked = 1;
20527d267278SRainer Weikusat goto out_unlock;
20537d267278SRainer Weikusat }
20547d267278SRainer Weikusat
20557d267278SRainer Weikusat if (!sk_locked) {
20567d267278SRainer Weikusat sk_locked = 1;
20577d267278SRainer Weikusat goto restart_locked;
20587d267278SRainer Weikusat }
20597d267278SRainer Weikusat }
20607d267278SRainer Weikusat
20617d267278SRainer Weikusat if (unlikely(sk_locked))
20627d267278SRainer Weikusat unix_state_unlock(sk);
20637d267278SRainer Weikusat
20643f66116eSAlban Crequy if (sock_flag(other, SOCK_RCVTSTAMP))
20653f66116eSAlban Crequy __net_timestamp(skb);
206616e57262SEric Dumazet maybe_add_creds(skb, sock, other);
20673c32da19SKirill Tkhai scm_stat_add(other, skb);
20687782040bSPaolo Abeni skb_queue_tail(&other->sk_receive_queue, skb);
20691c92b4e5SDavid S. Miller unix_state_unlock(other);
2070676d2369SDavid S. Miller other->sk_data_ready(other);
20711da177e4SLinus Torvalds sock_put(other);
20727cc05662SChristoph Hellwig scm_destroy(&scm);
20731da177e4SLinus Torvalds return len;
20741da177e4SLinus Torvalds
20751da177e4SLinus Torvalds out_unlock:
20767d267278SRainer Weikusat if (sk_locked)
20777d267278SRainer Weikusat unix_state_unlock(sk);
20781c92b4e5SDavid S. Miller unix_state_unlock(other);
20791da177e4SLinus Torvalds out_free:
20801da177e4SLinus Torvalds kfree_skb(skb);
20811da177e4SLinus Torvalds out:
20821da177e4SLinus Torvalds if (other)
20831da177e4SLinus Torvalds sock_put(other);
20847cc05662SChristoph Hellwig scm_destroy(&scm);
20851da177e4SLinus Torvalds return err;
20861da177e4SLinus Torvalds }
20871da177e4SLinus Torvalds
2088e370a723SEric Dumazet /* We use paged skbs for stream sockets, and limit occupancy to 32768
2089d4e9a408STobias Klauser * bytes, and a minimum of a full page.
2090e370a723SEric Dumazet */
2091e370a723SEric Dumazet #define UNIX_SKB_FRAGS_SZ (PAGE_SIZE << get_order(32768))
20921da177e4SLinus Torvalds
20934edf21aaSKuniyuki Iwashima #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
queue_oob(struct socket * sock,struct msghdr * msg,struct sock * other,struct scm_cookie * scm,bool fds_sent)20942aab4b96SEric Dumazet static int queue_oob(struct socket *sock, struct msghdr *msg, struct sock *other,
20952aab4b96SEric Dumazet struct scm_cookie *scm, bool fds_sent)
2096314001f0SRao Shoaib {
2097314001f0SRao Shoaib struct unix_sock *ousk = unix_sk(other);
2098314001f0SRao Shoaib struct sk_buff *skb;
2099314001f0SRao Shoaib int err = 0;
2100314001f0SRao Shoaib
2101314001f0SRao Shoaib skb = sock_alloc_send_skb(sock->sk, 1, msg->msg_flags & MSG_DONTWAIT, &err);
2102314001f0SRao Shoaib
2103314001f0SRao Shoaib if (!skb)
2104314001f0SRao Shoaib return err;
2105314001f0SRao Shoaib
21062aab4b96SEric Dumazet err = unix_scm_to_skb(scm, skb, !fds_sent);
21072aab4b96SEric Dumazet if (err < 0) {
21082aab4b96SEric Dumazet kfree_skb(skb);
21092aab4b96SEric Dumazet return err;
21102aab4b96SEric Dumazet }
2111314001f0SRao Shoaib skb_put(skb, 1);
2112314001f0SRao Shoaib err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, 1);
2113314001f0SRao Shoaib
2114314001f0SRao Shoaib if (err) {
2115314001f0SRao Shoaib kfree_skb(skb);
2116314001f0SRao Shoaib return err;
2117314001f0SRao Shoaib }
2118314001f0SRao Shoaib
2119314001f0SRao Shoaib unix_state_lock(other);
212019eed721SRao Shoaib
212119eed721SRao Shoaib if (sock_flag(other, SOCK_DEAD) ||
212219eed721SRao Shoaib (other->sk_shutdown & RCV_SHUTDOWN)) {
212319eed721SRao Shoaib unix_state_unlock(other);
212419eed721SRao Shoaib kfree_skb(skb);
212519eed721SRao Shoaib return -EPIPE;
212619eed721SRao Shoaib }
212719eed721SRao Shoaib
2128314001f0SRao Shoaib maybe_add_creds(skb, sock, other);
2129314001f0SRao Shoaib skb_get(skb);
2130314001f0SRao Shoaib
2131d59ae931SKuniyuki Iwashima scm_stat_add(other, skb);
2132d59ae931SKuniyuki Iwashima
2133d59ae931SKuniyuki Iwashima spin_lock(&other->sk_receive_queue.lock);
2134314001f0SRao Shoaib if (ousk->oob_skb)
213519eed721SRao Shoaib consume_skb(ousk->oob_skb);
2136e82025c6SKuniyuki Iwashima WRITE_ONCE(ousk->oob_skb, skb);
2137d59ae931SKuniyuki Iwashima __skb_queue_tail(&other->sk_receive_queue, skb);
2138d59ae931SKuniyuki Iwashima spin_unlock(&other->sk_receive_queue.lock);
2139314001f0SRao Shoaib
2140314001f0SRao Shoaib sk_send_sigurg(other);
2141314001f0SRao Shoaib unix_state_unlock(other);
2142314001f0SRao Shoaib other->sk_data_ready(other);
2143314001f0SRao Shoaib
2144314001f0SRao Shoaib return err;
2145314001f0SRao Shoaib }
2146314001f0SRao Shoaib #endif
2147314001f0SRao Shoaib
unix_stream_sendmsg(struct socket * sock,struct msghdr * msg,size_t len)21481b784140SYing Xue static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
21491b784140SYing Xue size_t len)
21501da177e4SLinus Torvalds {
21511da177e4SLinus Torvalds struct sock *sk = sock->sk;
21521da177e4SLinus Torvalds struct sock *other = NULL;
21531da177e4SLinus Torvalds int err, size;
2154f78a5fdaSDavid S. Miller struct sk_buff *skb;
21551da177e4SLinus Torvalds int sent = 0;
21567cc05662SChristoph Hellwig struct scm_cookie scm;
21578ba69ba6SMiklos Szeredi bool fds_sent = false;
2158e370a723SEric Dumazet int data_len;
21591da177e4SLinus Torvalds
21605f23b734Sdann frazier wait_for_unix_gc();
21617cc05662SChristoph Hellwig err = scm_send(sock, msg, &scm, false);
21621da177e4SLinus Torvalds if (err < 0)
21631da177e4SLinus Torvalds return err;
21641da177e4SLinus Torvalds
21651da177e4SLinus Torvalds err = -EOPNOTSUPP;
2166314001f0SRao Shoaib if (msg->msg_flags & MSG_OOB) {
21674edf21aaSKuniyuki Iwashima #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
2168314001f0SRao Shoaib if (len)
2169314001f0SRao Shoaib len--;
2170314001f0SRao Shoaib else
2171314001f0SRao Shoaib #endif
21721da177e4SLinus Torvalds goto out_err;
2173314001f0SRao Shoaib }
21741da177e4SLinus Torvalds
21751da177e4SLinus Torvalds if (msg->msg_namelen) {
2176776fcc45SKuniyuki Iwashima err = READ_ONCE(sk->sk_state) == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
21771da177e4SLinus Torvalds goto out_err;
21781da177e4SLinus Torvalds } else {
21791da177e4SLinus Torvalds err = -ENOTCONN;
2180830a1e5cSBenjamin LaHaise other = unix_peer(sk);
21811da177e4SLinus Torvalds if (!other)
21821da177e4SLinus Torvalds goto out_err;
21831da177e4SLinus Torvalds }
21841da177e4SLinus Torvalds
21850688d4e4SBreno Leitao if (READ_ONCE(sk->sk_shutdown) & SEND_SHUTDOWN)
21861da177e4SLinus Torvalds goto pipe_err;
21871da177e4SLinus Torvalds
21886eba6a37SEric Dumazet while (sent < len) {
21891da177e4SLinus Torvalds size = len - sent;
21901da177e4SLinus Torvalds
2191a0dbf5f8SDavid Howells if (unlikely(msg->msg_flags & MSG_SPLICE_PAGES)) {
2192a0dbf5f8SDavid Howells skb = sock_alloc_send_pskb(sk, 0, 0,
2193a0dbf5f8SDavid Howells msg->msg_flags & MSG_DONTWAIT,
2194a0dbf5f8SDavid Howells &err, 0);
2195a0dbf5f8SDavid Howells } else {
21961da177e4SLinus Torvalds /* Keep two messages in the pipe so it schedules better */
2197996ec22fSKuniyuki Iwashima size = min_t(int, size, (READ_ONCE(sk->sk_sndbuf) >> 1) - 64);
21981da177e4SLinus Torvalds
2199e370a723SEric Dumazet /* allow fallback to order-0 allocations */
2200e370a723SEric Dumazet size = min_t(int, size, SKB_MAX_HEAD(0) + UNIX_SKB_FRAGS_SZ);
22011da177e4SLinus Torvalds
2202e370a723SEric Dumazet data_len = max_t(int, 0, size - SKB_MAX_HEAD(0));
22031da177e4SLinus Torvalds
220431ff6aa5SKirill Tkhai data_len = min_t(size_t, size, PAGE_ALIGN(data_len));
220531ff6aa5SKirill Tkhai
2206e370a723SEric Dumazet skb = sock_alloc_send_pskb(sk, size - data_len, data_len,
220728d64271SEric Dumazet msg->msg_flags & MSG_DONTWAIT, &err,
220828d64271SEric Dumazet get_order(UNIX_SKB_FRAGS_SZ));
2209a0dbf5f8SDavid Howells }
2210e370a723SEric Dumazet if (!skb)
22111da177e4SLinus Torvalds goto out_err;
22121da177e4SLinus Torvalds
2213f78a5fdaSDavid S. Miller /* Only send the fds in the first buffer */
22147cc05662SChristoph Hellwig err = unix_scm_to_skb(&scm, skb, !fds_sent);
221525888e30SEric Dumazet if (err < 0) {
22166209344fSMiklos Szeredi kfree_skb(skb);
2217f78a5fdaSDavid S. Miller goto out_err;
22186209344fSMiklos Szeredi }
22198ba69ba6SMiklos Szeredi fds_sent = true;
22201da177e4SLinus Torvalds
2221a0dbf5f8SDavid Howells if (unlikely(msg->msg_flags & MSG_SPLICE_PAGES)) {
2222*bc6d8cc2SFrederik Deweerdt skb->ip_summed = CHECKSUM_UNNECESSARY;
2223a0dbf5f8SDavid Howells err = skb_splice_from_iter(skb, &msg->msg_iter, size,
2224a0dbf5f8SDavid Howells sk->sk_allocation);
2225a0dbf5f8SDavid Howells if (err < 0) {
2226a0dbf5f8SDavid Howells kfree_skb(skb);
2227a0dbf5f8SDavid Howells goto out_err;
2228a0dbf5f8SDavid Howells }
2229a0dbf5f8SDavid Howells size = err;
2230a0dbf5f8SDavid Howells refcount_add(size, &sk->sk_wmem_alloc);
2231a0dbf5f8SDavid Howells } else {
2232e370a723SEric Dumazet skb_put(skb, size - data_len);
2233e370a723SEric Dumazet skb->data_len = data_len;
2234e370a723SEric Dumazet skb->len = size;
2235c0371da6SAl Viro err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size);
22366eba6a37SEric Dumazet if (err) {
22371da177e4SLinus Torvalds kfree_skb(skb);
2238f78a5fdaSDavid S. Miller goto out_err;
22391da177e4SLinus Torvalds }
2240a0dbf5f8SDavid Howells }
22411da177e4SLinus Torvalds
22421c92b4e5SDavid S. Miller unix_state_lock(other);
22431da177e4SLinus Torvalds
22441da177e4SLinus Torvalds if (sock_flag(other, SOCK_DEAD) ||
22451da177e4SLinus Torvalds (other->sk_shutdown & RCV_SHUTDOWN))
22461da177e4SLinus Torvalds goto pipe_err_free;
22471da177e4SLinus Torvalds
224816e57262SEric Dumazet maybe_add_creds(skb, sock, other);
22493c32da19SKirill Tkhai scm_stat_add(other, skb);
22507782040bSPaolo Abeni skb_queue_tail(&other->sk_receive_queue, skb);
22511c92b4e5SDavid S. Miller unix_state_unlock(other);
2252676d2369SDavid S. Miller other->sk_data_ready(other);
22531da177e4SLinus Torvalds sent += size;
22541da177e4SLinus Torvalds }
22551da177e4SLinus Torvalds
22564edf21aaSKuniyuki Iwashima #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
2257314001f0SRao Shoaib if (msg->msg_flags & MSG_OOB) {
22582aab4b96SEric Dumazet err = queue_oob(sock, msg, other, &scm, fds_sent);
2259314001f0SRao Shoaib if (err)
2260314001f0SRao Shoaib goto out_err;
2261314001f0SRao Shoaib sent++;
2262314001f0SRao Shoaib }
2263314001f0SRao Shoaib #endif
2264314001f0SRao Shoaib
22657cc05662SChristoph Hellwig scm_destroy(&scm);
22661da177e4SLinus Torvalds
22671da177e4SLinus Torvalds return sent;
22681da177e4SLinus Torvalds
22691da177e4SLinus Torvalds pipe_err_free:
22701c92b4e5SDavid S. Miller unix_state_unlock(other);
22711da177e4SLinus Torvalds kfree_skb(skb);
22721da177e4SLinus Torvalds pipe_err:
22731da177e4SLinus Torvalds if (sent == 0 && !(msg->msg_flags&MSG_NOSIGNAL))
22741da177e4SLinus Torvalds send_sig(SIGPIPE, current, 0);
22751da177e4SLinus Torvalds err = -EPIPE;
22761da177e4SLinus Torvalds out_err:
22777cc05662SChristoph Hellwig scm_destroy(&scm);
22781da177e4SLinus Torvalds return sent ? : err;
22791da177e4SLinus Torvalds }
22801da177e4SLinus Torvalds
unix_seqpacket_sendmsg(struct socket * sock,struct msghdr * msg,size_t len)22811b784140SYing Xue static int unix_seqpacket_sendmsg(struct socket *sock, struct msghdr *msg,
22821b784140SYing Xue size_t len)
22831da177e4SLinus Torvalds {
22841da177e4SLinus Torvalds int err;
22851da177e4SLinus Torvalds struct sock *sk = sock->sk;
22861da177e4SLinus Torvalds
22871da177e4SLinus Torvalds err = sock_error(sk);
22881da177e4SLinus Torvalds if (err)
22891da177e4SLinus Torvalds return err;
22901da177e4SLinus Torvalds
2291776fcc45SKuniyuki Iwashima if (READ_ONCE(sk->sk_state) != TCP_ESTABLISHED)
22921da177e4SLinus Torvalds return -ENOTCONN;
22931da177e4SLinus Torvalds
22941da177e4SLinus Torvalds if (msg->msg_namelen)
22951da177e4SLinus Torvalds msg->msg_namelen = 0;
22961da177e4SLinus Torvalds
22971b784140SYing Xue return unix_dgram_sendmsg(sock, msg, len);
22981da177e4SLinus Torvalds }
22991da177e4SLinus Torvalds
unix_seqpacket_recvmsg(struct socket * sock,struct msghdr * msg,size_t size,int flags)23001b784140SYing Xue static int unix_seqpacket_recvmsg(struct socket *sock, struct msghdr *msg,
23011b784140SYing Xue size_t size, int flags)
2302a05d2ad1SEric W. Biederman {
2303a05d2ad1SEric W. Biederman struct sock *sk = sock->sk;
2304a05d2ad1SEric W. Biederman
2305776fcc45SKuniyuki Iwashima if (READ_ONCE(sk->sk_state) != TCP_ESTABLISHED)
2306a05d2ad1SEric W. Biederman return -ENOTCONN;
2307a05d2ad1SEric W. Biederman
23081b784140SYing Xue return unix_dgram_recvmsg(sock, msg, size, flags);
2309a05d2ad1SEric W. Biederman }
2310a05d2ad1SEric W. Biederman
unix_copy_addr(struct msghdr * msg,struct sock * sk)23111da177e4SLinus Torvalds static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
23121da177e4SLinus Torvalds {
2313ae3b5641SAl Viro struct unix_address *addr = smp_load_acquire(&unix_sk(sk)->addr);
23141da177e4SLinus Torvalds
2315ae3b5641SAl Viro if (addr) {
2316ae3b5641SAl Viro msg->msg_namelen = addr->len;
2317ae3b5641SAl Viro memcpy(msg->msg_name, addr->name, addr->len);
23181da177e4SLinus Torvalds }
23191da177e4SLinus Torvalds }
23201da177e4SLinus Torvalds
__unix_dgram_recvmsg(struct sock * sk,struct msghdr * msg,size_t size,int flags)23219825d866SCong Wang int __unix_dgram_recvmsg(struct sock *sk, struct msghdr *msg, size_t size,
23229825d866SCong Wang int flags)
23231da177e4SLinus Torvalds {
23247cc05662SChristoph Hellwig struct scm_cookie scm;
23259825d866SCong Wang struct socket *sock = sk->sk_socket;
23261da177e4SLinus Torvalds struct unix_sock *u = unix_sk(sk);
232764874280SRainer Weikusat struct sk_buff *skb, *last;
232864874280SRainer Weikusat long timeo;
2329fd69c399SPaolo Abeni int skip;
23301da177e4SLinus Torvalds int err;
23311da177e4SLinus Torvalds
23321da177e4SLinus Torvalds err = -EOPNOTSUPP;
23331da177e4SLinus Torvalds if (flags&MSG_OOB)
23341da177e4SLinus Torvalds goto out;
23351da177e4SLinus Torvalds
233664874280SRainer Weikusat timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
233764874280SRainer Weikusat
233864874280SRainer Weikusat do {
23396e1ce3c3SLinus Torvalds mutex_lock(&u->iolock);
23401da177e4SLinus Torvalds
2341f55bb7f9SPavel Emelyanov skip = sk_peek_offset(sk, flags);
2342b50b0580SSabrina Dubroca skb = __skb_try_recv_datagram(sk, &sk->sk_receive_queue, flags,
2343e427cad6SPaolo Abeni &skip, &err, &last);
2344e427cad6SPaolo Abeni if (skb) {
2345e427cad6SPaolo Abeni if (!(flags & MSG_PEEK))
2346e427cad6SPaolo Abeni scm_stat_del(sk, skb);
234764874280SRainer Weikusat break;
2348e427cad6SPaolo Abeni }
2349f55bb7f9SPavel Emelyanov
23506e1ce3c3SLinus Torvalds mutex_unlock(&u->iolock);
235164874280SRainer Weikusat
235264874280SRainer Weikusat if (err != -EAGAIN)
235364874280SRainer Weikusat break;
235464874280SRainer Weikusat } while (timeo &&
2355b50b0580SSabrina Dubroca !__skb_wait_for_more_packets(sk, &sk->sk_receive_queue,
2356b50b0580SSabrina Dubroca &err, &timeo, last));
235764874280SRainer Weikusat
23586e1ce3c3SLinus Torvalds if (!skb) { /* implies iolock unlocked */
23590a112258SFlorian Zumbiehl unix_state_lock(sk);
23600a112258SFlorian Zumbiehl /* Signal EOF on disconnected non-blocking SEQPACKET socket. */
23610a112258SFlorian Zumbiehl if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN &&
23620a112258SFlorian Zumbiehl (sk->sk_shutdown & RCV_SHUTDOWN))
23630a112258SFlorian Zumbiehl err = 0;
23640a112258SFlorian Zumbiehl unix_state_unlock(sk);
236564874280SRainer Weikusat goto out;
23660a112258SFlorian Zumbiehl }
23671da177e4SLinus Torvalds
236877b75f4dSRainer Weikusat if (wq_has_sleeper(&u->peer_wait))
236967426b75SEric Dumazet wake_up_interruptible_sync_poll(&u->peer_wait,
2370a9a08845SLinus Torvalds EPOLLOUT | EPOLLWRNORM |
2371a9a08845SLinus Torvalds EPOLLWRBAND);
23721da177e4SLinus Torvalds
23731da177e4SLinus Torvalds if (msg->msg_name)
23741da177e4SLinus Torvalds unix_copy_addr(msg, skb->sk);
23751da177e4SLinus Torvalds
2376f55bb7f9SPavel Emelyanov if (size > skb->len - skip)
2377f55bb7f9SPavel Emelyanov size = skb->len - skip;
2378f55bb7f9SPavel Emelyanov else if (size < skb->len - skip)
23791da177e4SLinus Torvalds msg->msg_flags |= MSG_TRUNC;
23801da177e4SLinus Torvalds
238151f3d02bSDavid S. Miller err = skb_copy_datagram_msg(skb, skip, msg, size);
23821da177e4SLinus Torvalds if (err)
23831da177e4SLinus Torvalds goto out_free;
23841da177e4SLinus Torvalds
23853f66116eSAlban Crequy if (sock_flag(sk, SOCK_RCVTSTAMP))
23863f66116eSAlban Crequy __sock_recv_timestamp(msg, sk, skb);
23873f66116eSAlban Crequy
23887cc05662SChristoph Hellwig memset(&scm, 0, sizeof(scm));
23897cc05662SChristoph Hellwig
23907cc05662SChristoph Hellwig scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
23917cc05662SChristoph Hellwig unix_set_secdata(&scm, skb);
23921da177e4SLinus Torvalds
23936eba6a37SEric Dumazet if (!(flags & MSG_PEEK)) {
23941da177e4SLinus Torvalds if (UNIXCB(skb).fp)
23957cc05662SChristoph Hellwig unix_detach_fds(&scm, skb);
2396f55bb7f9SPavel Emelyanov
2397f55bb7f9SPavel Emelyanov sk_peek_offset_bwd(sk, skb->len);
23986eba6a37SEric Dumazet } else {
23991da177e4SLinus Torvalds /* It is questionable: on PEEK we could:
24001da177e4SLinus Torvalds - do not return fds - good, but too simple 8)
24011da177e4SLinus Torvalds - return fds, and do not return them on read (old strategy,
24021da177e4SLinus Torvalds apparently wrong)
24031da177e4SLinus Torvalds - clone fds (I chose it for now, it is the most universal
24041da177e4SLinus Torvalds solution)
24051da177e4SLinus Torvalds
24061da177e4SLinus Torvalds POSIX 1003.1g does not actually define this clearly
24071da177e4SLinus Torvalds at all. POSIX 1003.1g doesn't define a lot of things
24081da177e4SLinus Torvalds clearly however!
24091da177e4SLinus Torvalds
24101da177e4SLinus Torvalds */
2411f55bb7f9SPavel Emelyanov
2412f55bb7f9SPavel Emelyanov sk_peek_offset_fwd(sk, size);
2413f55bb7f9SPavel Emelyanov
24141da177e4SLinus Torvalds if (UNIXCB(skb).fp)
2415cbcf0112SMiklos Szeredi unix_peek_fds(&scm, skb);
24161da177e4SLinus Torvalds }
24179f6f9af7SEric Dumazet err = (flags & MSG_TRUNC) ? skb->len - skip : size;
24181da177e4SLinus Torvalds
2419a9c49cc2SAlexander Mikhalitsyn scm_recv_unix(sock, msg, &scm, flags);
24201da177e4SLinus Torvalds
24211da177e4SLinus Torvalds out_free:
24221da177e4SLinus Torvalds skb_free_datagram(sk, skb);
24236e1ce3c3SLinus Torvalds mutex_unlock(&u->iolock);
24241da177e4SLinus Torvalds out:
24251da177e4SLinus Torvalds return err;
24261da177e4SLinus Torvalds }
24271da177e4SLinus Torvalds
unix_dgram_recvmsg(struct socket * sock,struct msghdr * msg,size_t size,int flags)24289825d866SCong Wang static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
24299825d866SCong Wang int flags)
24309825d866SCong Wang {
24319825d866SCong Wang struct sock *sk = sock->sk;
24329825d866SCong Wang
24339825d866SCong Wang #ifdef CONFIG_BPF_SYSCALL
243494531cfcSJiang Wang const struct proto *prot = READ_ONCE(sk->sk_prot);
243594531cfcSJiang Wang
243694531cfcSJiang Wang if (prot != &unix_dgram_proto)
2437ec095263SOliver Hartkopp return prot->recvmsg(sk, msg, size, flags, NULL);
24389825d866SCong Wang #endif
24399825d866SCong Wang return __unix_dgram_recvmsg(sk, msg, size, flags);
24409825d866SCong Wang }
24419825d866SCong Wang
unix_read_skb(struct sock * sk,skb_read_actor_t recv_actor)2442965b57b4SCong Wang static int unix_read_skb(struct sock *sk, skb_read_actor_t recv_actor)
244329df44faSCong Wang {
244429df44faSCong Wang struct unix_sock *u = unix_sk(sk);
244529df44faSCong Wang struct sk_buff *skb;
244678fa0d61SJohn Fastabend int err;
244729df44faSCong Wang
244829df44faSCong Wang mutex_lock(&u->iolock);
2449f4b41f06SOliver Hartkopp skb = skb_recv_datagram(sk, MSG_DONTWAIT, &err);
245029df44faSCong Wang mutex_unlock(&u->iolock);
245129df44faSCong Wang if (!skb)
245229df44faSCong Wang return err;
245329df44faSCong Wang
245478fa0d61SJohn Fastabend return recv_actor(sk, skb);
245529df44faSCong Wang }
245629df44faSCong Wang
24571da177e4SLinus Torvalds /*
245879f632c7SBenjamin Poirier * Sleep until more data has arrived. But check for races..
24591da177e4SLinus Torvalds */
unix_stream_data_wait(struct sock * sk,long timeo,struct sk_buff * last,unsigned int last_len,bool freezable)246079f632c7SBenjamin Poirier static long unix_stream_data_wait(struct sock *sk, long timeo,
246106a77b07SWANG Cong struct sk_buff *last, unsigned int last_len,
246206a77b07SWANG Cong bool freezable)
24631da177e4SLinus Torvalds {
2464f5d39b02SPeter Zijlstra unsigned int state = TASK_INTERRUPTIBLE | freezable * TASK_FREEZABLE;
24652b514574SHannes Frederic Sowa struct sk_buff *tail;
24661da177e4SLinus Torvalds DEFINE_WAIT(wait);
24671da177e4SLinus Torvalds
24681c92b4e5SDavid S. Miller unix_state_lock(sk);
24691da177e4SLinus Torvalds
24701da177e4SLinus Torvalds for (;;) {
2471f5d39b02SPeter Zijlstra prepare_to_wait(sk_sleep(sk), &wait, state);
24721da177e4SLinus Torvalds
24732b514574SHannes Frederic Sowa tail = skb_peek_tail(&sk->sk_receive_queue);
24742b514574SHannes Frederic Sowa if (tail != last ||
24752b514574SHannes Frederic Sowa (tail && tail->len != last_len) ||
24761da177e4SLinus Torvalds sk->sk_err ||
24771da177e4SLinus Torvalds (sk->sk_shutdown & RCV_SHUTDOWN) ||
24781da177e4SLinus Torvalds signal_pending(current) ||
24791da177e4SLinus Torvalds !timeo)
24801da177e4SLinus Torvalds break;
24811da177e4SLinus Torvalds
24829cd3e072SEric Dumazet sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
24831c92b4e5SDavid S. Miller unix_state_unlock(sk);
248406a77b07SWANG Cong timeo = schedule_timeout(timeo);
24851c92b4e5SDavid S. Miller unix_state_lock(sk);
2486b48732e4SMark Salyzyn
2487b48732e4SMark Salyzyn if (sock_flag(sk, SOCK_DEAD))
2488b48732e4SMark Salyzyn break;
2489b48732e4SMark Salyzyn
24909cd3e072SEric Dumazet sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
24911da177e4SLinus Torvalds }
24921da177e4SLinus Torvalds
2493aa395145SEric Dumazet finish_wait(sk_sleep(sk), &wait);
24941c92b4e5SDavid S. Miller unix_state_unlock(sk);
24951da177e4SLinus Torvalds return timeo;
24961da177e4SLinus Torvalds }
24971da177e4SLinus Torvalds
unix_skb_len(const struct sk_buff * skb)2498e370a723SEric Dumazet static unsigned int unix_skb_len(const struct sk_buff *skb)
2499e370a723SEric Dumazet {
2500e370a723SEric Dumazet return skb->len - UNIXCB(skb).consumed;
2501e370a723SEric Dumazet }
2502e370a723SEric Dumazet
25032b514574SHannes Frederic Sowa struct unix_stream_read_state {
25042b514574SHannes Frederic Sowa int (*recv_actor)(struct sk_buff *, int, int,
25052b514574SHannes Frederic Sowa struct unix_stream_read_state *);
25062b514574SHannes Frederic Sowa struct socket *socket;
25072b514574SHannes Frederic Sowa struct msghdr *msg;
25082b514574SHannes Frederic Sowa struct pipe_inode_info *pipe;
25092b514574SHannes Frederic Sowa size_t size;
25102b514574SHannes Frederic Sowa int flags;
25112b514574SHannes Frederic Sowa unsigned int splice_flags;
25122b514574SHannes Frederic Sowa };
25132b514574SHannes Frederic Sowa
2514314001f0SRao Shoaib #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
unix_stream_recv_urg(struct unix_stream_read_state * state)2515314001f0SRao Shoaib static int unix_stream_recv_urg(struct unix_stream_read_state *state)
2516314001f0SRao Shoaib {
2517314001f0SRao Shoaib struct socket *sock = state->socket;
2518314001f0SRao Shoaib struct sock *sk = sock->sk;
2519314001f0SRao Shoaib struct unix_sock *u = unix_sk(sk);
2520314001f0SRao Shoaib int chunk = 1;
2521876c14adSRao Shoaib struct sk_buff *oob_skb;
2522314001f0SRao Shoaib
2523876c14adSRao Shoaib mutex_lock(&u->iolock);
2524876c14adSRao Shoaib unix_state_lock(sk);
2525d59ae931SKuniyuki Iwashima spin_lock(&sk->sk_receive_queue.lock);
2526876c14adSRao Shoaib
2527876c14adSRao Shoaib if (sock_flag(sk, SOCK_URGINLINE) || !u->oob_skb) {
2528d59ae931SKuniyuki Iwashima spin_unlock(&sk->sk_receive_queue.lock);
2529876c14adSRao Shoaib unix_state_unlock(sk);
2530876c14adSRao Shoaib mutex_unlock(&u->iolock);
2531314001f0SRao Shoaib return -EINVAL;
2532876c14adSRao Shoaib }
2533314001f0SRao Shoaib
2534876c14adSRao Shoaib oob_skb = u->oob_skb;
2535876c14adSRao Shoaib
2536e82025c6SKuniyuki Iwashima if (!(state->flags & MSG_PEEK))
2537e82025c6SKuniyuki Iwashima WRITE_ONCE(u->oob_skb, NULL);
2538069a3ec3SEric Dumazet else
2539069a3ec3SEric Dumazet skb_get(oob_skb);
2540d59ae931SKuniyuki Iwashima
2541d59ae931SKuniyuki Iwashima spin_unlock(&sk->sk_receive_queue.lock);
2542876c14adSRao Shoaib unix_state_unlock(sk);
2543876c14adSRao Shoaib
2544876c14adSRao Shoaib chunk = state->recv_actor(oob_skb, 0, chunk, state);
2545876c14adSRao Shoaib
2546069a3ec3SEric Dumazet if (!(state->flags & MSG_PEEK))
2547876c14adSRao Shoaib UNIXCB(oob_skb).consumed += 1;
2548069a3ec3SEric Dumazet
2549069a3ec3SEric Dumazet consume_skb(oob_skb);
2550876c14adSRao Shoaib
2551876c14adSRao Shoaib mutex_unlock(&u->iolock);
2552876c14adSRao Shoaib
2553314001f0SRao Shoaib if (chunk < 0)
2554314001f0SRao Shoaib return -EFAULT;
2555314001f0SRao Shoaib
2556314001f0SRao Shoaib state->msg->msg_flags |= MSG_OOB;
2557314001f0SRao Shoaib return 1;
2558314001f0SRao Shoaib }
2559314001f0SRao Shoaib
manage_oob(struct sk_buff * skb,struct sock * sk,int flags,int copied)2560314001f0SRao Shoaib static struct sk_buff *manage_oob(struct sk_buff *skb, struct sock *sk,
2561314001f0SRao Shoaib int flags, int copied)
2562314001f0SRao Shoaib {
2563314001f0SRao Shoaib struct unix_sock *u = unix_sk(sk);
2564314001f0SRao Shoaib
2565314001f0SRao Shoaib if (!unix_skb_len(skb) && !(flags & MSG_PEEK)) {
2566314001f0SRao Shoaib skb_unlink(skb, &sk->sk_receive_queue);
2567314001f0SRao Shoaib consume_skb(skb);
2568314001f0SRao Shoaib skb = NULL;
2569314001f0SRao Shoaib } else {
2570d59ae931SKuniyuki Iwashima struct sk_buff *unlinked_skb = NULL;
2571d59ae931SKuniyuki Iwashima
2572d59ae931SKuniyuki Iwashima spin_lock(&sk->sk_receive_queue.lock);
2573d59ae931SKuniyuki Iwashima
2574314001f0SRao Shoaib if (skb == u->oob_skb) {
2575314001f0SRao Shoaib if (copied) {
2576314001f0SRao Shoaib skb = NULL;
2577185c72f6SRao Shoaib } else if (!(flags & MSG_PEEK)) {
2578185c72f6SRao Shoaib if (sock_flag(sk, SOCK_URGINLINE)) {
2579e82025c6SKuniyuki Iwashima WRITE_ONCE(u->oob_skb, NULL);
2580314001f0SRao Shoaib consume_skb(skb);
2581022d81a7SKuniyuki Iwashima } else {
2582d59ae931SKuniyuki Iwashima __skb_unlink(skb, &sk->sk_receive_queue);
2583601a89eaSKuniyuki Iwashima WRITE_ONCE(u->oob_skb, NULL);
2584d59ae931SKuniyuki Iwashima unlinked_skb = skb;
2585314001f0SRao Shoaib skb = skb_peek(&sk->sk_receive_queue);
2586314001f0SRao Shoaib }
2587185c72f6SRao Shoaib } else if (!sock_flag(sk, SOCK_URGINLINE)) {
2588185c72f6SRao Shoaib skb = skb_peek_next(skb, &sk->sk_receive_queue);
2589185c72f6SRao Shoaib }
2590314001f0SRao Shoaib }
2591d59ae931SKuniyuki Iwashima
2592d59ae931SKuniyuki Iwashima spin_unlock(&sk->sk_receive_queue.lock);
2593d59ae931SKuniyuki Iwashima
2594d59ae931SKuniyuki Iwashima if (unlinked_skb) {
2595d59ae931SKuniyuki Iwashima WARN_ON_ONCE(skb_unref(unlinked_skb));
2596d59ae931SKuniyuki Iwashima kfree_skb(unlinked_skb);
2597d59ae931SKuniyuki Iwashima }
2598314001f0SRao Shoaib }
2599314001f0SRao Shoaib return skb;
2600314001f0SRao Shoaib }
2601314001f0SRao Shoaib #endif
2602314001f0SRao Shoaib
unix_stream_read_skb(struct sock * sk,skb_read_actor_t recv_actor)2603965b57b4SCong Wang static int unix_stream_read_skb(struct sock *sk, skb_read_actor_t recv_actor)
260477462de1SJiang Wang {
2605772f9c31SMichal Luczaj struct unix_sock *u = unix_sk(sk);
2606772f9c31SMichal Luczaj struct sk_buff *skb;
2607772f9c31SMichal Luczaj int err;
2608772f9c31SMichal Luczaj
26090ede400cSKuniyuki Iwashima if (unlikely(READ_ONCE(sk->sk_state) != TCP_ESTABLISHED))
261077462de1SJiang Wang return -ENOTCONN;
261177462de1SJiang Wang
2612772f9c31SMichal Luczaj mutex_lock(&u->iolock);
2613772f9c31SMichal Luczaj skb = skb_recv_datagram(sk, MSG_DONTWAIT, &err);
2614772f9c31SMichal Luczaj mutex_unlock(&u->iolock);
2615772f9c31SMichal Luczaj if (!skb)
2616772f9c31SMichal Luczaj return err;
2617772f9c31SMichal Luczaj
2618772f9c31SMichal Luczaj #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
2619772f9c31SMichal Luczaj if (unlikely(skb == READ_ONCE(u->oob_skb))) {
2620772f9c31SMichal Luczaj bool drop = false;
2621772f9c31SMichal Luczaj
2622772f9c31SMichal Luczaj unix_state_lock(sk);
2623772f9c31SMichal Luczaj
2624772f9c31SMichal Luczaj if (sock_flag(sk, SOCK_DEAD)) {
2625772f9c31SMichal Luczaj unix_state_unlock(sk);
2626772f9c31SMichal Luczaj kfree_skb(skb);
2627772f9c31SMichal Luczaj return -ECONNRESET;
2628772f9c31SMichal Luczaj }
2629772f9c31SMichal Luczaj
2630772f9c31SMichal Luczaj spin_lock(&sk->sk_receive_queue.lock);
2631772f9c31SMichal Luczaj if (likely(skb == u->oob_skb)) {
2632772f9c31SMichal Luczaj WRITE_ONCE(u->oob_skb, NULL);
2633772f9c31SMichal Luczaj drop = true;
2634772f9c31SMichal Luczaj }
2635772f9c31SMichal Luczaj spin_unlock(&sk->sk_receive_queue.lock);
2636772f9c31SMichal Luczaj
2637772f9c31SMichal Luczaj unix_state_unlock(sk);
2638772f9c31SMichal Luczaj
2639772f9c31SMichal Luczaj if (drop) {
2640772f9c31SMichal Luczaj WARN_ON_ONCE(skb_unref(skb));
2641772f9c31SMichal Luczaj kfree_skb(skb);
2642772f9c31SMichal Luczaj return -EAGAIN;
2643772f9c31SMichal Luczaj }
2644772f9c31SMichal Luczaj }
2645772f9c31SMichal Luczaj #endif
2646772f9c31SMichal Luczaj
2647772f9c31SMichal Luczaj return recv_actor(sk, skb);
264877462de1SJiang Wang }
264977462de1SJiang Wang
unix_stream_read_generic(struct unix_stream_read_state * state,bool freezable)265006a77b07SWANG Cong static int unix_stream_read_generic(struct unix_stream_read_state *state,
265106a77b07SWANG Cong bool freezable)
26521da177e4SLinus Torvalds {
26537cc05662SChristoph Hellwig struct scm_cookie scm;
26542b514574SHannes Frederic Sowa struct socket *sock = state->socket;
26551da177e4SLinus Torvalds struct sock *sk = sock->sk;
26561da177e4SLinus Torvalds struct unix_sock *u = unix_sk(sk);
26571da177e4SLinus Torvalds int copied = 0;
26582b514574SHannes Frederic Sowa int flags = state->flags;
2659de144391SEric Dumazet int noblock = flags & MSG_DONTWAIT;
26602b514574SHannes Frederic Sowa bool check_creds = false;
26611da177e4SLinus Torvalds int target;
26621da177e4SLinus Torvalds int err = 0;
26631da177e4SLinus Torvalds long timeo;
2664fc0d7536SPavel Emelyanov int skip;
26652b514574SHannes Frederic Sowa size_t size = state->size;
26662b514574SHannes Frederic Sowa unsigned int last_len;
26671da177e4SLinus Torvalds
2668776fcc45SKuniyuki Iwashima if (unlikely(READ_ONCE(sk->sk_state) != TCP_ESTABLISHED)) {
26691da177e4SLinus Torvalds err = -EINVAL;
26701da177e4SLinus Torvalds goto out;
26711b92ee3dSRainer Weikusat }
26721da177e4SLinus Torvalds
26731b92ee3dSRainer Weikusat if (unlikely(flags & MSG_OOB)) {
26741da177e4SLinus Torvalds err = -EOPNOTSUPP;
2675314001f0SRao Shoaib #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
2676314001f0SRao Shoaib err = unix_stream_recv_urg(state);
2677314001f0SRao Shoaib #endif
26781da177e4SLinus Torvalds goto out;
26791b92ee3dSRainer Weikusat }
26801da177e4SLinus Torvalds
26811da177e4SLinus Torvalds target = sock_rcvlowat(sk, flags & MSG_WAITALL, size);
2682de144391SEric Dumazet timeo = sock_rcvtimeo(sk, noblock);
26831da177e4SLinus Torvalds
26842b514574SHannes Frederic Sowa memset(&scm, 0, sizeof(scm));
26852b514574SHannes Frederic Sowa
26861da177e4SLinus Torvalds /* Lock the socket to prevent queue disordering
26871da177e4SLinus Torvalds * while sleeps in memcpy_tomsg
26881da177e4SLinus Torvalds */
26896e1ce3c3SLinus Torvalds mutex_lock(&u->iolock);
26901da177e4SLinus Torvalds
2691a0917e0bSMatthew Dawson skip = max(sk_peek_offset(sk, flags), 0);
2692e9193d60SAndrey Vagin
26936eba6a37SEric Dumazet do {
26941da177e4SLinus Torvalds int chunk;
269573ed5d25SHannes Frederic Sowa bool drop_skb;
269679f632c7SBenjamin Poirier struct sk_buff *skb, *last;
26971da177e4SLinus Torvalds
269818eceb81SRainer Weikusat redo:
26993c0d2f37SMiklos Szeredi unix_state_lock(sk);
2700b48732e4SMark Salyzyn if (sock_flag(sk, SOCK_DEAD)) {
2701b48732e4SMark Salyzyn err = -ECONNRESET;
2702b48732e4SMark Salyzyn goto unlock;
2703b48732e4SMark Salyzyn }
270479f632c7SBenjamin Poirier last = skb = skb_peek(&sk->sk_receive_queue);
27052b514574SHannes Frederic Sowa last_len = last ? last->len : 0;
2706314001f0SRao Shoaib
2707aea3cb8cSKuniyuki Iwashima again:
2708314001f0SRao Shoaib #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
2709314001f0SRao Shoaib if (skb) {
2710314001f0SRao Shoaib skb = manage_oob(skb, sk, flags, copied);
2711022d81a7SKuniyuki Iwashima if (!skb && copied) {
2712314001f0SRao Shoaib unix_state_unlock(sk);
2713314001f0SRao Shoaib break;
2714314001f0SRao Shoaib }
2715314001f0SRao Shoaib }
2716314001f0SRao Shoaib #endif
27176eba6a37SEric Dumazet if (skb == NULL) {
27181da177e4SLinus Torvalds if (copied >= target)
27193c0d2f37SMiklos Szeredi goto unlock;
27201da177e4SLinus Torvalds
27211da177e4SLinus Torvalds /*
27221da177e4SLinus Torvalds * POSIX 1003.1g mandates this order.
27231da177e4SLinus Torvalds */
27241da177e4SLinus Torvalds
27256eba6a37SEric Dumazet err = sock_error(sk);
27266eba6a37SEric Dumazet if (err)
27273c0d2f37SMiklos Szeredi goto unlock;
27281da177e4SLinus Torvalds if (sk->sk_shutdown & RCV_SHUTDOWN)
27293c0d2f37SMiklos Szeredi goto unlock;
27303c0d2f37SMiklos Szeredi
27313c0d2f37SMiklos Szeredi unix_state_unlock(sk);
27321b92ee3dSRainer Weikusat if (!timeo) {
27331da177e4SLinus Torvalds err = -EAGAIN;
27341da177e4SLinus Torvalds break;
27351b92ee3dSRainer Weikusat }
27361b92ee3dSRainer Weikusat
27376e1ce3c3SLinus Torvalds mutex_unlock(&u->iolock);
27381da177e4SLinus Torvalds
27392b514574SHannes Frederic Sowa timeo = unix_stream_data_wait(sk, timeo, last,
274006a77b07SWANG Cong last_len, freezable);
27411da177e4SLinus Torvalds
27423822b5c2SRainer Weikusat if (signal_pending(current)) {
27431da177e4SLinus Torvalds err = sock_intr_errno(timeo);
2744fa0dc04dSEric Dumazet scm_destroy(&scm);
27451da177e4SLinus Torvalds goto out;
27461da177e4SLinus Torvalds }
2747b3ca9b02SRainer Weikusat
27486e1ce3c3SLinus Torvalds mutex_lock(&u->iolock);
274918eceb81SRainer Weikusat goto redo;
27503c0d2f37SMiklos Szeredi unlock:
27513c0d2f37SMiklos Szeredi unix_state_unlock(sk);
27523c0d2f37SMiklos Szeredi break;
27531da177e4SLinus Torvalds }
2754fc0d7536SPavel Emelyanov
2755e370a723SEric Dumazet while (skip >= unix_skb_len(skb)) {
2756e370a723SEric Dumazet skip -= unix_skb_len(skb);
275779f632c7SBenjamin Poirier last = skb;
27582b514574SHannes Frederic Sowa last_len = skb->len;
2759fc0d7536SPavel Emelyanov skb = skb_peek_next(skb, &sk->sk_receive_queue);
276079f632c7SBenjamin Poirier if (!skb)
2761fc0d7536SPavel Emelyanov goto again;
2762fc0d7536SPavel Emelyanov }
2763fc0d7536SPavel Emelyanov
27643c0d2f37SMiklos Szeredi unix_state_unlock(sk);
27651da177e4SLinus Torvalds
27661da177e4SLinus Torvalds if (check_creds) {
27671da177e4SLinus Torvalds /* Never glue messages from different writers */
27689490f886SHannes Frederic Sowa if (!unix_skb_scm_eq(skb, &scm))
27691da177e4SLinus Torvalds break;
27705e2ff670SAlexander Mikhalitsyn } else if (test_bit(SOCK_PASSCRED, &sock->flags) ||
27715e2ff670SAlexander Mikhalitsyn test_bit(SOCK_PASSPIDFD, &sock->flags)) {
27721da177e4SLinus Torvalds /* Copy credentials */
27737cc05662SChristoph Hellwig scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
277437a9a8dfSStephen Smalley unix_set_secdata(&scm, skb);
27752b514574SHannes Frederic Sowa check_creds = true;
27761da177e4SLinus Torvalds }
27771da177e4SLinus Torvalds
27781da177e4SLinus Torvalds /* Copy address just once */
27792b514574SHannes Frederic Sowa if (state->msg && state->msg->msg_name) {
27802b514574SHannes Frederic Sowa DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr,
27812b514574SHannes Frederic Sowa state->msg->msg_name);
27822b514574SHannes Frederic Sowa unix_copy_addr(state->msg, skb->sk);
27831da177e4SLinus Torvalds sunaddr = NULL;
27841da177e4SLinus Torvalds }
27851da177e4SLinus Torvalds
2786e370a723SEric Dumazet chunk = min_t(unsigned int, unix_skb_len(skb) - skip, size);
278773ed5d25SHannes Frederic Sowa skb_get(skb);
27882b514574SHannes Frederic Sowa chunk = state->recv_actor(skb, skip, chunk, state);
278973ed5d25SHannes Frederic Sowa drop_skb = !unix_skb_len(skb);
279073ed5d25SHannes Frederic Sowa /* skb is only safe to use if !drop_skb */
279173ed5d25SHannes Frederic Sowa consume_skb(skb);
27922b514574SHannes Frederic Sowa if (chunk < 0) {
27931da177e4SLinus Torvalds if (copied == 0)
27941da177e4SLinus Torvalds copied = -EFAULT;
27951da177e4SLinus Torvalds break;
27961da177e4SLinus Torvalds }
27971da177e4SLinus Torvalds copied += chunk;
27981da177e4SLinus Torvalds size -= chunk;
27991da177e4SLinus Torvalds
280073ed5d25SHannes Frederic Sowa if (drop_skb) {
280173ed5d25SHannes Frederic Sowa /* the skb was touched by a concurrent reader;
280273ed5d25SHannes Frederic Sowa * we should not expect anything from this skb
280373ed5d25SHannes Frederic Sowa * anymore and assume it invalid - we can be
280473ed5d25SHannes Frederic Sowa * sure it was dropped from the socket queue
280573ed5d25SHannes Frederic Sowa *
280673ed5d25SHannes Frederic Sowa * let's report a short read
280773ed5d25SHannes Frederic Sowa */
280873ed5d25SHannes Frederic Sowa err = 0;
280973ed5d25SHannes Frederic Sowa break;
281073ed5d25SHannes Frederic Sowa }
281173ed5d25SHannes Frederic Sowa
28121da177e4SLinus Torvalds /* Mark read part of skb as used */
28136eba6a37SEric Dumazet if (!(flags & MSG_PEEK)) {
2814e370a723SEric Dumazet UNIXCB(skb).consumed += chunk;
28151da177e4SLinus Torvalds
2816fc0d7536SPavel Emelyanov sk_peek_offset_bwd(sk, chunk);
2817fc0d7536SPavel Emelyanov
28183c32da19SKirill Tkhai if (UNIXCB(skb).fp) {
28193c32da19SKirill Tkhai scm_stat_del(sk, skb);
28207cc05662SChristoph Hellwig unix_detach_fds(&scm, skb);
28213c32da19SKirill Tkhai }
28221da177e4SLinus Torvalds
2823e370a723SEric Dumazet if (unix_skb_len(skb))
28241da177e4SLinus Torvalds break;
28251da177e4SLinus Torvalds
28266f01fd6eSEric Dumazet skb_unlink(skb, &sk->sk_receive_queue);
282770d4bf6dSNeil Horman consume_skb(skb);
28281da177e4SLinus Torvalds
28297cc05662SChristoph Hellwig if (scm.fp)
28301da177e4SLinus Torvalds break;
28316eba6a37SEric Dumazet } else {
28321da177e4SLinus Torvalds /* It is questionable, see note in unix_dgram_recvmsg.
28331da177e4SLinus Torvalds */
28341da177e4SLinus Torvalds if (UNIXCB(skb).fp)
2835cbcf0112SMiklos Szeredi unix_peek_fds(&scm, skb);
28361da177e4SLinus Torvalds
2837fc0d7536SPavel Emelyanov sk_peek_offset_fwd(sk, chunk);
2838fc0d7536SPavel Emelyanov
28399f389e35SAaron Conole if (UNIXCB(skb).fp)
28409f389e35SAaron Conole break;
28419f389e35SAaron Conole
2842e9193d60SAndrey Vagin skip = 0;
28439f389e35SAaron Conole last = skb;
28449f389e35SAaron Conole last_len = skb->len;
28459f389e35SAaron Conole unix_state_lock(sk);
28469f389e35SAaron Conole skb = skb_peek_next(skb, &sk->sk_receive_queue);
28479f389e35SAaron Conole if (skb)
28489f389e35SAaron Conole goto again;
28499f389e35SAaron Conole unix_state_unlock(sk);
28501da177e4SLinus Torvalds break;
28511da177e4SLinus Torvalds }
28521da177e4SLinus Torvalds } while (size);
28531da177e4SLinus Torvalds
28546e1ce3c3SLinus Torvalds mutex_unlock(&u->iolock);
28559d797ee2SKuniyuki Iwashima if (state->msg)
2856a9c49cc2SAlexander Mikhalitsyn scm_recv_unix(sock, state->msg, &scm, flags);
28572b514574SHannes Frederic Sowa else
28582b514574SHannes Frederic Sowa scm_destroy(&scm);
28591da177e4SLinus Torvalds out:
28601da177e4SLinus Torvalds return copied ? : err;
28611da177e4SLinus Torvalds }
28621da177e4SLinus Torvalds
unix_stream_read_actor(struct sk_buff * skb,int skip,int chunk,struct unix_stream_read_state * state)28632b514574SHannes Frederic Sowa static int unix_stream_read_actor(struct sk_buff *skb,
28642b514574SHannes Frederic Sowa int skip, int chunk,
28652b514574SHannes Frederic Sowa struct unix_stream_read_state *state)
28662b514574SHannes Frederic Sowa {
28672b514574SHannes Frederic Sowa int ret;
28682b514574SHannes Frederic Sowa
28692b514574SHannes Frederic Sowa ret = skb_copy_datagram_msg(skb, UNIXCB(skb).consumed + skip,
28702b514574SHannes Frederic Sowa state->msg, chunk);
28712b514574SHannes Frederic Sowa return ret ?: chunk;
28722b514574SHannes Frederic Sowa }
28732b514574SHannes Frederic Sowa
__unix_stream_recvmsg(struct sock * sk,struct msghdr * msg,size_t size,int flags)287494531cfcSJiang Wang int __unix_stream_recvmsg(struct sock *sk, struct msghdr *msg,
287594531cfcSJiang Wang size_t size, int flags)
287694531cfcSJiang Wang {
287794531cfcSJiang Wang struct unix_stream_read_state state = {
287894531cfcSJiang Wang .recv_actor = unix_stream_read_actor,
287994531cfcSJiang Wang .socket = sk->sk_socket,
288094531cfcSJiang Wang .msg = msg,
288194531cfcSJiang Wang .size = size,
288294531cfcSJiang Wang .flags = flags
288394531cfcSJiang Wang };
288494531cfcSJiang Wang
288594531cfcSJiang Wang return unix_stream_read_generic(&state, true);
288694531cfcSJiang Wang }
288794531cfcSJiang Wang
unix_stream_recvmsg(struct socket * sock,struct msghdr * msg,size_t size,int flags)28882b514574SHannes Frederic Sowa static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg,
28892b514574SHannes Frederic Sowa size_t size, int flags)
28902b514574SHannes Frederic Sowa {
28912b514574SHannes Frederic Sowa struct unix_stream_read_state state = {
28922b514574SHannes Frederic Sowa .recv_actor = unix_stream_read_actor,
28932b514574SHannes Frederic Sowa .socket = sock,
28942b514574SHannes Frederic Sowa .msg = msg,
28952b514574SHannes Frederic Sowa .size = size,
28962b514574SHannes Frederic Sowa .flags = flags
28972b514574SHannes Frederic Sowa };
28982b514574SHannes Frederic Sowa
289994531cfcSJiang Wang #ifdef CONFIG_BPF_SYSCALL
290094531cfcSJiang Wang struct sock *sk = sock->sk;
290194531cfcSJiang Wang const struct proto *prot = READ_ONCE(sk->sk_prot);
290294531cfcSJiang Wang
290394531cfcSJiang Wang if (prot != &unix_stream_proto)
2904ec095263SOliver Hartkopp return prot->recvmsg(sk, msg, size, flags, NULL);
290594531cfcSJiang Wang #endif
290606a77b07SWANG Cong return unix_stream_read_generic(&state, true);
29072b514574SHannes Frederic Sowa }
29082b514574SHannes Frederic Sowa
unix_stream_splice_actor(struct sk_buff * skb,int skip,int chunk,struct unix_stream_read_state * state)29092b514574SHannes Frederic Sowa static int unix_stream_splice_actor(struct sk_buff *skb,
29102b514574SHannes Frederic Sowa int skip, int chunk,
29112b514574SHannes Frederic Sowa struct unix_stream_read_state *state)
29122b514574SHannes Frederic Sowa {
29132b514574SHannes Frederic Sowa return skb_splice_bits(skb, state->socket->sk,
29142b514574SHannes Frederic Sowa UNIXCB(skb).consumed + skip,
291525869262SAl Viro state->pipe, chunk, state->splice_flags);
29162b514574SHannes Frederic Sowa }
29172b514574SHannes Frederic Sowa
unix_stream_splice_read(struct socket * sock,loff_t * ppos,struct pipe_inode_info * pipe,size_t size,unsigned int flags)29182b514574SHannes Frederic Sowa static ssize_t unix_stream_splice_read(struct socket *sock, loff_t *ppos,
29192b514574SHannes Frederic Sowa struct pipe_inode_info *pipe,
29202b514574SHannes Frederic Sowa size_t size, unsigned int flags)
29212b514574SHannes Frederic Sowa {
29222b514574SHannes Frederic Sowa struct unix_stream_read_state state = {
29232b514574SHannes Frederic Sowa .recv_actor = unix_stream_splice_actor,
29242b514574SHannes Frederic Sowa .socket = sock,
29252b514574SHannes Frederic Sowa .pipe = pipe,
29262b514574SHannes Frederic Sowa .size = size,
29272b514574SHannes Frederic Sowa .splice_flags = flags,
29282b514574SHannes Frederic Sowa };
29292b514574SHannes Frederic Sowa
29302b514574SHannes Frederic Sowa if (unlikely(*ppos))
29312b514574SHannes Frederic Sowa return -ESPIPE;
29322b514574SHannes Frederic Sowa
29332b514574SHannes Frederic Sowa if (sock->file->f_flags & O_NONBLOCK ||
29342b514574SHannes Frederic Sowa flags & SPLICE_F_NONBLOCK)
29352b514574SHannes Frederic Sowa state.flags = MSG_DONTWAIT;
29362b514574SHannes Frederic Sowa
293706a77b07SWANG Cong return unix_stream_read_generic(&state, false);
29382b514574SHannes Frederic Sowa }
29392b514574SHannes Frederic Sowa
unix_shutdown(struct socket * sock,int mode)29401da177e4SLinus Torvalds static int unix_shutdown(struct socket *sock, int mode)
29411da177e4SLinus Torvalds {
29421da177e4SLinus Torvalds struct sock *sk = sock->sk;
29431da177e4SLinus Torvalds struct sock *other;
29441da177e4SLinus Torvalds
2945fc61b928SXi Wang if (mode < SHUT_RD || mode > SHUT_RDWR)
2946fc61b928SXi Wang return -EINVAL;
2947fc61b928SXi Wang /* This maps:
2948fc61b928SXi Wang * SHUT_RD (0) -> RCV_SHUTDOWN (1)
2949fc61b928SXi Wang * SHUT_WR (1) -> SEND_SHUTDOWN (2)
2950fc61b928SXi Wang * SHUT_RDWR (2) -> SHUTDOWN_MASK (3)
2951fc61b928SXi Wang */
2952fc61b928SXi Wang ++mode;
29537180a031SAlban Crequy
29541c92b4e5SDavid S. Miller unix_state_lock(sk);
2955e1d09c2cSKuniyuki Iwashima WRITE_ONCE(sk->sk_shutdown, sk->sk_shutdown | mode);
29561da177e4SLinus Torvalds other = unix_peer(sk);
29571da177e4SLinus Torvalds if (other)
29581da177e4SLinus Torvalds sock_hold(other);
29591c92b4e5SDavid S. Miller unix_state_unlock(sk);
29601da177e4SLinus Torvalds sk->sk_state_change(sk);
29611da177e4SLinus Torvalds
29621da177e4SLinus Torvalds if (other &&
29631da177e4SLinus Torvalds (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
29641da177e4SLinus Torvalds
29651da177e4SLinus Torvalds int peer_mode = 0;
296694531cfcSJiang Wang const struct proto *prot = READ_ONCE(other->sk_prot);
29671da177e4SLinus Torvalds
2968d359902dSJiang Wang if (prot->unhash)
296994531cfcSJiang Wang prot->unhash(other);
29701da177e4SLinus Torvalds if (mode&RCV_SHUTDOWN)
29711da177e4SLinus Torvalds peer_mode |= SEND_SHUTDOWN;
29721da177e4SLinus Torvalds if (mode&SEND_SHUTDOWN)
29731da177e4SLinus Torvalds peer_mode |= RCV_SHUTDOWN;
29741c92b4e5SDavid S. Miller unix_state_lock(other);
2975e1d09c2cSKuniyuki Iwashima WRITE_ONCE(other->sk_shutdown, other->sk_shutdown | peer_mode);
29761c92b4e5SDavid S. Miller unix_state_unlock(other);
29771da177e4SLinus Torvalds other->sk_state_change(other);
2978d0c6416bSJiang Wang if (peer_mode == SHUTDOWN_MASK)
29798d8ad9d7SPavel Emelyanov sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
2980d0c6416bSJiang Wang else if (peer_mode & RCV_SHUTDOWN)
29818d8ad9d7SPavel Emelyanov sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
29821da177e4SLinus Torvalds }
29831da177e4SLinus Torvalds if (other)
29841da177e4SLinus Torvalds sock_put(other);
29857180a031SAlban Crequy
29861da177e4SLinus Torvalds return 0;
29871da177e4SLinus Torvalds }
29881da177e4SLinus Torvalds
unix_inq_len(struct sock * sk)2989885ee74dSPavel Emelyanov long unix_inq_len(struct sock *sk)
29901da177e4SLinus Torvalds {
29911da177e4SLinus Torvalds struct sk_buff *skb;
2992885ee74dSPavel Emelyanov long amount = 0;
29931da177e4SLinus Torvalds
29944e38d6c0SKuniyuki Iwashima if (READ_ONCE(sk->sk_state) == TCP_LISTEN)
2995885ee74dSPavel Emelyanov return -EINVAL;
29961da177e4SLinus Torvalds
29971da177e4SLinus Torvalds spin_lock(&sk->sk_receive_queue.lock);
29981da177e4SLinus Torvalds if (sk->sk_type == SOCK_STREAM ||
29991da177e4SLinus Torvalds sk->sk_type == SOCK_SEQPACKET) {
30001da177e4SLinus Torvalds skb_queue_walk(&sk->sk_receive_queue, skb)
3001e370a723SEric Dumazet amount += unix_skb_len(skb);
30021da177e4SLinus Torvalds } else {
30031da177e4SLinus Torvalds skb = skb_peek(&sk->sk_receive_queue);
30041da177e4SLinus Torvalds if (skb)
30051da177e4SLinus Torvalds amount = skb->len;
30061da177e4SLinus Torvalds }
30071da177e4SLinus Torvalds spin_unlock(&sk->sk_receive_queue.lock);
3008885ee74dSPavel Emelyanov
3009885ee74dSPavel Emelyanov return amount;
3010885ee74dSPavel Emelyanov }
3011885ee74dSPavel Emelyanov EXPORT_SYMBOL_GPL(unix_inq_len);
3012885ee74dSPavel Emelyanov
unix_outq_len(struct sock * sk)3013885ee74dSPavel Emelyanov long unix_outq_len(struct sock *sk)
3014885ee74dSPavel Emelyanov {
3015885ee74dSPavel Emelyanov return sk_wmem_alloc_get(sk);
3016885ee74dSPavel Emelyanov }
3017885ee74dSPavel Emelyanov EXPORT_SYMBOL_GPL(unix_outq_len);
3018885ee74dSPavel Emelyanov
unix_open_file(struct sock * sk)3019ba94f308SAndrey Vagin static int unix_open_file(struct sock *sk)
3020ba94f308SAndrey Vagin {
3021ba94f308SAndrey Vagin struct path path;
3022ba94f308SAndrey Vagin struct file *f;
3023ba94f308SAndrey Vagin int fd;
3024ba94f308SAndrey Vagin
3025ba94f308SAndrey Vagin if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
3026ba94f308SAndrey Vagin return -EPERM;
3027ba94f308SAndrey Vagin
3028ae3b5641SAl Viro if (!smp_load_acquire(&unix_sk(sk)->addr))
3029ba94f308SAndrey Vagin return -ENOENT;
3030ae3b5641SAl Viro
3031ae3b5641SAl Viro path = unix_sk(sk)->path;
3032ae3b5641SAl Viro if (!path.dentry)
3033ae3b5641SAl Viro return -ENOENT;
3034ba94f308SAndrey Vagin
3035ba94f308SAndrey Vagin path_get(&path);
3036ba94f308SAndrey Vagin
3037ba94f308SAndrey Vagin fd = get_unused_fd_flags(O_CLOEXEC);
3038ba94f308SAndrey Vagin if (fd < 0)
3039ba94f308SAndrey Vagin goto out;
3040ba94f308SAndrey Vagin
3041ba94f308SAndrey Vagin f = dentry_open(&path, O_PATH, current_cred());
3042ba94f308SAndrey Vagin if (IS_ERR(f)) {
3043ba94f308SAndrey Vagin put_unused_fd(fd);
3044ba94f308SAndrey Vagin fd = PTR_ERR(f);
3045ba94f308SAndrey Vagin goto out;
3046ba94f308SAndrey Vagin }
3047ba94f308SAndrey Vagin
3048ba94f308SAndrey Vagin fd_install(fd, f);
3049ba94f308SAndrey Vagin out:
3050ba94f308SAndrey Vagin path_put(&path);
3051ba94f308SAndrey Vagin
3052ba94f308SAndrey Vagin return fd;
3053ba94f308SAndrey Vagin }
3054ba94f308SAndrey Vagin
unix_ioctl(struct socket * sock,unsigned int cmd,unsigned long arg)3055885ee74dSPavel Emelyanov static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
3056885ee74dSPavel Emelyanov {
3057885ee74dSPavel Emelyanov struct sock *sk = sock->sk;
3058885ee74dSPavel Emelyanov long amount = 0;
3059885ee74dSPavel Emelyanov int err;
3060885ee74dSPavel Emelyanov
3061885ee74dSPavel Emelyanov switch (cmd) {
3062885ee74dSPavel Emelyanov case SIOCOUTQ:
3063885ee74dSPavel Emelyanov amount = unix_outq_len(sk);
30641da177e4SLinus Torvalds err = put_user(amount, (int __user *)arg);
30651da177e4SLinus Torvalds break;
3066885ee74dSPavel Emelyanov case SIOCINQ:
3067885ee74dSPavel Emelyanov amount = unix_inq_len(sk);
3068885ee74dSPavel Emelyanov if (amount < 0)
3069885ee74dSPavel Emelyanov err = amount;
3070885ee74dSPavel Emelyanov else
3071885ee74dSPavel Emelyanov err = put_user(amount, (int __user *)arg);
3072885ee74dSPavel Emelyanov break;
3073ba94f308SAndrey Vagin case SIOCUNIXFILE:
3074ba94f308SAndrey Vagin err = unix_open_file(sk);
3075ba94f308SAndrey Vagin break;
3076314001f0SRao Shoaib #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
3077314001f0SRao Shoaib case SIOCATMARK:
3078314001f0SRao Shoaib {
3079314001f0SRao Shoaib struct sk_buff *skb;
3080314001f0SRao Shoaib int answ = 0;
3081314001f0SRao Shoaib
3082314001f0SRao Shoaib skb = skb_peek(&sk->sk_receive_queue);
3083e82025c6SKuniyuki Iwashima if (skb && skb == READ_ONCE(unix_sk(sk)->oob_skb))
3084314001f0SRao Shoaib answ = 1;
3085314001f0SRao Shoaib err = put_user(answ, (int __user *)arg);
3086314001f0SRao Shoaib }
3087314001f0SRao Shoaib break;
3088314001f0SRao Shoaib #endif
30891da177e4SLinus Torvalds default:
3090b5e5fa5eSChristoph Hellwig err = -ENOIOCTLCMD;
30911da177e4SLinus Torvalds break;
30921da177e4SLinus Torvalds }
30931da177e4SLinus Torvalds return err;
30941da177e4SLinus Torvalds }
30951da177e4SLinus Torvalds
30965f6beb9eSArnd Bergmann #ifdef CONFIG_COMPAT
unix_compat_ioctl(struct socket * sock,unsigned int cmd,unsigned long arg)30975f6beb9eSArnd Bergmann static int unix_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
30985f6beb9eSArnd Bergmann {
30995f6beb9eSArnd Bergmann return unix_ioctl(sock, cmd, (unsigned long)compat_ptr(arg));
31005f6beb9eSArnd Bergmann }
31015f6beb9eSArnd Bergmann #endif
31025f6beb9eSArnd Bergmann
unix_poll(struct file * file,struct socket * sock,poll_table * wait)3103a11e1d43SLinus Torvalds static __poll_t unix_poll(struct file *file, struct socket *sock, poll_table *wait)
31041da177e4SLinus Torvalds {
31051da177e4SLinus Torvalds struct sock *sk = sock->sk;
3106484e036eSKuniyuki Iwashima unsigned char state;
3107a11e1d43SLinus Torvalds __poll_t mask;
3108e1d09c2cSKuniyuki Iwashima u8 shutdown;
3109a11e1d43SLinus Torvalds
311089ab066dSKarsten Graul sock_poll_wait(file, sock, wait);
3111a11e1d43SLinus Torvalds mask = 0;
3112e1d09c2cSKuniyuki Iwashima shutdown = READ_ONCE(sk->sk_shutdown);
3113484e036eSKuniyuki Iwashima state = READ_ONCE(sk->sk_state);
31141da177e4SLinus Torvalds
31151da177e4SLinus Torvalds /* exceptional events? */
3116cc04410aSEric Dumazet if (READ_ONCE(sk->sk_err))
3117a9a08845SLinus Torvalds mask |= EPOLLERR;
3118e1d09c2cSKuniyuki Iwashima if (shutdown == SHUTDOWN_MASK)
3119a9a08845SLinus Torvalds mask |= EPOLLHUP;
3120e1d09c2cSKuniyuki Iwashima if (shutdown & RCV_SHUTDOWN)
3121a9a08845SLinus Torvalds mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
31221da177e4SLinus Torvalds
31231da177e4SLinus Torvalds /* readable? */
31243ef7cf57SEric Dumazet if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
3125a9a08845SLinus Torvalds mask |= EPOLLIN | EPOLLRDNORM;
3126af493388SCong Wang if (sk_is_readable(sk))
3127af493388SCong Wang mask |= EPOLLIN | EPOLLRDNORM;
3128d9a232d4SKuniyuki Iwashima #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
3129d9a232d4SKuniyuki Iwashima if (READ_ONCE(unix_sk(sk)->oob_skb))
3130d9a232d4SKuniyuki Iwashima mask |= EPOLLPRI;
3131d9a232d4SKuniyuki Iwashima #endif
31321da177e4SLinus Torvalds
31331da177e4SLinus Torvalds /* Connection-based need to check for termination and startup */
31346eba6a37SEric Dumazet if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) &&
3135484e036eSKuniyuki Iwashima state == TCP_CLOSE)
3136a9a08845SLinus Torvalds mask |= EPOLLHUP;
31371da177e4SLinus Torvalds
31381da177e4SLinus Torvalds /*
31391da177e4SLinus Torvalds * we set writable also when the other side has shut down the
31401da177e4SLinus Torvalds * connection. This prevents stuck sockets.
31411da177e4SLinus Torvalds */
3142484e036eSKuniyuki Iwashima if (unix_writable(sk, state))
3143a9a08845SLinus Torvalds mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
31441da177e4SLinus Torvalds
31451da177e4SLinus Torvalds return mask;
31461da177e4SLinus Torvalds }
31471da177e4SLinus Torvalds
unix_dgram_poll(struct file * file,struct socket * sock,poll_table * wait)3148a11e1d43SLinus Torvalds static __poll_t unix_dgram_poll(struct file *file, struct socket *sock,
3149a11e1d43SLinus Torvalds poll_table *wait)
31503c73419cSRainer Weikusat {
3151ec0d215fSRainer Weikusat struct sock *sk = sock->sk, *other;
3152a11e1d43SLinus Torvalds unsigned int writable;
3153484e036eSKuniyuki Iwashima unsigned char state;
3154a11e1d43SLinus Torvalds __poll_t mask;
3155e1d09c2cSKuniyuki Iwashima u8 shutdown;
3156a11e1d43SLinus Torvalds
315789ab066dSKarsten Graul sock_poll_wait(file, sock, wait);
3158a11e1d43SLinus Torvalds mask = 0;
3159e1d09c2cSKuniyuki Iwashima shutdown = READ_ONCE(sk->sk_shutdown);
3160484e036eSKuniyuki Iwashima state = READ_ONCE(sk->sk_state);
31613c73419cSRainer Weikusat
31623c73419cSRainer Weikusat /* exceptional events? */
3163cc04410aSEric Dumazet if (READ_ONCE(sk->sk_err) ||
3164cc04410aSEric Dumazet !skb_queue_empty_lockless(&sk->sk_error_queue))
3165a9a08845SLinus Torvalds mask |= EPOLLERR |
3166a9a08845SLinus Torvalds (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0);
31677d4c04fcSKeller, Jacob E
3168e1d09c2cSKuniyuki Iwashima if (shutdown & RCV_SHUTDOWN)
3169a9a08845SLinus Torvalds mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
3170e1d09c2cSKuniyuki Iwashima if (shutdown == SHUTDOWN_MASK)
3171a9a08845SLinus Torvalds mask |= EPOLLHUP;
31723c73419cSRainer Weikusat
31733c73419cSRainer Weikusat /* readable? */
31743ef7cf57SEric Dumazet if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
3175a9a08845SLinus Torvalds mask |= EPOLLIN | EPOLLRDNORM;
3176af493388SCong Wang if (sk_is_readable(sk))
3177af493388SCong Wang mask |= EPOLLIN | EPOLLRDNORM;
31783c73419cSRainer Weikusat
31793c73419cSRainer Weikusat /* Connection-based need to check for termination and startup */
3180484e036eSKuniyuki Iwashima if (sk->sk_type == SOCK_SEQPACKET && state == TCP_CLOSE)
3181a9a08845SLinus Torvalds mask |= EPOLLHUP;
31823c73419cSRainer Weikusat
3183973a34aaSEric Dumazet /* No write status requested, avoid expensive OUT tests. */
3184a11e1d43SLinus Torvalds if (!(poll_requested_events(wait) & (EPOLLWRBAND|EPOLLWRNORM|EPOLLOUT)))
3185973a34aaSEric Dumazet return mask;
3186973a34aaSEric Dumazet
3187484e036eSKuniyuki Iwashima writable = unix_writable(sk, state);
31887d267278SRainer Weikusat if (writable) {
31897d267278SRainer Weikusat unix_state_lock(sk);
31907d267278SRainer Weikusat
31917d267278SRainer Weikusat other = unix_peer(sk);
31927d267278SRainer Weikusat if (other && unix_peer(other) != sk &&
319304f08eb4SEric Dumazet unix_recvq_full_lockless(other) &&
31947d267278SRainer Weikusat unix_dgram_peer_wake_me(sk, other))
3195ec0d215fSRainer Weikusat writable = 0;
31967d267278SRainer Weikusat
31977d267278SRainer Weikusat unix_state_unlock(sk);
3198ec0d215fSRainer Weikusat }
3199ec0d215fSRainer Weikusat
3200ec0d215fSRainer Weikusat if (writable)
3201a9a08845SLinus Torvalds mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
32023c73419cSRainer Weikusat else
32039cd3e072SEric Dumazet sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
32043c73419cSRainer Weikusat
32053c73419cSRainer Weikusat return mask;
32063c73419cSRainer Weikusat }
32071da177e4SLinus Torvalds
32081da177e4SLinus Torvalds #ifdef CONFIG_PROC_FS
3209a53eb3feSPavel Emelyanov
32107123aaa3SEric Dumazet #define BUCKET_SPACE (BITS_PER_LONG - (UNIX_HASH_BITS + 1) - 1)
32117123aaa3SEric Dumazet
32127123aaa3SEric Dumazet #define get_bucket(x) ((x) >> BUCKET_SPACE)
3213afd20b92SKuniyuki Iwashima #define get_offset(x) ((x) & ((1UL << BUCKET_SPACE) - 1))
32147123aaa3SEric Dumazet #define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o))
3215a53eb3feSPavel Emelyanov
unix_from_bucket(struct seq_file * seq,loff_t * pos)32167123aaa3SEric Dumazet static struct sock *unix_from_bucket(struct seq_file *seq, loff_t *pos)
32171da177e4SLinus Torvalds {
32187123aaa3SEric Dumazet unsigned long offset = get_offset(*pos);
32197123aaa3SEric Dumazet unsigned long bucket = get_bucket(*pos);
32207123aaa3SEric Dumazet unsigned long count = 0;
3221cf2f225eSKuniyuki Iwashima struct sock *sk;
32221da177e4SLinus Torvalds
3223cf2f225eSKuniyuki Iwashima for (sk = sk_head(&seq_file_net(seq)->unx.table.buckets[bucket]);
3224cf2f225eSKuniyuki Iwashima sk; sk = sk_next(sk)) {
32257123aaa3SEric Dumazet if (++count == offset)
32267123aaa3SEric Dumazet break;
32271da177e4SLinus Torvalds }
32287123aaa3SEric Dumazet
32297123aaa3SEric Dumazet return sk;
32307123aaa3SEric Dumazet }
32317123aaa3SEric Dumazet
unix_get_first(struct seq_file * seq,loff_t * pos)32324408d55aSKuniyuki Iwashima static struct sock *unix_get_first(struct seq_file *seq, loff_t *pos)
32337123aaa3SEric Dumazet {
3234afd20b92SKuniyuki Iwashima unsigned long bucket = get_bucket(*pos);
323579b05beaSKuniyuki Iwashima struct net *net = seq_file_net(seq);
32364408d55aSKuniyuki Iwashima struct sock *sk;
32377123aaa3SEric Dumazet
3238f302d180SKuniyuki Iwashima while (bucket < UNIX_HASH_SIZE) {
323979b05beaSKuniyuki Iwashima spin_lock(&net->unx.table.locks[bucket]);
32404408d55aSKuniyuki Iwashima
32417123aaa3SEric Dumazet sk = unix_from_bucket(seq, pos);
32427123aaa3SEric Dumazet if (sk)
32437123aaa3SEric Dumazet return sk;
32447123aaa3SEric Dumazet
324579b05beaSKuniyuki Iwashima spin_unlock(&net->unx.table.locks[bucket]);
32464408d55aSKuniyuki Iwashima
32474408d55aSKuniyuki Iwashima *pos = set_bucket_offset(++bucket, 1);
32484408d55aSKuniyuki Iwashima }
32497123aaa3SEric Dumazet
32501da177e4SLinus Torvalds return NULL;
32511da177e4SLinus Torvalds }
32521da177e4SLinus Torvalds
unix_get_next(struct seq_file * seq,struct sock * sk,loff_t * pos)32534408d55aSKuniyuki Iwashima static struct sock *unix_get_next(struct seq_file *seq, struct sock *sk,
32544408d55aSKuniyuki Iwashima loff_t *pos)
32554408d55aSKuniyuki Iwashima {
32564408d55aSKuniyuki Iwashima unsigned long bucket = get_bucket(*pos);
32574408d55aSKuniyuki Iwashima
3258cf2f225eSKuniyuki Iwashima sk = sk_next(sk);
3259cf2f225eSKuniyuki Iwashima if (sk)
32604408d55aSKuniyuki Iwashima return sk;
32614408d55aSKuniyuki Iwashima
3262cf2f225eSKuniyuki Iwashima
3263cf2f225eSKuniyuki Iwashima spin_unlock(&seq_file_net(seq)->unx.table.locks[bucket]);
32644408d55aSKuniyuki Iwashima
32654408d55aSKuniyuki Iwashima *pos = set_bucket_offset(++bucket, 1);
32664408d55aSKuniyuki Iwashima
32674408d55aSKuniyuki Iwashima return unix_get_first(seq, pos);
32684408d55aSKuniyuki Iwashima }
32694408d55aSKuniyuki Iwashima
unix_seq_start(struct seq_file * seq,loff_t * pos)32701da177e4SLinus Torvalds static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
32711da177e4SLinus Torvalds {
32727123aaa3SEric Dumazet if (!*pos)
32737123aaa3SEric Dumazet return SEQ_START_TOKEN;
32747123aaa3SEric Dumazet
32754408d55aSKuniyuki Iwashima return unix_get_first(seq, pos);
32761da177e4SLinus Torvalds }
32771da177e4SLinus Torvalds
unix_seq_next(struct seq_file * seq,void * v,loff_t * pos)32781da177e4SLinus Torvalds static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
32791da177e4SLinus Torvalds {
32801da177e4SLinus Torvalds ++*pos;
32814408d55aSKuniyuki Iwashima
32824408d55aSKuniyuki Iwashima if (v == SEQ_START_TOKEN)
32834408d55aSKuniyuki Iwashima return unix_get_first(seq, pos);
32844408d55aSKuniyuki Iwashima
32854408d55aSKuniyuki Iwashima return unix_get_next(seq, v, pos);
32861da177e4SLinus Torvalds }
32871da177e4SLinus Torvalds
unix_seq_stop(struct seq_file * seq,void * v)32881da177e4SLinus Torvalds static void unix_seq_stop(struct seq_file *seq, void *v)
32891da177e4SLinus Torvalds {
3290afd20b92SKuniyuki Iwashima struct sock *sk = v;
3291afd20b92SKuniyuki Iwashima
32922f7ca90aSKuniyuki Iwashima if (sk)
329379b05beaSKuniyuki Iwashima spin_unlock(&seq_file_net(seq)->unx.table.locks[sk->sk_hash]);
329479b05beaSKuniyuki Iwashima }
32951da177e4SLinus Torvalds
unix_seq_show(struct seq_file * seq,void * v)32961da177e4SLinus Torvalds static int unix_seq_show(struct seq_file *seq, void *v)
32971da177e4SLinus Torvalds {
32981da177e4SLinus Torvalds
3299b9f3124fSJoe Perches if (v == SEQ_START_TOKEN)
33001da177e4SLinus Torvalds seq_puts(seq, "Num RefCount Protocol Flags Type St "
33011da177e4SLinus Torvalds "Inode Path\n");
33021da177e4SLinus Torvalds else {
33031da177e4SLinus Torvalds struct sock *s = v;
33041da177e4SLinus Torvalds struct unix_sock *u = unix_sk(s);
33051c92b4e5SDavid S. Miller unix_state_lock(s);
33061da177e4SLinus Torvalds
330771338aa7SDan Rosenberg seq_printf(seq, "%pK: %08X %08X %08X %04X %02X %5lu",
33081da177e4SLinus Torvalds s,
330941c6d650SReshetova, Elena refcount_read(&s->sk_refcnt),
33101da177e4SLinus Torvalds 0,
33111da177e4SLinus Torvalds s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
33121da177e4SLinus Torvalds s->sk_type,
33131da177e4SLinus Torvalds s->sk_socket ?
33141da177e4SLinus Torvalds (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
33151da177e4SLinus Torvalds (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
33161da177e4SLinus Torvalds sock_i_ino(s));
33171da177e4SLinus Torvalds
33182f7ca90aSKuniyuki Iwashima if (u->addr) { // under a hash table lock here
33191da177e4SLinus Torvalds int i, len;
33201da177e4SLinus Torvalds seq_putc(seq, ' ');
33211da177e4SLinus Torvalds
33221da177e4SLinus Torvalds i = 0;
3323755662ceSKuniyuki Iwashima len = u->addr->len -
3324755662ceSKuniyuki Iwashima offsetof(struct sockaddr_un, sun_path);
33255ce7ab49SKuniyuki Iwashima if (u->addr->name->sun_path[0]) {
33261da177e4SLinus Torvalds len--;
33275ce7ab49SKuniyuki Iwashima } else {
33281da177e4SLinus Torvalds seq_putc(seq, '@');
33291da177e4SLinus Torvalds i++;
33301da177e4SLinus Torvalds }
33311da177e4SLinus Torvalds for ( ; i < len; i++)
3332e7947ea7SIsaac Boukris seq_putc(seq, u->addr->name->sun_path[i] ?:
3333e7947ea7SIsaac Boukris '@');
33341da177e4SLinus Torvalds }
33351c92b4e5SDavid S. Miller unix_state_unlock(s);
33361da177e4SLinus Torvalds seq_putc(seq, '\n');
33371da177e4SLinus Torvalds }
33381da177e4SLinus Torvalds
33391da177e4SLinus Torvalds return 0;
33401da177e4SLinus Torvalds }
33411da177e4SLinus Torvalds
334256b3d975SPhilippe De Muyter static const struct seq_operations unix_seq_ops = {
33431da177e4SLinus Torvalds .start = unix_seq_start,
33441da177e4SLinus Torvalds .next = unix_seq_next,
33451da177e4SLinus Torvalds .stop = unix_seq_stop,
33461da177e4SLinus Torvalds .show = unix_seq_show,
33471da177e4SLinus Torvalds };
33482c860a43SKuniyuki Iwashima
33492c860a43SKuniyuki Iwashima #if IS_BUILTIN(CONFIG_UNIX) && defined(CONFIG_BPF_SYSCALL)
3350855d8e77SKuniyuki Iwashima struct bpf_unix_iter_state {
3351855d8e77SKuniyuki Iwashima struct seq_net_private p;
3352855d8e77SKuniyuki Iwashima unsigned int cur_sk;
3353855d8e77SKuniyuki Iwashima unsigned int end_sk;
3354855d8e77SKuniyuki Iwashima unsigned int max_sk;
3355855d8e77SKuniyuki Iwashima struct sock **batch;
3356855d8e77SKuniyuki Iwashima bool st_bucket_done;
3357855d8e77SKuniyuki Iwashima };
3358855d8e77SKuniyuki Iwashima
33592c860a43SKuniyuki Iwashima struct bpf_iter__unix {
33602c860a43SKuniyuki Iwashima __bpf_md_ptr(struct bpf_iter_meta *, meta);
33612c860a43SKuniyuki Iwashima __bpf_md_ptr(struct unix_sock *, unix_sk);
33622c860a43SKuniyuki Iwashima uid_t uid __aligned(8);
33632c860a43SKuniyuki Iwashima };
33642c860a43SKuniyuki Iwashima
unix_prog_seq_show(struct bpf_prog * prog,struct bpf_iter_meta * meta,struct unix_sock * unix_sk,uid_t uid)33652c860a43SKuniyuki Iwashima static int unix_prog_seq_show(struct bpf_prog *prog, struct bpf_iter_meta *meta,
33662c860a43SKuniyuki Iwashima struct unix_sock *unix_sk, uid_t uid)
33672c860a43SKuniyuki Iwashima {
33682c860a43SKuniyuki Iwashima struct bpf_iter__unix ctx;
33692c860a43SKuniyuki Iwashima
33702c860a43SKuniyuki Iwashima meta->seq_num--; /* skip SEQ_START_TOKEN */
33712c860a43SKuniyuki Iwashima ctx.meta = meta;
33722c860a43SKuniyuki Iwashima ctx.unix_sk = unix_sk;
33732c860a43SKuniyuki Iwashima ctx.uid = uid;
33742c860a43SKuniyuki Iwashima return bpf_iter_run_prog(prog, &ctx);
33752c860a43SKuniyuki Iwashima }
33762c860a43SKuniyuki Iwashima
bpf_iter_unix_hold_batch(struct seq_file * seq,struct sock * start_sk)3377855d8e77SKuniyuki Iwashima static int bpf_iter_unix_hold_batch(struct seq_file *seq, struct sock *start_sk)
3378855d8e77SKuniyuki Iwashima
3379855d8e77SKuniyuki Iwashima {
3380855d8e77SKuniyuki Iwashima struct bpf_unix_iter_state *iter = seq->private;
3381855d8e77SKuniyuki Iwashima unsigned int expected = 1;
3382855d8e77SKuniyuki Iwashima struct sock *sk;
3383855d8e77SKuniyuki Iwashima
3384855d8e77SKuniyuki Iwashima sock_hold(start_sk);
3385855d8e77SKuniyuki Iwashima iter->batch[iter->end_sk++] = start_sk;
3386855d8e77SKuniyuki Iwashima
3387855d8e77SKuniyuki Iwashima for (sk = sk_next(start_sk); sk; sk = sk_next(sk)) {
3388855d8e77SKuniyuki Iwashima if (iter->end_sk < iter->max_sk) {
3389855d8e77SKuniyuki Iwashima sock_hold(sk);
3390855d8e77SKuniyuki Iwashima iter->batch[iter->end_sk++] = sk;
3391855d8e77SKuniyuki Iwashima }
3392855d8e77SKuniyuki Iwashima
3393855d8e77SKuniyuki Iwashima expected++;
3394855d8e77SKuniyuki Iwashima }
3395855d8e77SKuniyuki Iwashima
3396cf2f225eSKuniyuki Iwashima spin_unlock(&seq_file_net(seq)->unx.table.locks[start_sk->sk_hash]);
3397855d8e77SKuniyuki Iwashima
3398855d8e77SKuniyuki Iwashima return expected;
3399855d8e77SKuniyuki Iwashima }
3400855d8e77SKuniyuki Iwashima
bpf_iter_unix_put_batch(struct bpf_unix_iter_state * iter)3401855d8e77SKuniyuki Iwashima static void bpf_iter_unix_put_batch(struct bpf_unix_iter_state *iter)
3402855d8e77SKuniyuki Iwashima {
3403855d8e77SKuniyuki Iwashima while (iter->cur_sk < iter->end_sk)
3404855d8e77SKuniyuki Iwashima sock_put(iter->batch[iter->cur_sk++]);
3405855d8e77SKuniyuki Iwashima }
3406855d8e77SKuniyuki Iwashima
bpf_iter_unix_realloc_batch(struct bpf_unix_iter_state * iter,unsigned int new_batch_sz)3407855d8e77SKuniyuki Iwashima static int bpf_iter_unix_realloc_batch(struct bpf_unix_iter_state *iter,
3408855d8e77SKuniyuki Iwashima unsigned int new_batch_sz)
3409855d8e77SKuniyuki Iwashima {
3410855d8e77SKuniyuki Iwashima struct sock **new_batch;
3411855d8e77SKuniyuki Iwashima
3412855d8e77SKuniyuki Iwashima new_batch = kvmalloc(sizeof(*new_batch) * new_batch_sz,
3413855d8e77SKuniyuki Iwashima GFP_USER | __GFP_NOWARN);
3414855d8e77SKuniyuki Iwashima if (!new_batch)
3415855d8e77SKuniyuki Iwashima return -ENOMEM;
3416855d8e77SKuniyuki Iwashima
3417855d8e77SKuniyuki Iwashima bpf_iter_unix_put_batch(iter);
3418855d8e77SKuniyuki Iwashima kvfree(iter->batch);
3419855d8e77SKuniyuki Iwashima iter->batch = new_batch;
3420855d8e77SKuniyuki Iwashima iter->max_sk = new_batch_sz;
3421855d8e77SKuniyuki Iwashima
3422855d8e77SKuniyuki Iwashima return 0;
3423855d8e77SKuniyuki Iwashima }
3424855d8e77SKuniyuki Iwashima
bpf_iter_unix_batch(struct seq_file * seq,loff_t * pos)3425855d8e77SKuniyuki Iwashima static struct sock *bpf_iter_unix_batch(struct seq_file *seq,
3426855d8e77SKuniyuki Iwashima loff_t *pos)
3427855d8e77SKuniyuki Iwashima {
3428855d8e77SKuniyuki Iwashima struct bpf_unix_iter_state *iter = seq->private;
3429855d8e77SKuniyuki Iwashima unsigned int expected;
3430855d8e77SKuniyuki Iwashima bool resized = false;
3431855d8e77SKuniyuki Iwashima struct sock *sk;
3432855d8e77SKuniyuki Iwashima
3433855d8e77SKuniyuki Iwashima if (iter->st_bucket_done)
3434855d8e77SKuniyuki Iwashima *pos = set_bucket_offset(get_bucket(*pos) + 1, 1);
3435855d8e77SKuniyuki Iwashima
3436855d8e77SKuniyuki Iwashima again:
3437855d8e77SKuniyuki Iwashima /* Get a new batch */
3438855d8e77SKuniyuki Iwashima iter->cur_sk = 0;
3439855d8e77SKuniyuki Iwashima iter->end_sk = 0;
3440855d8e77SKuniyuki Iwashima
3441855d8e77SKuniyuki Iwashima sk = unix_get_first(seq, pos);
3442855d8e77SKuniyuki Iwashima if (!sk)
3443855d8e77SKuniyuki Iwashima return NULL; /* Done */
3444855d8e77SKuniyuki Iwashima
3445855d8e77SKuniyuki Iwashima expected = bpf_iter_unix_hold_batch(seq, sk);
3446855d8e77SKuniyuki Iwashima
3447855d8e77SKuniyuki Iwashima if (iter->end_sk == expected) {
3448855d8e77SKuniyuki Iwashima iter->st_bucket_done = true;
3449855d8e77SKuniyuki Iwashima return sk;
3450855d8e77SKuniyuki Iwashima }
3451855d8e77SKuniyuki Iwashima
3452855d8e77SKuniyuki Iwashima if (!resized && !bpf_iter_unix_realloc_batch(iter, expected * 3 / 2)) {
3453855d8e77SKuniyuki Iwashima resized = true;
3454855d8e77SKuniyuki Iwashima goto again;
3455855d8e77SKuniyuki Iwashima }
3456855d8e77SKuniyuki Iwashima
3457855d8e77SKuniyuki Iwashima return sk;
3458855d8e77SKuniyuki Iwashima }
3459855d8e77SKuniyuki Iwashima
bpf_iter_unix_seq_start(struct seq_file * seq,loff_t * pos)3460855d8e77SKuniyuki Iwashima static void *bpf_iter_unix_seq_start(struct seq_file *seq, loff_t *pos)
3461855d8e77SKuniyuki Iwashima {
3462855d8e77SKuniyuki Iwashima if (!*pos)
3463855d8e77SKuniyuki Iwashima return SEQ_START_TOKEN;
3464855d8e77SKuniyuki Iwashima
3465855d8e77SKuniyuki Iwashima /* bpf iter does not support lseek, so it always
3466855d8e77SKuniyuki Iwashima * continue from where it was stop()-ped.
3467855d8e77SKuniyuki Iwashima */
3468855d8e77SKuniyuki Iwashima return bpf_iter_unix_batch(seq, pos);
3469855d8e77SKuniyuki Iwashima }
3470855d8e77SKuniyuki Iwashima
bpf_iter_unix_seq_next(struct seq_file * seq,void * v,loff_t * pos)3471855d8e77SKuniyuki Iwashima static void *bpf_iter_unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3472855d8e77SKuniyuki Iwashima {
3473855d8e77SKuniyuki Iwashima struct bpf_unix_iter_state *iter = seq->private;
3474855d8e77SKuniyuki Iwashima struct sock *sk;
3475855d8e77SKuniyuki Iwashima
3476855d8e77SKuniyuki Iwashima /* Whenever seq_next() is called, the iter->cur_sk is
3477855d8e77SKuniyuki Iwashima * done with seq_show(), so advance to the next sk in
3478855d8e77SKuniyuki Iwashima * the batch.
3479855d8e77SKuniyuki Iwashima */
3480855d8e77SKuniyuki Iwashima if (iter->cur_sk < iter->end_sk)
3481855d8e77SKuniyuki Iwashima sock_put(iter->batch[iter->cur_sk++]);
3482855d8e77SKuniyuki Iwashima
3483855d8e77SKuniyuki Iwashima ++*pos;
3484855d8e77SKuniyuki Iwashima
3485855d8e77SKuniyuki Iwashima if (iter->cur_sk < iter->end_sk)
3486855d8e77SKuniyuki Iwashima sk = iter->batch[iter->cur_sk];
3487855d8e77SKuniyuki Iwashima else
3488855d8e77SKuniyuki Iwashima sk = bpf_iter_unix_batch(seq, pos);
3489855d8e77SKuniyuki Iwashima
3490855d8e77SKuniyuki Iwashima return sk;
3491855d8e77SKuniyuki Iwashima }
3492855d8e77SKuniyuki Iwashima
bpf_iter_unix_seq_show(struct seq_file * seq,void * v)34932c860a43SKuniyuki Iwashima static int bpf_iter_unix_seq_show(struct seq_file *seq, void *v)
34942c860a43SKuniyuki Iwashima {
34952c860a43SKuniyuki Iwashima struct bpf_iter_meta meta;
34962c860a43SKuniyuki Iwashima struct bpf_prog *prog;
34972c860a43SKuniyuki Iwashima struct sock *sk = v;
34982c860a43SKuniyuki Iwashima uid_t uid;
3499855d8e77SKuniyuki Iwashima bool slow;
3500855d8e77SKuniyuki Iwashima int ret;
35012c860a43SKuniyuki Iwashima
35022c860a43SKuniyuki Iwashima if (v == SEQ_START_TOKEN)
35032c860a43SKuniyuki Iwashima return 0;
35042c860a43SKuniyuki Iwashima
3505855d8e77SKuniyuki Iwashima slow = lock_sock_fast(sk);
3506855d8e77SKuniyuki Iwashima
3507855d8e77SKuniyuki Iwashima if (unlikely(sk_unhashed(sk))) {
3508855d8e77SKuniyuki Iwashima ret = SEQ_SKIP;
3509855d8e77SKuniyuki Iwashima goto unlock;
3510855d8e77SKuniyuki Iwashima }
3511855d8e77SKuniyuki Iwashima
35122c860a43SKuniyuki Iwashima uid = from_kuid_munged(seq_user_ns(seq), sock_i_uid(sk));
35132c860a43SKuniyuki Iwashima meta.seq = seq;
35142c860a43SKuniyuki Iwashima prog = bpf_iter_get_info(&meta, false);
3515855d8e77SKuniyuki Iwashima ret = unix_prog_seq_show(prog, &meta, v, uid);
3516855d8e77SKuniyuki Iwashima unlock:
3517855d8e77SKuniyuki Iwashima unlock_sock_fast(sk, slow);
3518855d8e77SKuniyuki Iwashima return ret;
35192c860a43SKuniyuki Iwashima }
35202c860a43SKuniyuki Iwashima
bpf_iter_unix_seq_stop(struct seq_file * seq,void * v)35212c860a43SKuniyuki Iwashima static void bpf_iter_unix_seq_stop(struct seq_file *seq, void *v)
35222c860a43SKuniyuki Iwashima {
3523855d8e77SKuniyuki Iwashima struct bpf_unix_iter_state *iter = seq->private;
35242c860a43SKuniyuki Iwashima struct bpf_iter_meta meta;
35252c860a43SKuniyuki Iwashima struct bpf_prog *prog;
35262c860a43SKuniyuki Iwashima
35272c860a43SKuniyuki Iwashima if (!v) {
35282c860a43SKuniyuki Iwashima meta.seq = seq;
35292c860a43SKuniyuki Iwashima prog = bpf_iter_get_info(&meta, true);
35302c860a43SKuniyuki Iwashima if (prog)
35312c860a43SKuniyuki Iwashima (void)unix_prog_seq_show(prog, &meta, v, 0);
35322c860a43SKuniyuki Iwashima }
35332c860a43SKuniyuki Iwashima
3534855d8e77SKuniyuki Iwashima if (iter->cur_sk < iter->end_sk)
3535855d8e77SKuniyuki Iwashima bpf_iter_unix_put_batch(iter);
35362c860a43SKuniyuki Iwashima }
35372c860a43SKuniyuki Iwashima
35382c860a43SKuniyuki Iwashima static const struct seq_operations bpf_iter_unix_seq_ops = {
3539855d8e77SKuniyuki Iwashima .start = bpf_iter_unix_seq_start,
3540855d8e77SKuniyuki Iwashima .next = bpf_iter_unix_seq_next,
35412c860a43SKuniyuki Iwashima .stop = bpf_iter_unix_seq_stop,
35422c860a43SKuniyuki Iwashima .show = bpf_iter_unix_seq_show,
35432c860a43SKuniyuki Iwashima };
35442c860a43SKuniyuki Iwashima #endif
35451da177e4SLinus Torvalds #endif
35461da177e4SLinus Torvalds
3547ec1b4cf7SStephen Hemminger static const struct net_proto_family unix_family_ops = {
35481da177e4SLinus Torvalds .family = PF_UNIX,
35491da177e4SLinus Torvalds .create = unix_create,
35501da177e4SLinus Torvalds .owner = THIS_MODULE,
35511da177e4SLinus Torvalds };
35521da177e4SLinus Torvalds
3553097e66c5SDenis V. Lunev
unix_net_init(struct net * net)35542c8c1e72SAlexey Dobriyan static int __net_init unix_net_init(struct net *net)
3555097e66c5SDenis V. Lunev {
3556b6e81138SKuniyuki Iwashima int i;
3557097e66c5SDenis V. Lunev
3558a0a53c8bSDenis V. Lunev net->unx.sysctl_max_dgram_qlen = 10;
35591597fbc0SPavel Emelyanov if (unix_sysctl_register(net))
35601597fbc0SPavel Emelyanov goto out;
3561d392e497SPavel Emelyanov
3562097e66c5SDenis V. Lunev #ifdef CONFIG_PROC_FS
3563c3506372SChristoph Hellwig if (!proc_create_net("unix", 0, net->proc_net, &unix_seq_ops,
3564b6e81138SKuniyuki Iwashima sizeof(struct seq_net_private)))
3565b6e81138SKuniyuki Iwashima goto err_sysctl;
3566097e66c5SDenis V. Lunev #endif
3567b6e81138SKuniyuki Iwashima
3568b6e81138SKuniyuki Iwashima net->unx.table.locks = kvmalloc_array(UNIX_HASH_SIZE,
3569b6e81138SKuniyuki Iwashima sizeof(spinlock_t), GFP_KERNEL);
3570b6e81138SKuniyuki Iwashima if (!net->unx.table.locks)
3571b6e81138SKuniyuki Iwashima goto err_proc;
3572b6e81138SKuniyuki Iwashima
3573b6e81138SKuniyuki Iwashima net->unx.table.buckets = kvmalloc_array(UNIX_HASH_SIZE,
3574b6e81138SKuniyuki Iwashima sizeof(struct hlist_head),
3575b6e81138SKuniyuki Iwashima GFP_KERNEL);
3576b6e81138SKuniyuki Iwashima if (!net->unx.table.buckets)
3577b6e81138SKuniyuki Iwashima goto free_locks;
3578b6e81138SKuniyuki Iwashima
3579b6e81138SKuniyuki Iwashima for (i = 0; i < UNIX_HASH_SIZE; i++) {
3580b6e81138SKuniyuki Iwashima spin_lock_init(&net->unx.table.locks[i]);
3581b6e81138SKuniyuki Iwashima INIT_HLIST_HEAD(&net->unx.table.buckets[i]);
3582b6e81138SKuniyuki Iwashima }
3583b6e81138SKuniyuki Iwashima
3584b6e81138SKuniyuki Iwashima return 0;
3585b6e81138SKuniyuki Iwashima
3586b6e81138SKuniyuki Iwashima free_locks:
3587b6e81138SKuniyuki Iwashima kvfree(net->unx.table.locks);
3588b6e81138SKuniyuki Iwashima err_proc:
3589b6e81138SKuniyuki Iwashima #ifdef CONFIG_PROC_FS
3590b6e81138SKuniyuki Iwashima remove_proc_entry("unix", net->proc_net);
3591b6e81138SKuniyuki Iwashima err_sysctl:
3592b6e81138SKuniyuki Iwashima #endif
3593b6e81138SKuniyuki Iwashima unix_sysctl_unregister(net);
3594097e66c5SDenis V. Lunev out:
3595b6e81138SKuniyuki Iwashima return -ENOMEM;
3596097e66c5SDenis V. Lunev }
3597097e66c5SDenis V. Lunev
unix_net_exit(struct net * net)35982c8c1e72SAlexey Dobriyan static void __net_exit unix_net_exit(struct net *net)
3599097e66c5SDenis V. Lunev {
3600b6e81138SKuniyuki Iwashima kvfree(net->unx.table.buckets);
3601b6e81138SKuniyuki Iwashima kvfree(net->unx.table.locks);
36021597fbc0SPavel Emelyanov unix_sysctl_unregister(net);
3603ece31ffdSGao feng remove_proc_entry("unix", net->proc_net);
3604097e66c5SDenis V. Lunev }
3605097e66c5SDenis V. Lunev
3606097e66c5SDenis V. Lunev static struct pernet_operations unix_net_ops = {
3607097e66c5SDenis V. Lunev .init = unix_net_init,
3608097e66c5SDenis V. Lunev .exit = unix_net_exit,
3609097e66c5SDenis V. Lunev };
3610097e66c5SDenis V. Lunev
36112c860a43SKuniyuki Iwashima #if IS_BUILTIN(CONFIG_UNIX) && defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
DEFINE_BPF_ITER_FUNC(unix,struct bpf_iter_meta * meta,struct unix_sock * unix_sk,uid_t uid)36122c860a43SKuniyuki Iwashima DEFINE_BPF_ITER_FUNC(unix, struct bpf_iter_meta *meta,
36132c860a43SKuniyuki Iwashima struct unix_sock *unix_sk, uid_t uid)
36142c860a43SKuniyuki Iwashima
3615855d8e77SKuniyuki Iwashima #define INIT_BATCH_SZ 16
3616855d8e77SKuniyuki Iwashima
3617855d8e77SKuniyuki Iwashima static int bpf_iter_init_unix(void *priv_data, struct bpf_iter_aux_info *aux)
3618855d8e77SKuniyuki Iwashima {
3619855d8e77SKuniyuki Iwashima struct bpf_unix_iter_state *iter = priv_data;
3620855d8e77SKuniyuki Iwashima int err;
3621855d8e77SKuniyuki Iwashima
3622855d8e77SKuniyuki Iwashima err = bpf_iter_init_seq_net(priv_data, aux);
3623855d8e77SKuniyuki Iwashima if (err)
3624855d8e77SKuniyuki Iwashima return err;
3625855d8e77SKuniyuki Iwashima
3626855d8e77SKuniyuki Iwashima err = bpf_iter_unix_realloc_batch(iter, INIT_BATCH_SZ);
3627855d8e77SKuniyuki Iwashima if (err) {
3628855d8e77SKuniyuki Iwashima bpf_iter_fini_seq_net(priv_data);
3629855d8e77SKuniyuki Iwashima return err;
3630855d8e77SKuniyuki Iwashima }
3631855d8e77SKuniyuki Iwashima
3632855d8e77SKuniyuki Iwashima return 0;
3633855d8e77SKuniyuki Iwashima }
3634855d8e77SKuniyuki Iwashima
bpf_iter_fini_unix(void * priv_data)3635855d8e77SKuniyuki Iwashima static void bpf_iter_fini_unix(void *priv_data)
3636855d8e77SKuniyuki Iwashima {
3637855d8e77SKuniyuki Iwashima struct bpf_unix_iter_state *iter = priv_data;
3638855d8e77SKuniyuki Iwashima
3639855d8e77SKuniyuki Iwashima bpf_iter_fini_seq_net(priv_data);
3640855d8e77SKuniyuki Iwashima kvfree(iter->batch);
3641855d8e77SKuniyuki Iwashima }
3642855d8e77SKuniyuki Iwashima
36432c860a43SKuniyuki Iwashima static const struct bpf_iter_seq_info unix_seq_info = {
36442c860a43SKuniyuki Iwashima .seq_ops = &bpf_iter_unix_seq_ops,
3645855d8e77SKuniyuki Iwashima .init_seq_private = bpf_iter_init_unix,
3646855d8e77SKuniyuki Iwashima .fini_seq_private = bpf_iter_fini_unix,
3647855d8e77SKuniyuki Iwashima .seq_priv_size = sizeof(struct bpf_unix_iter_state),
36482c860a43SKuniyuki Iwashima };
36492c860a43SKuniyuki Iwashima
3650eb7d8f1dSKuniyuki Iwashima static const struct bpf_func_proto *
bpf_iter_unix_get_func_proto(enum bpf_func_id func_id,const struct bpf_prog * prog)3651eb7d8f1dSKuniyuki Iwashima bpf_iter_unix_get_func_proto(enum bpf_func_id func_id,
3652eb7d8f1dSKuniyuki Iwashima const struct bpf_prog *prog)
3653eb7d8f1dSKuniyuki Iwashima {
3654eb7d8f1dSKuniyuki Iwashima switch (func_id) {
3655eb7d8f1dSKuniyuki Iwashima case BPF_FUNC_setsockopt:
3656eb7d8f1dSKuniyuki Iwashima return &bpf_sk_setsockopt_proto;
3657eb7d8f1dSKuniyuki Iwashima case BPF_FUNC_getsockopt:
3658eb7d8f1dSKuniyuki Iwashima return &bpf_sk_getsockopt_proto;
3659eb7d8f1dSKuniyuki Iwashima default:
3660eb7d8f1dSKuniyuki Iwashima return NULL;
3661eb7d8f1dSKuniyuki Iwashima }
3662eb7d8f1dSKuniyuki Iwashima }
3663eb7d8f1dSKuniyuki Iwashima
36642c860a43SKuniyuki Iwashima static struct bpf_iter_reg unix_reg_info = {
36652c860a43SKuniyuki Iwashima .target = "unix",
36662c860a43SKuniyuki Iwashima .ctx_arg_info_size = 1,
36672c860a43SKuniyuki Iwashima .ctx_arg_info = {
36682c860a43SKuniyuki Iwashima { offsetof(struct bpf_iter__unix, unix_sk),
36692c860a43SKuniyuki Iwashima PTR_TO_BTF_ID_OR_NULL },
36702c860a43SKuniyuki Iwashima },
3671eb7d8f1dSKuniyuki Iwashima .get_func_proto = bpf_iter_unix_get_func_proto,
36722c860a43SKuniyuki Iwashima .seq_info = &unix_seq_info,
36732c860a43SKuniyuki Iwashima };
36742c860a43SKuniyuki Iwashima
bpf_iter_register(void)36752c860a43SKuniyuki Iwashima static void __init bpf_iter_register(void)
36762c860a43SKuniyuki Iwashima {
36772c860a43SKuniyuki Iwashima unix_reg_info.ctx_arg_info[0].btf_id = btf_sock_ids[BTF_SOCK_TYPE_UNIX];
36782c860a43SKuniyuki Iwashima if (bpf_iter_reg_target(&unix_reg_info))
36792c860a43SKuniyuki Iwashima pr_warn("Warning: could not register bpf iterator unix\n");
36802c860a43SKuniyuki Iwashima }
36812c860a43SKuniyuki Iwashima #endif
36822c860a43SKuniyuki Iwashima
af_unix_init(void)36831da177e4SLinus Torvalds static int __init af_unix_init(void)
36841da177e4SLinus Torvalds {
368551bae889SKuniyuki Iwashima int i, rc = -1;
36861da177e4SLinus Torvalds
3687c593642cSPankaj Bharadiya BUILD_BUG_ON(sizeof(struct unix_skb_parms) > sizeof_field(struct sk_buff, cb));
36881da177e4SLinus Torvalds
368951bae889SKuniyuki Iwashima for (i = 0; i < UNIX_HASH_SIZE / 2; i++) {
369051bae889SKuniyuki Iwashima spin_lock_init(&bsd_socket_locks[i]);
369151bae889SKuniyuki Iwashima INIT_HLIST_HEAD(&bsd_socket_buckets[i]);
369251bae889SKuniyuki Iwashima }
369351bae889SKuniyuki Iwashima
369494531cfcSJiang Wang rc = proto_register(&unix_dgram_proto, 1);
369594531cfcSJiang Wang if (rc != 0) {
369694531cfcSJiang Wang pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__);
369794531cfcSJiang Wang goto out;
369894531cfcSJiang Wang }
369994531cfcSJiang Wang
370094531cfcSJiang Wang rc = proto_register(&unix_stream_proto, 1);
37011da177e4SLinus Torvalds if (rc != 0) {
37025cc208beSwangweidong pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__);
370373e341e0SYang Yingliang proto_unregister(&unix_dgram_proto);
37041da177e4SLinus Torvalds goto out;
37051da177e4SLinus Torvalds }
37061da177e4SLinus Torvalds
37071da177e4SLinus Torvalds sock_register(&unix_family_ops);
3708097e66c5SDenis V. Lunev register_pernet_subsys(&unix_net_ops);
3709c6382918SCong Wang unix_bpf_build_proto();
37102c860a43SKuniyuki Iwashima
37112c860a43SKuniyuki Iwashima #if IS_BUILTIN(CONFIG_UNIX) && defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
37122c860a43SKuniyuki Iwashima bpf_iter_register();
37132c860a43SKuniyuki Iwashima #endif
37142c860a43SKuniyuki Iwashima
37151da177e4SLinus Torvalds out:
37161da177e4SLinus Torvalds return rc;
37171da177e4SLinus Torvalds }
37181da177e4SLinus Torvalds
af_unix_exit(void)37191da177e4SLinus Torvalds static void __exit af_unix_exit(void)
37201da177e4SLinus Torvalds {
37211da177e4SLinus Torvalds sock_unregister(PF_UNIX);
372294531cfcSJiang Wang proto_unregister(&unix_dgram_proto);
372394531cfcSJiang Wang proto_unregister(&unix_stream_proto);
3724097e66c5SDenis V. Lunev unregister_pernet_subsys(&unix_net_ops);
37251da177e4SLinus Torvalds }
37261da177e4SLinus Torvalds
37273d366960SDavid Woodhouse /* Earlier than device_initcall() so that other drivers invoking
37283d366960SDavid Woodhouse request_module() don't end up in a loop when modprobe tries
37293d366960SDavid Woodhouse to use a UNIX socket. But later than subsys_initcall() because
37303d366960SDavid Woodhouse we depend on stuff initialised there */
37313d366960SDavid Woodhouse fs_initcall(af_unix_init);
37321da177e4SLinus Torvalds module_exit(af_unix_exit);
37331da177e4SLinus Torvalds
37341da177e4SLinus Torvalds MODULE_LICENSE("GPL");
37351da177e4SLinus Torvalds MODULE_ALIAS_NETPROTO(PF_UNIX);
3736