xref: /openbmc/linux/net/unix/af_unix.c (revision 16f6ccde74a6f8538c62f127f17207c75f4dba7a)
1  // SPDX-License-Identifier: GPL-2.0-or-later
2  /*
3   * NET4:	Implementation of BSD Unix domain sockets.
4   *
5   * Authors:	Alan Cox, <alan@lxorguk.ukuu.org.uk>
6   *
7   * Fixes:
8   *		Linus Torvalds	:	Assorted bug cures.
9   *		Niibe Yutaka	:	async I/O support.
10   *		Carsten Paeth	:	PF_UNIX check, address fixes.
11   *		Alan Cox	:	Limit size of allocated blocks.
12   *		Alan Cox	:	Fixed the stupid socketpair bug.
13   *		Alan Cox	:	BSD compatibility fine tuning.
14   *		Alan Cox	:	Fixed a bug in connect when interrupted.
15   *		Alan Cox	:	Sorted out a proper draft version of
16   *					file descriptor passing hacked up from
17   *					Mike Shaver's work.
18   *		Marty Leisner	:	Fixes to fd passing
19   *		Nick Nevin	:	recvmsg bugfix.
20   *		Alan Cox	:	Started proper garbage collector
21   *		Heiko EiBfeldt	:	Missing verify_area check
22   *		Alan Cox	:	Started POSIXisms
23   *		Andreas Schwab	:	Replace inode by dentry for proper
24   *					reference counting
25   *		Kirk Petersen	:	Made this a module
26   *	    Christoph Rohland	:	Elegant non-blocking accept/connect algorithm.
27   *					Lots of bug fixes.
28   *	     Alexey Kuznetosv	:	Repaired (I hope) bugs introduces
29   *					by above two patches.
30   *	     Andrea Arcangeli	:	If possible we block in connect(2)
31   *					if the max backlog of the listen socket
32   *					is been reached. This won't break
33   *					old apps and it will avoid huge amount
34   *					of socks hashed (this for unix_gc()
35   *					performances reasons).
36   *					Security fix that limits the max
37   *					number of socks to 2*max_files and
38   *					the number of skb queueable in the
39   *					dgram receiver.
40   *		Artur Skawina   :	Hash function optimizations
41   *	     Alexey Kuznetsov   :	Full scale SMP. Lot of bugs are introduced 8)
42   *	      Malcolm Beattie   :	Set peercred for socketpair
43   *	     Michal Ostrowski   :       Module initialization cleanup.
44   *	     Arnaldo C. Melo	:	Remove MOD_{INC,DEC}_USE_COUNT,
45   *	     				the core infrastructure is doing that
46   *	     				for all net proto families now (2.5.69+)
47   *
48   * Known differences from reference BSD that was tested:
49   *
50   *	[TO FIX]
51   *	ECONNREFUSED is not returned from one end of a connected() socket to the
52   *		other the moment one end closes.
53   *	fstat() doesn't return st_dev=0, and give the blksize as high water mark
54   *		and a fake inode identifier (nor the BSD first socket fstat twice bug).
55   *	[NOT TO FIX]
56   *	accept() returns a path name even if the connecting socket has closed
57   *		in the meantime (BSD loses the path and gives up).
58   *	accept() returns 0 length path for an unbound connector. BSD returns 16
59   *		and a null first byte in the path (but not for gethost/peername - BSD bug ??)
60   *	socketpair(...SOCK_RAW..) doesn't panic the kernel.
61   *	BSD af_unix apparently has connect forgetting to block properly.
62   *		(need to check this with the POSIX spec in detail)
63   *
64   * Differences from 2.0.0-11-... (ANK)
65   *	Bug fixes and improvements.
66   *		- client shutdown killed server socket.
67   *		- removed all useless cli/sti pairs.
68   *
69   *	Semantic changes/extensions.
70   *		- generic control message passing.
71   *		- SCM_CREDENTIALS control message.
72   *		- "Abstract" (not FS based) socket bindings.
73   *		  Abstract names are sequences of bytes (not zero terminated)
74   *		  started by 0, so that this name space does not intersect
75   *		  with BSD names.
76   */
77  
78  #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
79  
80  #include <linux/module.h>
81  #include <linux/kernel.h>
82  #include <linux/signal.h>
83  #include <linux/sched/signal.h>
84  #include <linux/errno.h>
85  #include <linux/string.h>
86  #include <linux/stat.h>
87  #include <linux/dcache.h>
88  #include <linux/namei.h>
89  #include <linux/socket.h>
90  #include <linux/un.h>
91  #include <linux/fcntl.h>
92  #include <linux/filter.h>
93  #include <linux/termios.h>
94  #include <linux/sockios.h>
95  #include <linux/net.h>
96  #include <linux/in.h>
97  #include <linux/fs.h>
98  #include <linux/slab.h>
99  #include <linux/uaccess.h>
100  #include <linux/skbuff.h>
101  #include <linux/netdevice.h>
102  #include <net/net_namespace.h>
103  #include <net/sock.h>
104  #include <net/tcp_states.h>
105  #include <net/af_unix.h>
106  #include <linux/proc_fs.h>
107  #include <linux/seq_file.h>
108  #include <net/scm.h>
109  #include <linux/init.h>
110  #include <linux/poll.h>
111  #include <linux/rtnetlink.h>
112  #include <linux/mount.h>
113  #include <net/checksum.h>
114  #include <linux/security.h>
115  #include <linux/splice.h>
116  #include <linux/freezer.h>
117  #include <linux/file.h>
118  #include <linux/btf_ids.h>
119  
120  #include "scm.h"
121  
122  static atomic_long_t unix_nr_socks;
123  static struct hlist_head bsd_socket_buckets[UNIX_HASH_SIZE / 2];
124  static spinlock_t bsd_socket_locks[UNIX_HASH_SIZE / 2];
125  
126  /* SMP locking strategy:
127   *    hash table is protected with spinlock.
128   *    each socket state is protected by separate spinlock.
129   */
130  
unix_unbound_hash(struct sock * sk)131  static unsigned int unix_unbound_hash(struct sock *sk)
132  {
133  	unsigned long hash = (unsigned long)sk;
134  
135  	hash ^= hash >> 16;
136  	hash ^= hash >> 8;
137  	hash ^= sk->sk_type;
138  
139  	return hash & UNIX_HASH_MOD;
140  }
141  
unix_bsd_hash(struct inode * i)142  static unsigned int unix_bsd_hash(struct inode *i)
143  {
144  	return i->i_ino & UNIX_HASH_MOD;
145  }
146  
unix_abstract_hash(struct sockaddr_un * sunaddr,int addr_len,int type)147  static unsigned int unix_abstract_hash(struct sockaddr_un *sunaddr,
148  				       int addr_len, int type)
149  {
150  	__wsum csum = csum_partial(sunaddr, addr_len, 0);
151  	unsigned int hash;
152  
153  	hash = (__force unsigned int)csum_fold(csum);
154  	hash ^= hash >> 8;
155  	hash ^= type;
156  
157  	return UNIX_HASH_MOD + 1 + (hash & UNIX_HASH_MOD);
158  }
159  
unix_table_double_lock(struct net * net,unsigned int hash1,unsigned int hash2)160  static void unix_table_double_lock(struct net *net,
161  				   unsigned int hash1, unsigned int hash2)
162  {
163  	if (hash1 == hash2) {
164  		spin_lock(&net->unx.table.locks[hash1]);
165  		return;
166  	}
167  
168  	if (hash1 > hash2)
169  		swap(hash1, hash2);
170  
171  	spin_lock(&net->unx.table.locks[hash1]);
172  	spin_lock_nested(&net->unx.table.locks[hash2], SINGLE_DEPTH_NESTING);
173  }
174  
unix_table_double_unlock(struct net * net,unsigned int hash1,unsigned int hash2)175  static void unix_table_double_unlock(struct net *net,
176  				     unsigned int hash1, unsigned int hash2)
177  {
178  	if (hash1 == hash2) {
179  		spin_unlock(&net->unx.table.locks[hash1]);
180  		return;
181  	}
182  
183  	spin_unlock(&net->unx.table.locks[hash1]);
184  	spin_unlock(&net->unx.table.locks[hash2]);
185  }
186  
187  #ifdef CONFIG_SECURITY_NETWORK
unix_get_secdata(struct scm_cookie * scm,struct sk_buff * skb)188  static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
189  {
190  	UNIXCB(skb).secid = scm->secid;
191  }
192  
unix_set_secdata(struct scm_cookie * scm,struct sk_buff * skb)193  static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
194  {
195  	scm->secid = UNIXCB(skb).secid;
196  }
197  
unix_secdata_eq(struct scm_cookie * scm,struct sk_buff * skb)198  static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
199  {
200  	return (scm->secid == UNIXCB(skb).secid);
201  }
202  #else
unix_get_secdata(struct scm_cookie * scm,struct sk_buff * skb)203  static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
204  { }
205  
unix_set_secdata(struct scm_cookie * scm,struct sk_buff * skb)206  static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
207  { }
208  
unix_secdata_eq(struct scm_cookie * scm,struct sk_buff * skb)209  static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
210  {
211  	return true;
212  }
213  #endif /* CONFIG_SECURITY_NETWORK */
214  
unix_our_peer(struct sock * sk,struct sock * osk)215  static inline int unix_our_peer(struct sock *sk, struct sock *osk)
216  {
217  	return unix_peer(osk) == sk;
218  }
219  
unix_may_send(struct sock * sk,struct sock * osk)220  static inline int unix_may_send(struct sock *sk, struct sock *osk)
221  {
222  	return unix_peer(osk) == NULL || unix_our_peer(sk, osk);
223  }
224  
unix_recvq_full_lockless(const struct sock * sk)225  static inline int unix_recvq_full_lockless(const struct sock *sk)
226  {
227  	return skb_queue_len_lockless(&sk->sk_receive_queue) > sk->sk_max_ack_backlog;
228  }
229  
unix_peer_get(struct sock * s)230  struct sock *unix_peer_get(struct sock *s)
231  {
232  	struct sock *peer;
233  
234  	unix_state_lock(s);
235  	peer = unix_peer(s);
236  	if (peer)
237  		sock_hold(peer);
238  	unix_state_unlock(s);
239  	return peer;
240  }
241  EXPORT_SYMBOL_GPL(unix_peer_get);
242  
unix_create_addr(struct sockaddr_un * sunaddr,int addr_len)243  static struct unix_address *unix_create_addr(struct sockaddr_un *sunaddr,
244  					     int addr_len)
245  {
246  	struct unix_address *addr;
247  
248  	addr = kmalloc(sizeof(*addr) + addr_len, GFP_KERNEL);
249  	if (!addr)
250  		return NULL;
251  
252  	refcount_set(&addr->refcnt, 1);
253  	addr->len = addr_len;
254  	memcpy(addr->name, sunaddr, addr_len);
255  
256  	return addr;
257  }
258  
unix_release_addr(struct unix_address * addr)259  static inline void unix_release_addr(struct unix_address *addr)
260  {
261  	if (refcount_dec_and_test(&addr->refcnt))
262  		kfree(addr);
263  }
264  
265  /*
266   *	Check unix socket name:
267   *		- should be not zero length.
268   *	        - if started by not zero, should be NULL terminated (FS object)
269   *		- if started by zero, it is abstract name.
270   */
271  
unix_validate_addr(struct sockaddr_un * sunaddr,int addr_len)272  static int unix_validate_addr(struct sockaddr_un *sunaddr, int addr_len)
273  {
274  	if (addr_len <= offsetof(struct sockaddr_un, sun_path) ||
275  	    addr_len > sizeof(*sunaddr))
276  		return -EINVAL;
277  
278  	if (sunaddr->sun_family != AF_UNIX)
279  		return -EINVAL;
280  
281  	return 0;
282  }
283  
unix_mkname_bsd(struct sockaddr_un * sunaddr,int addr_len)284  static int unix_mkname_bsd(struct sockaddr_un *sunaddr, int addr_len)
285  {
286  	struct sockaddr_storage *addr = (struct sockaddr_storage *)sunaddr;
287  	short offset = offsetof(struct sockaddr_storage, __data);
288  
289  	BUILD_BUG_ON(offset != offsetof(struct sockaddr_un, sun_path));
290  
291  	/* This may look like an off by one error but it is a bit more
292  	 * subtle.  108 is the longest valid AF_UNIX path for a binding.
293  	 * sun_path[108] doesn't as such exist.  However in kernel space
294  	 * we are guaranteed that it is a valid memory location in our
295  	 * kernel address buffer because syscall functions always pass
296  	 * a pointer of struct sockaddr_storage which has a bigger buffer
297  	 * than 108.  Also, we must terminate sun_path for strlen() in
298  	 * getname_kernel().
299  	 */
300  	addr->__data[addr_len - offset] = 0;
301  
302  	/* Don't pass sunaddr->sun_path to strlen().  Otherwise, 108 will
303  	 * cause panic if CONFIG_FORTIFY_SOURCE=y.  Let __fortify_strlen()
304  	 * know the actual buffer.
305  	 */
306  	return strlen(addr->__data) + offset + 1;
307  }
308  
__unix_remove_socket(struct sock * sk)309  static void __unix_remove_socket(struct sock *sk)
310  {
311  	sk_del_node_init(sk);
312  }
313  
__unix_insert_socket(struct net * net,struct sock * sk)314  static void __unix_insert_socket(struct net *net, struct sock *sk)
315  {
316  	DEBUG_NET_WARN_ON_ONCE(!sk_unhashed(sk));
317  	sk_add_node(sk, &net->unx.table.buckets[sk->sk_hash]);
318  }
319  
__unix_set_addr_hash(struct net * net,struct sock * sk,struct unix_address * addr,unsigned int hash)320  static void __unix_set_addr_hash(struct net *net, struct sock *sk,
321  				 struct unix_address *addr, unsigned int hash)
322  {
323  	__unix_remove_socket(sk);
324  	smp_store_release(&unix_sk(sk)->addr, addr);
325  
326  	sk->sk_hash = hash;
327  	__unix_insert_socket(net, sk);
328  }
329  
unix_remove_socket(struct net * net,struct sock * sk)330  static void unix_remove_socket(struct net *net, struct sock *sk)
331  {
332  	spin_lock(&net->unx.table.locks[sk->sk_hash]);
333  	__unix_remove_socket(sk);
334  	spin_unlock(&net->unx.table.locks[sk->sk_hash]);
335  }
336  
unix_insert_unbound_socket(struct net * net,struct sock * sk)337  static void unix_insert_unbound_socket(struct net *net, struct sock *sk)
338  {
339  	spin_lock(&net->unx.table.locks[sk->sk_hash]);
340  	__unix_insert_socket(net, sk);
341  	spin_unlock(&net->unx.table.locks[sk->sk_hash]);
342  }
343  
unix_insert_bsd_socket(struct sock * sk)344  static void unix_insert_bsd_socket(struct sock *sk)
345  {
346  	spin_lock(&bsd_socket_locks[sk->sk_hash]);
347  	sk_add_bind_node(sk, &bsd_socket_buckets[sk->sk_hash]);
348  	spin_unlock(&bsd_socket_locks[sk->sk_hash]);
349  }
350  
unix_remove_bsd_socket(struct sock * sk)351  static void unix_remove_bsd_socket(struct sock *sk)
352  {
353  	if (!hlist_unhashed(&sk->sk_bind_node)) {
354  		spin_lock(&bsd_socket_locks[sk->sk_hash]);
355  		__sk_del_bind_node(sk);
356  		spin_unlock(&bsd_socket_locks[sk->sk_hash]);
357  
358  		sk_node_init(&sk->sk_bind_node);
359  	}
360  }
361  
__unix_find_socket_byname(struct net * net,struct sockaddr_un * sunname,int len,unsigned int hash)362  static struct sock *__unix_find_socket_byname(struct net *net,
363  					      struct sockaddr_un *sunname,
364  					      int len, unsigned int hash)
365  {
366  	struct sock *s;
367  
368  	sk_for_each(s, &net->unx.table.buckets[hash]) {
369  		struct unix_sock *u = unix_sk(s);
370  
371  		if (u->addr->len == len &&
372  		    !memcmp(u->addr->name, sunname, len))
373  			return s;
374  	}
375  	return NULL;
376  }
377  
unix_find_socket_byname(struct net * net,struct sockaddr_un * sunname,int len,unsigned int hash)378  static inline struct sock *unix_find_socket_byname(struct net *net,
379  						   struct sockaddr_un *sunname,
380  						   int len, unsigned int hash)
381  {
382  	struct sock *s;
383  
384  	spin_lock(&net->unx.table.locks[hash]);
385  	s = __unix_find_socket_byname(net, sunname, len, hash);
386  	if (s)
387  		sock_hold(s);
388  	spin_unlock(&net->unx.table.locks[hash]);
389  	return s;
390  }
391  
unix_find_socket_byinode(struct inode * i)392  static struct sock *unix_find_socket_byinode(struct inode *i)
393  {
394  	unsigned int hash = unix_bsd_hash(i);
395  	struct sock *s;
396  
397  	spin_lock(&bsd_socket_locks[hash]);
398  	sk_for_each_bound(s, &bsd_socket_buckets[hash]) {
399  		struct dentry *dentry = unix_sk(s)->path.dentry;
400  
401  		if (dentry && d_backing_inode(dentry) == i) {
402  			sock_hold(s);
403  			spin_unlock(&bsd_socket_locks[hash]);
404  			return s;
405  		}
406  	}
407  	spin_unlock(&bsd_socket_locks[hash]);
408  	return NULL;
409  }
410  
411  /* Support code for asymmetrically connected dgram sockets
412   *
413   * If a datagram socket is connected to a socket not itself connected
414   * to the first socket (eg, /dev/log), clients may only enqueue more
415   * messages if the present receive queue of the server socket is not
416   * "too large". This means there's a second writeability condition
417   * poll and sendmsg need to test. The dgram recv code will do a wake
418   * up on the peer_wait wait queue of a socket upon reception of a
419   * datagram which needs to be propagated to sleeping would-be writers
420   * since these might not have sent anything so far. This can't be
421   * accomplished via poll_wait because the lifetime of the server
422   * socket might be less than that of its clients if these break their
423   * association with it or if the server socket is closed while clients
424   * are still connected to it and there's no way to inform "a polling
425   * implementation" that it should let go of a certain wait queue
426   *
427   * In order to propagate a wake up, a wait_queue_entry_t of the client
428   * socket is enqueued on the peer_wait queue of the server socket
429   * whose wake function does a wake_up on the ordinary client socket
430   * wait queue. This connection is established whenever a write (or
431   * poll for write) hit the flow control condition and broken when the
432   * association to the server socket is dissolved or after a wake up
433   * was relayed.
434   */
435  
unix_dgram_peer_wake_relay(wait_queue_entry_t * q,unsigned mode,int flags,void * key)436  static int unix_dgram_peer_wake_relay(wait_queue_entry_t *q, unsigned mode, int flags,
437  				      void *key)
438  {
439  	struct unix_sock *u;
440  	wait_queue_head_t *u_sleep;
441  
442  	u = container_of(q, struct unix_sock, peer_wake);
443  
444  	__remove_wait_queue(&unix_sk(u->peer_wake.private)->peer_wait,
445  			    q);
446  	u->peer_wake.private = NULL;
447  
448  	/* relaying can only happen while the wq still exists */
449  	u_sleep = sk_sleep(&u->sk);
450  	if (u_sleep)
451  		wake_up_interruptible_poll(u_sleep, key_to_poll(key));
452  
453  	return 0;
454  }
455  
unix_dgram_peer_wake_connect(struct sock * sk,struct sock * other)456  static int unix_dgram_peer_wake_connect(struct sock *sk, struct sock *other)
457  {
458  	struct unix_sock *u, *u_other;
459  	int rc;
460  
461  	u = unix_sk(sk);
462  	u_other = unix_sk(other);
463  	rc = 0;
464  	spin_lock(&u_other->peer_wait.lock);
465  
466  	if (!u->peer_wake.private) {
467  		u->peer_wake.private = other;
468  		__add_wait_queue(&u_other->peer_wait, &u->peer_wake);
469  
470  		rc = 1;
471  	}
472  
473  	spin_unlock(&u_other->peer_wait.lock);
474  	return rc;
475  }
476  
unix_dgram_peer_wake_disconnect(struct sock * sk,struct sock * other)477  static void unix_dgram_peer_wake_disconnect(struct sock *sk,
478  					    struct sock *other)
479  {
480  	struct unix_sock *u, *u_other;
481  
482  	u = unix_sk(sk);
483  	u_other = unix_sk(other);
484  	spin_lock(&u_other->peer_wait.lock);
485  
486  	if (u->peer_wake.private == other) {
487  		__remove_wait_queue(&u_other->peer_wait, &u->peer_wake);
488  		u->peer_wake.private = NULL;
489  	}
490  
491  	spin_unlock(&u_other->peer_wait.lock);
492  }
493  
unix_dgram_peer_wake_disconnect_wakeup(struct sock * sk,struct sock * other)494  static void unix_dgram_peer_wake_disconnect_wakeup(struct sock *sk,
495  						   struct sock *other)
496  {
497  	unix_dgram_peer_wake_disconnect(sk, other);
498  	wake_up_interruptible_poll(sk_sleep(sk),
499  				   EPOLLOUT |
500  				   EPOLLWRNORM |
501  				   EPOLLWRBAND);
502  }
503  
504  /* preconditions:
505   *	- unix_peer(sk) == other
506   *	- association is stable
507   */
unix_dgram_peer_wake_me(struct sock * sk,struct sock * other)508  static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other)
509  {
510  	int connected;
511  
512  	connected = unix_dgram_peer_wake_connect(sk, other);
513  
514  	/* If other is SOCK_DEAD, we want to make sure we signal
515  	 * POLLOUT, such that a subsequent write() can get a
516  	 * -ECONNREFUSED. Otherwise, if we haven't queued any skbs
517  	 * to other and its full, we will hang waiting for POLLOUT.
518  	 */
519  	if (unix_recvq_full_lockless(other) && !sock_flag(other, SOCK_DEAD))
520  		return 1;
521  
522  	if (connected)
523  		unix_dgram_peer_wake_disconnect(sk, other);
524  
525  	return 0;
526  }
527  
unix_writable(const struct sock * sk,unsigned char state)528  static int unix_writable(const struct sock *sk, unsigned char state)
529  {
530  	return state != TCP_LISTEN &&
531  		(refcount_read(&sk->sk_wmem_alloc) << 2) <= READ_ONCE(sk->sk_sndbuf);
532  }
533  
unix_write_space(struct sock * sk)534  static void unix_write_space(struct sock *sk)
535  {
536  	struct socket_wq *wq;
537  
538  	rcu_read_lock();
539  	if (unix_writable(sk, READ_ONCE(sk->sk_state))) {
540  		wq = rcu_dereference(sk->sk_wq);
541  		if (skwq_has_sleeper(wq))
542  			wake_up_interruptible_sync_poll(&wq->wait,
543  				EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND);
544  		sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
545  	}
546  	rcu_read_unlock();
547  }
548  
549  /* When dgram socket disconnects (or changes its peer), we clear its receive
550   * queue of packets arrived from previous peer. First, it allows to do
551   * flow control based only on wmem_alloc; second, sk connected to peer
552   * may receive messages only from that peer. */
unix_dgram_disconnected(struct sock * sk,struct sock * other)553  static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
554  {
555  	if (!skb_queue_empty(&sk->sk_receive_queue)) {
556  		skb_queue_purge(&sk->sk_receive_queue);
557  		wake_up_interruptible_all(&unix_sk(sk)->peer_wait);
558  
559  		/* If one link of bidirectional dgram pipe is disconnected,
560  		 * we signal error. Messages are lost. Do not make this,
561  		 * when peer was not connected to us.
562  		 */
563  		if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
564  			WRITE_ONCE(other->sk_err, ECONNRESET);
565  			sk_error_report(other);
566  		}
567  	}
568  }
569  
unix_sock_destructor(struct sock * sk)570  static void unix_sock_destructor(struct sock *sk)
571  {
572  	struct unix_sock *u = unix_sk(sk);
573  
574  	skb_queue_purge(&sk->sk_receive_queue);
575  
576  	DEBUG_NET_WARN_ON_ONCE(refcount_read(&sk->sk_wmem_alloc));
577  	DEBUG_NET_WARN_ON_ONCE(!sk_unhashed(sk));
578  	DEBUG_NET_WARN_ON_ONCE(sk->sk_socket);
579  	if (!sock_flag(sk, SOCK_DEAD)) {
580  		pr_info("Attempt to release alive unix socket: %p\n", sk);
581  		return;
582  	}
583  
584  	if (u->addr)
585  		unix_release_addr(u->addr);
586  
587  	atomic_long_dec(&unix_nr_socks);
588  	sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
589  #ifdef UNIX_REFCNT_DEBUG
590  	pr_debug("UNIX %p is destroyed, %ld are still alive.\n", sk,
591  		atomic_long_read(&unix_nr_socks));
592  #endif
593  }
594  
unix_release_sock(struct sock * sk,int embrion)595  static void unix_release_sock(struct sock *sk, int embrion)
596  {
597  	struct unix_sock *u = unix_sk(sk);
598  	struct sock *skpair;
599  	struct sk_buff *skb;
600  	struct path path;
601  	int state;
602  
603  	unix_remove_socket(sock_net(sk), sk);
604  	unix_remove_bsd_socket(sk);
605  
606  	/* Clear state */
607  	unix_state_lock(sk);
608  	sock_orphan(sk);
609  	WRITE_ONCE(sk->sk_shutdown, SHUTDOWN_MASK);
610  	path	     = u->path;
611  	u->path.dentry = NULL;
612  	u->path.mnt = NULL;
613  	state = sk->sk_state;
614  	WRITE_ONCE(sk->sk_state, TCP_CLOSE);
615  
616  	skpair = unix_peer(sk);
617  	unix_peer(sk) = NULL;
618  
619  	unix_state_unlock(sk);
620  
621  #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
622  	if (u->oob_skb) {
623  		kfree_skb(u->oob_skb);
624  		u->oob_skb = NULL;
625  	}
626  #endif
627  
628  	wake_up_interruptible_all(&u->peer_wait);
629  
630  	if (skpair != NULL) {
631  		if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
632  			unix_state_lock(skpair);
633  			/* No more writes */
634  			WRITE_ONCE(skpair->sk_shutdown, SHUTDOWN_MASK);
635  			if (!skb_queue_empty_lockless(&sk->sk_receive_queue) || embrion)
636  				WRITE_ONCE(skpair->sk_err, ECONNRESET);
637  			unix_state_unlock(skpair);
638  			skpair->sk_state_change(skpair);
639  			sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
640  		}
641  
642  		unix_dgram_peer_wake_disconnect(sk, skpair);
643  		sock_put(skpair); /* It may now die */
644  	}
645  
646  	/* Try to flush out this socket. Throw out buffers at least */
647  
648  	while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
649  		if (state == TCP_LISTEN)
650  			unix_release_sock(skb->sk, 1);
651  		/* passed fds are erased in the kfree_skb hook	      */
652  		UNIXCB(skb).consumed = skb->len;
653  		kfree_skb(skb);
654  	}
655  
656  	if (path.dentry)
657  		path_put(&path);
658  
659  	sock_put(sk);
660  
661  	/* ---- Socket is dead now and most probably destroyed ---- */
662  
663  	/*
664  	 * Fixme: BSD difference: In BSD all sockets connected to us get
665  	 *	  ECONNRESET and we die on the spot. In Linux we behave
666  	 *	  like files and pipes do and wait for the last
667  	 *	  dereference.
668  	 *
669  	 * Can't we simply set sock->err?
670  	 *
671  	 *	  What the above comment does talk about? --ANK(980817)
672  	 */
673  
674  	if (READ_ONCE(unix_tot_inflight))
675  		unix_gc();		/* Garbage collect fds */
676  }
677  
init_peercred(struct sock * sk)678  static void init_peercred(struct sock *sk)
679  {
680  	const struct cred *old_cred;
681  	struct pid *old_pid;
682  
683  	spin_lock(&sk->sk_peer_lock);
684  	old_pid = sk->sk_peer_pid;
685  	old_cred = sk->sk_peer_cred;
686  	sk->sk_peer_pid  = get_pid(task_tgid(current));
687  	sk->sk_peer_cred = get_current_cred();
688  	spin_unlock(&sk->sk_peer_lock);
689  
690  	put_pid(old_pid);
691  	put_cred(old_cred);
692  }
693  
copy_peercred(struct sock * sk,struct sock * peersk)694  static void copy_peercred(struct sock *sk, struct sock *peersk)
695  {
696  	if (sk < peersk) {
697  		spin_lock(&sk->sk_peer_lock);
698  		spin_lock_nested(&peersk->sk_peer_lock, SINGLE_DEPTH_NESTING);
699  	} else {
700  		spin_lock(&peersk->sk_peer_lock);
701  		spin_lock_nested(&sk->sk_peer_lock, SINGLE_DEPTH_NESTING);
702  	}
703  
704  	sk->sk_peer_pid  = get_pid(peersk->sk_peer_pid);
705  	sk->sk_peer_cred = get_cred(peersk->sk_peer_cred);
706  
707  	spin_unlock(&sk->sk_peer_lock);
708  	spin_unlock(&peersk->sk_peer_lock);
709  }
710  
unix_listen(struct socket * sock,int backlog)711  static int unix_listen(struct socket *sock, int backlog)
712  {
713  	int err;
714  	struct sock *sk = sock->sk;
715  	struct unix_sock *u = unix_sk(sk);
716  
717  	err = -EOPNOTSUPP;
718  	if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
719  		goto out;	/* Only stream/seqpacket sockets accept */
720  	err = -EINVAL;
721  	if (!READ_ONCE(u->addr))
722  		goto out;	/* No listens on an unbound socket */
723  	unix_state_lock(sk);
724  	if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
725  		goto out_unlock;
726  	if (backlog > sk->sk_max_ack_backlog)
727  		wake_up_interruptible_all(&u->peer_wait);
728  	sk->sk_max_ack_backlog	= backlog;
729  	WRITE_ONCE(sk->sk_state, TCP_LISTEN);
730  
731  	/* set credentials so connect can copy them */
732  	init_peercred(sk);
733  	err = 0;
734  
735  out_unlock:
736  	unix_state_unlock(sk);
737  out:
738  	return err;
739  }
740  
741  static int unix_release(struct socket *);
742  static int unix_bind(struct socket *, struct sockaddr *, int);
743  static int unix_stream_connect(struct socket *, struct sockaddr *,
744  			       int addr_len, int flags);
745  static int unix_socketpair(struct socket *, struct socket *);
746  static int unix_accept(struct socket *, struct socket *, int, bool);
747  static int unix_getname(struct socket *, struct sockaddr *, int);
748  static __poll_t unix_poll(struct file *, struct socket *, poll_table *);
749  static __poll_t unix_dgram_poll(struct file *, struct socket *,
750  				    poll_table *);
751  static int unix_ioctl(struct socket *, unsigned int, unsigned long);
752  #ifdef CONFIG_COMPAT
753  static int unix_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg);
754  #endif
755  static int unix_shutdown(struct socket *, int);
756  static int unix_stream_sendmsg(struct socket *, struct msghdr *, size_t);
757  static int unix_stream_recvmsg(struct socket *, struct msghdr *, size_t, int);
758  static ssize_t unix_stream_splice_read(struct socket *,  loff_t *ppos,
759  				       struct pipe_inode_info *, size_t size,
760  				       unsigned int flags);
761  static int unix_dgram_sendmsg(struct socket *, struct msghdr *, size_t);
762  static int unix_dgram_recvmsg(struct socket *, struct msghdr *, size_t, int);
763  static int unix_read_skb(struct sock *sk, skb_read_actor_t recv_actor);
764  static int unix_stream_read_skb(struct sock *sk, skb_read_actor_t recv_actor);
765  static int unix_dgram_connect(struct socket *, struct sockaddr *,
766  			      int, int);
767  static int unix_seqpacket_sendmsg(struct socket *, struct msghdr *, size_t);
768  static int unix_seqpacket_recvmsg(struct socket *, struct msghdr *, size_t,
769  				  int);
770  
unix_set_peek_off(struct sock * sk,int val)771  static int unix_set_peek_off(struct sock *sk, int val)
772  {
773  	struct unix_sock *u = unix_sk(sk);
774  
775  	if (mutex_lock_interruptible(&u->iolock))
776  		return -EINTR;
777  
778  	WRITE_ONCE(sk->sk_peek_off, val);
779  	mutex_unlock(&u->iolock);
780  
781  	return 0;
782  }
783  
784  #ifdef CONFIG_PROC_FS
unix_count_nr_fds(struct sock * sk)785  static int unix_count_nr_fds(struct sock *sk)
786  {
787  	struct sk_buff *skb;
788  	struct unix_sock *u;
789  	int nr_fds = 0;
790  
791  	spin_lock(&sk->sk_receive_queue.lock);
792  	skb = skb_peek(&sk->sk_receive_queue);
793  	while (skb) {
794  		u = unix_sk(skb->sk);
795  		nr_fds += atomic_read(&u->scm_stat.nr_fds);
796  		skb = skb_peek_next(skb, &sk->sk_receive_queue);
797  	}
798  	spin_unlock(&sk->sk_receive_queue.lock);
799  
800  	return nr_fds;
801  }
802  
unix_show_fdinfo(struct seq_file * m,struct socket * sock)803  static void unix_show_fdinfo(struct seq_file *m, struct socket *sock)
804  {
805  	struct sock *sk = sock->sk;
806  	unsigned char s_state;
807  	struct unix_sock *u;
808  	int nr_fds = 0;
809  
810  	if (sk) {
811  		s_state = READ_ONCE(sk->sk_state);
812  		u = unix_sk(sk);
813  
814  		/* SOCK_STREAM and SOCK_SEQPACKET sockets never change their
815  		 * sk_state after switching to TCP_ESTABLISHED or TCP_LISTEN.
816  		 * SOCK_DGRAM is ordinary. So, no lock is needed.
817  		 */
818  		if (sock->type == SOCK_DGRAM || s_state == TCP_ESTABLISHED)
819  			nr_fds = atomic_read(&u->scm_stat.nr_fds);
820  		else if (s_state == TCP_LISTEN)
821  			nr_fds = unix_count_nr_fds(sk);
822  
823  		seq_printf(m, "scm_fds: %u\n", nr_fds);
824  	}
825  }
826  #else
827  #define unix_show_fdinfo NULL
828  #endif
829  
830  static const struct proto_ops unix_stream_ops = {
831  	.family =	PF_UNIX,
832  	.owner =	THIS_MODULE,
833  	.release =	unix_release,
834  	.bind =		unix_bind,
835  	.connect =	unix_stream_connect,
836  	.socketpair =	unix_socketpair,
837  	.accept =	unix_accept,
838  	.getname =	unix_getname,
839  	.poll =		unix_poll,
840  	.ioctl =	unix_ioctl,
841  #ifdef CONFIG_COMPAT
842  	.compat_ioctl =	unix_compat_ioctl,
843  #endif
844  	.listen =	unix_listen,
845  	.shutdown =	unix_shutdown,
846  	.sendmsg =	unix_stream_sendmsg,
847  	.recvmsg =	unix_stream_recvmsg,
848  	.read_skb =	unix_stream_read_skb,
849  	.mmap =		sock_no_mmap,
850  	.splice_read =	unix_stream_splice_read,
851  	.set_peek_off =	unix_set_peek_off,
852  	.show_fdinfo =	unix_show_fdinfo,
853  };
854  
855  static const struct proto_ops unix_dgram_ops = {
856  	.family =	PF_UNIX,
857  	.owner =	THIS_MODULE,
858  	.release =	unix_release,
859  	.bind =		unix_bind,
860  	.connect =	unix_dgram_connect,
861  	.socketpair =	unix_socketpair,
862  	.accept =	sock_no_accept,
863  	.getname =	unix_getname,
864  	.poll =		unix_dgram_poll,
865  	.ioctl =	unix_ioctl,
866  #ifdef CONFIG_COMPAT
867  	.compat_ioctl =	unix_compat_ioctl,
868  #endif
869  	.listen =	sock_no_listen,
870  	.shutdown =	unix_shutdown,
871  	.sendmsg =	unix_dgram_sendmsg,
872  	.read_skb =	unix_read_skb,
873  	.recvmsg =	unix_dgram_recvmsg,
874  	.mmap =		sock_no_mmap,
875  	.set_peek_off =	unix_set_peek_off,
876  	.show_fdinfo =	unix_show_fdinfo,
877  };
878  
879  static const struct proto_ops unix_seqpacket_ops = {
880  	.family =	PF_UNIX,
881  	.owner =	THIS_MODULE,
882  	.release =	unix_release,
883  	.bind =		unix_bind,
884  	.connect =	unix_stream_connect,
885  	.socketpair =	unix_socketpair,
886  	.accept =	unix_accept,
887  	.getname =	unix_getname,
888  	.poll =		unix_dgram_poll,
889  	.ioctl =	unix_ioctl,
890  #ifdef CONFIG_COMPAT
891  	.compat_ioctl =	unix_compat_ioctl,
892  #endif
893  	.listen =	unix_listen,
894  	.shutdown =	unix_shutdown,
895  	.sendmsg =	unix_seqpacket_sendmsg,
896  	.recvmsg =	unix_seqpacket_recvmsg,
897  	.mmap =		sock_no_mmap,
898  	.set_peek_off =	unix_set_peek_off,
899  	.show_fdinfo =	unix_show_fdinfo,
900  };
901  
unix_close(struct sock * sk,long timeout)902  static void unix_close(struct sock *sk, long timeout)
903  {
904  	/* Nothing to do here, unix socket does not need a ->close().
905  	 * This is merely for sockmap.
906  	 */
907  }
908  
unix_unhash(struct sock * sk)909  static void unix_unhash(struct sock *sk)
910  {
911  	/* Nothing to do here, unix socket does not need a ->unhash().
912  	 * This is merely for sockmap.
913  	 */
914  }
915  
unix_bpf_bypass_getsockopt(int level,int optname)916  static bool unix_bpf_bypass_getsockopt(int level, int optname)
917  {
918  	if (level == SOL_SOCKET) {
919  		switch (optname) {
920  		case SO_PEERPIDFD:
921  			return true;
922  		default:
923  			return false;
924  		}
925  	}
926  
927  	return false;
928  }
929  
930  struct proto unix_dgram_proto = {
931  	.name			= "UNIX",
932  	.owner			= THIS_MODULE,
933  	.obj_size		= sizeof(struct unix_sock),
934  	.close			= unix_close,
935  	.bpf_bypass_getsockopt	= unix_bpf_bypass_getsockopt,
936  #ifdef CONFIG_BPF_SYSCALL
937  	.psock_update_sk_prot	= unix_dgram_bpf_update_proto,
938  #endif
939  };
940  
941  struct proto unix_stream_proto = {
942  	.name			= "UNIX-STREAM",
943  	.owner			= THIS_MODULE,
944  	.obj_size		= sizeof(struct unix_sock),
945  	.close			= unix_close,
946  	.unhash			= unix_unhash,
947  	.bpf_bypass_getsockopt	= unix_bpf_bypass_getsockopt,
948  #ifdef CONFIG_BPF_SYSCALL
949  	.psock_update_sk_prot	= unix_stream_bpf_update_proto,
950  #endif
951  };
952  
unix_create1(struct net * net,struct socket * sock,int kern,int type)953  static struct sock *unix_create1(struct net *net, struct socket *sock, int kern, int type)
954  {
955  	struct unix_sock *u;
956  	struct sock *sk;
957  	int err;
958  
959  	atomic_long_inc(&unix_nr_socks);
960  	if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files()) {
961  		err = -ENFILE;
962  		goto err;
963  	}
964  
965  	if (type == SOCK_STREAM)
966  		sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_stream_proto, kern);
967  	else /*dgram and  seqpacket */
968  		sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_dgram_proto, kern);
969  
970  	if (!sk) {
971  		err = -ENOMEM;
972  		goto err;
973  	}
974  
975  	sock_init_data(sock, sk);
976  
977  	sk->sk_hash		= unix_unbound_hash(sk);
978  	sk->sk_allocation	= GFP_KERNEL_ACCOUNT;
979  	sk->sk_write_space	= unix_write_space;
980  	sk->sk_max_ack_backlog	= READ_ONCE(net->unx.sysctl_max_dgram_qlen);
981  	sk->sk_destruct		= unix_sock_destructor;
982  	u = unix_sk(sk);
983  	u->inflight = 0;
984  	u->path.dentry = NULL;
985  	u->path.mnt = NULL;
986  	spin_lock_init(&u->lock);
987  	INIT_LIST_HEAD(&u->link);
988  	mutex_init(&u->iolock); /* single task reading lock */
989  	mutex_init(&u->bindlock); /* single task binding lock */
990  	init_waitqueue_head(&u->peer_wait);
991  	init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay);
992  	memset(&u->scm_stat, 0, sizeof(struct scm_stat));
993  	unix_insert_unbound_socket(net, sk);
994  
995  	sock_prot_inuse_add(net, sk->sk_prot, 1);
996  
997  	return sk;
998  
999  err:
1000  	atomic_long_dec(&unix_nr_socks);
1001  	return ERR_PTR(err);
1002  }
1003  
unix_create(struct net * net,struct socket * sock,int protocol,int kern)1004  static int unix_create(struct net *net, struct socket *sock, int protocol,
1005  		       int kern)
1006  {
1007  	struct sock *sk;
1008  
1009  	if (protocol && protocol != PF_UNIX)
1010  		return -EPROTONOSUPPORT;
1011  
1012  	sock->state = SS_UNCONNECTED;
1013  
1014  	switch (sock->type) {
1015  	case SOCK_STREAM:
1016  		sock->ops = &unix_stream_ops;
1017  		break;
1018  		/*
1019  		 *	Believe it or not BSD has AF_UNIX, SOCK_RAW though
1020  		 *	nothing uses it.
1021  		 */
1022  	case SOCK_RAW:
1023  		sock->type = SOCK_DGRAM;
1024  		fallthrough;
1025  	case SOCK_DGRAM:
1026  		sock->ops = &unix_dgram_ops;
1027  		break;
1028  	case SOCK_SEQPACKET:
1029  		sock->ops = &unix_seqpacket_ops;
1030  		break;
1031  	default:
1032  		return -ESOCKTNOSUPPORT;
1033  	}
1034  
1035  	sk = unix_create1(net, sock, kern, sock->type);
1036  	if (IS_ERR(sk))
1037  		return PTR_ERR(sk);
1038  
1039  	return 0;
1040  }
1041  
unix_release(struct socket * sock)1042  static int unix_release(struct socket *sock)
1043  {
1044  	struct sock *sk = sock->sk;
1045  
1046  	if (!sk)
1047  		return 0;
1048  
1049  	sk->sk_prot->close(sk, 0);
1050  	unix_release_sock(sk, 0);
1051  	sock->sk = NULL;
1052  
1053  	return 0;
1054  }
1055  
unix_find_bsd(struct sockaddr_un * sunaddr,int addr_len,int type)1056  static struct sock *unix_find_bsd(struct sockaddr_un *sunaddr, int addr_len,
1057  				  int type)
1058  {
1059  	struct inode *inode;
1060  	struct path path;
1061  	struct sock *sk;
1062  	int err;
1063  
1064  	unix_mkname_bsd(sunaddr, addr_len);
1065  	err = kern_path(sunaddr->sun_path, LOOKUP_FOLLOW, &path);
1066  	if (err)
1067  		goto fail;
1068  
1069  	err = path_permission(&path, MAY_WRITE);
1070  	if (err)
1071  		goto path_put;
1072  
1073  	err = -ECONNREFUSED;
1074  	inode = d_backing_inode(path.dentry);
1075  	if (!S_ISSOCK(inode->i_mode))
1076  		goto path_put;
1077  
1078  	sk = unix_find_socket_byinode(inode);
1079  	if (!sk)
1080  		goto path_put;
1081  
1082  	err = -EPROTOTYPE;
1083  	if (sk->sk_type == type)
1084  		touch_atime(&path);
1085  	else
1086  		goto sock_put;
1087  
1088  	path_put(&path);
1089  
1090  	return sk;
1091  
1092  sock_put:
1093  	sock_put(sk);
1094  path_put:
1095  	path_put(&path);
1096  fail:
1097  	return ERR_PTR(err);
1098  }
1099  
unix_find_abstract(struct net * net,struct sockaddr_un * sunaddr,int addr_len,int type)1100  static struct sock *unix_find_abstract(struct net *net,
1101  				       struct sockaddr_un *sunaddr,
1102  				       int addr_len, int type)
1103  {
1104  	unsigned int hash = unix_abstract_hash(sunaddr, addr_len, type);
1105  	struct dentry *dentry;
1106  	struct sock *sk;
1107  
1108  	sk = unix_find_socket_byname(net, sunaddr, addr_len, hash);
1109  	if (!sk)
1110  		return ERR_PTR(-ECONNREFUSED);
1111  
1112  	dentry = unix_sk(sk)->path.dentry;
1113  	if (dentry)
1114  		touch_atime(&unix_sk(sk)->path);
1115  
1116  	return sk;
1117  }
1118  
unix_find_other(struct net * net,struct sockaddr_un * sunaddr,int addr_len,int type)1119  static struct sock *unix_find_other(struct net *net,
1120  				    struct sockaddr_un *sunaddr,
1121  				    int addr_len, int type)
1122  {
1123  	struct sock *sk;
1124  
1125  	if (sunaddr->sun_path[0])
1126  		sk = unix_find_bsd(sunaddr, addr_len, type);
1127  	else
1128  		sk = unix_find_abstract(net, sunaddr, addr_len, type);
1129  
1130  	return sk;
1131  }
1132  
unix_autobind(struct sock * sk)1133  static int unix_autobind(struct sock *sk)
1134  {
1135  	struct unix_sock *u = unix_sk(sk);
1136  	unsigned int new_hash, old_hash;
1137  	struct net *net = sock_net(sk);
1138  	struct unix_address *addr;
1139  	u32 lastnum, ordernum;
1140  	int err;
1141  
1142  	err = mutex_lock_interruptible(&u->bindlock);
1143  	if (err)
1144  		return err;
1145  
1146  	if (u->addr)
1147  		goto out;
1148  
1149  	err = -ENOMEM;
1150  	addr = kzalloc(sizeof(*addr) +
1151  		       offsetof(struct sockaddr_un, sun_path) + 16, GFP_KERNEL);
1152  	if (!addr)
1153  		goto out;
1154  
1155  	addr->len = offsetof(struct sockaddr_un, sun_path) + 6;
1156  	addr->name->sun_family = AF_UNIX;
1157  	refcount_set(&addr->refcnt, 1);
1158  
1159  	old_hash = sk->sk_hash;
1160  	ordernum = get_random_u32();
1161  	lastnum = ordernum & 0xFFFFF;
1162  retry:
1163  	ordernum = (ordernum + 1) & 0xFFFFF;
1164  	sprintf(addr->name->sun_path + 1, "%05x", ordernum);
1165  
1166  	new_hash = unix_abstract_hash(addr->name, addr->len, sk->sk_type);
1167  	unix_table_double_lock(net, old_hash, new_hash);
1168  
1169  	if (__unix_find_socket_byname(net, addr->name, addr->len, new_hash)) {
1170  		unix_table_double_unlock(net, old_hash, new_hash);
1171  
1172  		/* __unix_find_socket_byname() may take long time if many names
1173  		 * are already in use.
1174  		 */
1175  		cond_resched();
1176  
1177  		if (ordernum == lastnum) {
1178  			/* Give up if all names seems to be in use. */
1179  			err = -ENOSPC;
1180  			unix_release_addr(addr);
1181  			goto out;
1182  		}
1183  
1184  		goto retry;
1185  	}
1186  
1187  	__unix_set_addr_hash(net, sk, addr, new_hash);
1188  	unix_table_double_unlock(net, old_hash, new_hash);
1189  	err = 0;
1190  
1191  out:	mutex_unlock(&u->bindlock);
1192  	return err;
1193  }
1194  
unix_bind_bsd(struct sock * sk,struct sockaddr_un * sunaddr,int addr_len)1195  static int unix_bind_bsd(struct sock *sk, struct sockaddr_un *sunaddr,
1196  			 int addr_len)
1197  {
1198  	umode_t mode = S_IFSOCK |
1199  	       (SOCK_INODE(sk->sk_socket)->i_mode & ~current_umask());
1200  	struct unix_sock *u = unix_sk(sk);
1201  	unsigned int new_hash, old_hash;
1202  	struct net *net = sock_net(sk);
1203  	struct mnt_idmap *idmap;
1204  	struct unix_address *addr;
1205  	struct dentry *dentry;
1206  	struct path parent;
1207  	int err;
1208  
1209  	addr_len = unix_mkname_bsd(sunaddr, addr_len);
1210  	addr = unix_create_addr(sunaddr, addr_len);
1211  	if (!addr)
1212  		return -ENOMEM;
1213  
1214  	/*
1215  	 * Get the parent directory, calculate the hash for last
1216  	 * component.
1217  	 */
1218  	dentry = kern_path_create(AT_FDCWD, addr->name->sun_path, &parent, 0);
1219  	if (IS_ERR(dentry)) {
1220  		err = PTR_ERR(dentry);
1221  		goto out;
1222  	}
1223  
1224  	/*
1225  	 * All right, let's create it.
1226  	 */
1227  	idmap = mnt_idmap(parent.mnt);
1228  	err = security_path_mknod(&parent, dentry, mode, 0);
1229  	if (!err)
1230  		err = vfs_mknod(idmap, d_inode(parent.dentry), dentry, mode, 0);
1231  	if (err)
1232  		goto out_path;
1233  	err = mutex_lock_interruptible(&u->bindlock);
1234  	if (err)
1235  		goto out_unlink;
1236  	if (u->addr)
1237  		goto out_unlock;
1238  
1239  	old_hash = sk->sk_hash;
1240  	new_hash = unix_bsd_hash(d_backing_inode(dentry));
1241  	unix_table_double_lock(net, old_hash, new_hash);
1242  	u->path.mnt = mntget(parent.mnt);
1243  	u->path.dentry = dget(dentry);
1244  	__unix_set_addr_hash(net, sk, addr, new_hash);
1245  	unix_table_double_unlock(net, old_hash, new_hash);
1246  	unix_insert_bsd_socket(sk);
1247  	mutex_unlock(&u->bindlock);
1248  	done_path_create(&parent, dentry);
1249  	return 0;
1250  
1251  out_unlock:
1252  	mutex_unlock(&u->bindlock);
1253  	err = -EINVAL;
1254  out_unlink:
1255  	/* failed after successful mknod?  unlink what we'd created... */
1256  	vfs_unlink(idmap, d_inode(parent.dentry), dentry, NULL);
1257  out_path:
1258  	done_path_create(&parent, dentry);
1259  out:
1260  	unix_release_addr(addr);
1261  	return err == -EEXIST ? -EADDRINUSE : err;
1262  }
1263  
unix_bind_abstract(struct sock * sk,struct sockaddr_un * sunaddr,int addr_len)1264  static int unix_bind_abstract(struct sock *sk, struct sockaddr_un *sunaddr,
1265  			      int addr_len)
1266  {
1267  	struct unix_sock *u = unix_sk(sk);
1268  	unsigned int new_hash, old_hash;
1269  	struct net *net = sock_net(sk);
1270  	struct unix_address *addr;
1271  	int err;
1272  
1273  	addr = unix_create_addr(sunaddr, addr_len);
1274  	if (!addr)
1275  		return -ENOMEM;
1276  
1277  	err = mutex_lock_interruptible(&u->bindlock);
1278  	if (err)
1279  		goto out;
1280  
1281  	if (u->addr) {
1282  		err = -EINVAL;
1283  		goto out_mutex;
1284  	}
1285  
1286  	old_hash = sk->sk_hash;
1287  	new_hash = unix_abstract_hash(addr->name, addr->len, sk->sk_type);
1288  	unix_table_double_lock(net, old_hash, new_hash);
1289  
1290  	if (__unix_find_socket_byname(net, addr->name, addr->len, new_hash))
1291  		goto out_spin;
1292  
1293  	__unix_set_addr_hash(net, sk, addr, new_hash);
1294  	unix_table_double_unlock(net, old_hash, new_hash);
1295  	mutex_unlock(&u->bindlock);
1296  	return 0;
1297  
1298  out_spin:
1299  	unix_table_double_unlock(net, old_hash, new_hash);
1300  	err = -EADDRINUSE;
1301  out_mutex:
1302  	mutex_unlock(&u->bindlock);
1303  out:
1304  	unix_release_addr(addr);
1305  	return err;
1306  }
1307  
unix_bind(struct socket * sock,struct sockaddr * uaddr,int addr_len)1308  static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
1309  {
1310  	struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1311  	struct sock *sk = sock->sk;
1312  	int err;
1313  
1314  	if (addr_len == offsetof(struct sockaddr_un, sun_path) &&
1315  	    sunaddr->sun_family == AF_UNIX)
1316  		return unix_autobind(sk);
1317  
1318  	err = unix_validate_addr(sunaddr, addr_len);
1319  	if (err)
1320  		return err;
1321  
1322  	if (sunaddr->sun_path[0])
1323  		err = unix_bind_bsd(sk, sunaddr, addr_len);
1324  	else
1325  		err = unix_bind_abstract(sk, sunaddr, addr_len);
1326  
1327  	return err;
1328  }
1329  
unix_state_double_lock(struct sock * sk1,struct sock * sk2)1330  static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
1331  {
1332  	if (unlikely(sk1 == sk2) || !sk2) {
1333  		unix_state_lock(sk1);
1334  		return;
1335  	}
1336  	if (sk1 > sk2)
1337  		swap(sk1, sk2);
1338  
1339  	unix_state_lock(sk1);
1340  	unix_state_lock_nested(sk2, U_LOCK_SECOND);
1341  }
1342  
unix_state_double_unlock(struct sock * sk1,struct sock * sk2)1343  static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)
1344  {
1345  	if (unlikely(sk1 == sk2) || !sk2) {
1346  		unix_state_unlock(sk1);
1347  		return;
1348  	}
1349  	unix_state_unlock(sk1);
1350  	unix_state_unlock(sk2);
1351  }
1352  
unix_dgram_connect(struct socket * sock,struct sockaddr * addr,int alen,int flags)1353  static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
1354  			      int alen, int flags)
1355  {
1356  	struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr;
1357  	struct sock *sk = sock->sk;
1358  	struct sock *other;
1359  	int err;
1360  
1361  	err = -EINVAL;
1362  	if (alen < offsetofend(struct sockaddr, sa_family))
1363  		goto out;
1364  
1365  	if (addr->sa_family != AF_UNSPEC) {
1366  		err = unix_validate_addr(sunaddr, alen);
1367  		if (err)
1368  			goto out;
1369  
1370  		if ((test_bit(SOCK_PASSCRED, &sock->flags) ||
1371  		     test_bit(SOCK_PASSPIDFD, &sock->flags)) &&
1372  		    !READ_ONCE(unix_sk(sk)->addr)) {
1373  			err = unix_autobind(sk);
1374  			if (err)
1375  				goto out;
1376  		}
1377  
1378  restart:
1379  		other = unix_find_other(sock_net(sk), sunaddr, alen, sock->type);
1380  		if (IS_ERR(other)) {
1381  			err = PTR_ERR(other);
1382  			goto out;
1383  		}
1384  
1385  		unix_state_double_lock(sk, other);
1386  
1387  		/* Apparently VFS overslept socket death. Retry. */
1388  		if (sock_flag(other, SOCK_DEAD)) {
1389  			unix_state_double_unlock(sk, other);
1390  			sock_put(other);
1391  			goto restart;
1392  		}
1393  
1394  		err = -EPERM;
1395  		if (!unix_may_send(sk, other))
1396  			goto out_unlock;
1397  
1398  		err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1399  		if (err)
1400  			goto out_unlock;
1401  
1402  		WRITE_ONCE(sk->sk_state, TCP_ESTABLISHED);
1403  		WRITE_ONCE(other->sk_state, TCP_ESTABLISHED);
1404  	} else {
1405  		/*
1406  		 *	1003.1g breaking connected state with AF_UNSPEC
1407  		 */
1408  		other = NULL;
1409  		unix_state_double_lock(sk, other);
1410  	}
1411  
1412  	/*
1413  	 * If it was connected, reconnect.
1414  	 */
1415  	if (unix_peer(sk)) {
1416  		struct sock *old_peer = unix_peer(sk);
1417  
1418  		unix_peer(sk) = other;
1419  		if (!other)
1420  			WRITE_ONCE(sk->sk_state, TCP_CLOSE);
1421  		unix_dgram_peer_wake_disconnect_wakeup(sk, old_peer);
1422  
1423  		unix_state_double_unlock(sk, other);
1424  
1425  		if (other != old_peer) {
1426  			unix_dgram_disconnected(sk, old_peer);
1427  
1428  			unix_state_lock(old_peer);
1429  			if (!unix_peer(old_peer))
1430  				WRITE_ONCE(old_peer->sk_state, TCP_CLOSE);
1431  			unix_state_unlock(old_peer);
1432  		}
1433  
1434  		sock_put(old_peer);
1435  	} else {
1436  		unix_peer(sk) = other;
1437  		unix_state_double_unlock(sk, other);
1438  	}
1439  
1440  	return 0;
1441  
1442  out_unlock:
1443  	unix_state_double_unlock(sk, other);
1444  	sock_put(other);
1445  out:
1446  	return err;
1447  }
1448  
unix_wait_for_peer(struct sock * other,long timeo)1449  static long unix_wait_for_peer(struct sock *other, long timeo)
1450  	__releases(&unix_sk(other)->lock)
1451  {
1452  	struct unix_sock *u = unix_sk(other);
1453  	int sched;
1454  	DEFINE_WAIT(wait);
1455  
1456  	prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);
1457  
1458  	sched = !sock_flag(other, SOCK_DEAD) &&
1459  		!(other->sk_shutdown & RCV_SHUTDOWN) &&
1460  		unix_recvq_full_lockless(other);
1461  
1462  	unix_state_unlock(other);
1463  
1464  	if (sched)
1465  		timeo = schedule_timeout(timeo);
1466  
1467  	finish_wait(&u->peer_wait, &wait);
1468  	return timeo;
1469  }
1470  
unix_stream_connect(struct socket * sock,struct sockaddr * uaddr,int addr_len,int flags)1471  static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
1472  			       int addr_len, int flags)
1473  {
1474  	struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1475  	struct sock *sk = sock->sk, *newsk = NULL, *other = NULL;
1476  	struct unix_sock *u = unix_sk(sk), *newu, *otheru;
1477  	struct net *net = sock_net(sk);
1478  	struct sk_buff *skb = NULL;
1479  	unsigned char state;
1480  	long timeo;
1481  	int err;
1482  
1483  	err = unix_validate_addr(sunaddr, addr_len);
1484  	if (err)
1485  		goto out;
1486  
1487  	if ((test_bit(SOCK_PASSCRED, &sock->flags) ||
1488  	     test_bit(SOCK_PASSPIDFD, &sock->flags)) &&
1489  	    !READ_ONCE(u->addr)) {
1490  		err = unix_autobind(sk);
1491  		if (err)
1492  			goto out;
1493  	}
1494  
1495  	timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
1496  
1497  	/* First of all allocate resources.
1498  	   If we will make it after state is locked,
1499  	   we will have to recheck all again in any case.
1500  	 */
1501  
1502  	/* create new sock for complete connection */
1503  	newsk = unix_create1(net, NULL, 0, sock->type);
1504  	if (IS_ERR(newsk)) {
1505  		err = PTR_ERR(newsk);
1506  		newsk = NULL;
1507  		goto out;
1508  	}
1509  
1510  	err = -ENOMEM;
1511  
1512  	/* Allocate skb for sending to listening sock */
1513  	skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
1514  	if (skb == NULL)
1515  		goto out;
1516  
1517  restart:
1518  	/*  Find listening sock. */
1519  	other = unix_find_other(net, sunaddr, addr_len, sk->sk_type);
1520  	if (IS_ERR(other)) {
1521  		err = PTR_ERR(other);
1522  		other = NULL;
1523  		goto out;
1524  	}
1525  
1526  	unix_state_lock(other);
1527  
1528  	/* Apparently VFS overslept socket death. Retry. */
1529  	if (sock_flag(other, SOCK_DEAD)) {
1530  		unix_state_unlock(other);
1531  		sock_put(other);
1532  		goto restart;
1533  	}
1534  
1535  	err = -ECONNREFUSED;
1536  	if (other->sk_state != TCP_LISTEN)
1537  		goto out_unlock;
1538  	if (other->sk_shutdown & RCV_SHUTDOWN)
1539  		goto out_unlock;
1540  
1541  	if (unix_recvq_full_lockless(other)) {
1542  		err = -EAGAIN;
1543  		if (!timeo)
1544  			goto out_unlock;
1545  
1546  		timeo = unix_wait_for_peer(other, timeo);
1547  
1548  		err = sock_intr_errno(timeo);
1549  		if (signal_pending(current))
1550  			goto out;
1551  		sock_put(other);
1552  		goto restart;
1553  	}
1554  
1555  	/* self connect and simultaneous connect are eliminated
1556  	 * by rejecting TCP_LISTEN socket to avoid deadlock.
1557  	 */
1558  	state = READ_ONCE(sk->sk_state);
1559  	if (unlikely(state != TCP_CLOSE)) {
1560  		err = state == TCP_ESTABLISHED ? -EISCONN : -EINVAL;
1561  		goto out_unlock;
1562  	}
1563  
1564  	unix_state_lock_nested(sk, U_LOCK_SECOND);
1565  
1566  	if (unlikely(sk->sk_state != TCP_CLOSE)) {
1567  		err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EINVAL;
1568  		unix_state_unlock(sk);
1569  		goto out_unlock;
1570  	}
1571  
1572  	err = security_unix_stream_connect(sk, other, newsk);
1573  	if (err) {
1574  		unix_state_unlock(sk);
1575  		goto out_unlock;
1576  	}
1577  
1578  	/* The way is open! Fastly set all the necessary fields... */
1579  
1580  	sock_hold(sk);
1581  	unix_peer(newsk)	= sk;
1582  	newsk->sk_state		= TCP_ESTABLISHED;
1583  	newsk->sk_type		= sk->sk_type;
1584  	init_peercred(newsk);
1585  	newu = unix_sk(newsk);
1586  	RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq);
1587  	otheru = unix_sk(other);
1588  
1589  	/* copy address information from listening to new sock
1590  	 *
1591  	 * The contents of *(otheru->addr) and otheru->path
1592  	 * are seen fully set up here, since we have found
1593  	 * otheru in hash under its lock.  Insertion into the
1594  	 * hash chain we'd found it in had been done in an
1595  	 * earlier critical area protected by the chain's lock,
1596  	 * the same one where we'd set *(otheru->addr) contents,
1597  	 * as well as otheru->path and otheru->addr itself.
1598  	 *
1599  	 * Using smp_store_release() here to set newu->addr
1600  	 * is enough to make those stores, as well as stores
1601  	 * to newu->path visible to anyone who gets newu->addr
1602  	 * by smp_load_acquire().  IOW, the same warranties
1603  	 * as for unix_sock instances bound in unix_bind() or
1604  	 * in unix_autobind().
1605  	 */
1606  	if (otheru->path.dentry) {
1607  		path_get(&otheru->path);
1608  		newu->path = otheru->path;
1609  	}
1610  	refcount_inc(&otheru->addr->refcnt);
1611  	smp_store_release(&newu->addr, otheru->addr);
1612  
1613  	/* Set credentials */
1614  	copy_peercred(sk, other);
1615  
1616  	sock->state	= SS_CONNECTED;
1617  	WRITE_ONCE(sk->sk_state, TCP_ESTABLISHED);
1618  	sock_hold(newsk);
1619  
1620  	smp_mb__after_atomic();	/* sock_hold() does an atomic_inc() */
1621  	unix_peer(sk)	= newsk;
1622  
1623  	unix_state_unlock(sk);
1624  
1625  	/* take ten and send info to listening sock */
1626  	spin_lock(&other->sk_receive_queue.lock);
1627  	__skb_queue_tail(&other->sk_receive_queue, skb);
1628  	spin_unlock(&other->sk_receive_queue.lock);
1629  	unix_state_unlock(other);
1630  	other->sk_data_ready(other);
1631  	sock_put(other);
1632  	return 0;
1633  
1634  out_unlock:
1635  	if (other)
1636  		unix_state_unlock(other);
1637  
1638  out:
1639  	kfree_skb(skb);
1640  	if (newsk)
1641  		unix_release_sock(newsk, 0);
1642  	if (other)
1643  		sock_put(other);
1644  	return err;
1645  }
1646  
unix_socketpair(struct socket * socka,struct socket * sockb)1647  static int unix_socketpair(struct socket *socka, struct socket *sockb)
1648  {
1649  	struct sock *ska = socka->sk, *skb = sockb->sk;
1650  
1651  	/* Join our sockets back to back */
1652  	sock_hold(ska);
1653  	sock_hold(skb);
1654  	unix_peer(ska) = skb;
1655  	unix_peer(skb) = ska;
1656  	init_peercred(ska);
1657  	init_peercred(skb);
1658  
1659  	ska->sk_state = TCP_ESTABLISHED;
1660  	skb->sk_state = TCP_ESTABLISHED;
1661  	socka->state  = SS_CONNECTED;
1662  	sockb->state  = SS_CONNECTED;
1663  	return 0;
1664  }
1665  
unix_sock_inherit_flags(const struct socket * old,struct socket * new)1666  static void unix_sock_inherit_flags(const struct socket *old,
1667  				    struct socket *new)
1668  {
1669  	if (test_bit(SOCK_PASSCRED, &old->flags))
1670  		set_bit(SOCK_PASSCRED, &new->flags);
1671  	if (test_bit(SOCK_PASSPIDFD, &old->flags))
1672  		set_bit(SOCK_PASSPIDFD, &new->flags);
1673  	if (test_bit(SOCK_PASSSEC, &old->flags))
1674  		set_bit(SOCK_PASSSEC, &new->flags);
1675  }
1676  
unix_accept(struct socket * sock,struct socket * newsock,int flags,bool kern)1677  static int unix_accept(struct socket *sock, struct socket *newsock, int flags,
1678  		       bool kern)
1679  {
1680  	struct sock *sk = sock->sk;
1681  	struct sock *tsk;
1682  	struct sk_buff *skb;
1683  	int err;
1684  
1685  	err = -EOPNOTSUPP;
1686  	if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
1687  		goto out;
1688  
1689  	err = -EINVAL;
1690  	if (READ_ONCE(sk->sk_state) != TCP_LISTEN)
1691  		goto out;
1692  
1693  	/* If socket state is TCP_LISTEN it cannot change (for now...),
1694  	 * so that no locks are necessary.
1695  	 */
1696  
1697  	skb = skb_recv_datagram(sk, (flags & O_NONBLOCK) ? MSG_DONTWAIT : 0,
1698  				&err);
1699  	if (!skb) {
1700  		/* This means receive shutdown. */
1701  		if (err == 0)
1702  			err = -EINVAL;
1703  		goto out;
1704  	}
1705  
1706  	tsk = skb->sk;
1707  	skb_free_datagram(sk, skb);
1708  	wake_up_interruptible(&unix_sk(sk)->peer_wait);
1709  
1710  	/* attach accepted sock to socket */
1711  	unix_state_lock(tsk);
1712  	newsock->state = SS_CONNECTED;
1713  	unix_sock_inherit_flags(sock, newsock);
1714  	sock_graft(tsk, newsock);
1715  	unix_state_unlock(tsk);
1716  	return 0;
1717  
1718  out:
1719  	return err;
1720  }
1721  
1722  
unix_getname(struct socket * sock,struct sockaddr * uaddr,int peer)1723  static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int peer)
1724  {
1725  	struct sock *sk = sock->sk;
1726  	struct unix_address *addr;
1727  	DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, uaddr);
1728  	int err = 0;
1729  
1730  	if (peer) {
1731  		sk = unix_peer_get(sk);
1732  
1733  		err = -ENOTCONN;
1734  		if (!sk)
1735  			goto out;
1736  		err = 0;
1737  	} else {
1738  		sock_hold(sk);
1739  	}
1740  
1741  	addr = smp_load_acquire(&unix_sk(sk)->addr);
1742  	if (!addr) {
1743  		sunaddr->sun_family = AF_UNIX;
1744  		sunaddr->sun_path[0] = 0;
1745  		err = offsetof(struct sockaddr_un, sun_path);
1746  	} else {
1747  		err = addr->len;
1748  		memcpy(sunaddr, addr->name, addr->len);
1749  	}
1750  	sock_put(sk);
1751  out:
1752  	return err;
1753  }
1754  
unix_peek_fds(struct scm_cookie * scm,struct sk_buff * skb)1755  static void unix_peek_fds(struct scm_cookie *scm, struct sk_buff *skb)
1756  {
1757  	scm->fp = scm_fp_dup(UNIXCB(skb).fp);
1758  
1759  	/*
1760  	 * Garbage collection of unix sockets starts by selecting a set of
1761  	 * candidate sockets which have reference only from being in flight
1762  	 * (total_refs == inflight_refs).  This condition is checked once during
1763  	 * the candidate collection phase, and candidates are marked as such, so
1764  	 * that non-candidates can later be ignored.  While inflight_refs is
1765  	 * protected by unix_gc_lock, total_refs (file count) is not, hence this
1766  	 * is an instantaneous decision.
1767  	 *
1768  	 * Once a candidate, however, the socket must not be reinstalled into a
1769  	 * file descriptor while the garbage collection is in progress.
1770  	 *
1771  	 * If the above conditions are met, then the directed graph of
1772  	 * candidates (*) does not change while unix_gc_lock is held.
1773  	 *
1774  	 * Any operations that changes the file count through file descriptors
1775  	 * (dup, close, sendmsg) does not change the graph since candidates are
1776  	 * not installed in fds.
1777  	 *
1778  	 * Dequeing a candidate via recvmsg would install it into an fd, but
1779  	 * that takes unix_gc_lock to decrement the inflight count, so it's
1780  	 * serialized with garbage collection.
1781  	 *
1782  	 * MSG_PEEK is special in that it does not change the inflight count,
1783  	 * yet does install the socket into an fd.  The following lock/unlock
1784  	 * pair is to ensure serialization with garbage collection.  It must be
1785  	 * done between incrementing the file count and installing the file into
1786  	 * an fd.
1787  	 *
1788  	 * If garbage collection starts after the barrier provided by the
1789  	 * lock/unlock, then it will see the elevated refcount and not mark this
1790  	 * as a candidate.  If a garbage collection is already in progress
1791  	 * before the file count was incremented, then the lock/unlock pair will
1792  	 * ensure that garbage collection is finished before progressing to
1793  	 * installing the fd.
1794  	 *
1795  	 * (*) A -> B where B is on the queue of A or B is on the queue of C
1796  	 * which is on the queue of listening socket A.
1797  	 */
1798  	spin_lock(&unix_gc_lock);
1799  	spin_unlock(&unix_gc_lock);
1800  }
1801  
unix_scm_to_skb(struct scm_cookie * scm,struct sk_buff * skb,bool send_fds)1802  static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)
1803  {
1804  	int err = 0;
1805  
1806  	UNIXCB(skb).pid  = get_pid(scm->pid);
1807  	UNIXCB(skb).uid = scm->creds.uid;
1808  	UNIXCB(skb).gid = scm->creds.gid;
1809  	UNIXCB(skb).fp = NULL;
1810  	unix_get_secdata(scm, skb);
1811  	if (scm->fp && send_fds)
1812  		err = unix_attach_fds(scm, skb);
1813  
1814  	skb->destructor = unix_destruct_scm;
1815  	return err;
1816  }
1817  
unix_passcred_enabled(const struct socket * sock,const struct sock * other)1818  static bool unix_passcred_enabled(const struct socket *sock,
1819  				  const struct sock *other)
1820  {
1821  	return test_bit(SOCK_PASSCRED, &sock->flags) ||
1822  	       test_bit(SOCK_PASSPIDFD, &sock->flags) ||
1823  	       !other->sk_socket ||
1824  	       test_bit(SOCK_PASSCRED, &other->sk_socket->flags) ||
1825  	       test_bit(SOCK_PASSPIDFD, &other->sk_socket->flags);
1826  }
1827  
1828  /*
1829   * Some apps rely on write() giving SCM_CREDENTIALS
1830   * We include credentials if source or destination socket
1831   * asserted SOCK_PASSCRED.
1832   */
maybe_add_creds(struct sk_buff * skb,const struct socket * sock,const struct sock * other)1833  static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock,
1834  			    const struct sock *other)
1835  {
1836  	if (UNIXCB(skb).pid)
1837  		return;
1838  	if (unix_passcred_enabled(sock, other)) {
1839  		UNIXCB(skb).pid  = get_pid(task_tgid(current));
1840  		current_uid_gid(&UNIXCB(skb).uid, &UNIXCB(skb).gid);
1841  	}
1842  }
1843  
unix_skb_scm_eq(struct sk_buff * skb,struct scm_cookie * scm)1844  static bool unix_skb_scm_eq(struct sk_buff *skb,
1845  			    struct scm_cookie *scm)
1846  {
1847  	return UNIXCB(skb).pid == scm->pid &&
1848  	       uid_eq(UNIXCB(skb).uid, scm->creds.uid) &&
1849  	       gid_eq(UNIXCB(skb).gid, scm->creds.gid) &&
1850  	       unix_secdata_eq(scm, skb);
1851  }
1852  
scm_stat_add(struct sock * sk,struct sk_buff * skb)1853  static void scm_stat_add(struct sock *sk, struct sk_buff *skb)
1854  {
1855  	struct scm_fp_list *fp = UNIXCB(skb).fp;
1856  	struct unix_sock *u = unix_sk(sk);
1857  
1858  	if (unlikely(fp && fp->count))
1859  		atomic_add(fp->count, &u->scm_stat.nr_fds);
1860  }
1861  
scm_stat_del(struct sock * sk,struct sk_buff * skb)1862  static void scm_stat_del(struct sock *sk, struct sk_buff *skb)
1863  {
1864  	struct scm_fp_list *fp = UNIXCB(skb).fp;
1865  	struct unix_sock *u = unix_sk(sk);
1866  
1867  	if (unlikely(fp && fp->count))
1868  		atomic_sub(fp->count, &u->scm_stat.nr_fds);
1869  }
1870  
1871  /*
1872   *	Send AF_UNIX data.
1873   */
1874  
unix_dgram_sendmsg(struct socket * sock,struct msghdr * msg,size_t len)1875  static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
1876  			      size_t len)
1877  {
1878  	DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, msg->msg_name);
1879  	struct sock *sk = sock->sk, *other = NULL;
1880  	struct unix_sock *u = unix_sk(sk);
1881  	struct scm_cookie scm;
1882  	struct sk_buff *skb;
1883  	int data_len = 0;
1884  	int sk_locked;
1885  	long timeo;
1886  	int err;
1887  
1888  	wait_for_unix_gc();
1889  	err = scm_send(sock, msg, &scm, false);
1890  	if (err < 0)
1891  		return err;
1892  
1893  	err = -EOPNOTSUPP;
1894  	if (msg->msg_flags&MSG_OOB)
1895  		goto out;
1896  
1897  	if (msg->msg_namelen) {
1898  		err = unix_validate_addr(sunaddr, msg->msg_namelen);
1899  		if (err)
1900  			goto out;
1901  	} else {
1902  		sunaddr = NULL;
1903  		err = -ENOTCONN;
1904  		other = unix_peer_get(sk);
1905  		if (!other)
1906  			goto out;
1907  	}
1908  
1909  	if ((test_bit(SOCK_PASSCRED, &sock->flags) ||
1910  	     test_bit(SOCK_PASSPIDFD, &sock->flags)) &&
1911  	    !READ_ONCE(u->addr)) {
1912  		err = unix_autobind(sk);
1913  		if (err)
1914  			goto out;
1915  	}
1916  
1917  	err = -EMSGSIZE;
1918  	if (len > READ_ONCE(sk->sk_sndbuf) - 32)
1919  		goto out;
1920  
1921  	if (len > SKB_MAX_ALLOC) {
1922  		data_len = min_t(size_t,
1923  				 len - SKB_MAX_ALLOC,
1924  				 MAX_SKB_FRAGS * PAGE_SIZE);
1925  		data_len = PAGE_ALIGN(data_len);
1926  
1927  		BUILD_BUG_ON(SKB_MAX_ALLOC < PAGE_SIZE);
1928  	}
1929  
1930  	skb = sock_alloc_send_pskb(sk, len - data_len, data_len,
1931  				   msg->msg_flags & MSG_DONTWAIT, &err,
1932  				   PAGE_ALLOC_COSTLY_ORDER);
1933  	if (skb == NULL)
1934  		goto out;
1935  
1936  	err = unix_scm_to_skb(&scm, skb, true);
1937  	if (err < 0)
1938  		goto out_free;
1939  
1940  	skb_put(skb, len - data_len);
1941  	skb->data_len = data_len;
1942  	skb->len = len;
1943  	err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, len);
1944  	if (err)
1945  		goto out_free;
1946  
1947  	timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
1948  
1949  restart:
1950  	if (!other) {
1951  		err = -ECONNRESET;
1952  		if (sunaddr == NULL)
1953  			goto out_free;
1954  
1955  		other = unix_find_other(sock_net(sk), sunaddr, msg->msg_namelen,
1956  					sk->sk_type);
1957  		if (IS_ERR(other)) {
1958  			err = PTR_ERR(other);
1959  			other = NULL;
1960  			goto out_free;
1961  		}
1962  	}
1963  
1964  	if (sk_filter(other, skb) < 0) {
1965  		/* Toss the packet but do not return any error to the sender */
1966  		err = len;
1967  		goto out_free;
1968  	}
1969  
1970  	sk_locked = 0;
1971  	unix_state_lock(other);
1972  restart_locked:
1973  	err = -EPERM;
1974  	if (!unix_may_send(sk, other))
1975  		goto out_unlock;
1976  
1977  	if (unlikely(sock_flag(other, SOCK_DEAD))) {
1978  		/*
1979  		 *	Check with 1003.1g - what should
1980  		 *	datagram error
1981  		 */
1982  		unix_state_unlock(other);
1983  		sock_put(other);
1984  
1985  		if (!sk_locked)
1986  			unix_state_lock(sk);
1987  
1988  		err = 0;
1989  		if (sk->sk_type == SOCK_SEQPACKET) {
1990  			/* We are here only when racing with unix_release_sock()
1991  			 * is clearing @other. Never change state to TCP_CLOSE
1992  			 * unlike SOCK_DGRAM wants.
1993  			 */
1994  			unix_state_unlock(sk);
1995  			err = -EPIPE;
1996  		} else if (unix_peer(sk) == other) {
1997  			unix_peer(sk) = NULL;
1998  			unix_dgram_peer_wake_disconnect_wakeup(sk, other);
1999  
2000  			WRITE_ONCE(sk->sk_state, TCP_CLOSE);
2001  			unix_state_unlock(sk);
2002  
2003  			unix_dgram_disconnected(sk, other);
2004  			sock_put(other);
2005  			err = -ECONNREFUSED;
2006  		} else {
2007  			unix_state_unlock(sk);
2008  		}
2009  
2010  		other = NULL;
2011  		if (err)
2012  			goto out_free;
2013  		goto restart;
2014  	}
2015  
2016  	err = -EPIPE;
2017  	if (other->sk_shutdown & RCV_SHUTDOWN)
2018  		goto out_unlock;
2019  
2020  	if (sk->sk_type != SOCK_SEQPACKET) {
2021  		err = security_unix_may_send(sk->sk_socket, other->sk_socket);
2022  		if (err)
2023  			goto out_unlock;
2024  	}
2025  
2026  	/* other == sk && unix_peer(other) != sk if
2027  	 * - unix_peer(sk) == NULL, destination address bound to sk
2028  	 * - unix_peer(sk) == sk by time of get but disconnected before lock
2029  	 */
2030  	if (other != sk &&
2031  	    unlikely(unix_peer(other) != sk &&
2032  	    unix_recvq_full_lockless(other))) {
2033  		if (timeo) {
2034  			timeo = unix_wait_for_peer(other, timeo);
2035  
2036  			err = sock_intr_errno(timeo);
2037  			if (signal_pending(current))
2038  				goto out_free;
2039  
2040  			goto restart;
2041  		}
2042  
2043  		if (!sk_locked) {
2044  			unix_state_unlock(other);
2045  			unix_state_double_lock(sk, other);
2046  		}
2047  
2048  		if (unix_peer(sk) != other ||
2049  		    unix_dgram_peer_wake_me(sk, other)) {
2050  			err = -EAGAIN;
2051  			sk_locked = 1;
2052  			goto out_unlock;
2053  		}
2054  
2055  		if (!sk_locked) {
2056  			sk_locked = 1;
2057  			goto restart_locked;
2058  		}
2059  	}
2060  
2061  	if (unlikely(sk_locked))
2062  		unix_state_unlock(sk);
2063  
2064  	if (sock_flag(other, SOCK_RCVTSTAMP))
2065  		__net_timestamp(skb);
2066  	maybe_add_creds(skb, sock, other);
2067  	scm_stat_add(other, skb);
2068  	skb_queue_tail(&other->sk_receive_queue, skb);
2069  	unix_state_unlock(other);
2070  	other->sk_data_ready(other);
2071  	sock_put(other);
2072  	scm_destroy(&scm);
2073  	return len;
2074  
2075  out_unlock:
2076  	if (sk_locked)
2077  		unix_state_unlock(sk);
2078  	unix_state_unlock(other);
2079  out_free:
2080  	kfree_skb(skb);
2081  out:
2082  	if (other)
2083  		sock_put(other);
2084  	scm_destroy(&scm);
2085  	return err;
2086  }
2087  
2088  /* We use paged skbs for stream sockets, and limit occupancy to 32768
2089   * bytes, and a minimum of a full page.
2090   */
2091  #define UNIX_SKB_FRAGS_SZ (PAGE_SIZE << get_order(32768))
2092  
2093  #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
queue_oob(struct socket * sock,struct msghdr * msg,struct sock * other,struct scm_cookie * scm,bool fds_sent)2094  static int queue_oob(struct socket *sock, struct msghdr *msg, struct sock *other,
2095  		     struct scm_cookie *scm, bool fds_sent)
2096  {
2097  	struct unix_sock *ousk = unix_sk(other);
2098  	struct sk_buff *skb;
2099  	int err = 0;
2100  
2101  	skb = sock_alloc_send_skb(sock->sk, 1, msg->msg_flags & MSG_DONTWAIT, &err);
2102  
2103  	if (!skb)
2104  		return err;
2105  
2106  	err = unix_scm_to_skb(scm, skb, !fds_sent);
2107  	if (err < 0) {
2108  		kfree_skb(skb);
2109  		return err;
2110  	}
2111  	skb_put(skb, 1);
2112  	err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, 1);
2113  
2114  	if (err) {
2115  		kfree_skb(skb);
2116  		return err;
2117  	}
2118  
2119  	unix_state_lock(other);
2120  
2121  	if (sock_flag(other, SOCK_DEAD) ||
2122  	    (other->sk_shutdown & RCV_SHUTDOWN)) {
2123  		unix_state_unlock(other);
2124  		kfree_skb(skb);
2125  		return -EPIPE;
2126  	}
2127  
2128  	maybe_add_creds(skb, sock, other);
2129  	skb_get(skb);
2130  
2131  	scm_stat_add(other, skb);
2132  
2133  	spin_lock(&other->sk_receive_queue.lock);
2134  	if (ousk->oob_skb)
2135  		consume_skb(ousk->oob_skb);
2136  	WRITE_ONCE(ousk->oob_skb, skb);
2137  	__skb_queue_tail(&other->sk_receive_queue, skb);
2138  	spin_unlock(&other->sk_receive_queue.lock);
2139  
2140  	sk_send_sigurg(other);
2141  	unix_state_unlock(other);
2142  	other->sk_data_ready(other);
2143  
2144  	return err;
2145  }
2146  #endif
2147  
unix_stream_sendmsg(struct socket * sock,struct msghdr * msg,size_t len)2148  static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
2149  			       size_t len)
2150  {
2151  	struct sock *sk = sock->sk;
2152  	struct sock *other = NULL;
2153  	int err, size;
2154  	struct sk_buff *skb;
2155  	int sent = 0;
2156  	struct scm_cookie scm;
2157  	bool fds_sent = false;
2158  	int data_len;
2159  
2160  	wait_for_unix_gc();
2161  	err = scm_send(sock, msg, &scm, false);
2162  	if (err < 0)
2163  		return err;
2164  
2165  	err = -EOPNOTSUPP;
2166  	if (msg->msg_flags & MSG_OOB) {
2167  #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
2168  		if (len)
2169  			len--;
2170  		else
2171  #endif
2172  			goto out_err;
2173  	}
2174  
2175  	if (msg->msg_namelen) {
2176  		err = READ_ONCE(sk->sk_state) == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
2177  		goto out_err;
2178  	} else {
2179  		err = -ENOTCONN;
2180  		other = unix_peer(sk);
2181  		if (!other)
2182  			goto out_err;
2183  	}
2184  
2185  	if (READ_ONCE(sk->sk_shutdown) & SEND_SHUTDOWN)
2186  		goto pipe_err;
2187  
2188  	while (sent < len) {
2189  		size = len - sent;
2190  
2191  		if (unlikely(msg->msg_flags & MSG_SPLICE_PAGES)) {
2192  			skb = sock_alloc_send_pskb(sk, 0, 0,
2193  						   msg->msg_flags & MSG_DONTWAIT,
2194  						   &err, 0);
2195  		} else {
2196  			/* Keep two messages in the pipe so it schedules better */
2197  			size = min_t(int, size, (READ_ONCE(sk->sk_sndbuf) >> 1) - 64);
2198  
2199  			/* allow fallback to order-0 allocations */
2200  			size = min_t(int, size, SKB_MAX_HEAD(0) + UNIX_SKB_FRAGS_SZ);
2201  
2202  			data_len = max_t(int, 0, size - SKB_MAX_HEAD(0));
2203  
2204  			data_len = min_t(size_t, size, PAGE_ALIGN(data_len));
2205  
2206  			skb = sock_alloc_send_pskb(sk, size - data_len, data_len,
2207  						   msg->msg_flags & MSG_DONTWAIT, &err,
2208  						   get_order(UNIX_SKB_FRAGS_SZ));
2209  		}
2210  		if (!skb)
2211  			goto out_err;
2212  
2213  		/* Only send the fds in the first buffer */
2214  		err = unix_scm_to_skb(&scm, skb, !fds_sent);
2215  		if (err < 0) {
2216  			kfree_skb(skb);
2217  			goto out_err;
2218  		}
2219  		fds_sent = true;
2220  
2221  		if (unlikely(msg->msg_flags & MSG_SPLICE_PAGES)) {
2222  			skb->ip_summed = CHECKSUM_UNNECESSARY;
2223  			err = skb_splice_from_iter(skb, &msg->msg_iter, size,
2224  						   sk->sk_allocation);
2225  			if (err < 0) {
2226  				kfree_skb(skb);
2227  				goto out_err;
2228  			}
2229  			size = err;
2230  			refcount_add(size, &sk->sk_wmem_alloc);
2231  		} else {
2232  			skb_put(skb, size - data_len);
2233  			skb->data_len = data_len;
2234  			skb->len = size;
2235  			err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size);
2236  			if (err) {
2237  				kfree_skb(skb);
2238  				goto out_err;
2239  			}
2240  		}
2241  
2242  		unix_state_lock(other);
2243  
2244  		if (sock_flag(other, SOCK_DEAD) ||
2245  		    (other->sk_shutdown & RCV_SHUTDOWN))
2246  			goto pipe_err_free;
2247  
2248  		maybe_add_creds(skb, sock, other);
2249  		scm_stat_add(other, skb);
2250  		skb_queue_tail(&other->sk_receive_queue, skb);
2251  		unix_state_unlock(other);
2252  		other->sk_data_ready(other);
2253  		sent += size;
2254  	}
2255  
2256  #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
2257  	if (msg->msg_flags & MSG_OOB) {
2258  		err = queue_oob(sock, msg, other, &scm, fds_sent);
2259  		if (err)
2260  			goto out_err;
2261  		sent++;
2262  	}
2263  #endif
2264  
2265  	scm_destroy(&scm);
2266  
2267  	return sent;
2268  
2269  pipe_err_free:
2270  	unix_state_unlock(other);
2271  	kfree_skb(skb);
2272  pipe_err:
2273  	if (sent == 0 && !(msg->msg_flags&MSG_NOSIGNAL))
2274  		send_sig(SIGPIPE, current, 0);
2275  	err = -EPIPE;
2276  out_err:
2277  	scm_destroy(&scm);
2278  	return sent ? : err;
2279  }
2280  
unix_seqpacket_sendmsg(struct socket * sock,struct msghdr * msg,size_t len)2281  static int unix_seqpacket_sendmsg(struct socket *sock, struct msghdr *msg,
2282  				  size_t len)
2283  {
2284  	int err;
2285  	struct sock *sk = sock->sk;
2286  
2287  	err = sock_error(sk);
2288  	if (err)
2289  		return err;
2290  
2291  	if (READ_ONCE(sk->sk_state) != TCP_ESTABLISHED)
2292  		return -ENOTCONN;
2293  
2294  	if (msg->msg_namelen)
2295  		msg->msg_namelen = 0;
2296  
2297  	return unix_dgram_sendmsg(sock, msg, len);
2298  }
2299  
unix_seqpacket_recvmsg(struct socket * sock,struct msghdr * msg,size_t size,int flags)2300  static int unix_seqpacket_recvmsg(struct socket *sock, struct msghdr *msg,
2301  				  size_t size, int flags)
2302  {
2303  	struct sock *sk = sock->sk;
2304  
2305  	if (READ_ONCE(sk->sk_state) != TCP_ESTABLISHED)
2306  		return -ENOTCONN;
2307  
2308  	return unix_dgram_recvmsg(sock, msg, size, flags);
2309  }
2310  
unix_copy_addr(struct msghdr * msg,struct sock * sk)2311  static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
2312  {
2313  	struct unix_address *addr = smp_load_acquire(&unix_sk(sk)->addr);
2314  
2315  	if (addr) {
2316  		msg->msg_namelen = addr->len;
2317  		memcpy(msg->msg_name, addr->name, addr->len);
2318  	}
2319  }
2320  
__unix_dgram_recvmsg(struct sock * sk,struct msghdr * msg,size_t size,int flags)2321  int __unix_dgram_recvmsg(struct sock *sk, struct msghdr *msg, size_t size,
2322  			 int flags)
2323  {
2324  	struct scm_cookie scm;
2325  	struct socket *sock = sk->sk_socket;
2326  	struct unix_sock *u = unix_sk(sk);
2327  	struct sk_buff *skb, *last;
2328  	long timeo;
2329  	int skip;
2330  	int err;
2331  
2332  	err = -EOPNOTSUPP;
2333  	if (flags&MSG_OOB)
2334  		goto out;
2335  
2336  	timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
2337  
2338  	do {
2339  		mutex_lock(&u->iolock);
2340  
2341  		skip = sk_peek_offset(sk, flags);
2342  		skb = __skb_try_recv_datagram(sk, &sk->sk_receive_queue, flags,
2343  					      &skip, &err, &last);
2344  		if (skb) {
2345  			if (!(flags & MSG_PEEK))
2346  				scm_stat_del(sk, skb);
2347  			break;
2348  		}
2349  
2350  		mutex_unlock(&u->iolock);
2351  
2352  		if (err != -EAGAIN)
2353  			break;
2354  	} while (timeo &&
2355  		 !__skb_wait_for_more_packets(sk, &sk->sk_receive_queue,
2356  					      &err, &timeo, last));
2357  
2358  	if (!skb) { /* implies iolock unlocked */
2359  		unix_state_lock(sk);
2360  		/* Signal EOF on disconnected non-blocking SEQPACKET socket. */
2361  		if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN &&
2362  		    (sk->sk_shutdown & RCV_SHUTDOWN))
2363  			err = 0;
2364  		unix_state_unlock(sk);
2365  		goto out;
2366  	}
2367  
2368  	if (wq_has_sleeper(&u->peer_wait))
2369  		wake_up_interruptible_sync_poll(&u->peer_wait,
2370  						EPOLLOUT | EPOLLWRNORM |
2371  						EPOLLWRBAND);
2372  
2373  	if (msg->msg_name)
2374  		unix_copy_addr(msg, skb->sk);
2375  
2376  	if (size > skb->len - skip)
2377  		size = skb->len - skip;
2378  	else if (size < skb->len - skip)
2379  		msg->msg_flags |= MSG_TRUNC;
2380  
2381  	err = skb_copy_datagram_msg(skb, skip, msg, size);
2382  	if (err)
2383  		goto out_free;
2384  
2385  	if (sock_flag(sk, SOCK_RCVTSTAMP))
2386  		__sock_recv_timestamp(msg, sk, skb);
2387  
2388  	memset(&scm, 0, sizeof(scm));
2389  
2390  	scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2391  	unix_set_secdata(&scm, skb);
2392  
2393  	if (!(flags & MSG_PEEK)) {
2394  		if (UNIXCB(skb).fp)
2395  			unix_detach_fds(&scm, skb);
2396  
2397  		sk_peek_offset_bwd(sk, skb->len);
2398  	} else {
2399  		/* It is questionable: on PEEK we could:
2400  		   - do not return fds - good, but too simple 8)
2401  		   - return fds, and do not return them on read (old strategy,
2402  		     apparently wrong)
2403  		   - clone fds (I chose it for now, it is the most universal
2404  		     solution)
2405  
2406  		   POSIX 1003.1g does not actually define this clearly
2407  		   at all. POSIX 1003.1g doesn't define a lot of things
2408  		   clearly however!
2409  
2410  		*/
2411  
2412  		sk_peek_offset_fwd(sk, size);
2413  
2414  		if (UNIXCB(skb).fp)
2415  			unix_peek_fds(&scm, skb);
2416  	}
2417  	err = (flags & MSG_TRUNC) ? skb->len - skip : size;
2418  
2419  	scm_recv_unix(sock, msg, &scm, flags);
2420  
2421  out_free:
2422  	skb_free_datagram(sk, skb);
2423  	mutex_unlock(&u->iolock);
2424  out:
2425  	return err;
2426  }
2427  
unix_dgram_recvmsg(struct socket * sock,struct msghdr * msg,size_t size,int flags)2428  static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
2429  			      int flags)
2430  {
2431  	struct sock *sk = sock->sk;
2432  
2433  #ifdef CONFIG_BPF_SYSCALL
2434  	const struct proto *prot = READ_ONCE(sk->sk_prot);
2435  
2436  	if (prot != &unix_dgram_proto)
2437  		return prot->recvmsg(sk, msg, size, flags, NULL);
2438  #endif
2439  	return __unix_dgram_recvmsg(sk, msg, size, flags);
2440  }
2441  
unix_read_skb(struct sock * sk,skb_read_actor_t recv_actor)2442  static int unix_read_skb(struct sock *sk, skb_read_actor_t recv_actor)
2443  {
2444  	struct unix_sock *u = unix_sk(sk);
2445  	struct sk_buff *skb;
2446  	int err;
2447  
2448  	mutex_lock(&u->iolock);
2449  	skb = skb_recv_datagram(sk, MSG_DONTWAIT, &err);
2450  	mutex_unlock(&u->iolock);
2451  	if (!skb)
2452  		return err;
2453  
2454  	return recv_actor(sk, skb);
2455  }
2456  
2457  /*
2458   *	Sleep until more data has arrived. But check for races..
2459   */
unix_stream_data_wait(struct sock * sk,long timeo,struct sk_buff * last,unsigned int last_len,bool freezable)2460  static long unix_stream_data_wait(struct sock *sk, long timeo,
2461  				  struct sk_buff *last, unsigned int last_len,
2462  				  bool freezable)
2463  {
2464  	unsigned int state = TASK_INTERRUPTIBLE | freezable * TASK_FREEZABLE;
2465  	struct sk_buff *tail;
2466  	DEFINE_WAIT(wait);
2467  
2468  	unix_state_lock(sk);
2469  
2470  	for (;;) {
2471  		prepare_to_wait(sk_sleep(sk), &wait, state);
2472  
2473  		tail = skb_peek_tail(&sk->sk_receive_queue);
2474  		if (tail != last ||
2475  		    (tail && tail->len != last_len) ||
2476  		    sk->sk_err ||
2477  		    (sk->sk_shutdown & RCV_SHUTDOWN) ||
2478  		    signal_pending(current) ||
2479  		    !timeo)
2480  			break;
2481  
2482  		sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2483  		unix_state_unlock(sk);
2484  		timeo = schedule_timeout(timeo);
2485  		unix_state_lock(sk);
2486  
2487  		if (sock_flag(sk, SOCK_DEAD))
2488  			break;
2489  
2490  		sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2491  	}
2492  
2493  	finish_wait(sk_sleep(sk), &wait);
2494  	unix_state_unlock(sk);
2495  	return timeo;
2496  }
2497  
unix_skb_len(const struct sk_buff * skb)2498  static unsigned int unix_skb_len(const struct sk_buff *skb)
2499  {
2500  	return skb->len - UNIXCB(skb).consumed;
2501  }
2502  
2503  struct unix_stream_read_state {
2504  	int (*recv_actor)(struct sk_buff *, int, int,
2505  			  struct unix_stream_read_state *);
2506  	struct socket *socket;
2507  	struct msghdr *msg;
2508  	struct pipe_inode_info *pipe;
2509  	size_t size;
2510  	int flags;
2511  	unsigned int splice_flags;
2512  };
2513  
2514  #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
unix_stream_recv_urg(struct unix_stream_read_state * state)2515  static int unix_stream_recv_urg(struct unix_stream_read_state *state)
2516  {
2517  	struct socket *sock = state->socket;
2518  	struct sock *sk = sock->sk;
2519  	struct unix_sock *u = unix_sk(sk);
2520  	int chunk = 1;
2521  	struct sk_buff *oob_skb;
2522  
2523  	mutex_lock(&u->iolock);
2524  	unix_state_lock(sk);
2525  	spin_lock(&sk->sk_receive_queue.lock);
2526  
2527  	if (sock_flag(sk, SOCK_URGINLINE) || !u->oob_skb) {
2528  		spin_unlock(&sk->sk_receive_queue.lock);
2529  		unix_state_unlock(sk);
2530  		mutex_unlock(&u->iolock);
2531  		return -EINVAL;
2532  	}
2533  
2534  	oob_skb = u->oob_skb;
2535  
2536  	if (!(state->flags & MSG_PEEK))
2537  		WRITE_ONCE(u->oob_skb, NULL);
2538  	else
2539  		skb_get(oob_skb);
2540  
2541  	spin_unlock(&sk->sk_receive_queue.lock);
2542  	unix_state_unlock(sk);
2543  
2544  	chunk = state->recv_actor(oob_skb, 0, chunk, state);
2545  
2546  	if (!(state->flags & MSG_PEEK))
2547  		UNIXCB(oob_skb).consumed += 1;
2548  
2549  	consume_skb(oob_skb);
2550  
2551  	mutex_unlock(&u->iolock);
2552  
2553  	if (chunk < 0)
2554  		return -EFAULT;
2555  
2556  	state->msg->msg_flags |= MSG_OOB;
2557  	return 1;
2558  }
2559  
manage_oob(struct sk_buff * skb,struct sock * sk,int flags,int copied)2560  static struct sk_buff *manage_oob(struct sk_buff *skb, struct sock *sk,
2561  				  int flags, int copied)
2562  {
2563  	struct unix_sock *u = unix_sk(sk);
2564  
2565  	if (!unix_skb_len(skb) && !(flags & MSG_PEEK)) {
2566  		skb_unlink(skb, &sk->sk_receive_queue);
2567  		consume_skb(skb);
2568  		skb = NULL;
2569  	} else {
2570  		struct sk_buff *unlinked_skb = NULL;
2571  
2572  		spin_lock(&sk->sk_receive_queue.lock);
2573  
2574  		if (skb == u->oob_skb) {
2575  			if (copied) {
2576  				skb = NULL;
2577  			} else if (!(flags & MSG_PEEK)) {
2578  				if (sock_flag(sk, SOCK_URGINLINE)) {
2579  					WRITE_ONCE(u->oob_skb, NULL);
2580  					consume_skb(skb);
2581  				} else {
2582  					__skb_unlink(skb, &sk->sk_receive_queue);
2583  					WRITE_ONCE(u->oob_skb, NULL);
2584  					unlinked_skb = skb;
2585  					skb = skb_peek(&sk->sk_receive_queue);
2586  				}
2587  			} else if (!sock_flag(sk, SOCK_URGINLINE)) {
2588  				skb = skb_peek_next(skb, &sk->sk_receive_queue);
2589  			}
2590  		}
2591  
2592  		spin_unlock(&sk->sk_receive_queue.lock);
2593  
2594  		if (unlinked_skb) {
2595  			WARN_ON_ONCE(skb_unref(unlinked_skb));
2596  			kfree_skb(unlinked_skb);
2597  		}
2598  	}
2599  	return skb;
2600  }
2601  #endif
2602  
unix_stream_read_skb(struct sock * sk,skb_read_actor_t recv_actor)2603  static int unix_stream_read_skb(struct sock *sk, skb_read_actor_t recv_actor)
2604  {
2605  	struct unix_sock *u = unix_sk(sk);
2606  	struct sk_buff *skb;
2607  	int err;
2608  
2609  	if (unlikely(READ_ONCE(sk->sk_state) != TCP_ESTABLISHED))
2610  		return -ENOTCONN;
2611  
2612  	mutex_lock(&u->iolock);
2613  	skb = skb_recv_datagram(sk, MSG_DONTWAIT, &err);
2614  	mutex_unlock(&u->iolock);
2615  	if (!skb)
2616  		return err;
2617  
2618  #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
2619  	if (unlikely(skb == READ_ONCE(u->oob_skb))) {
2620  		bool drop = false;
2621  
2622  		unix_state_lock(sk);
2623  
2624  		if (sock_flag(sk, SOCK_DEAD)) {
2625  			unix_state_unlock(sk);
2626  			kfree_skb(skb);
2627  			return -ECONNRESET;
2628  		}
2629  
2630  		spin_lock(&sk->sk_receive_queue.lock);
2631  		if (likely(skb == u->oob_skb)) {
2632  			WRITE_ONCE(u->oob_skb, NULL);
2633  			drop = true;
2634  		}
2635  		spin_unlock(&sk->sk_receive_queue.lock);
2636  
2637  		unix_state_unlock(sk);
2638  
2639  		if (drop) {
2640  			WARN_ON_ONCE(skb_unref(skb));
2641  			kfree_skb(skb);
2642  			return -EAGAIN;
2643  		}
2644  	}
2645  #endif
2646  
2647  	return recv_actor(sk, skb);
2648  }
2649  
unix_stream_read_generic(struct unix_stream_read_state * state,bool freezable)2650  static int unix_stream_read_generic(struct unix_stream_read_state *state,
2651  				    bool freezable)
2652  {
2653  	struct scm_cookie scm;
2654  	struct socket *sock = state->socket;
2655  	struct sock *sk = sock->sk;
2656  	struct unix_sock *u = unix_sk(sk);
2657  	int copied = 0;
2658  	int flags = state->flags;
2659  	int noblock = flags & MSG_DONTWAIT;
2660  	bool check_creds = false;
2661  	int target;
2662  	int err = 0;
2663  	long timeo;
2664  	int skip;
2665  	size_t size = state->size;
2666  	unsigned int last_len;
2667  
2668  	if (unlikely(READ_ONCE(sk->sk_state) != TCP_ESTABLISHED)) {
2669  		err = -EINVAL;
2670  		goto out;
2671  	}
2672  
2673  	if (unlikely(flags & MSG_OOB)) {
2674  		err = -EOPNOTSUPP;
2675  #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
2676  		err = unix_stream_recv_urg(state);
2677  #endif
2678  		goto out;
2679  	}
2680  
2681  	target = sock_rcvlowat(sk, flags & MSG_WAITALL, size);
2682  	timeo = sock_rcvtimeo(sk, noblock);
2683  
2684  	memset(&scm, 0, sizeof(scm));
2685  
2686  	/* Lock the socket to prevent queue disordering
2687  	 * while sleeps in memcpy_tomsg
2688  	 */
2689  	mutex_lock(&u->iolock);
2690  
2691  	skip = max(sk_peek_offset(sk, flags), 0);
2692  
2693  	do {
2694  		int chunk;
2695  		bool drop_skb;
2696  		struct sk_buff *skb, *last;
2697  
2698  redo:
2699  		unix_state_lock(sk);
2700  		if (sock_flag(sk, SOCK_DEAD)) {
2701  			err = -ECONNRESET;
2702  			goto unlock;
2703  		}
2704  		last = skb = skb_peek(&sk->sk_receive_queue);
2705  		last_len = last ? last->len : 0;
2706  
2707  again:
2708  #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
2709  		if (skb) {
2710  			skb = manage_oob(skb, sk, flags, copied);
2711  			if (!skb && copied) {
2712  				unix_state_unlock(sk);
2713  				break;
2714  			}
2715  		}
2716  #endif
2717  		if (skb == NULL) {
2718  			if (copied >= target)
2719  				goto unlock;
2720  
2721  			/*
2722  			 *	POSIX 1003.1g mandates this order.
2723  			 */
2724  
2725  			err = sock_error(sk);
2726  			if (err)
2727  				goto unlock;
2728  			if (sk->sk_shutdown & RCV_SHUTDOWN)
2729  				goto unlock;
2730  
2731  			unix_state_unlock(sk);
2732  			if (!timeo) {
2733  				err = -EAGAIN;
2734  				break;
2735  			}
2736  
2737  			mutex_unlock(&u->iolock);
2738  
2739  			timeo = unix_stream_data_wait(sk, timeo, last,
2740  						      last_len, freezable);
2741  
2742  			if (signal_pending(current)) {
2743  				err = sock_intr_errno(timeo);
2744  				scm_destroy(&scm);
2745  				goto out;
2746  			}
2747  
2748  			mutex_lock(&u->iolock);
2749  			goto redo;
2750  unlock:
2751  			unix_state_unlock(sk);
2752  			break;
2753  		}
2754  
2755  		while (skip >= unix_skb_len(skb)) {
2756  			skip -= unix_skb_len(skb);
2757  			last = skb;
2758  			last_len = skb->len;
2759  			skb = skb_peek_next(skb, &sk->sk_receive_queue);
2760  			if (!skb)
2761  				goto again;
2762  		}
2763  
2764  		unix_state_unlock(sk);
2765  
2766  		if (check_creds) {
2767  			/* Never glue messages from different writers */
2768  			if (!unix_skb_scm_eq(skb, &scm))
2769  				break;
2770  		} else if (test_bit(SOCK_PASSCRED, &sock->flags) ||
2771  			   test_bit(SOCK_PASSPIDFD, &sock->flags)) {
2772  			/* Copy credentials */
2773  			scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2774  			unix_set_secdata(&scm, skb);
2775  			check_creds = true;
2776  		}
2777  
2778  		/* Copy address just once */
2779  		if (state->msg && state->msg->msg_name) {
2780  			DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr,
2781  					 state->msg->msg_name);
2782  			unix_copy_addr(state->msg, skb->sk);
2783  			sunaddr = NULL;
2784  		}
2785  
2786  		chunk = min_t(unsigned int, unix_skb_len(skb) - skip, size);
2787  		skb_get(skb);
2788  		chunk = state->recv_actor(skb, skip, chunk, state);
2789  		drop_skb = !unix_skb_len(skb);
2790  		/* skb is only safe to use if !drop_skb */
2791  		consume_skb(skb);
2792  		if (chunk < 0) {
2793  			if (copied == 0)
2794  				copied = -EFAULT;
2795  			break;
2796  		}
2797  		copied += chunk;
2798  		size -= chunk;
2799  
2800  		if (drop_skb) {
2801  			/* the skb was touched by a concurrent reader;
2802  			 * we should not expect anything from this skb
2803  			 * anymore and assume it invalid - we can be
2804  			 * sure it was dropped from the socket queue
2805  			 *
2806  			 * let's report a short read
2807  			 */
2808  			err = 0;
2809  			break;
2810  		}
2811  
2812  		/* Mark read part of skb as used */
2813  		if (!(flags & MSG_PEEK)) {
2814  			UNIXCB(skb).consumed += chunk;
2815  
2816  			sk_peek_offset_bwd(sk, chunk);
2817  
2818  			if (UNIXCB(skb).fp) {
2819  				scm_stat_del(sk, skb);
2820  				unix_detach_fds(&scm, skb);
2821  			}
2822  
2823  			if (unix_skb_len(skb))
2824  				break;
2825  
2826  			skb_unlink(skb, &sk->sk_receive_queue);
2827  			consume_skb(skb);
2828  
2829  			if (scm.fp)
2830  				break;
2831  		} else {
2832  			/* It is questionable, see note in unix_dgram_recvmsg.
2833  			 */
2834  			if (UNIXCB(skb).fp)
2835  				unix_peek_fds(&scm, skb);
2836  
2837  			sk_peek_offset_fwd(sk, chunk);
2838  
2839  			if (UNIXCB(skb).fp)
2840  				break;
2841  
2842  			skip = 0;
2843  			last = skb;
2844  			last_len = skb->len;
2845  			unix_state_lock(sk);
2846  			skb = skb_peek_next(skb, &sk->sk_receive_queue);
2847  			if (skb)
2848  				goto again;
2849  			unix_state_unlock(sk);
2850  			break;
2851  		}
2852  	} while (size);
2853  
2854  	mutex_unlock(&u->iolock);
2855  	if (state->msg)
2856  		scm_recv_unix(sock, state->msg, &scm, flags);
2857  	else
2858  		scm_destroy(&scm);
2859  out:
2860  	return copied ? : err;
2861  }
2862  
unix_stream_read_actor(struct sk_buff * skb,int skip,int chunk,struct unix_stream_read_state * state)2863  static int unix_stream_read_actor(struct sk_buff *skb,
2864  				  int skip, int chunk,
2865  				  struct unix_stream_read_state *state)
2866  {
2867  	int ret;
2868  
2869  	ret = skb_copy_datagram_msg(skb, UNIXCB(skb).consumed + skip,
2870  				    state->msg, chunk);
2871  	return ret ?: chunk;
2872  }
2873  
__unix_stream_recvmsg(struct sock * sk,struct msghdr * msg,size_t size,int flags)2874  int __unix_stream_recvmsg(struct sock *sk, struct msghdr *msg,
2875  			  size_t size, int flags)
2876  {
2877  	struct unix_stream_read_state state = {
2878  		.recv_actor = unix_stream_read_actor,
2879  		.socket = sk->sk_socket,
2880  		.msg = msg,
2881  		.size = size,
2882  		.flags = flags
2883  	};
2884  
2885  	return unix_stream_read_generic(&state, true);
2886  }
2887  
unix_stream_recvmsg(struct socket * sock,struct msghdr * msg,size_t size,int flags)2888  static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg,
2889  			       size_t size, int flags)
2890  {
2891  	struct unix_stream_read_state state = {
2892  		.recv_actor = unix_stream_read_actor,
2893  		.socket = sock,
2894  		.msg = msg,
2895  		.size = size,
2896  		.flags = flags
2897  	};
2898  
2899  #ifdef CONFIG_BPF_SYSCALL
2900  	struct sock *sk = sock->sk;
2901  	const struct proto *prot = READ_ONCE(sk->sk_prot);
2902  
2903  	if (prot != &unix_stream_proto)
2904  		return prot->recvmsg(sk, msg, size, flags, NULL);
2905  #endif
2906  	return unix_stream_read_generic(&state, true);
2907  }
2908  
unix_stream_splice_actor(struct sk_buff * skb,int skip,int chunk,struct unix_stream_read_state * state)2909  static int unix_stream_splice_actor(struct sk_buff *skb,
2910  				    int skip, int chunk,
2911  				    struct unix_stream_read_state *state)
2912  {
2913  	return skb_splice_bits(skb, state->socket->sk,
2914  			       UNIXCB(skb).consumed + skip,
2915  			       state->pipe, chunk, state->splice_flags);
2916  }
2917  
unix_stream_splice_read(struct socket * sock,loff_t * ppos,struct pipe_inode_info * pipe,size_t size,unsigned int flags)2918  static ssize_t unix_stream_splice_read(struct socket *sock,  loff_t *ppos,
2919  				       struct pipe_inode_info *pipe,
2920  				       size_t size, unsigned int flags)
2921  {
2922  	struct unix_stream_read_state state = {
2923  		.recv_actor = unix_stream_splice_actor,
2924  		.socket = sock,
2925  		.pipe = pipe,
2926  		.size = size,
2927  		.splice_flags = flags,
2928  	};
2929  
2930  	if (unlikely(*ppos))
2931  		return -ESPIPE;
2932  
2933  	if (sock->file->f_flags & O_NONBLOCK ||
2934  	    flags & SPLICE_F_NONBLOCK)
2935  		state.flags = MSG_DONTWAIT;
2936  
2937  	return unix_stream_read_generic(&state, false);
2938  }
2939  
unix_shutdown(struct socket * sock,int mode)2940  static int unix_shutdown(struct socket *sock, int mode)
2941  {
2942  	struct sock *sk = sock->sk;
2943  	struct sock *other;
2944  
2945  	if (mode < SHUT_RD || mode > SHUT_RDWR)
2946  		return -EINVAL;
2947  	/* This maps:
2948  	 * SHUT_RD   (0) -> RCV_SHUTDOWN  (1)
2949  	 * SHUT_WR   (1) -> SEND_SHUTDOWN (2)
2950  	 * SHUT_RDWR (2) -> SHUTDOWN_MASK (3)
2951  	 */
2952  	++mode;
2953  
2954  	unix_state_lock(sk);
2955  	WRITE_ONCE(sk->sk_shutdown, sk->sk_shutdown | mode);
2956  	other = unix_peer(sk);
2957  	if (other)
2958  		sock_hold(other);
2959  	unix_state_unlock(sk);
2960  	sk->sk_state_change(sk);
2961  
2962  	if (other &&
2963  		(sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
2964  
2965  		int peer_mode = 0;
2966  		const struct proto *prot = READ_ONCE(other->sk_prot);
2967  
2968  		if (prot->unhash)
2969  			prot->unhash(other);
2970  		if (mode&RCV_SHUTDOWN)
2971  			peer_mode |= SEND_SHUTDOWN;
2972  		if (mode&SEND_SHUTDOWN)
2973  			peer_mode |= RCV_SHUTDOWN;
2974  		unix_state_lock(other);
2975  		WRITE_ONCE(other->sk_shutdown, other->sk_shutdown | peer_mode);
2976  		unix_state_unlock(other);
2977  		other->sk_state_change(other);
2978  		if (peer_mode == SHUTDOWN_MASK)
2979  			sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
2980  		else if (peer_mode & RCV_SHUTDOWN)
2981  			sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
2982  	}
2983  	if (other)
2984  		sock_put(other);
2985  
2986  	return 0;
2987  }
2988  
unix_inq_len(struct sock * sk)2989  long unix_inq_len(struct sock *sk)
2990  {
2991  	struct sk_buff *skb;
2992  	long amount = 0;
2993  
2994  	if (READ_ONCE(sk->sk_state) == TCP_LISTEN)
2995  		return -EINVAL;
2996  
2997  	spin_lock(&sk->sk_receive_queue.lock);
2998  	if (sk->sk_type == SOCK_STREAM ||
2999  	    sk->sk_type == SOCK_SEQPACKET) {
3000  		skb_queue_walk(&sk->sk_receive_queue, skb)
3001  			amount += unix_skb_len(skb);
3002  	} else {
3003  		skb = skb_peek(&sk->sk_receive_queue);
3004  		if (skb)
3005  			amount = skb->len;
3006  	}
3007  	spin_unlock(&sk->sk_receive_queue.lock);
3008  
3009  	return amount;
3010  }
3011  EXPORT_SYMBOL_GPL(unix_inq_len);
3012  
unix_outq_len(struct sock * sk)3013  long unix_outq_len(struct sock *sk)
3014  {
3015  	return sk_wmem_alloc_get(sk);
3016  }
3017  EXPORT_SYMBOL_GPL(unix_outq_len);
3018  
unix_open_file(struct sock * sk)3019  static int unix_open_file(struct sock *sk)
3020  {
3021  	struct path path;
3022  	struct file *f;
3023  	int fd;
3024  
3025  	if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
3026  		return -EPERM;
3027  
3028  	if (!smp_load_acquire(&unix_sk(sk)->addr))
3029  		return -ENOENT;
3030  
3031  	path = unix_sk(sk)->path;
3032  	if (!path.dentry)
3033  		return -ENOENT;
3034  
3035  	path_get(&path);
3036  
3037  	fd = get_unused_fd_flags(O_CLOEXEC);
3038  	if (fd < 0)
3039  		goto out;
3040  
3041  	f = dentry_open(&path, O_PATH, current_cred());
3042  	if (IS_ERR(f)) {
3043  		put_unused_fd(fd);
3044  		fd = PTR_ERR(f);
3045  		goto out;
3046  	}
3047  
3048  	fd_install(fd, f);
3049  out:
3050  	path_put(&path);
3051  
3052  	return fd;
3053  }
3054  
unix_ioctl(struct socket * sock,unsigned int cmd,unsigned long arg)3055  static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
3056  {
3057  	struct sock *sk = sock->sk;
3058  	long amount = 0;
3059  	int err;
3060  
3061  	switch (cmd) {
3062  	case SIOCOUTQ:
3063  		amount = unix_outq_len(sk);
3064  		err = put_user(amount, (int __user *)arg);
3065  		break;
3066  	case SIOCINQ:
3067  		amount = unix_inq_len(sk);
3068  		if (amount < 0)
3069  			err = amount;
3070  		else
3071  			err = put_user(amount, (int __user *)arg);
3072  		break;
3073  	case SIOCUNIXFILE:
3074  		err = unix_open_file(sk);
3075  		break;
3076  #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
3077  	case SIOCATMARK:
3078  		{
3079  			struct sk_buff *skb;
3080  			int answ = 0;
3081  
3082  			skb = skb_peek(&sk->sk_receive_queue);
3083  			if (skb && skb == READ_ONCE(unix_sk(sk)->oob_skb))
3084  				answ = 1;
3085  			err = put_user(answ, (int __user *)arg);
3086  		}
3087  		break;
3088  #endif
3089  	default:
3090  		err = -ENOIOCTLCMD;
3091  		break;
3092  	}
3093  	return err;
3094  }
3095  
3096  #ifdef CONFIG_COMPAT
unix_compat_ioctl(struct socket * sock,unsigned int cmd,unsigned long arg)3097  static int unix_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
3098  {
3099  	return unix_ioctl(sock, cmd, (unsigned long)compat_ptr(arg));
3100  }
3101  #endif
3102  
unix_poll(struct file * file,struct socket * sock,poll_table * wait)3103  static __poll_t unix_poll(struct file *file, struct socket *sock, poll_table *wait)
3104  {
3105  	struct sock *sk = sock->sk;
3106  	unsigned char state;
3107  	__poll_t mask;
3108  	u8 shutdown;
3109  
3110  	sock_poll_wait(file, sock, wait);
3111  	mask = 0;
3112  	shutdown = READ_ONCE(sk->sk_shutdown);
3113  	state = READ_ONCE(sk->sk_state);
3114  
3115  	/* exceptional events? */
3116  	if (READ_ONCE(sk->sk_err))
3117  		mask |= EPOLLERR;
3118  	if (shutdown == SHUTDOWN_MASK)
3119  		mask |= EPOLLHUP;
3120  	if (shutdown & RCV_SHUTDOWN)
3121  		mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
3122  
3123  	/* readable? */
3124  	if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
3125  		mask |= EPOLLIN | EPOLLRDNORM;
3126  	if (sk_is_readable(sk))
3127  		mask |= EPOLLIN | EPOLLRDNORM;
3128  #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
3129  	if (READ_ONCE(unix_sk(sk)->oob_skb))
3130  		mask |= EPOLLPRI;
3131  #endif
3132  
3133  	/* Connection-based need to check for termination and startup */
3134  	if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) &&
3135  	    state == TCP_CLOSE)
3136  		mask |= EPOLLHUP;
3137  
3138  	/*
3139  	 * we set writable also when the other side has shut down the
3140  	 * connection. This prevents stuck sockets.
3141  	 */
3142  	if (unix_writable(sk, state))
3143  		mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
3144  
3145  	return mask;
3146  }
3147  
unix_dgram_poll(struct file * file,struct socket * sock,poll_table * wait)3148  static __poll_t unix_dgram_poll(struct file *file, struct socket *sock,
3149  				    poll_table *wait)
3150  {
3151  	struct sock *sk = sock->sk, *other;
3152  	unsigned int writable;
3153  	unsigned char state;
3154  	__poll_t mask;
3155  	u8 shutdown;
3156  
3157  	sock_poll_wait(file, sock, wait);
3158  	mask = 0;
3159  	shutdown = READ_ONCE(sk->sk_shutdown);
3160  	state = READ_ONCE(sk->sk_state);
3161  
3162  	/* exceptional events? */
3163  	if (READ_ONCE(sk->sk_err) ||
3164  	    !skb_queue_empty_lockless(&sk->sk_error_queue))
3165  		mask |= EPOLLERR |
3166  			(sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0);
3167  
3168  	if (shutdown & RCV_SHUTDOWN)
3169  		mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
3170  	if (shutdown == SHUTDOWN_MASK)
3171  		mask |= EPOLLHUP;
3172  
3173  	/* readable? */
3174  	if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
3175  		mask |= EPOLLIN | EPOLLRDNORM;
3176  	if (sk_is_readable(sk))
3177  		mask |= EPOLLIN | EPOLLRDNORM;
3178  
3179  	/* Connection-based need to check for termination and startup */
3180  	if (sk->sk_type == SOCK_SEQPACKET && state == TCP_CLOSE)
3181  		mask |= EPOLLHUP;
3182  
3183  	/* No write status requested, avoid expensive OUT tests. */
3184  	if (!(poll_requested_events(wait) & (EPOLLWRBAND|EPOLLWRNORM|EPOLLOUT)))
3185  		return mask;
3186  
3187  	writable = unix_writable(sk, state);
3188  	if (writable) {
3189  		unix_state_lock(sk);
3190  
3191  		other = unix_peer(sk);
3192  		if (other && unix_peer(other) != sk &&
3193  		    unix_recvq_full_lockless(other) &&
3194  		    unix_dgram_peer_wake_me(sk, other))
3195  			writable = 0;
3196  
3197  		unix_state_unlock(sk);
3198  	}
3199  
3200  	if (writable)
3201  		mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
3202  	else
3203  		sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
3204  
3205  	return mask;
3206  }
3207  
3208  #ifdef CONFIG_PROC_FS
3209  
3210  #define BUCKET_SPACE (BITS_PER_LONG - (UNIX_HASH_BITS + 1) - 1)
3211  
3212  #define get_bucket(x) ((x) >> BUCKET_SPACE)
3213  #define get_offset(x) ((x) & ((1UL << BUCKET_SPACE) - 1))
3214  #define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o))
3215  
unix_from_bucket(struct seq_file * seq,loff_t * pos)3216  static struct sock *unix_from_bucket(struct seq_file *seq, loff_t *pos)
3217  {
3218  	unsigned long offset = get_offset(*pos);
3219  	unsigned long bucket = get_bucket(*pos);
3220  	unsigned long count = 0;
3221  	struct sock *sk;
3222  
3223  	for (sk = sk_head(&seq_file_net(seq)->unx.table.buckets[bucket]);
3224  	     sk; sk = sk_next(sk)) {
3225  		if (++count == offset)
3226  			break;
3227  	}
3228  
3229  	return sk;
3230  }
3231  
unix_get_first(struct seq_file * seq,loff_t * pos)3232  static struct sock *unix_get_first(struct seq_file *seq, loff_t *pos)
3233  {
3234  	unsigned long bucket = get_bucket(*pos);
3235  	struct net *net = seq_file_net(seq);
3236  	struct sock *sk;
3237  
3238  	while (bucket < UNIX_HASH_SIZE) {
3239  		spin_lock(&net->unx.table.locks[bucket]);
3240  
3241  		sk = unix_from_bucket(seq, pos);
3242  		if (sk)
3243  			return sk;
3244  
3245  		spin_unlock(&net->unx.table.locks[bucket]);
3246  
3247  		*pos = set_bucket_offset(++bucket, 1);
3248  	}
3249  
3250  	return NULL;
3251  }
3252  
unix_get_next(struct seq_file * seq,struct sock * sk,loff_t * pos)3253  static struct sock *unix_get_next(struct seq_file *seq, struct sock *sk,
3254  				  loff_t *pos)
3255  {
3256  	unsigned long bucket = get_bucket(*pos);
3257  
3258  	sk = sk_next(sk);
3259  	if (sk)
3260  		return sk;
3261  
3262  
3263  	spin_unlock(&seq_file_net(seq)->unx.table.locks[bucket]);
3264  
3265  	*pos = set_bucket_offset(++bucket, 1);
3266  
3267  	return unix_get_first(seq, pos);
3268  }
3269  
unix_seq_start(struct seq_file * seq,loff_t * pos)3270  static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
3271  {
3272  	if (!*pos)
3273  		return SEQ_START_TOKEN;
3274  
3275  	return unix_get_first(seq, pos);
3276  }
3277  
unix_seq_next(struct seq_file * seq,void * v,loff_t * pos)3278  static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3279  {
3280  	++*pos;
3281  
3282  	if (v == SEQ_START_TOKEN)
3283  		return unix_get_first(seq, pos);
3284  
3285  	return unix_get_next(seq, v, pos);
3286  }
3287  
unix_seq_stop(struct seq_file * seq,void * v)3288  static void unix_seq_stop(struct seq_file *seq, void *v)
3289  {
3290  	struct sock *sk = v;
3291  
3292  	if (sk)
3293  		spin_unlock(&seq_file_net(seq)->unx.table.locks[sk->sk_hash]);
3294  }
3295  
unix_seq_show(struct seq_file * seq,void * v)3296  static int unix_seq_show(struct seq_file *seq, void *v)
3297  {
3298  
3299  	if (v == SEQ_START_TOKEN)
3300  		seq_puts(seq, "Num       RefCount Protocol Flags    Type St "
3301  			 "Inode Path\n");
3302  	else {
3303  		struct sock *s = v;
3304  		struct unix_sock *u = unix_sk(s);
3305  		unix_state_lock(s);
3306  
3307  		seq_printf(seq, "%pK: %08X %08X %08X %04X %02X %5lu",
3308  			s,
3309  			refcount_read(&s->sk_refcnt),
3310  			0,
3311  			s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
3312  			s->sk_type,
3313  			s->sk_socket ?
3314  			(s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
3315  			(s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
3316  			sock_i_ino(s));
3317  
3318  		if (u->addr) {	// under a hash table lock here
3319  			int i, len;
3320  			seq_putc(seq, ' ');
3321  
3322  			i = 0;
3323  			len = u->addr->len -
3324  				offsetof(struct sockaddr_un, sun_path);
3325  			if (u->addr->name->sun_path[0]) {
3326  				len--;
3327  			} else {
3328  				seq_putc(seq, '@');
3329  				i++;
3330  			}
3331  			for ( ; i < len; i++)
3332  				seq_putc(seq, u->addr->name->sun_path[i] ?:
3333  					 '@');
3334  		}
3335  		unix_state_unlock(s);
3336  		seq_putc(seq, '\n');
3337  	}
3338  
3339  	return 0;
3340  }
3341  
3342  static const struct seq_operations unix_seq_ops = {
3343  	.start  = unix_seq_start,
3344  	.next   = unix_seq_next,
3345  	.stop   = unix_seq_stop,
3346  	.show   = unix_seq_show,
3347  };
3348  
3349  #if IS_BUILTIN(CONFIG_UNIX) && defined(CONFIG_BPF_SYSCALL)
3350  struct bpf_unix_iter_state {
3351  	struct seq_net_private p;
3352  	unsigned int cur_sk;
3353  	unsigned int end_sk;
3354  	unsigned int max_sk;
3355  	struct sock **batch;
3356  	bool st_bucket_done;
3357  };
3358  
3359  struct bpf_iter__unix {
3360  	__bpf_md_ptr(struct bpf_iter_meta *, meta);
3361  	__bpf_md_ptr(struct unix_sock *, unix_sk);
3362  	uid_t uid __aligned(8);
3363  };
3364  
unix_prog_seq_show(struct bpf_prog * prog,struct bpf_iter_meta * meta,struct unix_sock * unix_sk,uid_t uid)3365  static int unix_prog_seq_show(struct bpf_prog *prog, struct bpf_iter_meta *meta,
3366  			      struct unix_sock *unix_sk, uid_t uid)
3367  {
3368  	struct bpf_iter__unix ctx;
3369  
3370  	meta->seq_num--;  /* skip SEQ_START_TOKEN */
3371  	ctx.meta = meta;
3372  	ctx.unix_sk = unix_sk;
3373  	ctx.uid = uid;
3374  	return bpf_iter_run_prog(prog, &ctx);
3375  }
3376  
bpf_iter_unix_hold_batch(struct seq_file * seq,struct sock * start_sk)3377  static int bpf_iter_unix_hold_batch(struct seq_file *seq, struct sock *start_sk)
3378  
3379  {
3380  	struct bpf_unix_iter_state *iter = seq->private;
3381  	unsigned int expected = 1;
3382  	struct sock *sk;
3383  
3384  	sock_hold(start_sk);
3385  	iter->batch[iter->end_sk++] = start_sk;
3386  
3387  	for (sk = sk_next(start_sk); sk; sk = sk_next(sk)) {
3388  		if (iter->end_sk < iter->max_sk) {
3389  			sock_hold(sk);
3390  			iter->batch[iter->end_sk++] = sk;
3391  		}
3392  
3393  		expected++;
3394  	}
3395  
3396  	spin_unlock(&seq_file_net(seq)->unx.table.locks[start_sk->sk_hash]);
3397  
3398  	return expected;
3399  }
3400  
bpf_iter_unix_put_batch(struct bpf_unix_iter_state * iter)3401  static void bpf_iter_unix_put_batch(struct bpf_unix_iter_state *iter)
3402  {
3403  	while (iter->cur_sk < iter->end_sk)
3404  		sock_put(iter->batch[iter->cur_sk++]);
3405  }
3406  
bpf_iter_unix_realloc_batch(struct bpf_unix_iter_state * iter,unsigned int new_batch_sz)3407  static int bpf_iter_unix_realloc_batch(struct bpf_unix_iter_state *iter,
3408  				       unsigned int new_batch_sz)
3409  {
3410  	struct sock **new_batch;
3411  
3412  	new_batch = kvmalloc(sizeof(*new_batch) * new_batch_sz,
3413  			     GFP_USER | __GFP_NOWARN);
3414  	if (!new_batch)
3415  		return -ENOMEM;
3416  
3417  	bpf_iter_unix_put_batch(iter);
3418  	kvfree(iter->batch);
3419  	iter->batch = new_batch;
3420  	iter->max_sk = new_batch_sz;
3421  
3422  	return 0;
3423  }
3424  
bpf_iter_unix_batch(struct seq_file * seq,loff_t * pos)3425  static struct sock *bpf_iter_unix_batch(struct seq_file *seq,
3426  					loff_t *pos)
3427  {
3428  	struct bpf_unix_iter_state *iter = seq->private;
3429  	unsigned int expected;
3430  	bool resized = false;
3431  	struct sock *sk;
3432  
3433  	if (iter->st_bucket_done)
3434  		*pos = set_bucket_offset(get_bucket(*pos) + 1, 1);
3435  
3436  again:
3437  	/* Get a new batch */
3438  	iter->cur_sk = 0;
3439  	iter->end_sk = 0;
3440  
3441  	sk = unix_get_first(seq, pos);
3442  	if (!sk)
3443  		return NULL; /* Done */
3444  
3445  	expected = bpf_iter_unix_hold_batch(seq, sk);
3446  
3447  	if (iter->end_sk == expected) {
3448  		iter->st_bucket_done = true;
3449  		return sk;
3450  	}
3451  
3452  	if (!resized && !bpf_iter_unix_realloc_batch(iter, expected * 3 / 2)) {
3453  		resized = true;
3454  		goto again;
3455  	}
3456  
3457  	return sk;
3458  }
3459  
bpf_iter_unix_seq_start(struct seq_file * seq,loff_t * pos)3460  static void *bpf_iter_unix_seq_start(struct seq_file *seq, loff_t *pos)
3461  {
3462  	if (!*pos)
3463  		return SEQ_START_TOKEN;
3464  
3465  	/* bpf iter does not support lseek, so it always
3466  	 * continue from where it was stop()-ped.
3467  	 */
3468  	return bpf_iter_unix_batch(seq, pos);
3469  }
3470  
bpf_iter_unix_seq_next(struct seq_file * seq,void * v,loff_t * pos)3471  static void *bpf_iter_unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3472  {
3473  	struct bpf_unix_iter_state *iter = seq->private;
3474  	struct sock *sk;
3475  
3476  	/* Whenever seq_next() is called, the iter->cur_sk is
3477  	 * done with seq_show(), so advance to the next sk in
3478  	 * the batch.
3479  	 */
3480  	if (iter->cur_sk < iter->end_sk)
3481  		sock_put(iter->batch[iter->cur_sk++]);
3482  
3483  	++*pos;
3484  
3485  	if (iter->cur_sk < iter->end_sk)
3486  		sk = iter->batch[iter->cur_sk];
3487  	else
3488  		sk = bpf_iter_unix_batch(seq, pos);
3489  
3490  	return sk;
3491  }
3492  
bpf_iter_unix_seq_show(struct seq_file * seq,void * v)3493  static int bpf_iter_unix_seq_show(struct seq_file *seq, void *v)
3494  {
3495  	struct bpf_iter_meta meta;
3496  	struct bpf_prog *prog;
3497  	struct sock *sk = v;
3498  	uid_t uid;
3499  	bool slow;
3500  	int ret;
3501  
3502  	if (v == SEQ_START_TOKEN)
3503  		return 0;
3504  
3505  	slow = lock_sock_fast(sk);
3506  
3507  	if (unlikely(sk_unhashed(sk))) {
3508  		ret = SEQ_SKIP;
3509  		goto unlock;
3510  	}
3511  
3512  	uid = from_kuid_munged(seq_user_ns(seq), sock_i_uid(sk));
3513  	meta.seq = seq;
3514  	prog = bpf_iter_get_info(&meta, false);
3515  	ret = unix_prog_seq_show(prog, &meta, v, uid);
3516  unlock:
3517  	unlock_sock_fast(sk, slow);
3518  	return ret;
3519  }
3520  
bpf_iter_unix_seq_stop(struct seq_file * seq,void * v)3521  static void bpf_iter_unix_seq_stop(struct seq_file *seq, void *v)
3522  {
3523  	struct bpf_unix_iter_state *iter = seq->private;
3524  	struct bpf_iter_meta meta;
3525  	struct bpf_prog *prog;
3526  
3527  	if (!v) {
3528  		meta.seq = seq;
3529  		prog = bpf_iter_get_info(&meta, true);
3530  		if (prog)
3531  			(void)unix_prog_seq_show(prog, &meta, v, 0);
3532  	}
3533  
3534  	if (iter->cur_sk < iter->end_sk)
3535  		bpf_iter_unix_put_batch(iter);
3536  }
3537  
3538  static const struct seq_operations bpf_iter_unix_seq_ops = {
3539  	.start	= bpf_iter_unix_seq_start,
3540  	.next	= bpf_iter_unix_seq_next,
3541  	.stop	= bpf_iter_unix_seq_stop,
3542  	.show	= bpf_iter_unix_seq_show,
3543  };
3544  #endif
3545  #endif
3546  
3547  static const struct net_proto_family unix_family_ops = {
3548  	.family = PF_UNIX,
3549  	.create = unix_create,
3550  	.owner	= THIS_MODULE,
3551  };
3552  
3553  
unix_net_init(struct net * net)3554  static int __net_init unix_net_init(struct net *net)
3555  {
3556  	int i;
3557  
3558  	net->unx.sysctl_max_dgram_qlen = 10;
3559  	if (unix_sysctl_register(net))
3560  		goto out;
3561  
3562  #ifdef CONFIG_PROC_FS
3563  	if (!proc_create_net("unix", 0, net->proc_net, &unix_seq_ops,
3564  			     sizeof(struct seq_net_private)))
3565  		goto err_sysctl;
3566  #endif
3567  
3568  	net->unx.table.locks = kvmalloc_array(UNIX_HASH_SIZE,
3569  					      sizeof(spinlock_t), GFP_KERNEL);
3570  	if (!net->unx.table.locks)
3571  		goto err_proc;
3572  
3573  	net->unx.table.buckets = kvmalloc_array(UNIX_HASH_SIZE,
3574  						sizeof(struct hlist_head),
3575  						GFP_KERNEL);
3576  	if (!net->unx.table.buckets)
3577  		goto free_locks;
3578  
3579  	for (i = 0; i < UNIX_HASH_SIZE; i++) {
3580  		spin_lock_init(&net->unx.table.locks[i]);
3581  		INIT_HLIST_HEAD(&net->unx.table.buckets[i]);
3582  	}
3583  
3584  	return 0;
3585  
3586  free_locks:
3587  	kvfree(net->unx.table.locks);
3588  err_proc:
3589  #ifdef CONFIG_PROC_FS
3590  	remove_proc_entry("unix", net->proc_net);
3591  err_sysctl:
3592  #endif
3593  	unix_sysctl_unregister(net);
3594  out:
3595  	return -ENOMEM;
3596  }
3597  
unix_net_exit(struct net * net)3598  static void __net_exit unix_net_exit(struct net *net)
3599  {
3600  	kvfree(net->unx.table.buckets);
3601  	kvfree(net->unx.table.locks);
3602  	unix_sysctl_unregister(net);
3603  	remove_proc_entry("unix", net->proc_net);
3604  }
3605  
3606  static struct pernet_operations unix_net_ops = {
3607  	.init = unix_net_init,
3608  	.exit = unix_net_exit,
3609  };
3610  
3611  #if IS_BUILTIN(CONFIG_UNIX) && defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
DEFINE_BPF_ITER_FUNC(unix,struct bpf_iter_meta * meta,struct unix_sock * unix_sk,uid_t uid)3612  DEFINE_BPF_ITER_FUNC(unix, struct bpf_iter_meta *meta,
3613  		     struct unix_sock *unix_sk, uid_t uid)
3614  
3615  #define INIT_BATCH_SZ 16
3616  
3617  static int bpf_iter_init_unix(void *priv_data, struct bpf_iter_aux_info *aux)
3618  {
3619  	struct bpf_unix_iter_state *iter = priv_data;
3620  	int err;
3621  
3622  	err = bpf_iter_init_seq_net(priv_data, aux);
3623  	if (err)
3624  		return err;
3625  
3626  	err = bpf_iter_unix_realloc_batch(iter, INIT_BATCH_SZ);
3627  	if (err) {
3628  		bpf_iter_fini_seq_net(priv_data);
3629  		return err;
3630  	}
3631  
3632  	return 0;
3633  }
3634  
bpf_iter_fini_unix(void * priv_data)3635  static void bpf_iter_fini_unix(void *priv_data)
3636  {
3637  	struct bpf_unix_iter_state *iter = priv_data;
3638  
3639  	bpf_iter_fini_seq_net(priv_data);
3640  	kvfree(iter->batch);
3641  }
3642  
3643  static const struct bpf_iter_seq_info unix_seq_info = {
3644  	.seq_ops		= &bpf_iter_unix_seq_ops,
3645  	.init_seq_private	= bpf_iter_init_unix,
3646  	.fini_seq_private	= bpf_iter_fini_unix,
3647  	.seq_priv_size		= sizeof(struct bpf_unix_iter_state),
3648  };
3649  
3650  static const struct bpf_func_proto *
bpf_iter_unix_get_func_proto(enum bpf_func_id func_id,const struct bpf_prog * prog)3651  bpf_iter_unix_get_func_proto(enum bpf_func_id func_id,
3652  			     const struct bpf_prog *prog)
3653  {
3654  	switch (func_id) {
3655  	case BPF_FUNC_setsockopt:
3656  		return &bpf_sk_setsockopt_proto;
3657  	case BPF_FUNC_getsockopt:
3658  		return &bpf_sk_getsockopt_proto;
3659  	default:
3660  		return NULL;
3661  	}
3662  }
3663  
3664  static struct bpf_iter_reg unix_reg_info = {
3665  	.target			= "unix",
3666  	.ctx_arg_info_size	= 1,
3667  	.ctx_arg_info		= {
3668  		{ offsetof(struct bpf_iter__unix, unix_sk),
3669  		  PTR_TO_BTF_ID_OR_NULL },
3670  	},
3671  	.get_func_proto         = bpf_iter_unix_get_func_proto,
3672  	.seq_info		= &unix_seq_info,
3673  };
3674  
bpf_iter_register(void)3675  static void __init bpf_iter_register(void)
3676  {
3677  	unix_reg_info.ctx_arg_info[0].btf_id = btf_sock_ids[BTF_SOCK_TYPE_UNIX];
3678  	if (bpf_iter_reg_target(&unix_reg_info))
3679  		pr_warn("Warning: could not register bpf iterator unix\n");
3680  }
3681  #endif
3682  
af_unix_init(void)3683  static int __init af_unix_init(void)
3684  {
3685  	int i, rc = -1;
3686  
3687  	BUILD_BUG_ON(sizeof(struct unix_skb_parms) > sizeof_field(struct sk_buff, cb));
3688  
3689  	for (i = 0; i < UNIX_HASH_SIZE / 2; i++) {
3690  		spin_lock_init(&bsd_socket_locks[i]);
3691  		INIT_HLIST_HEAD(&bsd_socket_buckets[i]);
3692  	}
3693  
3694  	rc = proto_register(&unix_dgram_proto, 1);
3695  	if (rc != 0) {
3696  		pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__);
3697  		goto out;
3698  	}
3699  
3700  	rc = proto_register(&unix_stream_proto, 1);
3701  	if (rc != 0) {
3702  		pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__);
3703  		proto_unregister(&unix_dgram_proto);
3704  		goto out;
3705  	}
3706  
3707  	sock_register(&unix_family_ops);
3708  	register_pernet_subsys(&unix_net_ops);
3709  	unix_bpf_build_proto();
3710  
3711  #if IS_BUILTIN(CONFIG_UNIX) && defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
3712  	bpf_iter_register();
3713  #endif
3714  
3715  out:
3716  	return rc;
3717  }
3718  
af_unix_exit(void)3719  static void __exit af_unix_exit(void)
3720  {
3721  	sock_unregister(PF_UNIX);
3722  	proto_unregister(&unix_dgram_proto);
3723  	proto_unregister(&unix_stream_proto);
3724  	unregister_pernet_subsys(&unix_net_ops);
3725  }
3726  
3727  /* Earlier than device_initcall() so that other drivers invoking
3728     request_module() don't end up in a loop when modprobe tries
3729     to use a UNIX socket. But later than subsys_initcall() because
3730     we depend on stuff initialised there */
3731  fs_initcall(af_unix_init);
3732  module_exit(af_unix_exit);
3733  
3734  MODULE_LICENSE("GPL");
3735  MODULE_ALIAS_NETPROTO(PF_UNIX);
3736