xref: /openbmc/linux/net/unix/af_unix.c (revision 4d75f5c664195b970e1cd2fd25b65b5eff257a0a)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * NET4:	Implementation of BSD Unix domain sockets.
4  *
5  * Authors:	Alan Cox, <alan@lxorguk.ukuu.org.uk>
6  *
7  * Fixes:
8  *		Linus Torvalds	:	Assorted bug cures.
9  *		Niibe Yutaka	:	async I/O support.
10  *		Carsten Paeth	:	PF_UNIX check, address fixes.
11  *		Alan Cox	:	Limit size of allocated blocks.
12  *		Alan Cox	:	Fixed the stupid socketpair bug.
13  *		Alan Cox	:	BSD compatibility fine tuning.
14  *		Alan Cox	:	Fixed a bug in connect when interrupted.
15  *		Alan Cox	:	Sorted out a proper draft version of
16  *					file descriptor passing hacked up from
17  *					Mike Shaver's work.
18  *		Marty Leisner	:	Fixes to fd passing
19  *		Nick Nevin	:	recvmsg bugfix.
20  *		Alan Cox	:	Started proper garbage collector
21  *		Heiko EiBfeldt	:	Missing verify_area check
22  *		Alan Cox	:	Started POSIXisms
23  *		Andreas Schwab	:	Replace inode by dentry for proper
24  *					reference counting
25  *		Kirk Petersen	:	Made this a module
26  *	    Christoph Rohland	:	Elegant non-blocking accept/connect algorithm.
27  *					Lots of bug fixes.
28  *	     Alexey Kuznetosv	:	Repaired (I hope) bugs introduces
29  *					by above two patches.
30  *	     Andrea Arcangeli	:	If possible we block in connect(2)
31  *					if the max backlog of the listen socket
32  *					is been reached. This won't break
33  *					old apps and it will avoid huge amount
34  *					of socks hashed (this for unix_gc()
35  *					performances reasons).
36  *					Security fix that limits the max
37  *					number of socks to 2*max_files and
38  *					the number of skb queueable in the
39  *					dgram receiver.
40  *		Artur Skawina   :	Hash function optimizations
41  *	     Alexey Kuznetsov   :	Full scale SMP. Lot of bugs are introduced 8)
42  *	      Malcolm Beattie   :	Set peercred for socketpair
43  *	     Michal Ostrowski   :       Module initialization cleanup.
44  *	     Arnaldo C. Melo	:	Remove MOD_{INC,DEC}_USE_COUNT,
45  *	     				the core infrastructure is doing that
46  *	     				for all net proto families now (2.5.69+)
47  *
48  * Known differences from reference BSD that was tested:
49  *
50  *	[TO FIX]
51  *	ECONNREFUSED is not returned from one end of a connected() socket to the
52  *		other the moment one end closes.
53  *	fstat() doesn't return st_dev=0, and give the blksize as high water mark
54  *		and a fake inode identifier (nor the BSD first socket fstat twice bug).
55  *	[NOT TO FIX]
56  *	accept() returns a path name even if the connecting socket has closed
57  *		in the meantime (BSD loses the path and gives up).
58  *	accept() returns 0 length path for an unbound connector. BSD returns 16
59  *		and a null first byte in the path (but not for gethost/peername - BSD bug ??)
60  *	socketpair(...SOCK_RAW..) doesn't panic the kernel.
61  *	BSD af_unix apparently has connect forgetting to block properly.
62  *		(need to check this with the POSIX spec in detail)
63  *
64  * Differences from 2.0.0-11-... (ANK)
65  *	Bug fixes and improvements.
66  *		- client shutdown killed server socket.
67  *		- removed all useless cli/sti pairs.
68  *
69  *	Semantic changes/extensions.
70  *		- generic control message passing.
71  *		- SCM_CREDENTIALS control message.
72  *		- "Abstract" (not FS based) socket bindings.
73  *		  Abstract names are sequences of bytes (not zero terminated)
74  *		  started by 0, so that this name space does not intersect
75  *		  with BSD names.
76  */
77 
78 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
79 
80 #include <linux/module.h>
81 #include <linux/kernel.h>
82 #include <linux/signal.h>
83 #include <linux/sched/signal.h>
84 #include <linux/errno.h>
85 #include <linux/string.h>
86 #include <linux/stat.h>
87 #include <linux/dcache.h>
88 #include <linux/namei.h>
89 #include <linux/socket.h>
90 #include <linux/un.h>
91 #include <linux/fcntl.h>
92 #include <linux/filter.h>
93 #include <linux/termios.h>
94 #include <linux/sockios.h>
95 #include <linux/net.h>
96 #include <linux/in.h>
97 #include <linux/fs.h>
98 #include <linux/slab.h>
99 #include <linux/uaccess.h>
100 #include <linux/skbuff.h>
101 #include <linux/netdevice.h>
102 #include <net/net_namespace.h>
103 #include <net/sock.h>
104 #include <net/tcp_states.h>
105 #include <net/af_unix.h>
106 #include <linux/proc_fs.h>
107 #include <linux/seq_file.h>
108 #include <net/scm.h>
109 #include <linux/init.h>
110 #include <linux/poll.h>
111 #include <linux/rtnetlink.h>
112 #include <linux/mount.h>
113 #include <net/checksum.h>
114 #include <linux/security.h>
115 #include <linux/splice.h>
116 #include <linux/freezer.h>
117 #include <linux/file.h>
118 #include <linux/btf_ids.h>
119 
120 static atomic_long_t unix_nr_socks;
121 static struct hlist_head bsd_socket_buckets[UNIX_HASH_SIZE / 2];
122 static spinlock_t bsd_socket_locks[UNIX_HASH_SIZE / 2];
123 
124 /* SMP locking strategy:
125  *    hash table is protected with spinlock.
126  *    each socket state is protected by separate spinlock.
127  */
128 
unix_unbound_hash(struct sock * sk)129 static unsigned int unix_unbound_hash(struct sock *sk)
130 {
131 	unsigned long hash = (unsigned long)sk;
132 
133 	hash ^= hash >> 16;
134 	hash ^= hash >> 8;
135 	hash ^= sk->sk_type;
136 
137 	return hash & UNIX_HASH_MOD;
138 }
139 
unix_bsd_hash(struct inode * i)140 static unsigned int unix_bsd_hash(struct inode *i)
141 {
142 	return i->i_ino & UNIX_HASH_MOD;
143 }
144 
unix_abstract_hash(struct sockaddr_un * sunaddr,int addr_len,int type)145 static unsigned int unix_abstract_hash(struct sockaddr_un *sunaddr,
146 				       int addr_len, int type)
147 {
148 	__wsum csum = csum_partial(sunaddr, addr_len, 0);
149 	unsigned int hash;
150 
151 	hash = (__force unsigned int)csum_fold(csum);
152 	hash ^= hash >> 8;
153 	hash ^= type;
154 
155 	return UNIX_HASH_MOD + 1 + (hash & UNIX_HASH_MOD);
156 }
157 
unix_table_double_lock(struct net * net,unsigned int hash1,unsigned int hash2)158 static void unix_table_double_lock(struct net *net,
159 				   unsigned int hash1, unsigned int hash2)
160 {
161 	if (hash1 == hash2) {
162 		spin_lock(&net->unx.table.locks[hash1]);
163 		return;
164 	}
165 
166 	if (hash1 > hash2)
167 		swap(hash1, hash2);
168 
169 	spin_lock(&net->unx.table.locks[hash1]);
170 	spin_lock_nested(&net->unx.table.locks[hash2], SINGLE_DEPTH_NESTING);
171 }
172 
unix_table_double_unlock(struct net * net,unsigned int hash1,unsigned int hash2)173 static void unix_table_double_unlock(struct net *net,
174 				     unsigned int hash1, unsigned int hash2)
175 {
176 	if (hash1 == hash2) {
177 		spin_unlock(&net->unx.table.locks[hash1]);
178 		return;
179 	}
180 
181 	spin_unlock(&net->unx.table.locks[hash1]);
182 	spin_unlock(&net->unx.table.locks[hash2]);
183 }
184 
185 #ifdef CONFIG_SECURITY_NETWORK
unix_get_secdata(struct scm_cookie * scm,struct sk_buff * skb)186 static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
187 {
188 	UNIXCB(skb).secid = scm->secid;
189 }
190 
unix_set_secdata(struct scm_cookie * scm,struct sk_buff * skb)191 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
192 {
193 	scm->secid = UNIXCB(skb).secid;
194 }
195 
unix_secdata_eq(struct scm_cookie * scm,struct sk_buff * skb)196 static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
197 {
198 	return (scm->secid == UNIXCB(skb).secid);
199 }
200 #else
unix_get_secdata(struct scm_cookie * scm,struct sk_buff * skb)201 static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
202 { }
203 
unix_set_secdata(struct scm_cookie * scm,struct sk_buff * skb)204 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
205 { }
206 
unix_secdata_eq(struct scm_cookie * scm,struct sk_buff * skb)207 static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
208 {
209 	return true;
210 }
211 #endif /* CONFIG_SECURITY_NETWORK */
212 
unix_our_peer(struct sock * sk,struct sock * osk)213 static inline int unix_our_peer(struct sock *sk, struct sock *osk)
214 {
215 	return unix_peer(osk) == sk;
216 }
217 
unix_may_send(struct sock * sk,struct sock * osk)218 static inline int unix_may_send(struct sock *sk, struct sock *osk)
219 {
220 	return unix_peer(osk) == NULL || unix_our_peer(sk, osk);
221 }
222 
unix_recvq_full_lockless(const struct sock * sk)223 static inline int unix_recvq_full_lockless(const struct sock *sk)
224 {
225 	return skb_queue_len_lockless(&sk->sk_receive_queue) > sk->sk_max_ack_backlog;
226 }
227 
unix_peer_get(struct sock * s)228 struct sock *unix_peer_get(struct sock *s)
229 {
230 	struct sock *peer;
231 
232 	unix_state_lock(s);
233 	peer = unix_peer(s);
234 	if (peer)
235 		sock_hold(peer);
236 	unix_state_unlock(s);
237 	return peer;
238 }
239 EXPORT_SYMBOL_GPL(unix_peer_get);
240 
unix_create_addr(struct sockaddr_un * sunaddr,int addr_len)241 static struct unix_address *unix_create_addr(struct sockaddr_un *sunaddr,
242 					     int addr_len)
243 {
244 	struct unix_address *addr;
245 
246 	addr = kmalloc(sizeof(*addr) + addr_len, GFP_KERNEL);
247 	if (!addr)
248 		return NULL;
249 
250 	refcount_set(&addr->refcnt, 1);
251 	addr->len = addr_len;
252 	memcpy(addr->name, sunaddr, addr_len);
253 
254 	return addr;
255 }
256 
unix_release_addr(struct unix_address * addr)257 static inline void unix_release_addr(struct unix_address *addr)
258 {
259 	if (refcount_dec_and_test(&addr->refcnt))
260 		kfree(addr);
261 }
262 
263 /*
264  *	Check unix socket name:
265  *		- should be not zero length.
266  *	        - if started by not zero, should be NULL terminated (FS object)
267  *		- if started by zero, it is abstract name.
268  */
269 
unix_validate_addr(struct sockaddr_un * sunaddr,int addr_len)270 static int unix_validate_addr(struct sockaddr_un *sunaddr, int addr_len)
271 {
272 	if (addr_len <= offsetof(struct sockaddr_un, sun_path) ||
273 	    addr_len > sizeof(*sunaddr))
274 		return -EINVAL;
275 
276 	if (sunaddr->sun_family != AF_UNIX)
277 		return -EINVAL;
278 
279 	return 0;
280 }
281 
unix_mkname_bsd(struct sockaddr_un * sunaddr,int addr_len)282 static int unix_mkname_bsd(struct sockaddr_un *sunaddr, int addr_len)
283 {
284 	struct sockaddr_storage *addr = (struct sockaddr_storage *)sunaddr;
285 	short offset = offsetof(struct sockaddr_storage, __data);
286 
287 	BUILD_BUG_ON(offset != offsetof(struct sockaddr_un, sun_path));
288 
289 	/* This may look like an off by one error but it is a bit more
290 	 * subtle.  108 is the longest valid AF_UNIX path for a binding.
291 	 * sun_path[108] doesn't as such exist.  However in kernel space
292 	 * we are guaranteed that it is a valid memory location in our
293 	 * kernel address buffer because syscall functions always pass
294 	 * a pointer of struct sockaddr_storage which has a bigger buffer
295 	 * than 108.  Also, we must terminate sun_path for strlen() in
296 	 * getname_kernel().
297 	 */
298 	addr->__data[addr_len - offset] = 0;
299 
300 	/* Don't pass sunaddr->sun_path to strlen().  Otherwise, 108 will
301 	 * cause panic if CONFIG_FORTIFY_SOURCE=y.  Let __fortify_strlen()
302 	 * know the actual buffer.
303 	 */
304 	return strlen(addr->__data) + offset + 1;
305 }
306 
__unix_remove_socket(struct sock * sk)307 static void __unix_remove_socket(struct sock *sk)
308 {
309 	sk_del_node_init(sk);
310 }
311 
__unix_insert_socket(struct net * net,struct sock * sk)312 static void __unix_insert_socket(struct net *net, struct sock *sk)
313 {
314 	DEBUG_NET_WARN_ON_ONCE(!sk_unhashed(sk));
315 	sk_add_node(sk, &net->unx.table.buckets[sk->sk_hash]);
316 }
317 
__unix_set_addr_hash(struct net * net,struct sock * sk,struct unix_address * addr,unsigned int hash)318 static void __unix_set_addr_hash(struct net *net, struct sock *sk,
319 				 struct unix_address *addr, unsigned int hash)
320 {
321 	__unix_remove_socket(sk);
322 	smp_store_release(&unix_sk(sk)->addr, addr);
323 
324 	sk->sk_hash = hash;
325 	__unix_insert_socket(net, sk);
326 }
327 
unix_remove_socket(struct net * net,struct sock * sk)328 static void unix_remove_socket(struct net *net, struct sock *sk)
329 {
330 	spin_lock(&net->unx.table.locks[sk->sk_hash]);
331 	__unix_remove_socket(sk);
332 	spin_unlock(&net->unx.table.locks[sk->sk_hash]);
333 }
334 
unix_insert_unbound_socket(struct net * net,struct sock * sk)335 static void unix_insert_unbound_socket(struct net *net, struct sock *sk)
336 {
337 	spin_lock(&net->unx.table.locks[sk->sk_hash]);
338 	__unix_insert_socket(net, sk);
339 	spin_unlock(&net->unx.table.locks[sk->sk_hash]);
340 }
341 
unix_insert_bsd_socket(struct sock * sk)342 static void unix_insert_bsd_socket(struct sock *sk)
343 {
344 	spin_lock(&bsd_socket_locks[sk->sk_hash]);
345 	sk_add_bind_node(sk, &bsd_socket_buckets[sk->sk_hash]);
346 	spin_unlock(&bsd_socket_locks[sk->sk_hash]);
347 }
348 
unix_remove_bsd_socket(struct sock * sk)349 static void unix_remove_bsd_socket(struct sock *sk)
350 {
351 	if (!hlist_unhashed(&sk->sk_bind_node)) {
352 		spin_lock(&bsd_socket_locks[sk->sk_hash]);
353 		__sk_del_bind_node(sk);
354 		spin_unlock(&bsd_socket_locks[sk->sk_hash]);
355 
356 		sk_node_init(&sk->sk_bind_node);
357 	}
358 }
359 
__unix_find_socket_byname(struct net * net,struct sockaddr_un * sunname,int len,unsigned int hash)360 static struct sock *__unix_find_socket_byname(struct net *net,
361 					      struct sockaddr_un *sunname,
362 					      int len, unsigned int hash)
363 {
364 	struct sock *s;
365 
366 	sk_for_each(s, &net->unx.table.buckets[hash]) {
367 		struct unix_sock *u = unix_sk(s);
368 
369 		if (u->addr->len == len &&
370 		    !memcmp(u->addr->name, sunname, len))
371 			return s;
372 	}
373 	return NULL;
374 }
375 
unix_find_socket_byname(struct net * net,struct sockaddr_un * sunname,int len,unsigned int hash)376 static inline struct sock *unix_find_socket_byname(struct net *net,
377 						   struct sockaddr_un *sunname,
378 						   int len, unsigned int hash)
379 {
380 	struct sock *s;
381 
382 	spin_lock(&net->unx.table.locks[hash]);
383 	s = __unix_find_socket_byname(net, sunname, len, hash);
384 	if (s)
385 		sock_hold(s);
386 	spin_unlock(&net->unx.table.locks[hash]);
387 	return s;
388 }
389 
unix_find_socket_byinode(struct inode * i)390 static struct sock *unix_find_socket_byinode(struct inode *i)
391 {
392 	unsigned int hash = unix_bsd_hash(i);
393 	struct sock *s;
394 
395 	spin_lock(&bsd_socket_locks[hash]);
396 	sk_for_each_bound(s, &bsd_socket_buckets[hash]) {
397 		struct dentry *dentry = unix_sk(s)->path.dentry;
398 
399 		if (dentry && d_backing_inode(dentry) == i) {
400 			sock_hold(s);
401 			spin_unlock(&bsd_socket_locks[hash]);
402 			return s;
403 		}
404 	}
405 	spin_unlock(&bsd_socket_locks[hash]);
406 	return NULL;
407 }
408 
409 /* Support code for asymmetrically connected dgram sockets
410  *
411  * If a datagram socket is connected to a socket not itself connected
412  * to the first socket (eg, /dev/log), clients may only enqueue more
413  * messages if the present receive queue of the server socket is not
414  * "too large". This means there's a second writeability condition
415  * poll and sendmsg need to test. The dgram recv code will do a wake
416  * up on the peer_wait wait queue of a socket upon reception of a
417  * datagram which needs to be propagated to sleeping would-be writers
418  * since these might not have sent anything so far. This can't be
419  * accomplished via poll_wait because the lifetime of the server
420  * socket might be less than that of its clients if these break their
421  * association with it or if the server socket is closed while clients
422  * are still connected to it and there's no way to inform "a polling
423  * implementation" that it should let go of a certain wait queue
424  *
425  * In order to propagate a wake up, a wait_queue_entry_t of the client
426  * socket is enqueued on the peer_wait queue of the server socket
427  * whose wake function does a wake_up on the ordinary client socket
428  * wait queue. This connection is established whenever a write (or
429  * poll for write) hit the flow control condition and broken when the
430  * association to the server socket is dissolved or after a wake up
431  * was relayed.
432  */
433 
unix_dgram_peer_wake_relay(wait_queue_entry_t * q,unsigned mode,int flags,void * key)434 static int unix_dgram_peer_wake_relay(wait_queue_entry_t *q, unsigned mode, int flags,
435 				      void *key)
436 {
437 	struct unix_sock *u;
438 	wait_queue_head_t *u_sleep;
439 
440 	u = container_of(q, struct unix_sock, peer_wake);
441 
442 	__remove_wait_queue(&unix_sk(u->peer_wake.private)->peer_wait,
443 			    q);
444 	u->peer_wake.private = NULL;
445 
446 	/* relaying can only happen while the wq still exists */
447 	u_sleep = sk_sleep(&u->sk);
448 	if (u_sleep)
449 		wake_up_interruptible_poll(u_sleep, key_to_poll(key));
450 
451 	return 0;
452 }
453 
unix_dgram_peer_wake_connect(struct sock * sk,struct sock * other)454 static int unix_dgram_peer_wake_connect(struct sock *sk, struct sock *other)
455 {
456 	struct unix_sock *u, *u_other;
457 	int rc;
458 
459 	u = unix_sk(sk);
460 	u_other = unix_sk(other);
461 	rc = 0;
462 	spin_lock(&u_other->peer_wait.lock);
463 
464 	if (!u->peer_wake.private) {
465 		u->peer_wake.private = other;
466 		__add_wait_queue(&u_other->peer_wait, &u->peer_wake);
467 
468 		rc = 1;
469 	}
470 
471 	spin_unlock(&u_other->peer_wait.lock);
472 	return rc;
473 }
474 
unix_dgram_peer_wake_disconnect(struct sock * sk,struct sock * other)475 static void unix_dgram_peer_wake_disconnect(struct sock *sk,
476 					    struct sock *other)
477 {
478 	struct unix_sock *u, *u_other;
479 
480 	u = unix_sk(sk);
481 	u_other = unix_sk(other);
482 	spin_lock(&u_other->peer_wait.lock);
483 
484 	if (u->peer_wake.private == other) {
485 		__remove_wait_queue(&u_other->peer_wait, &u->peer_wake);
486 		u->peer_wake.private = NULL;
487 	}
488 
489 	spin_unlock(&u_other->peer_wait.lock);
490 }
491 
unix_dgram_peer_wake_disconnect_wakeup(struct sock * sk,struct sock * other)492 static void unix_dgram_peer_wake_disconnect_wakeup(struct sock *sk,
493 						   struct sock *other)
494 {
495 	unix_dgram_peer_wake_disconnect(sk, other);
496 	wake_up_interruptible_poll(sk_sleep(sk),
497 				   EPOLLOUT |
498 				   EPOLLWRNORM |
499 				   EPOLLWRBAND);
500 }
501 
502 /* preconditions:
503  *	- unix_peer(sk) == other
504  *	- association is stable
505  */
unix_dgram_peer_wake_me(struct sock * sk,struct sock * other)506 static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other)
507 {
508 	int connected;
509 
510 	connected = unix_dgram_peer_wake_connect(sk, other);
511 
512 	/* If other is SOCK_DEAD, we want to make sure we signal
513 	 * POLLOUT, such that a subsequent write() can get a
514 	 * -ECONNREFUSED. Otherwise, if we haven't queued any skbs
515 	 * to other and its full, we will hang waiting for POLLOUT.
516 	 */
517 	if (unix_recvq_full_lockless(other) && !sock_flag(other, SOCK_DEAD))
518 		return 1;
519 
520 	if (connected)
521 		unix_dgram_peer_wake_disconnect(sk, other);
522 
523 	return 0;
524 }
525 
unix_writable(const struct sock * sk,unsigned char state)526 static int unix_writable(const struct sock *sk, unsigned char state)
527 {
528 	return state != TCP_LISTEN &&
529 		(refcount_read(&sk->sk_wmem_alloc) << 2) <= READ_ONCE(sk->sk_sndbuf);
530 }
531 
unix_write_space(struct sock * sk)532 static void unix_write_space(struct sock *sk)
533 {
534 	struct socket_wq *wq;
535 
536 	rcu_read_lock();
537 	if (unix_writable(sk, READ_ONCE(sk->sk_state))) {
538 		wq = rcu_dereference(sk->sk_wq);
539 		if (skwq_has_sleeper(wq))
540 			wake_up_interruptible_sync_poll(&wq->wait,
541 				EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND);
542 		sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
543 	}
544 	rcu_read_unlock();
545 }
546 
547 /* When dgram socket disconnects (or changes its peer), we clear its receive
548  * queue of packets arrived from previous peer. First, it allows to do
549  * flow control based only on wmem_alloc; second, sk connected to peer
550  * may receive messages only from that peer. */
unix_dgram_disconnected(struct sock * sk,struct sock * other)551 static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
552 {
553 	if (!skb_queue_empty(&sk->sk_receive_queue)) {
554 		skb_queue_purge(&sk->sk_receive_queue);
555 		wake_up_interruptible_all(&unix_sk(sk)->peer_wait);
556 
557 		/* If one link of bidirectional dgram pipe is disconnected,
558 		 * we signal error. Messages are lost. Do not make this,
559 		 * when peer was not connected to us.
560 		 */
561 		if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
562 			WRITE_ONCE(other->sk_err, ECONNRESET);
563 			sk_error_report(other);
564 		}
565 	}
566 }
567 
unix_sock_destructor(struct sock * sk)568 static void unix_sock_destructor(struct sock *sk)
569 {
570 	struct unix_sock *u = unix_sk(sk);
571 
572 	skb_queue_purge(&sk->sk_receive_queue);
573 
574 	DEBUG_NET_WARN_ON_ONCE(refcount_read(&sk->sk_wmem_alloc));
575 	DEBUG_NET_WARN_ON_ONCE(!sk_unhashed(sk));
576 	DEBUG_NET_WARN_ON_ONCE(sk->sk_socket);
577 	if (!sock_flag(sk, SOCK_DEAD)) {
578 		pr_info("Attempt to release alive unix socket: %p\n", sk);
579 		return;
580 	}
581 
582 	if (u->addr)
583 		unix_release_addr(u->addr);
584 
585 	atomic_long_dec(&unix_nr_socks);
586 	sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
587 #ifdef UNIX_REFCNT_DEBUG
588 	pr_debug("UNIX %p is destroyed, %ld are still alive.\n", sk,
589 		atomic_long_read(&unix_nr_socks));
590 #endif
591 }
592 
unix_release_sock(struct sock * sk,int embrion)593 static void unix_release_sock(struct sock *sk, int embrion)
594 {
595 	struct unix_sock *u = unix_sk(sk);
596 	struct sock *skpair;
597 	struct sk_buff *skb;
598 	struct path path;
599 	int state;
600 
601 	unix_remove_socket(sock_net(sk), sk);
602 	unix_remove_bsd_socket(sk);
603 
604 	/* Clear state */
605 	unix_state_lock(sk);
606 	sock_orphan(sk);
607 	WRITE_ONCE(sk->sk_shutdown, SHUTDOWN_MASK);
608 	path	     = u->path;
609 	u->path.dentry = NULL;
610 	u->path.mnt = NULL;
611 	state = sk->sk_state;
612 	WRITE_ONCE(sk->sk_state, TCP_CLOSE);
613 
614 	skpair = unix_peer(sk);
615 	unix_peer(sk) = NULL;
616 
617 	unix_state_unlock(sk);
618 
619 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
620 	if (u->oob_skb) {
621 		kfree_skb(u->oob_skb);
622 		u->oob_skb = NULL;
623 	}
624 #endif
625 
626 	wake_up_interruptible_all(&u->peer_wait);
627 
628 	if (skpair != NULL) {
629 		if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
630 			unix_state_lock(skpair);
631 			/* No more writes */
632 			WRITE_ONCE(skpair->sk_shutdown, SHUTDOWN_MASK);
633 			if (!skb_queue_empty_lockless(&sk->sk_receive_queue) || embrion)
634 				WRITE_ONCE(skpair->sk_err, ECONNRESET);
635 			unix_state_unlock(skpair);
636 			skpair->sk_state_change(skpair);
637 			sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
638 		}
639 
640 		unix_dgram_peer_wake_disconnect(sk, skpair);
641 		sock_put(skpair); /* It may now die */
642 	}
643 
644 	/* Try to flush out this socket. Throw out buffers at least */
645 
646 	while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
647 		if (state == TCP_LISTEN)
648 			unix_release_sock(skb->sk, 1);
649 		/* passed fds are erased in the kfree_skb hook	      */
650 		UNIXCB(skb).consumed = skb->len;
651 		kfree_skb(skb);
652 	}
653 
654 	if (path.dentry)
655 		path_put(&path);
656 
657 	sock_put(sk);
658 
659 	/* ---- Socket is dead now and most probably destroyed ---- */
660 
661 	/*
662 	 * Fixme: BSD difference: In BSD all sockets connected to us get
663 	 *	  ECONNRESET and we die on the spot. In Linux we behave
664 	 *	  like files and pipes do and wait for the last
665 	 *	  dereference.
666 	 *
667 	 * Can't we simply set sock->err?
668 	 *
669 	 *	  What the above comment does talk about? --ANK(980817)
670 	 */
671 
672 	if (READ_ONCE(unix_tot_inflight))
673 		unix_gc();		/* Garbage collect fds */
674 }
675 
init_peercred(struct sock * sk)676 static void init_peercred(struct sock *sk)
677 {
678 	const struct cred *old_cred;
679 	struct pid *old_pid;
680 
681 	spin_lock(&sk->sk_peer_lock);
682 	old_pid = sk->sk_peer_pid;
683 	old_cred = sk->sk_peer_cred;
684 	sk->sk_peer_pid  = get_pid(task_tgid(current));
685 	sk->sk_peer_cred = get_current_cred();
686 	spin_unlock(&sk->sk_peer_lock);
687 
688 	put_pid(old_pid);
689 	put_cred(old_cred);
690 }
691 
copy_peercred(struct sock * sk,struct sock * peersk)692 static void copy_peercred(struct sock *sk, struct sock *peersk)
693 {
694 	if (sk < peersk) {
695 		spin_lock(&sk->sk_peer_lock);
696 		spin_lock_nested(&peersk->sk_peer_lock, SINGLE_DEPTH_NESTING);
697 	} else {
698 		spin_lock(&peersk->sk_peer_lock);
699 		spin_lock_nested(&sk->sk_peer_lock, SINGLE_DEPTH_NESTING);
700 	}
701 
702 	sk->sk_peer_pid  = get_pid(peersk->sk_peer_pid);
703 	sk->sk_peer_cred = get_cred(peersk->sk_peer_cred);
704 
705 	spin_unlock(&sk->sk_peer_lock);
706 	spin_unlock(&peersk->sk_peer_lock);
707 }
708 
unix_listen(struct socket * sock,int backlog)709 static int unix_listen(struct socket *sock, int backlog)
710 {
711 	int err;
712 	struct sock *sk = sock->sk;
713 	struct unix_sock *u = unix_sk(sk);
714 
715 	err = -EOPNOTSUPP;
716 	if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
717 		goto out;	/* Only stream/seqpacket sockets accept */
718 	err = -EINVAL;
719 	if (!READ_ONCE(u->addr))
720 		goto out;	/* No listens on an unbound socket */
721 	unix_state_lock(sk);
722 	if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
723 		goto out_unlock;
724 	if (backlog > sk->sk_max_ack_backlog)
725 		wake_up_interruptible_all(&u->peer_wait);
726 	sk->sk_max_ack_backlog	= backlog;
727 	WRITE_ONCE(sk->sk_state, TCP_LISTEN);
728 
729 	/* set credentials so connect can copy them */
730 	init_peercred(sk);
731 	err = 0;
732 
733 out_unlock:
734 	unix_state_unlock(sk);
735 out:
736 	return err;
737 }
738 
739 static int unix_release(struct socket *);
740 static int unix_bind(struct socket *, struct sockaddr *, int);
741 static int unix_stream_connect(struct socket *, struct sockaddr *,
742 			       int addr_len, int flags);
743 static int unix_socketpair(struct socket *, struct socket *);
744 static int unix_accept(struct socket *, struct socket *, int, bool);
745 static int unix_getname(struct socket *, struct sockaddr *, int);
746 static __poll_t unix_poll(struct file *, struct socket *, poll_table *);
747 static __poll_t unix_dgram_poll(struct file *, struct socket *,
748 				    poll_table *);
749 static int unix_ioctl(struct socket *, unsigned int, unsigned long);
750 #ifdef CONFIG_COMPAT
751 static int unix_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg);
752 #endif
753 static int unix_shutdown(struct socket *, int);
754 static int unix_stream_sendmsg(struct socket *, struct msghdr *, size_t);
755 static int unix_stream_recvmsg(struct socket *, struct msghdr *, size_t, int);
756 static ssize_t unix_stream_splice_read(struct socket *,  loff_t *ppos,
757 				       struct pipe_inode_info *, size_t size,
758 				       unsigned int flags);
759 static int unix_dgram_sendmsg(struct socket *, struct msghdr *, size_t);
760 static int unix_dgram_recvmsg(struct socket *, struct msghdr *, size_t, int);
761 static int unix_read_skb(struct sock *sk, skb_read_actor_t recv_actor);
762 static int unix_stream_read_skb(struct sock *sk, skb_read_actor_t recv_actor);
763 static int unix_dgram_connect(struct socket *, struct sockaddr *,
764 			      int, int);
765 static int unix_seqpacket_sendmsg(struct socket *, struct msghdr *, size_t);
766 static int unix_seqpacket_recvmsg(struct socket *, struct msghdr *, size_t,
767 				  int);
768 
unix_set_peek_off(struct sock * sk,int val)769 static int unix_set_peek_off(struct sock *sk, int val)
770 {
771 	struct unix_sock *u = unix_sk(sk);
772 
773 	if (mutex_lock_interruptible(&u->iolock))
774 		return -EINTR;
775 
776 	WRITE_ONCE(sk->sk_peek_off, val);
777 	mutex_unlock(&u->iolock);
778 
779 	return 0;
780 }
781 
782 #ifdef CONFIG_PROC_FS
unix_count_nr_fds(struct sock * sk)783 static int unix_count_nr_fds(struct sock *sk)
784 {
785 	struct sk_buff *skb;
786 	struct unix_sock *u;
787 	int nr_fds = 0;
788 
789 	spin_lock(&sk->sk_receive_queue.lock);
790 	skb = skb_peek(&sk->sk_receive_queue);
791 	while (skb) {
792 		u = unix_sk(skb->sk);
793 		nr_fds += atomic_read(&u->scm_stat.nr_fds);
794 		skb = skb_peek_next(skb, &sk->sk_receive_queue);
795 	}
796 	spin_unlock(&sk->sk_receive_queue.lock);
797 
798 	return nr_fds;
799 }
800 
unix_show_fdinfo(struct seq_file * m,struct socket * sock)801 static void unix_show_fdinfo(struct seq_file *m, struct socket *sock)
802 {
803 	struct sock *sk = sock->sk;
804 	unsigned char s_state;
805 	struct unix_sock *u;
806 	int nr_fds = 0;
807 
808 	if (sk) {
809 		s_state = READ_ONCE(sk->sk_state);
810 		u = unix_sk(sk);
811 
812 		/* SOCK_STREAM and SOCK_SEQPACKET sockets never change their
813 		 * sk_state after switching to TCP_ESTABLISHED or TCP_LISTEN.
814 		 * SOCK_DGRAM is ordinary. So, no lock is needed.
815 		 */
816 		if (sock->type == SOCK_DGRAM || s_state == TCP_ESTABLISHED)
817 			nr_fds = atomic_read(&u->scm_stat.nr_fds);
818 		else if (s_state == TCP_LISTEN)
819 			nr_fds = unix_count_nr_fds(sk);
820 
821 		seq_printf(m, "scm_fds: %u\n", nr_fds);
822 	}
823 }
824 #else
825 #define unix_show_fdinfo NULL
826 #endif
827 
828 static const struct proto_ops unix_stream_ops = {
829 	.family =	PF_UNIX,
830 	.owner =	THIS_MODULE,
831 	.release =	unix_release,
832 	.bind =		unix_bind,
833 	.connect =	unix_stream_connect,
834 	.socketpair =	unix_socketpair,
835 	.accept =	unix_accept,
836 	.getname =	unix_getname,
837 	.poll =		unix_poll,
838 	.ioctl =	unix_ioctl,
839 #ifdef CONFIG_COMPAT
840 	.compat_ioctl =	unix_compat_ioctl,
841 #endif
842 	.listen =	unix_listen,
843 	.shutdown =	unix_shutdown,
844 	.sendmsg =	unix_stream_sendmsg,
845 	.recvmsg =	unix_stream_recvmsg,
846 	.read_skb =	unix_stream_read_skb,
847 	.mmap =		sock_no_mmap,
848 	.splice_read =	unix_stream_splice_read,
849 	.set_peek_off =	unix_set_peek_off,
850 	.show_fdinfo =	unix_show_fdinfo,
851 };
852 
853 static const struct proto_ops unix_dgram_ops = {
854 	.family =	PF_UNIX,
855 	.owner =	THIS_MODULE,
856 	.release =	unix_release,
857 	.bind =		unix_bind,
858 	.connect =	unix_dgram_connect,
859 	.socketpair =	unix_socketpair,
860 	.accept =	sock_no_accept,
861 	.getname =	unix_getname,
862 	.poll =		unix_dgram_poll,
863 	.ioctl =	unix_ioctl,
864 #ifdef CONFIG_COMPAT
865 	.compat_ioctl =	unix_compat_ioctl,
866 #endif
867 	.listen =	sock_no_listen,
868 	.shutdown =	unix_shutdown,
869 	.sendmsg =	unix_dgram_sendmsg,
870 	.read_skb =	unix_read_skb,
871 	.recvmsg =	unix_dgram_recvmsg,
872 	.mmap =		sock_no_mmap,
873 	.set_peek_off =	unix_set_peek_off,
874 	.show_fdinfo =	unix_show_fdinfo,
875 };
876 
877 static const struct proto_ops unix_seqpacket_ops = {
878 	.family =	PF_UNIX,
879 	.owner =	THIS_MODULE,
880 	.release =	unix_release,
881 	.bind =		unix_bind,
882 	.connect =	unix_stream_connect,
883 	.socketpair =	unix_socketpair,
884 	.accept =	unix_accept,
885 	.getname =	unix_getname,
886 	.poll =		unix_dgram_poll,
887 	.ioctl =	unix_ioctl,
888 #ifdef CONFIG_COMPAT
889 	.compat_ioctl =	unix_compat_ioctl,
890 #endif
891 	.listen =	unix_listen,
892 	.shutdown =	unix_shutdown,
893 	.sendmsg =	unix_seqpacket_sendmsg,
894 	.recvmsg =	unix_seqpacket_recvmsg,
895 	.mmap =		sock_no_mmap,
896 	.set_peek_off =	unix_set_peek_off,
897 	.show_fdinfo =	unix_show_fdinfo,
898 };
899 
unix_close(struct sock * sk,long timeout)900 static void unix_close(struct sock *sk, long timeout)
901 {
902 	/* Nothing to do here, unix socket does not need a ->close().
903 	 * This is merely for sockmap.
904 	 */
905 }
906 
unix_unhash(struct sock * sk)907 static void unix_unhash(struct sock *sk)
908 {
909 	/* Nothing to do here, unix socket does not need a ->unhash().
910 	 * This is merely for sockmap.
911 	 */
912 }
913 
unix_bpf_bypass_getsockopt(int level,int optname)914 static bool unix_bpf_bypass_getsockopt(int level, int optname)
915 {
916 	if (level == SOL_SOCKET) {
917 		switch (optname) {
918 		case SO_PEERPIDFD:
919 			return true;
920 		default:
921 			return false;
922 		}
923 	}
924 
925 	return false;
926 }
927 
928 struct proto unix_dgram_proto = {
929 	.name			= "UNIX",
930 	.owner			= THIS_MODULE,
931 	.obj_size		= sizeof(struct unix_sock),
932 	.close			= unix_close,
933 	.bpf_bypass_getsockopt	= unix_bpf_bypass_getsockopt,
934 #ifdef CONFIG_BPF_SYSCALL
935 	.psock_update_sk_prot	= unix_dgram_bpf_update_proto,
936 #endif
937 };
938 
939 struct proto unix_stream_proto = {
940 	.name			= "UNIX-STREAM",
941 	.owner			= THIS_MODULE,
942 	.obj_size		= sizeof(struct unix_sock),
943 	.close			= unix_close,
944 	.unhash			= unix_unhash,
945 	.bpf_bypass_getsockopt	= unix_bpf_bypass_getsockopt,
946 #ifdef CONFIG_BPF_SYSCALL
947 	.psock_update_sk_prot	= unix_stream_bpf_update_proto,
948 #endif
949 };
950 
unix_create1(struct net * net,struct socket * sock,int kern,int type)951 static struct sock *unix_create1(struct net *net, struct socket *sock, int kern, int type)
952 {
953 	struct unix_sock *u;
954 	struct sock *sk;
955 	int err;
956 
957 	atomic_long_inc(&unix_nr_socks);
958 	if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files()) {
959 		err = -ENFILE;
960 		goto err;
961 	}
962 
963 	if (type == SOCK_STREAM)
964 		sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_stream_proto, kern);
965 	else /*dgram and  seqpacket */
966 		sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_dgram_proto, kern);
967 
968 	if (!sk) {
969 		err = -ENOMEM;
970 		goto err;
971 	}
972 
973 	sock_init_data(sock, sk);
974 
975 	sk->sk_hash		= unix_unbound_hash(sk);
976 	sk->sk_allocation	= GFP_KERNEL_ACCOUNT;
977 	sk->sk_write_space	= unix_write_space;
978 	sk->sk_max_ack_backlog	= READ_ONCE(net->unx.sysctl_max_dgram_qlen);
979 	sk->sk_destruct		= unix_sock_destructor;
980 	u = unix_sk(sk);
981 	u->listener = NULL;
982 	u->vertex = NULL;
983 	u->path.dentry = NULL;
984 	u->path.mnt = NULL;
985 	spin_lock_init(&u->lock);
986 	mutex_init(&u->iolock); /* single task reading lock */
987 	mutex_init(&u->bindlock); /* single task binding lock */
988 	init_waitqueue_head(&u->peer_wait);
989 	init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay);
990 	memset(&u->scm_stat, 0, sizeof(struct scm_stat));
991 	unix_insert_unbound_socket(net, sk);
992 
993 	sock_prot_inuse_add(net, sk->sk_prot, 1);
994 
995 	return sk;
996 
997 err:
998 	atomic_long_dec(&unix_nr_socks);
999 	return ERR_PTR(err);
1000 }
1001 
unix_create(struct net * net,struct socket * sock,int protocol,int kern)1002 static int unix_create(struct net *net, struct socket *sock, int protocol,
1003 		       int kern)
1004 {
1005 	struct sock *sk;
1006 
1007 	if (protocol && protocol != PF_UNIX)
1008 		return -EPROTONOSUPPORT;
1009 
1010 	sock->state = SS_UNCONNECTED;
1011 
1012 	switch (sock->type) {
1013 	case SOCK_STREAM:
1014 		sock->ops = &unix_stream_ops;
1015 		break;
1016 		/*
1017 		 *	Believe it or not BSD has AF_UNIX, SOCK_RAW though
1018 		 *	nothing uses it.
1019 		 */
1020 	case SOCK_RAW:
1021 		sock->type = SOCK_DGRAM;
1022 		fallthrough;
1023 	case SOCK_DGRAM:
1024 		sock->ops = &unix_dgram_ops;
1025 		break;
1026 	case SOCK_SEQPACKET:
1027 		sock->ops = &unix_seqpacket_ops;
1028 		break;
1029 	default:
1030 		return -ESOCKTNOSUPPORT;
1031 	}
1032 
1033 	sk = unix_create1(net, sock, kern, sock->type);
1034 	if (IS_ERR(sk))
1035 		return PTR_ERR(sk);
1036 
1037 	return 0;
1038 }
1039 
unix_release(struct socket * sock)1040 static int unix_release(struct socket *sock)
1041 {
1042 	struct sock *sk = sock->sk;
1043 
1044 	if (!sk)
1045 		return 0;
1046 
1047 	sk->sk_prot->close(sk, 0);
1048 	unix_release_sock(sk, 0);
1049 	sock->sk = NULL;
1050 
1051 	return 0;
1052 }
1053 
unix_find_bsd(struct sockaddr_un * sunaddr,int addr_len,int type)1054 static struct sock *unix_find_bsd(struct sockaddr_un *sunaddr, int addr_len,
1055 				  int type)
1056 {
1057 	struct inode *inode;
1058 	struct path path;
1059 	struct sock *sk;
1060 	int err;
1061 
1062 	unix_mkname_bsd(sunaddr, addr_len);
1063 	err = kern_path(sunaddr->sun_path, LOOKUP_FOLLOW, &path);
1064 	if (err)
1065 		goto fail;
1066 
1067 	err = path_permission(&path, MAY_WRITE);
1068 	if (err)
1069 		goto path_put;
1070 
1071 	err = -ECONNREFUSED;
1072 	inode = d_backing_inode(path.dentry);
1073 	if (!S_ISSOCK(inode->i_mode))
1074 		goto path_put;
1075 
1076 	sk = unix_find_socket_byinode(inode);
1077 	if (!sk)
1078 		goto path_put;
1079 
1080 	err = -EPROTOTYPE;
1081 	if (sk->sk_type == type)
1082 		touch_atime(&path);
1083 	else
1084 		goto sock_put;
1085 
1086 	path_put(&path);
1087 
1088 	return sk;
1089 
1090 sock_put:
1091 	sock_put(sk);
1092 path_put:
1093 	path_put(&path);
1094 fail:
1095 	return ERR_PTR(err);
1096 }
1097 
unix_find_abstract(struct net * net,struct sockaddr_un * sunaddr,int addr_len,int type)1098 static struct sock *unix_find_abstract(struct net *net,
1099 				       struct sockaddr_un *sunaddr,
1100 				       int addr_len, int type)
1101 {
1102 	unsigned int hash = unix_abstract_hash(sunaddr, addr_len, type);
1103 	struct dentry *dentry;
1104 	struct sock *sk;
1105 
1106 	sk = unix_find_socket_byname(net, sunaddr, addr_len, hash);
1107 	if (!sk)
1108 		return ERR_PTR(-ECONNREFUSED);
1109 
1110 	dentry = unix_sk(sk)->path.dentry;
1111 	if (dentry)
1112 		touch_atime(&unix_sk(sk)->path);
1113 
1114 	return sk;
1115 }
1116 
unix_find_other(struct net * net,struct sockaddr_un * sunaddr,int addr_len,int type)1117 static struct sock *unix_find_other(struct net *net,
1118 				    struct sockaddr_un *sunaddr,
1119 				    int addr_len, int type)
1120 {
1121 	struct sock *sk;
1122 
1123 	if (sunaddr->sun_path[0])
1124 		sk = unix_find_bsd(sunaddr, addr_len, type);
1125 	else
1126 		sk = unix_find_abstract(net, sunaddr, addr_len, type);
1127 
1128 	return sk;
1129 }
1130 
unix_autobind(struct sock * sk)1131 static int unix_autobind(struct sock *sk)
1132 {
1133 	struct unix_sock *u = unix_sk(sk);
1134 	unsigned int new_hash, old_hash;
1135 	struct net *net = sock_net(sk);
1136 	struct unix_address *addr;
1137 	u32 lastnum, ordernum;
1138 	int err;
1139 
1140 	err = mutex_lock_interruptible(&u->bindlock);
1141 	if (err)
1142 		return err;
1143 
1144 	if (u->addr)
1145 		goto out;
1146 
1147 	err = -ENOMEM;
1148 	addr = kzalloc(sizeof(*addr) +
1149 		       offsetof(struct sockaddr_un, sun_path) + 16, GFP_KERNEL);
1150 	if (!addr)
1151 		goto out;
1152 
1153 	addr->len = offsetof(struct sockaddr_un, sun_path) + 6;
1154 	addr->name->sun_family = AF_UNIX;
1155 	refcount_set(&addr->refcnt, 1);
1156 
1157 	old_hash = sk->sk_hash;
1158 	ordernum = get_random_u32();
1159 	lastnum = ordernum & 0xFFFFF;
1160 retry:
1161 	ordernum = (ordernum + 1) & 0xFFFFF;
1162 	sprintf(addr->name->sun_path + 1, "%05x", ordernum);
1163 
1164 	new_hash = unix_abstract_hash(addr->name, addr->len, sk->sk_type);
1165 	unix_table_double_lock(net, old_hash, new_hash);
1166 
1167 	if (__unix_find_socket_byname(net, addr->name, addr->len, new_hash)) {
1168 		unix_table_double_unlock(net, old_hash, new_hash);
1169 
1170 		/* __unix_find_socket_byname() may take long time if many names
1171 		 * are already in use.
1172 		 */
1173 		cond_resched();
1174 
1175 		if (ordernum == lastnum) {
1176 			/* Give up if all names seems to be in use. */
1177 			err = -ENOSPC;
1178 			unix_release_addr(addr);
1179 			goto out;
1180 		}
1181 
1182 		goto retry;
1183 	}
1184 
1185 	__unix_set_addr_hash(net, sk, addr, new_hash);
1186 	unix_table_double_unlock(net, old_hash, new_hash);
1187 	err = 0;
1188 
1189 out:	mutex_unlock(&u->bindlock);
1190 	return err;
1191 }
1192 
unix_bind_bsd(struct sock * sk,struct sockaddr_un * sunaddr,int addr_len)1193 static int unix_bind_bsd(struct sock *sk, struct sockaddr_un *sunaddr,
1194 			 int addr_len)
1195 {
1196 	umode_t mode = S_IFSOCK |
1197 	       (SOCK_INODE(sk->sk_socket)->i_mode & ~current_umask());
1198 	struct unix_sock *u = unix_sk(sk);
1199 	unsigned int new_hash, old_hash;
1200 	struct net *net = sock_net(sk);
1201 	struct mnt_idmap *idmap;
1202 	struct unix_address *addr;
1203 	struct dentry *dentry;
1204 	struct path parent;
1205 	int err;
1206 
1207 	addr_len = unix_mkname_bsd(sunaddr, addr_len);
1208 	addr = unix_create_addr(sunaddr, addr_len);
1209 	if (!addr)
1210 		return -ENOMEM;
1211 
1212 	/*
1213 	 * Get the parent directory, calculate the hash for last
1214 	 * component.
1215 	 */
1216 	dentry = kern_path_create(AT_FDCWD, addr->name->sun_path, &parent, 0);
1217 	if (IS_ERR(dentry)) {
1218 		err = PTR_ERR(dentry);
1219 		goto out;
1220 	}
1221 
1222 	/*
1223 	 * All right, let's create it.
1224 	 */
1225 	idmap = mnt_idmap(parent.mnt);
1226 	err = security_path_mknod(&parent, dentry, mode, 0);
1227 	if (!err)
1228 		err = vfs_mknod(idmap, d_inode(parent.dentry), dentry, mode, 0);
1229 	if (err)
1230 		goto out_path;
1231 	err = mutex_lock_interruptible(&u->bindlock);
1232 	if (err)
1233 		goto out_unlink;
1234 	if (u->addr)
1235 		goto out_unlock;
1236 
1237 	old_hash = sk->sk_hash;
1238 	new_hash = unix_bsd_hash(d_backing_inode(dentry));
1239 	unix_table_double_lock(net, old_hash, new_hash);
1240 	u->path.mnt = mntget(parent.mnt);
1241 	u->path.dentry = dget(dentry);
1242 	__unix_set_addr_hash(net, sk, addr, new_hash);
1243 	unix_table_double_unlock(net, old_hash, new_hash);
1244 	unix_insert_bsd_socket(sk);
1245 	mutex_unlock(&u->bindlock);
1246 	done_path_create(&parent, dentry);
1247 	return 0;
1248 
1249 out_unlock:
1250 	mutex_unlock(&u->bindlock);
1251 	err = -EINVAL;
1252 out_unlink:
1253 	/* failed after successful mknod?  unlink what we'd created... */
1254 	vfs_unlink(idmap, d_inode(parent.dentry), dentry, NULL);
1255 out_path:
1256 	done_path_create(&parent, dentry);
1257 out:
1258 	unix_release_addr(addr);
1259 	return err == -EEXIST ? -EADDRINUSE : err;
1260 }
1261 
unix_bind_abstract(struct sock * sk,struct sockaddr_un * sunaddr,int addr_len)1262 static int unix_bind_abstract(struct sock *sk, struct sockaddr_un *sunaddr,
1263 			      int addr_len)
1264 {
1265 	struct unix_sock *u = unix_sk(sk);
1266 	unsigned int new_hash, old_hash;
1267 	struct net *net = sock_net(sk);
1268 	struct unix_address *addr;
1269 	int err;
1270 
1271 	addr = unix_create_addr(sunaddr, addr_len);
1272 	if (!addr)
1273 		return -ENOMEM;
1274 
1275 	err = mutex_lock_interruptible(&u->bindlock);
1276 	if (err)
1277 		goto out;
1278 
1279 	if (u->addr) {
1280 		err = -EINVAL;
1281 		goto out_mutex;
1282 	}
1283 
1284 	old_hash = sk->sk_hash;
1285 	new_hash = unix_abstract_hash(addr->name, addr->len, sk->sk_type);
1286 	unix_table_double_lock(net, old_hash, new_hash);
1287 
1288 	if (__unix_find_socket_byname(net, addr->name, addr->len, new_hash))
1289 		goto out_spin;
1290 
1291 	__unix_set_addr_hash(net, sk, addr, new_hash);
1292 	unix_table_double_unlock(net, old_hash, new_hash);
1293 	mutex_unlock(&u->bindlock);
1294 	return 0;
1295 
1296 out_spin:
1297 	unix_table_double_unlock(net, old_hash, new_hash);
1298 	err = -EADDRINUSE;
1299 out_mutex:
1300 	mutex_unlock(&u->bindlock);
1301 out:
1302 	unix_release_addr(addr);
1303 	return err;
1304 }
1305 
unix_bind(struct socket * sock,struct sockaddr * uaddr,int addr_len)1306 static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
1307 {
1308 	struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1309 	struct sock *sk = sock->sk;
1310 	int err;
1311 
1312 	if (addr_len == offsetof(struct sockaddr_un, sun_path) &&
1313 	    sunaddr->sun_family == AF_UNIX)
1314 		return unix_autobind(sk);
1315 
1316 	err = unix_validate_addr(sunaddr, addr_len);
1317 	if (err)
1318 		return err;
1319 
1320 	if (sunaddr->sun_path[0])
1321 		err = unix_bind_bsd(sk, sunaddr, addr_len);
1322 	else
1323 		err = unix_bind_abstract(sk, sunaddr, addr_len);
1324 
1325 	return err;
1326 }
1327 
unix_state_double_lock(struct sock * sk1,struct sock * sk2)1328 static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
1329 {
1330 	if (unlikely(sk1 == sk2) || !sk2) {
1331 		unix_state_lock(sk1);
1332 		return;
1333 	}
1334 	if (sk1 > sk2)
1335 		swap(sk1, sk2);
1336 
1337 	unix_state_lock(sk1);
1338 	unix_state_lock_nested(sk2, U_LOCK_SECOND);
1339 }
1340 
unix_state_double_unlock(struct sock * sk1,struct sock * sk2)1341 static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)
1342 {
1343 	if (unlikely(sk1 == sk2) || !sk2) {
1344 		unix_state_unlock(sk1);
1345 		return;
1346 	}
1347 	unix_state_unlock(sk1);
1348 	unix_state_unlock(sk2);
1349 }
1350 
unix_dgram_connect(struct socket * sock,struct sockaddr * addr,int alen,int flags)1351 static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
1352 			      int alen, int flags)
1353 {
1354 	struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr;
1355 	struct sock *sk = sock->sk;
1356 	struct sock *other;
1357 	int err;
1358 
1359 	err = -EINVAL;
1360 	if (alen < offsetofend(struct sockaddr, sa_family))
1361 		goto out;
1362 
1363 	if (addr->sa_family != AF_UNSPEC) {
1364 		err = unix_validate_addr(sunaddr, alen);
1365 		if (err)
1366 			goto out;
1367 
1368 		if ((test_bit(SOCK_PASSCRED, &sock->flags) ||
1369 		     test_bit(SOCK_PASSPIDFD, &sock->flags)) &&
1370 		    !READ_ONCE(unix_sk(sk)->addr)) {
1371 			err = unix_autobind(sk);
1372 			if (err)
1373 				goto out;
1374 		}
1375 
1376 restart:
1377 		other = unix_find_other(sock_net(sk), sunaddr, alen, sock->type);
1378 		if (IS_ERR(other)) {
1379 			err = PTR_ERR(other);
1380 			goto out;
1381 		}
1382 
1383 		unix_state_double_lock(sk, other);
1384 
1385 		/* Apparently VFS overslept socket death. Retry. */
1386 		if (sock_flag(other, SOCK_DEAD)) {
1387 			unix_state_double_unlock(sk, other);
1388 			sock_put(other);
1389 			goto restart;
1390 		}
1391 
1392 		err = -EPERM;
1393 		if (!unix_may_send(sk, other))
1394 			goto out_unlock;
1395 
1396 		err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1397 		if (err)
1398 			goto out_unlock;
1399 
1400 		WRITE_ONCE(sk->sk_state, TCP_ESTABLISHED);
1401 		WRITE_ONCE(other->sk_state, TCP_ESTABLISHED);
1402 	} else {
1403 		/*
1404 		 *	1003.1g breaking connected state with AF_UNSPEC
1405 		 */
1406 		other = NULL;
1407 		unix_state_double_lock(sk, other);
1408 	}
1409 
1410 	/*
1411 	 * If it was connected, reconnect.
1412 	 */
1413 	if (unix_peer(sk)) {
1414 		struct sock *old_peer = unix_peer(sk);
1415 
1416 		unix_peer(sk) = other;
1417 		if (!other)
1418 			WRITE_ONCE(sk->sk_state, TCP_CLOSE);
1419 		unix_dgram_peer_wake_disconnect_wakeup(sk, old_peer);
1420 
1421 		unix_state_double_unlock(sk, other);
1422 
1423 		if (other != old_peer) {
1424 			unix_dgram_disconnected(sk, old_peer);
1425 
1426 			unix_state_lock(old_peer);
1427 			if (!unix_peer(old_peer))
1428 				WRITE_ONCE(old_peer->sk_state, TCP_CLOSE);
1429 			unix_state_unlock(old_peer);
1430 		}
1431 
1432 		sock_put(old_peer);
1433 	} else {
1434 		unix_peer(sk) = other;
1435 		unix_state_double_unlock(sk, other);
1436 	}
1437 
1438 	return 0;
1439 
1440 out_unlock:
1441 	unix_state_double_unlock(sk, other);
1442 	sock_put(other);
1443 out:
1444 	return err;
1445 }
1446 
unix_wait_for_peer(struct sock * other,long timeo)1447 static long unix_wait_for_peer(struct sock *other, long timeo)
1448 	__releases(&unix_sk(other)->lock)
1449 {
1450 	struct unix_sock *u = unix_sk(other);
1451 	int sched;
1452 	DEFINE_WAIT(wait);
1453 
1454 	prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);
1455 
1456 	sched = !sock_flag(other, SOCK_DEAD) &&
1457 		!(other->sk_shutdown & RCV_SHUTDOWN) &&
1458 		unix_recvq_full_lockless(other);
1459 
1460 	unix_state_unlock(other);
1461 
1462 	if (sched)
1463 		timeo = schedule_timeout(timeo);
1464 
1465 	finish_wait(&u->peer_wait, &wait);
1466 	return timeo;
1467 }
1468 
unix_stream_connect(struct socket * sock,struct sockaddr * uaddr,int addr_len,int flags)1469 static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
1470 			       int addr_len, int flags)
1471 {
1472 	struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1473 	struct sock *sk = sock->sk, *newsk = NULL, *other = NULL;
1474 	struct unix_sock *u = unix_sk(sk), *newu, *otheru;
1475 	struct net *net = sock_net(sk);
1476 	struct sk_buff *skb = NULL;
1477 	unsigned char state;
1478 	long timeo;
1479 	int err;
1480 
1481 	err = unix_validate_addr(sunaddr, addr_len);
1482 	if (err)
1483 		goto out;
1484 
1485 	if ((test_bit(SOCK_PASSCRED, &sock->flags) ||
1486 	     test_bit(SOCK_PASSPIDFD, &sock->flags)) &&
1487 	    !READ_ONCE(u->addr)) {
1488 		err = unix_autobind(sk);
1489 		if (err)
1490 			goto out;
1491 	}
1492 
1493 	timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
1494 
1495 	/* First of all allocate resources.
1496 	   If we will make it after state is locked,
1497 	   we will have to recheck all again in any case.
1498 	 */
1499 
1500 	/* create new sock for complete connection */
1501 	newsk = unix_create1(net, NULL, 0, sock->type);
1502 	if (IS_ERR(newsk)) {
1503 		err = PTR_ERR(newsk);
1504 		newsk = NULL;
1505 		goto out;
1506 	}
1507 
1508 	err = -ENOMEM;
1509 
1510 	/* Allocate skb for sending to listening sock */
1511 	skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
1512 	if (skb == NULL)
1513 		goto out;
1514 
1515 restart:
1516 	/*  Find listening sock. */
1517 	other = unix_find_other(net, sunaddr, addr_len, sk->sk_type);
1518 	if (IS_ERR(other)) {
1519 		err = PTR_ERR(other);
1520 		other = NULL;
1521 		goto out;
1522 	}
1523 
1524 	unix_state_lock(other);
1525 
1526 	/* Apparently VFS overslept socket death. Retry. */
1527 	if (sock_flag(other, SOCK_DEAD)) {
1528 		unix_state_unlock(other);
1529 		sock_put(other);
1530 		goto restart;
1531 	}
1532 
1533 	err = -ECONNREFUSED;
1534 	if (other->sk_state != TCP_LISTEN)
1535 		goto out_unlock;
1536 	if (other->sk_shutdown & RCV_SHUTDOWN)
1537 		goto out_unlock;
1538 
1539 	if (unix_recvq_full_lockless(other)) {
1540 		err = -EAGAIN;
1541 		if (!timeo)
1542 			goto out_unlock;
1543 
1544 		timeo = unix_wait_for_peer(other, timeo);
1545 
1546 		err = sock_intr_errno(timeo);
1547 		if (signal_pending(current))
1548 			goto out;
1549 		sock_put(other);
1550 		goto restart;
1551 	}
1552 
1553 	/* self connect and simultaneous connect are eliminated
1554 	 * by rejecting TCP_LISTEN socket to avoid deadlock.
1555 	 */
1556 	state = READ_ONCE(sk->sk_state);
1557 	if (unlikely(state != TCP_CLOSE)) {
1558 		err = state == TCP_ESTABLISHED ? -EISCONN : -EINVAL;
1559 		goto out_unlock;
1560 	}
1561 
1562 	unix_state_lock_nested(sk, U_LOCK_SECOND);
1563 
1564 	if (unlikely(sk->sk_state != TCP_CLOSE)) {
1565 		err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EINVAL;
1566 		unix_state_unlock(sk);
1567 		goto out_unlock;
1568 	}
1569 
1570 	err = security_unix_stream_connect(sk, other, newsk);
1571 	if (err) {
1572 		unix_state_unlock(sk);
1573 		goto out_unlock;
1574 	}
1575 
1576 	/* The way is open! Fastly set all the necessary fields... */
1577 
1578 	sock_hold(sk);
1579 	unix_peer(newsk)	= sk;
1580 	newsk->sk_state		= TCP_ESTABLISHED;
1581 	newsk->sk_type		= sk->sk_type;
1582 	init_peercred(newsk);
1583 	newu = unix_sk(newsk);
1584 	newu->listener = other;
1585 	RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq);
1586 	otheru = unix_sk(other);
1587 
1588 	/* copy address information from listening to new sock
1589 	 *
1590 	 * The contents of *(otheru->addr) and otheru->path
1591 	 * are seen fully set up here, since we have found
1592 	 * otheru in hash under its lock.  Insertion into the
1593 	 * hash chain we'd found it in had been done in an
1594 	 * earlier critical area protected by the chain's lock,
1595 	 * the same one where we'd set *(otheru->addr) contents,
1596 	 * as well as otheru->path and otheru->addr itself.
1597 	 *
1598 	 * Using smp_store_release() here to set newu->addr
1599 	 * is enough to make those stores, as well as stores
1600 	 * to newu->path visible to anyone who gets newu->addr
1601 	 * by smp_load_acquire().  IOW, the same warranties
1602 	 * as for unix_sock instances bound in unix_bind() or
1603 	 * in unix_autobind().
1604 	 */
1605 	if (otheru->path.dentry) {
1606 		path_get(&otheru->path);
1607 		newu->path = otheru->path;
1608 	}
1609 	refcount_inc(&otheru->addr->refcnt);
1610 	smp_store_release(&newu->addr, otheru->addr);
1611 
1612 	/* Set credentials */
1613 	copy_peercred(sk, other);
1614 
1615 	sock->state	= SS_CONNECTED;
1616 	WRITE_ONCE(sk->sk_state, TCP_ESTABLISHED);
1617 	sock_hold(newsk);
1618 
1619 	smp_mb__after_atomic();	/* sock_hold() does an atomic_inc() */
1620 	unix_peer(sk)	= newsk;
1621 
1622 	unix_state_unlock(sk);
1623 
1624 	/* take ten and send info to listening sock */
1625 	spin_lock(&other->sk_receive_queue.lock);
1626 	__skb_queue_tail(&other->sk_receive_queue, skb);
1627 	spin_unlock(&other->sk_receive_queue.lock);
1628 	unix_state_unlock(other);
1629 	other->sk_data_ready(other);
1630 	sock_put(other);
1631 	return 0;
1632 
1633 out_unlock:
1634 	if (other)
1635 		unix_state_unlock(other);
1636 
1637 out:
1638 	kfree_skb(skb);
1639 	if (newsk)
1640 		unix_release_sock(newsk, 0);
1641 	if (other)
1642 		sock_put(other);
1643 	return err;
1644 }
1645 
unix_socketpair(struct socket * socka,struct socket * sockb)1646 static int unix_socketpair(struct socket *socka, struct socket *sockb)
1647 {
1648 	struct sock *ska = socka->sk, *skb = sockb->sk;
1649 
1650 	/* Join our sockets back to back */
1651 	sock_hold(ska);
1652 	sock_hold(skb);
1653 	unix_peer(ska) = skb;
1654 	unix_peer(skb) = ska;
1655 	init_peercred(ska);
1656 	init_peercred(skb);
1657 
1658 	ska->sk_state = TCP_ESTABLISHED;
1659 	skb->sk_state = TCP_ESTABLISHED;
1660 	socka->state  = SS_CONNECTED;
1661 	sockb->state  = SS_CONNECTED;
1662 	return 0;
1663 }
1664 
unix_sock_inherit_flags(const struct socket * old,struct socket * new)1665 static void unix_sock_inherit_flags(const struct socket *old,
1666 				    struct socket *new)
1667 {
1668 	if (test_bit(SOCK_PASSCRED, &old->flags))
1669 		set_bit(SOCK_PASSCRED, &new->flags);
1670 	if (test_bit(SOCK_PASSPIDFD, &old->flags))
1671 		set_bit(SOCK_PASSPIDFD, &new->flags);
1672 	if (test_bit(SOCK_PASSSEC, &old->flags))
1673 		set_bit(SOCK_PASSSEC, &new->flags);
1674 }
1675 
unix_accept(struct socket * sock,struct socket * newsock,int flags,bool kern)1676 static int unix_accept(struct socket *sock, struct socket *newsock, int flags,
1677 		       bool kern)
1678 {
1679 	struct sock *sk = sock->sk;
1680 	struct sk_buff *skb;
1681 	struct sock *tsk;
1682 	int err;
1683 
1684 	err = -EOPNOTSUPP;
1685 	if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
1686 		goto out;
1687 
1688 	err = -EINVAL;
1689 	if (READ_ONCE(sk->sk_state) != TCP_LISTEN)
1690 		goto out;
1691 
1692 	/* If socket state is TCP_LISTEN it cannot change (for now...),
1693 	 * so that no locks are necessary.
1694 	 */
1695 
1696 	skb = skb_recv_datagram(sk, (flags & O_NONBLOCK) ? MSG_DONTWAIT : 0,
1697 				&err);
1698 	if (!skb) {
1699 		/* This means receive shutdown. */
1700 		if (err == 0)
1701 			err = -EINVAL;
1702 		goto out;
1703 	}
1704 
1705 	tsk = skb->sk;
1706 	skb_free_datagram(sk, skb);
1707 	wake_up_interruptible(&unix_sk(sk)->peer_wait);
1708 
1709 	/* attach accepted sock to socket */
1710 	unix_state_lock(tsk);
1711 	unix_update_edges(unix_sk(tsk));
1712 	newsock->state = SS_CONNECTED;
1713 	unix_sock_inherit_flags(sock, newsock);
1714 	sock_graft(tsk, newsock);
1715 	unix_state_unlock(tsk);
1716 	return 0;
1717 
1718 out:
1719 	return err;
1720 }
1721 
1722 
unix_getname(struct socket * sock,struct sockaddr * uaddr,int peer)1723 static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int peer)
1724 {
1725 	struct sock *sk = sock->sk;
1726 	struct unix_address *addr;
1727 	DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, uaddr);
1728 	int err = 0;
1729 
1730 	if (peer) {
1731 		sk = unix_peer_get(sk);
1732 
1733 		err = -ENOTCONN;
1734 		if (!sk)
1735 			goto out;
1736 		err = 0;
1737 	} else {
1738 		sock_hold(sk);
1739 	}
1740 
1741 	addr = smp_load_acquire(&unix_sk(sk)->addr);
1742 	if (!addr) {
1743 		sunaddr->sun_family = AF_UNIX;
1744 		sunaddr->sun_path[0] = 0;
1745 		err = offsetof(struct sockaddr_un, sun_path);
1746 	} else {
1747 		err = addr->len;
1748 		memcpy(sunaddr, addr->name, addr->len);
1749 	}
1750 	sock_put(sk);
1751 out:
1752 	return err;
1753 }
1754 
1755 /* The "user->unix_inflight" variable is protected by the garbage
1756  * collection lock, and we just read it locklessly here. If you go
1757  * over the limit, there might be a tiny race in actually noticing
1758  * it across threads. Tough.
1759  */
too_many_unix_fds(struct task_struct * p)1760 static inline bool too_many_unix_fds(struct task_struct *p)
1761 {
1762 	struct user_struct *user = current_user();
1763 
1764 	if (unlikely(READ_ONCE(user->unix_inflight) > task_rlimit(p, RLIMIT_NOFILE)))
1765 		return !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN);
1766 	return false;
1767 }
1768 
unix_attach_fds(struct scm_cookie * scm,struct sk_buff * skb)1769 static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1770 {
1771 	if (too_many_unix_fds(current))
1772 		return -ETOOMANYREFS;
1773 
1774 	/* Need to duplicate file references for the sake of garbage
1775 	 * collection.  Otherwise a socket in the fps might become a
1776 	 * candidate for GC while the skb is not yet queued.
1777 	 */
1778 	UNIXCB(skb).fp = scm_fp_dup(scm->fp);
1779 	if (!UNIXCB(skb).fp)
1780 		return -ENOMEM;
1781 
1782 	if (unix_prepare_fpl(UNIXCB(skb).fp))
1783 		return -ENOMEM;
1784 
1785 	return 0;
1786 }
1787 
unix_detach_fds(struct scm_cookie * scm,struct sk_buff * skb)1788 static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1789 {
1790 	scm->fp = UNIXCB(skb).fp;
1791 	UNIXCB(skb).fp = NULL;
1792 
1793 	unix_destroy_fpl(scm->fp);
1794 }
1795 
unix_peek_fds(struct scm_cookie * scm,struct sk_buff * skb)1796 static void unix_peek_fds(struct scm_cookie *scm, struct sk_buff *skb)
1797 {
1798 	scm->fp = scm_fp_dup(UNIXCB(skb).fp);
1799 }
1800 
unix_destruct_scm(struct sk_buff * skb)1801 static void unix_destruct_scm(struct sk_buff *skb)
1802 {
1803 	struct scm_cookie scm;
1804 
1805 	memset(&scm, 0, sizeof(scm));
1806 	scm.pid  = UNIXCB(skb).pid;
1807 	if (UNIXCB(skb).fp)
1808 		unix_detach_fds(&scm, skb);
1809 
1810 	/* Alas, it calls VFS */
1811 	/* So fscking what? fput() had been SMP-safe since the last Summer */
1812 	scm_destroy(&scm);
1813 	sock_wfree(skb);
1814 }
1815 
unix_scm_to_skb(struct scm_cookie * scm,struct sk_buff * skb,bool send_fds)1816 static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)
1817 {
1818 	int err = 0;
1819 
1820 	UNIXCB(skb).pid  = get_pid(scm->pid);
1821 	UNIXCB(skb).uid = scm->creds.uid;
1822 	UNIXCB(skb).gid = scm->creds.gid;
1823 	UNIXCB(skb).fp = NULL;
1824 	unix_get_secdata(scm, skb);
1825 	if (scm->fp && send_fds)
1826 		err = unix_attach_fds(scm, skb);
1827 
1828 	skb->destructor = unix_destruct_scm;
1829 	return err;
1830 }
1831 
unix_passcred_enabled(const struct socket * sock,const struct sock * other)1832 static bool unix_passcred_enabled(const struct socket *sock,
1833 				  const struct sock *other)
1834 {
1835 	return test_bit(SOCK_PASSCRED, &sock->flags) ||
1836 	       test_bit(SOCK_PASSPIDFD, &sock->flags) ||
1837 	       !other->sk_socket ||
1838 	       test_bit(SOCK_PASSCRED, &other->sk_socket->flags) ||
1839 	       test_bit(SOCK_PASSPIDFD, &other->sk_socket->flags);
1840 }
1841 
1842 /*
1843  * Some apps rely on write() giving SCM_CREDENTIALS
1844  * We include credentials if source or destination socket
1845  * asserted SOCK_PASSCRED.
1846  */
maybe_add_creds(struct sk_buff * skb,const struct socket * sock,const struct sock * other)1847 static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock,
1848 			    const struct sock *other)
1849 {
1850 	if (UNIXCB(skb).pid)
1851 		return;
1852 	if (unix_passcred_enabled(sock, other)) {
1853 		UNIXCB(skb).pid  = get_pid(task_tgid(current));
1854 		current_uid_gid(&UNIXCB(skb).uid, &UNIXCB(skb).gid);
1855 	}
1856 }
1857 
unix_skb_scm_eq(struct sk_buff * skb,struct scm_cookie * scm)1858 static bool unix_skb_scm_eq(struct sk_buff *skb,
1859 			    struct scm_cookie *scm)
1860 {
1861 	return UNIXCB(skb).pid == scm->pid &&
1862 	       uid_eq(UNIXCB(skb).uid, scm->creds.uid) &&
1863 	       gid_eq(UNIXCB(skb).gid, scm->creds.gid) &&
1864 	       unix_secdata_eq(scm, skb);
1865 }
1866 
scm_stat_add(struct sock * sk,struct sk_buff * skb)1867 static void scm_stat_add(struct sock *sk, struct sk_buff *skb)
1868 {
1869 	struct scm_fp_list *fp = UNIXCB(skb).fp;
1870 	struct unix_sock *u = unix_sk(sk);
1871 
1872 	if (unlikely(fp && fp->count)) {
1873 		atomic_add(fp->count, &u->scm_stat.nr_fds);
1874 		unix_add_edges(fp, u);
1875 	}
1876 }
1877 
scm_stat_del(struct sock * sk,struct sk_buff * skb)1878 static void scm_stat_del(struct sock *sk, struct sk_buff *skb)
1879 {
1880 	struct scm_fp_list *fp = UNIXCB(skb).fp;
1881 	struct unix_sock *u = unix_sk(sk);
1882 
1883 	if (unlikely(fp && fp->count)) {
1884 		atomic_sub(fp->count, &u->scm_stat.nr_fds);
1885 		unix_del_edges(fp);
1886 	}
1887 }
1888 
1889 /*
1890  *	Send AF_UNIX data.
1891  */
1892 
unix_dgram_sendmsg(struct socket * sock,struct msghdr * msg,size_t len)1893 static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
1894 			      size_t len)
1895 {
1896 	DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, msg->msg_name);
1897 	struct sock *sk = sock->sk, *other = NULL;
1898 	struct unix_sock *u = unix_sk(sk);
1899 	struct scm_cookie scm;
1900 	struct sk_buff *skb;
1901 	int data_len = 0;
1902 	int sk_locked;
1903 	long timeo;
1904 	int err;
1905 
1906 	err = scm_send(sock, msg, &scm, false);
1907 	if (err < 0)
1908 		return err;
1909 
1910 	wait_for_unix_gc(scm.fp);
1911 
1912 	err = -EOPNOTSUPP;
1913 	if (msg->msg_flags&MSG_OOB)
1914 		goto out;
1915 
1916 	if (msg->msg_namelen) {
1917 		err = unix_validate_addr(sunaddr, msg->msg_namelen);
1918 		if (err)
1919 			goto out;
1920 	} else {
1921 		sunaddr = NULL;
1922 		err = -ENOTCONN;
1923 		other = unix_peer_get(sk);
1924 		if (!other)
1925 			goto out;
1926 	}
1927 
1928 	if ((test_bit(SOCK_PASSCRED, &sock->flags) ||
1929 	     test_bit(SOCK_PASSPIDFD, &sock->flags)) &&
1930 	    !READ_ONCE(u->addr)) {
1931 		err = unix_autobind(sk);
1932 		if (err)
1933 			goto out;
1934 	}
1935 
1936 	err = -EMSGSIZE;
1937 	if (len > READ_ONCE(sk->sk_sndbuf) - 32)
1938 		goto out;
1939 
1940 	if (len > SKB_MAX_ALLOC) {
1941 		data_len = min_t(size_t,
1942 				 len - SKB_MAX_ALLOC,
1943 				 MAX_SKB_FRAGS * PAGE_SIZE);
1944 		data_len = PAGE_ALIGN(data_len);
1945 
1946 		BUILD_BUG_ON(SKB_MAX_ALLOC < PAGE_SIZE);
1947 	}
1948 
1949 	skb = sock_alloc_send_pskb(sk, len - data_len, data_len,
1950 				   msg->msg_flags & MSG_DONTWAIT, &err,
1951 				   PAGE_ALLOC_COSTLY_ORDER);
1952 	if (skb == NULL)
1953 		goto out;
1954 
1955 	err = unix_scm_to_skb(&scm, skb, true);
1956 	if (err < 0)
1957 		goto out_free;
1958 
1959 	skb_put(skb, len - data_len);
1960 	skb->data_len = data_len;
1961 	skb->len = len;
1962 	err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, len);
1963 	if (err)
1964 		goto out_free;
1965 
1966 	timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
1967 
1968 restart:
1969 	if (!other) {
1970 		err = -ECONNRESET;
1971 		if (sunaddr == NULL)
1972 			goto out_free;
1973 
1974 		other = unix_find_other(sock_net(sk), sunaddr, msg->msg_namelen,
1975 					sk->sk_type);
1976 		if (IS_ERR(other)) {
1977 			err = PTR_ERR(other);
1978 			other = NULL;
1979 			goto out_free;
1980 		}
1981 	}
1982 
1983 	if (sk_filter(other, skb) < 0) {
1984 		/* Toss the packet but do not return any error to the sender */
1985 		err = len;
1986 		goto out_free;
1987 	}
1988 
1989 	sk_locked = 0;
1990 	unix_state_lock(other);
1991 restart_locked:
1992 	err = -EPERM;
1993 	if (!unix_may_send(sk, other))
1994 		goto out_unlock;
1995 
1996 	if (unlikely(sock_flag(other, SOCK_DEAD))) {
1997 		/*
1998 		 *	Check with 1003.1g - what should
1999 		 *	datagram error
2000 		 */
2001 		unix_state_unlock(other);
2002 		sock_put(other);
2003 
2004 		if (!sk_locked)
2005 			unix_state_lock(sk);
2006 
2007 		err = 0;
2008 		if (sk->sk_type == SOCK_SEQPACKET) {
2009 			/* We are here only when racing with unix_release_sock()
2010 			 * is clearing @other. Never change state to TCP_CLOSE
2011 			 * unlike SOCK_DGRAM wants.
2012 			 */
2013 			unix_state_unlock(sk);
2014 			err = -EPIPE;
2015 		} else if (unix_peer(sk) == other) {
2016 			unix_peer(sk) = NULL;
2017 			unix_dgram_peer_wake_disconnect_wakeup(sk, other);
2018 
2019 			WRITE_ONCE(sk->sk_state, TCP_CLOSE);
2020 			unix_state_unlock(sk);
2021 
2022 			unix_dgram_disconnected(sk, other);
2023 			sock_put(other);
2024 			err = -ECONNREFUSED;
2025 		} else {
2026 			unix_state_unlock(sk);
2027 		}
2028 
2029 		other = NULL;
2030 		if (err)
2031 			goto out_free;
2032 		goto restart;
2033 	}
2034 
2035 	err = -EPIPE;
2036 	if (other->sk_shutdown & RCV_SHUTDOWN)
2037 		goto out_unlock;
2038 
2039 	if (sk->sk_type != SOCK_SEQPACKET) {
2040 		err = security_unix_may_send(sk->sk_socket, other->sk_socket);
2041 		if (err)
2042 			goto out_unlock;
2043 	}
2044 
2045 	/* other == sk && unix_peer(other) != sk if
2046 	 * - unix_peer(sk) == NULL, destination address bound to sk
2047 	 * - unix_peer(sk) == sk by time of get but disconnected before lock
2048 	 */
2049 	if (other != sk &&
2050 	    unlikely(unix_peer(other) != sk &&
2051 	    unix_recvq_full_lockless(other))) {
2052 		if (timeo) {
2053 			timeo = unix_wait_for_peer(other, timeo);
2054 
2055 			err = sock_intr_errno(timeo);
2056 			if (signal_pending(current))
2057 				goto out_free;
2058 
2059 			goto restart;
2060 		}
2061 
2062 		if (!sk_locked) {
2063 			unix_state_unlock(other);
2064 			unix_state_double_lock(sk, other);
2065 		}
2066 
2067 		if (unix_peer(sk) != other ||
2068 		    unix_dgram_peer_wake_me(sk, other)) {
2069 			err = -EAGAIN;
2070 			sk_locked = 1;
2071 			goto out_unlock;
2072 		}
2073 
2074 		if (!sk_locked) {
2075 			sk_locked = 1;
2076 			goto restart_locked;
2077 		}
2078 	}
2079 
2080 	if (unlikely(sk_locked))
2081 		unix_state_unlock(sk);
2082 
2083 	if (sock_flag(other, SOCK_RCVTSTAMP))
2084 		__net_timestamp(skb);
2085 	maybe_add_creds(skb, sock, other);
2086 	scm_stat_add(other, skb);
2087 	skb_queue_tail(&other->sk_receive_queue, skb);
2088 	unix_state_unlock(other);
2089 	other->sk_data_ready(other);
2090 	sock_put(other);
2091 	scm_destroy(&scm);
2092 	return len;
2093 
2094 out_unlock:
2095 	if (sk_locked)
2096 		unix_state_unlock(sk);
2097 	unix_state_unlock(other);
2098 out_free:
2099 	kfree_skb(skb);
2100 out:
2101 	if (other)
2102 		sock_put(other);
2103 	scm_destroy(&scm);
2104 	return err;
2105 }
2106 
2107 /* We use paged skbs for stream sockets, and limit occupancy to 32768
2108  * bytes, and a minimum of a full page.
2109  */
2110 #define UNIX_SKB_FRAGS_SZ (PAGE_SIZE << get_order(32768))
2111 
2112 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
queue_oob(struct socket * sock,struct msghdr * msg,struct sock * other,struct scm_cookie * scm,bool fds_sent)2113 static int queue_oob(struct socket *sock, struct msghdr *msg, struct sock *other,
2114 		     struct scm_cookie *scm, bool fds_sent)
2115 {
2116 	struct unix_sock *ousk = unix_sk(other);
2117 	struct sk_buff *skb;
2118 	int err = 0;
2119 
2120 	skb = sock_alloc_send_skb(sock->sk, 1, msg->msg_flags & MSG_DONTWAIT, &err);
2121 
2122 	if (!skb)
2123 		return err;
2124 
2125 	err = unix_scm_to_skb(scm, skb, !fds_sent);
2126 	if (err < 0) {
2127 		kfree_skb(skb);
2128 		return err;
2129 	}
2130 	skb_put(skb, 1);
2131 	err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, 1);
2132 
2133 	if (err) {
2134 		kfree_skb(skb);
2135 		return err;
2136 	}
2137 
2138 	unix_state_lock(other);
2139 
2140 	if (sock_flag(other, SOCK_DEAD) ||
2141 	    (other->sk_shutdown & RCV_SHUTDOWN)) {
2142 		unix_state_unlock(other);
2143 		kfree_skb(skb);
2144 		return -EPIPE;
2145 	}
2146 
2147 	maybe_add_creds(skb, sock, other);
2148 	skb_get(skb);
2149 
2150 	scm_stat_add(other, skb);
2151 
2152 	spin_lock(&other->sk_receive_queue.lock);
2153 	if (ousk->oob_skb)
2154 		consume_skb(ousk->oob_skb);
2155 	WRITE_ONCE(ousk->oob_skb, skb);
2156 	__skb_queue_tail(&other->sk_receive_queue, skb);
2157 	spin_unlock(&other->sk_receive_queue.lock);
2158 
2159 	sk_send_sigurg(other);
2160 	unix_state_unlock(other);
2161 	other->sk_data_ready(other);
2162 
2163 	return err;
2164 }
2165 #endif
2166 
unix_stream_sendmsg(struct socket * sock,struct msghdr * msg,size_t len)2167 static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
2168 			       size_t len)
2169 {
2170 	struct sock *sk = sock->sk;
2171 	struct sock *other = NULL;
2172 	int err, size;
2173 	struct sk_buff *skb;
2174 	int sent = 0;
2175 	struct scm_cookie scm;
2176 	bool fds_sent = false;
2177 	int data_len;
2178 
2179 	err = scm_send(sock, msg, &scm, false);
2180 	if (err < 0)
2181 		return err;
2182 
2183 	wait_for_unix_gc(scm.fp);
2184 
2185 	err = -EOPNOTSUPP;
2186 	if (msg->msg_flags & MSG_OOB) {
2187 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
2188 		if (len)
2189 			len--;
2190 		else
2191 #endif
2192 			goto out_err;
2193 	}
2194 
2195 	if (msg->msg_namelen) {
2196 		err = READ_ONCE(sk->sk_state) == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
2197 		goto out_err;
2198 	} else {
2199 		err = -ENOTCONN;
2200 		other = unix_peer(sk);
2201 		if (!other)
2202 			goto out_err;
2203 	}
2204 
2205 	if (READ_ONCE(sk->sk_shutdown) & SEND_SHUTDOWN)
2206 		goto pipe_err;
2207 
2208 	while (sent < len) {
2209 		size = len - sent;
2210 
2211 		if (unlikely(msg->msg_flags & MSG_SPLICE_PAGES)) {
2212 			skb = sock_alloc_send_pskb(sk, 0, 0,
2213 						   msg->msg_flags & MSG_DONTWAIT,
2214 						   &err, 0);
2215 		} else {
2216 			/* Keep two messages in the pipe so it schedules better */
2217 			size = min_t(int, size, (READ_ONCE(sk->sk_sndbuf) >> 1) - 64);
2218 
2219 			/* allow fallback to order-0 allocations */
2220 			size = min_t(int, size, SKB_MAX_HEAD(0) + UNIX_SKB_FRAGS_SZ);
2221 
2222 			data_len = max_t(int, 0, size - SKB_MAX_HEAD(0));
2223 
2224 			data_len = min_t(size_t, size, PAGE_ALIGN(data_len));
2225 
2226 			skb = sock_alloc_send_pskb(sk, size - data_len, data_len,
2227 						   msg->msg_flags & MSG_DONTWAIT, &err,
2228 						   get_order(UNIX_SKB_FRAGS_SZ));
2229 		}
2230 		if (!skb)
2231 			goto out_err;
2232 
2233 		/* Only send the fds in the first buffer */
2234 		err = unix_scm_to_skb(&scm, skb, !fds_sent);
2235 		if (err < 0) {
2236 			kfree_skb(skb);
2237 			goto out_err;
2238 		}
2239 		fds_sent = true;
2240 
2241 		if (unlikely(msg->msg_flags & MSG_SPLICE_PAGES)) {
2242 			skb->ip_summed = CHECKSUM_UNNECESSARY;
2243 			err = skb_splice_from_iter(skb, &msg->msg_iter, size,
2244 						   sk->sk_allocation);
2245 			if (err < 0) {
2246 				kfree_skb(skb);
2247 				goto out_err;
2248 			}
2249 			size = err;
2250 			refcount_add(size, &sk->sk_wmem_alloc);
2251 		} else {
2252 			skb_put(skb, size - data_len);
2253 			skb->data_len = data_len;
2254 			skb->len = size;
2255 			err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size);
2256 			if (err) {
2257 				kfree_skb(skb);
2258 				goto out_err;
2259 			}
2260 		}
2261 
2262 		unix_state_lock(other);
2263 
2264 		if (sock_flag(other, SOCK_DEAD) ||
2265 		    (other->sk_shutdown & RCV_SHUTDOWN))
2266 			goto pipe_err_free;
2267 
2268 		maybe_add_creds(skb, sock, other);
2269 		scm_stat_add(other, skb);
2270 		skb_queue_tail(&other->sk_receive_queue, skb);
2271 		unix_state_unlock(other);
2272 		other->sk_data_ready(other);
2273 		sent += size;
2274 	}
2275 
2276 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
2277 	if (msg->msg_flags & MSG_OOB) {
2278 		err = queue_oob(sock, msg, other, &scm, fds_sent);
2279 		if (err)
2280 			goto out_err;
2281 		sent++;
2282 	}
2283 #endif
2284 
2285 	scm_destroy(&scm);
2286 
2287 	return sent;
2288 
2289 pipe_err_free:
2290 	unix_state_unlock(other);
2291 	kfree_skb(skb);
2292 pipe_err:
2293 	if (sent == 0 && !(msg->msg_flags&MSG_NOSIGNAL))
2294 		send_sig(SIGPIPE, current, 0);
2295 	err = -EPIPE;
2296 out_err:
2297 	scm_destroy(&scm);
2298 	return sent ? : err;
2299 }
2300 
unix_seqpacket_sendmsg(struct socket * sock,struct msghdr * msg,size_t len)2301 static int unix_seqpacket_sendmsg(struct socket *sock, struct msghdr *msg,
2302 				  size_t len)
2303 {
2304 	int err;
2305 	struct sock *sk = sock->sk;
2306 
2307 	err = sock_error(sk);
2308 	if (err)
2309 		return err;
2310 
2311 	if (READ_ONCE(sk->sk_state) != TCP_ESTABLISHED)
2312 		return -ENOTCONN;
2313 
2314 	if (msg->msg_namelen)
2315 		msg->msg_namelen = 0;
2316 
2317 	return unix_dgram_sendmsg(sock, msg, len);
2318 }
2319 
unix_seqpacket_recvmsg(struct socket * sock,struct msghdr * msg,size_t size,int flags)2320 static int unix_seqpacket_recvmsg(struct socket *sock, struct msghdr *msg,
2321 				  size_t size, int flags)
2322 {
2323 	struct sock *sk = sock->sk;
2324 
2325 	if (READ_ONCE(sk->sk_state) != TCP_ESTABLISHED)
2326 		return -ENOTCONN;
2327 
2328 	return unix_dgram_recvmsg(sock, msg, size, flags);
2329 }
2330 
unix_copy_addr(struct msghdr * msg,struct sock * sk)2331 static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
2332 {
2333 	struct unix_address *addr = smp_load_acquire(&unix_sk(sk)->addr);
2334 
2335 	if (addr) {
2336 		msg->msg_namelen = addr->len;
2337 		memcpy(msg->msg_name, addr->name, addr->len);
2338 	}
2339 }
2340 
__unix_dgram_recvmsg(struct sock * sk,struct msghdr * msg,size_t size,int flags)2341 int __unix_dgram_recvmsg(struct sock *sk, struct msghdr *msg, size_t size,
2342 			 int flags)
2343 {
2344 	struct scm_cookie scm;
2345 	struct socket *sock = sk->sk_socket;
2346 	struct unix_sock *u = unix_sk(sk);
2347 	struct sk_buff *skb, *last;
2348 	long timeo;
2349 	int skip;
2350 	int err;
2351 
2352 	err = -EOPNOTSUPP;
2353 	if (flags&MSG_OOB)
2354 		goto out;
2355 
2356 	timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
2357 
2358 	do {
2359 		mutex_lock(&u->iolock);
2360 
2361 		skip = sk_peek_offset(sk, flags);
2362 		skb = __skb_try_recv_datagram(sk, &sk->sk_receive_queue, flags,
2363 					      &skip, &err, &last);
2364 		if (skb) {
2365 			if (!(flags & MSG_PEEK))
2366 				scm_stat_del(sk, skb);
2367 			break;
2368 		}
2369 
2370 		mutex_unlock(&u->iolock);
2371 
2372 		if (err != -EAGAIN)
2373 			break;
2374 	} while (timeo &&
2375 		 !__skb_wait_for_more_packets(sk, &sk->sk_receive_queue,
2376 					      &err, &timeo, last));
2377 
2378 	if (!skb) { /* implies iolock unlocked */
2379 		unix_state_lock(sk);
2380 		/* Signal EOF on disconnected non-blocking SEQPACKET socket. */
2381 		if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN &&
2382 		    (sk->sk_shutdown & RCV_SHUTDOWN))
2383 			err = 0;
2384 		unix_state_unlock(sk);
2385 		goto out;
2386 	}
2387 
2388 	if (wq_has_sleeper(&u->peer_wait))
2389 		wake_up_interruptible_sync_poll(&u->peer_wait,
2390 						EPOLLOUT | EPOLLWRNORM |
2391 						EPOLLWRBAND);
2392 
2393 	if (msg->msg_name)
2394 		unix_copy_addr(msg, skb->sk);
2395 
2396 	if (size > skb->len - skip)
2397 		size = skb->len - skip;
2398 	else if (size < skb->len - skip)
2399 		msg->msg_flags |= MSG_TRUNC;
2400 
2401 	err = skb_copy_datagram_msg(skb, skip, msg, size);
2402 	if (err)
2403 		goto out_free;
2404 
2405 	if (sock_flag(sk, SOCK_RCVTSTAMP))
2406 		__sock_recv_timestamp(msg, sk, skb);
2407 
2408 	memset(&scm, 0, sizeof(scm));
2409 
2410 	scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2411 	unix_set_secdata(&scm, skb);
2412 
2413 	if (!(flags & MSG_PEEK)) {
2414 		if (UNIXCB(skb).fp)
2415 			unix_detach_fds(&scm, skb);
2416 
2417 		sk_peek_offset_bwd(sk, skb->len);
2418 	} else {
2419 		/* It is questionable: on PEEK we could:
2420 		   - do not return fds - good, but too simple 8)
2421 		   - return fds, and do not return them on read (old strategy,
2422 		     apparently wrong)
2423 		   - clone fds (I chose it for now, it is the most universal
2424 		     solution)
2425 
2426 		   POSIX 1003.1g does not actually define this clearly
2427 		   at all. POSIX 1003.1g doesn't define a lot of things
2428 		   clearly however!
2429 
2430 		*/
2431 
2432 		sk_peek_offset_fwd(sk, size);
2433 
2434 		if (UNIXCB(skb).fp)
2435 			unix_peek_fds(&scm, skb);
2436 	}
2437 	err = (flags & MSG_TRUNC) ? skb->len - skip : size;
2438 
2439 	scm_recv_unix(sock, msg, &scm, flags);
2440 
2441 out_free:
2442 	skb_free_datagram(sk, skb);
2443 	mutex_unlock(&u->iolock);
2444 out:
2445 	return err;
2446 }
2447 
unix_dgram_recvmsg(struct socket * sock,struct msghdr * msg,size_t size,int flags)2448 static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
2449 			      int flags)
2450 {
2451 	struct sock *sk = sock->sk;
2452 
2453 #ifdef CONFIG_BPF_SYSCALL
2454 	const struct proto *prot = READ_ONCE(sk->sk_prot);
2455 
2456 	if (prot != &unix_dgram_proto)
2457 		return prot->recvmsg(sk, msg, size, flags, NULL);
2458 #endif
2459 	return __unix_dgram_recvmsg(sk, msg, size, flags);
2460 }
2461 
unix_read_skb(struct sock * sk,skb_read_actor_t recv_actor)2462 static int unix_read_skb(struct sock *sk, skb_read_actor_t recv_actor)
2463 {
2464 	struct unix_sock *u = unix_sk(sk);
2465 	struct sk_buff *skb;
2466 	int err;
2467 
2468 	mutex_lock(&u->iolock);
2469 	skb = skb_recv_datagram(sk, MSG_DONTWAIT, &err);
2470 	mutex_unlock(&u->iolock);
2471 	if (!skb)
2472 		return err;
2473 
2474 	return recv_actor(sk, skb);
2475 }
2476 
2477 /*
2478  *	Sleep until more data has arrived. But check for races..
2479  */
unix_stream_data_wait(struct sock * sk,long timeo,struct sk_buff * last,unsigned int last_len,bool freezable)2480 static long unix_stream_data_wait(struct sock *sk, long timeo,
2481 				  struct sk_buff *last, unsigned int last_len,
2482 				  bool freezable)
2483 {
2484 	unsigned int state = TASK_INTERRUPTIBLE | freezable * TASK_FREEZABLE;
2485 	struct sk_buff *tail;
2486 	DEFINE_WAIT(wait);
2487 
2488 	unix_state_lock(sk);
2489 
2490 	for (;;) {
2491 		prepare_to_wait(sk_sleep(sk), &wait, state);
2492 
2493 		tail = skb_peek_tail(&sk->sk_receive_queue);
2494 		if (tail != last ||
2495 		    (tail && tail->len != last_len) ||
2496 		    sk->sk_err ||
2497 		    (sk->sk_shutdown & RCV_SHUTDOWN) ||
2498 		    signal_pending(current) ||
2499 		    !timeo)
2500 			break;
2501 
2502 		sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2503 		unix_state_unlock(sk);
2504 		timeo = schedule_timeout(timeo);
2505 		unix_state_lock(sk);
2506 
2507 		if (sock_flag(sk, SOCK_DEAD))
2508 			break;
2509 
2510 		sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2511 	}
2512 
2513 	finish_wait(sk_sleep(sk), &wait);
2514 	unix_state_unlock(sk);
2515 	return timeo;
2516 }
2517 
unix_skb_len(const struct sk_buff * skb)2518 static unsigned int unix_skb_len(const struct sk_buff *skb)
2519 {
2520 	return skb->len - UNIXCB(skb).consumed;
2521 }
2522 
2523 struct unix_stream_read_state {
2524 	int (*recv_actor)(struct sk_buff *, int, int,
2525 			  struct unix_stream_read_state *);
2526 	struct socket *socket;
2527 	struct msghdr *msg;
2528 	struct pipe_inode_info *pipe;
2529 	size_t size;
2530 	int flags;
2531 	unsigned int splice_flags;
2532 };
2533 
2534 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
unix_stream_recv_urg(struct unix_stream_read_state * state)2535 static int unix_stream_recv_urg(struct unix_stream_read_state *state)
2536 {
2537 	struct socket *sock = state->socket;
2538 	struct sock *sk = sock->sk;
2539 	struct unix_sock *u = unix_sk(sk);
2540 	int chunk = 1;
2541 	struct sk_buff *oob_skb;
2542 
2543 	mutex_lock(&u->iolock);
2544 	unix_state_lock(sk);
2545 	spin_lock(&sk->sk_receive_queue.lock);
2546 
2547 	if (sock_flag(sk, SOCK_URGINLINE) || !u->oob_skb) {
2548 		spin_unlock(&sk->sk_receive_queue.lock);
2549 		unix_state_unlock(sk);
2550 		mutex_unlock(&u->iolock);
2551 		return -EINVAL;
2552 	}
2553 
2554 	oob_skb = u->oob_skb;
2555 
2556 	if (!(state->flags & MSG_PEEK))
2557 		WRITE_ONCE(u->oob_skb, NULL);
2558 	else
2559 		skb_get(oob_skb);
2560 
2561 	spin_unlock(&sk->sk_receive_queue.lock);
2562 	unix_state_unlock(sk);
2563 
2564 	chunk = state->recv_actor(oob_skb, 0, chunk, state);
2565 
2566 	if (!(state->flags & MSG_PEEK))
2567 		UNIXCB(oob_skb).consumed += 1;
2568 
2569 	consume_skb(oob_skb);
2570 
2571 	mutex_unlock(&u->iolock);
2572 
2573 	if (chunk < 0)
2574 		return -EFAULT;
2575 
2576 	state->msg->msg_flags |= MSG_OOB;
2577 	return 1;
2578 }
2579 
manage_oob(struct sk_buff * skb,struct sock * sk,int flags,int copied)2580 static struct sk_buff *manage_oob(struct sk_buff *skb, struct sock *sk,
2581 				  int flags, int copied)
2582 {
2583 	struct unix_sock *u = unix_sk(sk);
2584 
2585 	if (!unix_skb_len(skb) && !(flags & MSG_PEEK)) {
2586 		skb_unlink(skb, &sk->sk_receive_queue);
2587 		consume_skb(skb);
2588 		skb = NULL;
2589 	} else {
2590 		struct sk_buff *unlinked_skb = NULL;
2591 
2592 		spin_lock(&sk->sk_receive_queue.lock);
2593 
2594 		if (skb == u->oob_skb) {
2595 			if (copied) {
2596 				skb = NULL;
2597 			} else if (!(flags & MSG_PEEK)) {
2598 				if (sock_flag(sk, SOCK_URGINLINE)) {
2599 					WRITE_ONCE(u->oob_skb, NULL);
2600 					consume_skb(skb);
2601 				} else {
2602 					__skb_unlink(skb, &sk->sk_receive_queue);
2603 					WRITE_ONCE(u->oob_skb, NULL);
2604 					unlinked_skb = skb;
2605 					skb = skb_peek(&sk->sk_receive_queue);
2606 				}
2607 			} else if (!sock_flag(sk, SOCK_URGINLINE)) {
2608 				skb = skb_peek_next(skb, &sk->sk_receive_queue);
2609 			}
2610 		}
2611 
2612 		spin_unlock(&sk->sk_receive_queue.lock);
2613 
2614 		if (unlinked_skb) {
2615 			WARN_ON_ONCE(skb_unref(unlinked_skb));
2616 			kfree_skb(unlinked_skb);
2617 		}
2618 	}
2619 	return skb;
2620 }
2621 #endif
2622 
unix_stream_read_skb(struct sock * sk,skb_read_actor_t recv_actor)2623 static int unix_stream_read_skb(struct sock *sk, skb_read_actor_t recv_actor)
2624 {
2625 	struct unix_sock *u = unix_sk(sk);
2626 	struct sk_buff *skb;
2627 	int err;
2628 
2629 	if (unlikely(READ_ONCE(sk->sk_state) != TCP_ESTABLISHED))
2630 		return -ENOTCONN;
2631 
2632 	mutex_lock(&u->iolock);
2633 	skb = skb_recv_datagram(sk, MSG_DONTWAIT, &err);
2634 	mutex_unlock(&u->iolock);
2635 	if (!skb)
2636 		return err;
2637 
2638 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
2639 	if (unlikely(skb == READ_ONCE(u->oob_skb))) {
2640 		bool drop = false;
2641 
2642 		unix_state_lock(sk);
2643 
2644 		if (sock_flag(sk, SOCK_DEAD)) {
2645 			unix_state_unlock(sk);
2646 			kfree_skb(skb);
2647 			return -ECONNRESET;
2648 		}
2649 
2650 		spin_lock(&sk->sk_receive_queue.lock);
2651 		if (likely(skb == u->oob_skb)) {
2652 			WRITE_ONCE(u->oob_skb, NULL);
2653 			drop = true;
2654 		}
2655 		spin_unlock(&sk->sk_receive_queue.lock);
2656 
2657 		unix_state_unlock(sk);
2658 
2659 		if (drop) {
2660 			WARN_ON_ONCE(skb_unref(skb));
2661 			kfree_skb(skb);
2662 			return -EAGAIN;
2663 		}
2664 	}
2665 #endif
2666 
2667 	return recv_actor(sk, skb);
2668 }
2669 
unix_stream_read_generic(struct unix_stream_read_state * state,bool freezable)2670 static int unix_stream_read_generic(struct unix_stream_read_state *state,
2671 				    bool freezable)
2672 {
2673 	struct scm_cookie scm;
2674 	struct socket *sock = state->socket;
2675 	struct sock *sk = sock->sk;
2676 	struct unix_sock *u = unix_sk(sk);
2677 	int copied = 0;
2678 	int flags = state->flags;
2679 	int noblock = flags & MSG_DONTWAIT;
2680 	bool check_creds = false;
2681 	int target;
2682 	int err = 0;
2683 	long timeo;
2684 	int skip;
2685 	size_t size = state->size;
2686 	unsigned int last_len;
2687 
2688 	if (unlikely(READ_ONCE(sk->sk_state) != TCP_ESTABLISHED)) {
2689 		err = -EINVAL;
2690 		goto out;
2691 	}
2692 
2693 	if (unlikely(flags & MSG_OOB)) {
2694 		err = -EOPNOTSUPP;
2695 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
2696 		err = unix_stream_recv_urg(state);
2697 #endif
2698 		goto out;
2699 	}
2700 
2701 	target = sock_rcvlowat(sk, flags & MSG_WAITALL, size);
2702 	timeo = sock_rcvtimeo(sk, noblock);
2703 
2704 	memset(&scm, 0, sizeof(scm));
2705 
2706 	/* Lock the socket to prevent queue disordering
2707 	 * while sleeps in memcpy_tomsg
2708 	 */
2709 	mutex_lock(&u->iolock);
2710 
2711 	skip = max(sk_peek_offset(sk, flags), 0);
2712 
2713 	do {
2714 		int chunk;
2715 		bool drop_skb;
2716 		struct sk_buff *skb, *last;
2717 
2718 redo:
2719 		unix_state_lock(sk);
2720 		if (sock_flag(sk, SOCK_DEAD)) {
2721 			err = -ECONNRESET;
2722 			goto unlock;
2723 		}
2724 		last = skb = skb_peek(&sk->sk_receive_queue);
2725 		last_len = last ? last->len : 0;
2726 
2727 again:
2728 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
2729 		if (skb) {
2730 			skb = manage_oob(skb, sk, flags, copied);
2731 			if (!skb && copied) {
2732 				unix_state_unlock(sk);
2733 				break;
2734 			}
2735 		}
2736 #endif
2737 		if (skb == NULL) {
2738 			if (copied >= target)
2739 				goto unlock;
2740 
2741 			/*
2742 			 *	POSIX 1003.1g mandates this order.
2743 			 */
2744 
2745 			err = sock_error(sk);
2746 			if (err)
2747 				goto unlock;
2748 			if (sk->sk_shutdown & RCV_SHUTDOWN)
2749 				goto unlock;
2750 
2751 			unix_state_unlock(sk);
2752 			if (!timeo) {
2753 				err = -EAGAIN;
2754 				break;
2755 			}
2756 
2757 			mutex_unlock(&u->iolock);
2758 
2759 			timeo = unix_stream_data_wait(sk, timeo, last,
2760 						      last_len, freezable);
2761 
2762 			if (signal_pending(current)) {
2763 				err = sock_intr_errno(timeo);
2764 				scm_destroy(&scm);
2765 				goto out;
2766 			}
2767 
2768 			mutex_lock(&u->iolock);
2769 			goto redo;
2770 unlock:
2771 			unix_state_unlock(sk);
2772 			break;
2773 		}
2774 
2775 		while (skip >= unix_skb_len(skb)) {
2776 			skip -= unix_skb_len(skb);
2777 			last = skb;
2778 			last_len = skb->len;
2779 			skb = skb_peek_next(skb, &sk->sk_receive_queue);
2780 			if (!skb)
2781 				goto again;
2782 		}
2783 
2784 		unix_state_unlock(sk);
2785 
2786 		if (check_creds) {
2787 			/* Never glue messages from different writers */
2788 			if (!unix_skb_scm_eq(skb, &scm))
2789 				break;
2790 		} else if (test_bit(SOCK_PASSCRED, &sock->flags) ||
2791 			   test_bit(SOCK_PASSPIDFD, &sock->flags)) {
2792 			/* Copy credentials */
2793 			scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2794 			unix_set_secdata(&scm, skb);
2795 			check_creds = true;
2796 		}
2797 
2798 		/* Copy address just once */
2799 		if (state->msg && state->msg->msg_name) {
2800 			DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr,
2801 					 state->msg->msg_name);
2802 			unix_copy_addr(state->msg, skb->sk);
2803 			sunaddr = NULL;
2804 		}
2805 
2806 		chunk = min_t(unsigned int, unix_skb_len(skb) - skip, size);
2807 		skb_get(skb);
2808 		chunk = state->recv_actor(skb, skip, chunk, state);
2809 		drop_skb = !unix_skb_len(skb);
2810 		/* skb is only safe to use if !drop_skb */
2811 		consume_skb(skb);
2812 		if (chunk < 0) {
2813 			if (copied == 0)
2814 				copied = -EFAULT;
2815 			break;
2816 		}
2817 		copied += chunk;
2818 		size -= chunk;
2819 
2820 		if (drop_skb) {
2821 			/* the skb was touched by a concurrent reader;
2822 			 * we should not expect anything from this skb
2823 			 * anymore and assume it invalid - we can be
2824 			 * sure it was dropped from the socket queue
2825 			 *
2826 			 * let's report a short read
2827 			 */
2828 			err = 0;
2829 			break;
2830 		}
2831 
2832 		/* Mark read part of skb as used */
2833 		if (!(flags & MSG_PEEK)) {
2834 			UNIXCB(skb).consumed += chunk;
2835 
2836 			sk_peek_offset_bwd(sk, chunk);
2837 
2838 			if (UNIXCB(skb).fp) {
2839 				scm_stat_del(sk, skb);
2840 				unix_detach_fds(&scm, skb);
2841 			}
2842 
2843 			if (unix_skb_len(skb))
2844 				break;
2845 
2846 			skb_unlink(skb, &sk->sk_receive_queue);
2847 			consume_skb(skb);
2848 
2849 			if (scm.fp)
2850 				break;
2851 		} else {
2852 			/* It is questionable, see note in unix_dgram_recvmsg.
2853 			 */
2854 			if (UNIXCB(skb).fp)
2855 				unix_peek_fds(&scm, skb);
2856 
2857 			sk_peek_offset_fwd(sk, chunk);
2858 
2859 			if (UNIXCB(skb).fp)
2860 				break;
2861 
2862 			skip = 0;
2863 			last = skb;
2864 			last_len = skb->len;
2865 			unix_state_lock(sk);
2866 			skb = skb_peek_next(skb, &sk->sk_receive_queue);
2867 			if (skb)
2868 				goto again;
2869 			unix_state_unlock(sk);
2870 			break;
2871 		}
2872 	} while (size);
2873 
2874 	mutex_unlock(&u->iolock);
2875 	if (state->msg)
2876 		scm_recv_unix(sock, state->msg, &scm, flags);
2877 	else
2878 		scm_destroy(&scm);
2879 out:
2880 	return copied ? : err;
2881 }
2882 
unix_stream_read_actor(struct sk_buff * skb,int skip,int chunk,struct unix_stream_read_state * state)2883 static int unix_stream_read_actor(struct sk_buff *skb,
2884 				  int skip, int chunk,
2885 				  struct unix_stream_read_state *state)
2886 {
2887 	int ret;
2888 
2889 	ret = skb_copy_datagram_msg(skb, UNIXCB(skb).consumed + skip,
2890 				    state->msg, chunk);
2891 	return ret ?: chunk;
2892 }
2893 
__unix_stream_recvmsg(struct sock * sk,struct msghdr * msg,size_t size,int flags)2894 int __unix_stream_recvmsg(struct sock *sk, struct msghdr *msg,
2895 			  size_t size, int flags)
2896 {
2897 	struct unix_stream_read_state state = {
2898 		.recv_actor = unix_stream_read_actor,
2899 		.socket = sk->sk_socket,
2900 		.msg = msg,
2901 		.size = size,
2902 		.flags = flags
2903 	};
2904 
2905 	return unix_stream_read_generic(&state, true);
2906 }
2907 
unix_stream_recvmsg(struct socket * sock,struct msghdr * msg,size_t size,int flags)2908 static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg,
2909 			       size_t size, int flags)
2910 {
2911 	struct unix_stream_read_state state = {
2912 		.recv_actor = unix_stream_read_actor,
2913 		.socket = sock,
2914 		.msg = msg,
2915 		.size = size,
2916 		.flags = flags
2917 	};
2918 
2919 #ifdef CONFIG_BPF_SYSCALL
2920 	struct sock *sk = sock->sk;
2921 	const struct proto *prot = READ_ONCE(sk->sk_prot);
2922 
2923 	if (prot != &unix_stream_proto)
2924 		return prot->recvmsg(sk, msg, size, flags, NULL);
2925 #endif
2926 	return unix_stream_read_generic(&state, true);
2927 }
2928 
unix_stream_splice_actor(struct sk_buff * skb,int skip,int chunk,struct unix_stream_read_state * state)2929 static int unix_stream_splice_actor(struct sk_buff *skb,
2930 				    int skip, int chunk,
2931 				    struct unix_stream_read_state *state)
2932 {
2933 	return skb_splice_bits(skb, state->socket->sk,
2934 			       UNIXCB(skb).consumed + skip,
2935 			       state->pipe, chunk, state->splice_flags);
2936 }
2937 
unix_stream_splice_read(struct socket * sock,loff_t * ppos,struct pipe_inode_info * pipe,size_t size,unsigned int flags)2938 static ssize_t unix_stream_splice_read(struct socket *sock,  loff_t *ppos,
2939 				       struct pipe_inode_info *pipe,
2940 				       size_t size, unsigned int flags)
2941 {
2942 	struct unix_stream_read_state state = {
2943 		.recv_actor = unix_stream_splice_actor,
2944 		.socket = sock,
2945 		.pipe = pipe,
2946 		.size = size,
2947 		.splice_flags = flags,
2948 	};
2949 
2950 	if (unlikely(*ppos))
2951 		return -ESPIPE;
2952 
2953 	if (sock->file->f_flags & O_NONBLOCK ||
2954 	    flags & SPLICE_F_NONBLOCK)
2955 		state.flags = MSG_DONTWAIT;
2956 
2957 	return unix_stream_read_generic(&state, false);
2958 }
2959 
unix_shutdown(struct socket * sock,int mode)2960 static int unix_shutdown(struct socket *sock, int mode)
2961 {
2962 	struct sock *sk = sock->sk;
2963 	struct sock *other;
2964 
2965 	if (mode < SHUT_RD || mode > SHUT_RDWR)
2966 		return -EINVAL;
2967 	/* This maps:
2968 	 * SHUT_RD   (0) -> RCV_SHUTDOWN  (1)
2969 	 * SHUT_WR   (1) -> SEND_SHUTDOWN (2)
2970 	 * SHUT_RDWR (2) -> SHUTDOWN_MASK (3)
2971 	 */
2972 	++mode;
2973 
2974 	unix_state_lock(sk);
2975 	WRITE_ONCE(sk->sk_shutdown, sk->sk_shutdown | mode);
2976 	other = unix_peer(sk);
2977 	if (other)
2978 		sock_hold(other);
2979 	unix_state_unlock(sk);
2980 	sk->sk_state_change(sk);
2981 
2982 	if (other &&
2983 		(sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
2984 
2985 		int peer_mode = 0;
2986 		const struct proto *prot = READ_ONCE(other->sk_prot);
2987 
2988 		if (prot->unhash)
2989 			prot->unhash(other);
2990 		if (mode&RCV_SHUTDOWN)
2991 			peer_mode |= SEND_SHUTDOWN;
2992 		if (mode&SEND_SHUTDOWN)
2993 			peer_mode |= RCV_SHUTDOWN;
2994 		unix_state_lock(other);
2995 		WRITE_ONCE(other->sk_shutdown, other->sk_shutdown | peer_mode);
2996 		unix_state_unlock(other);
2997 		other->sk_state_change(other);
2998 		if (peer_mode == SHUTDOWN_MASK)
2999 			sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
3000 		else if (peer_mode & RCV_SHUTDOWN)
3001 			sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
3002 	}
3003 	if (other)
3004 		sock_put(other);
3005 
3006 	return 0;
3007 }
3008 
unix_inq_len(struct sock * sk)3009 long unix_inq_len(struct sock *sk)
3010 {
3011 	struct sk_buff *skb;
3012 	long amount = 0;
3013 
3014 	if (READ_ONCE(sk->sk_state) == TCP_LISTEN)
3015 		return -EINVAL;
3016 
3017 	spin_lock(&sk->sk_receive_queue.lock);
3018 	if (sk->sk_type == SOCK_STREAM ||
3019 	    sk->sk_type == SOCK_SEQPACKET) {
3020 		skb_queue_walk(&sk->sk_receive_queue, skb)
3021 			amount += unix_skb_len(skb);
3022 	} else {
3023 		skb = skb_peek(&sk->sk_receive_queue);
3024 		if (skb)
3025 			amount = skb->len;
3026 	}
3027 	spin_unlock(&sk->sk_receive_queue.lock);
3028 
3029 	return amount;
3030 }
3031 EXPORT_SYMBOL_GPL(unix_inq_len);
3032 
unix_outq_len(struct sock * sk)3033 long unix_outq_len(struct sock *sk)
3034 {
3035 	return sk_wmem_alloc_get(sk);
3036 }
3037 EXPORT_SYMBOL_GPL(unix_outq_len);
3038 
unix_open_file(struct sock * sk)3039 static int unix_open_file(struct sock *sk)
3040 {
3041 	struct path path;
3042 	struct file *f;
3043 	int fd;
3044 
3045 	if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
3046 		return -EPERM;
3047 
3048 	if (!smp_load_acquire(&unix_sk(sk)->addr))
3049 		return -ENOENT;
3050 
3051 	path = unix_sk(sk)->path;
3052 	if (!path.dentry)
3053 		return -ENOENT;
3054 
3055 	path_get(&path);
3056 
3057 	fd = get_unused_fd_flags(O_CLOEXEC);
3058 	if (fd < 0)
3059 		goto out;
3060 
3061 	f = dentry_open(&path, O_PATH, current_cred());
3062 	if (IS_ERR(f)) {
3063 		put_unused_fd(fd);
3064 		fd = PTR_ERR(f);
3065 		goto out;
3066 	}
3067 
3068 	fd_install(fd, f);
3069 out:
3070 	path_put(&path);
3071 
3072 	return fd;
3073 }
3074 
unix_ioctl(struct socket * sock,unsigned int cmd,unsigned long arg)3075 static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
3076 {
3077 	struct sock *sk = sock->sk;
3078 	long amount = 0;
3079 	int err;
3080 
3081 	switch (cmd) {
3082 	case SIOCOUTQ:
3083 		amount = unix_outq_len(sk);
3084 		err = put_user(amount, (int __user *)arg);
3085 		break;
3086 	case SIOCINQ:
3087 		amount = unix_inq_len(sk);
3088 		if (amount < 0)
3089 			err = amount;
3090 		else
3091 			err = put_user(amount, (int __user *)arg);
3092 		break;
3093 	case SIOCUNIXFILE:
3094 		err = unix_open_file(sk);
3095 		break;
3096 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
3097 	case SIOCATMARK:
3098 		{
3099 			struct sk_buff *skb;
3100 			int answ = 0;
3101 
3102 			skb = skb_peek(&sk->sk_receive_queue);
3103 			if (skb && skb == READ_ONCE(unix_sk(sk)->oob_skb))
3104 				answ = 1;
3105 			err = put_user(answ, (int __user *)arg);
3106 		}
3107 		break;
3108 #endif
3109 	default:
3110 		err = -ENOIOCTLCMD;
3111 		break;
3112 	}
3113 	return err;
3114 }
3115 
3116 #ifdef CONFIG_COMPAT
unix_compat_ioctl(struct socket * sock,unsigned int cmd,unsigned long arg)3117 static int unix_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
3118 {
3119 	return unix_ioctl(sock, cmd, (unsigned long)compat_ptr(arg));
3120 }
3121 #endif
3122 
unix_poll(struct file * file,struct socket * sock,poll_table * wait)3123 static __poll_t unix_poll(struct file *file, struct socket *sock, poll_table *wait)
3124 {
3125 	struct sock *sk = sock->sk;
3126 	unsigned char state;
3127 	__poll_t mask;
3128 	u8 shutdown;
3129 
3130 	sock_poll_wait(file, sock, wait);
3131 	mask = 0;
3132 	shutdown = READ_ONCE(sk->sk_shutdown);
3133 	state = READ_ONCE(sk->sk_state);
3134 
3135 	/* exceptional events? */
3136 	if (READ_ONCE(sk->sk_err))
3137 		mask |= EPOLLERR;
3138 	if (shutdown == SHUTDOWN_MASK)
3139 		mask |= EPOLLHUP;
3140 	if (shutdown & RCV_SHUTDOWN)
3141 		mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
3142 
3143 	/* readable? */
3144 	if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
3145 		mask |= EPOLLIN | EPOLLRDNORM;
3146 	if (sk_is_readable(sk))
3147 		mask |= EPOLLIN | EPOLLRDNORM;
3148 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
3149 	if (READ_ONCE(unix_sk(sk)->oob_skb))
3150 		mask |= EPOLLPRI;
3151 #endif
3152 
3153 	/* Connection-based need to check for termination and startup */
3154 	if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) &&
3155 	    state == TCP_CLOSE)
3156 		mask |= EPOLLHUP;
3157 
3158 	/*
3159 	 * we set writable also when the other side has shut down the
3160 	 * connection. This prevents stuck sockets.
3161 	 */
3162 	if (unix_writable(sk, state))
3163 		mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
3164 
3165 	return mask;
3166 }
3167 
unix_dgram_poll(struct file * file,struct socket * sock,poll_table * wait)3168 static __poll_t unix_dgram_poll(struct file *file, struct socket *sock,
3169 				    poll_table *wait)
3170 {
3171 	struct sock *sk = sock->sk, *other;
3172 	unsigned int writable;
3173 	unsigned char state;
3174 	__poll_t mask;
3175 	u8 shutdown;
3176 
3177 	sock_poll_wait(file, sock, wait);
3178 	mask = 0;
3179 	shutdown = READ_ONCE(sk->sk_shutdown);
3180 	state = READ_ONCE(sk->sk_state);
3181 
3182 	/* exceptional events? */
3183 	if (READ_ONCE(sk->sk_err) ||
3184 	    !skb_queue_empty_lockless(&sk->sk_error_queue))
3185 		mask |= EPOLLERR |
3186 			(sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0);
3187 
3188 	if (shutdown & RCV_SHUTDOWN)
3189 		mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
3190 	if (shutdown == SHUTDOWN_MASK)
3191 		mask |= EPOLLHUP;
3192 
3193 	/* readable? */
3194 	if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
3195 		mask |= EPOLLIN | EPOLLRDNORM;
3196 	if (sk_is_readable(sk))
3197 		mask |= EPOLLIN | EPOLLRDNORM;
3198 
3199 	/* Connection-based need to check for termination and startup */
3200 	if (sk->sk_type == SOCK_SEQPACKET && state == TCP_CLOSE)
3201 		mask |= EPOLLHUP;
3202 
3203 	/* No write status requested, avoid expensive OUT tests. */
3204 	if (!(poll_requested_events(wait) & (EPOLLWRBAND|EPOLLWRNORM|EPOLLOUT)))
3205 		return mask;
3206 
3207 	writable = unix_writable(sk, state);
3208 	if (writable) {
3209 		unix_state_lock(sk);
3210 
3211 		other = unix_peer(sk);
3212 		if (other && unix_peer(other) != sk &&
3213 		    unix_recvq_full_lockless(other) &&
3214 		    unix_dgram_peer_wake_me(sk, other))
3215 			writable = 0;
3216 
3217 		unix_state_unlock(sk);
3218 	}
3219 
3220 	if (writable)
3221 		mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
3222 	else
3223 		sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
3224 
3225 	return mask;
3226 }
3227 
3228 #ifdef CONFIG_PROC_FS
3229 
3230 #define BUCKET_SPACE (BITS_PER_LONG - (UNIX_HASH_BITS + 1) - 1)
3231 
3232 #define get_bucket(x) ((x) >> BUCKET_SPACE)
3233 #define get_offset(x) ((x) & ((1UL << BUCKET_SPACE) - 1))
3234 #define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o))
3235 
unix_from_bucket(struct seq_file * seq,loff_t * pos)3236 static struct sock *unix_from_bucket(struct seq_file *seq, loff_t *pos)
3237 {
3238 	unsigned long offset = get_offset(*pos);
3239 	unsigned long bucket = get_bucket(*pos);
3240 	unsigned long count = 0;
3241 	struct sock *sk;
3242 
3243 	for (sk = sk_head(&seq_file_net(seq)->unx.table.buckets[bucket]);
3244 	     sk; sk = sk_next(sk)) {
3245 		if (++count == offset)
3246 			break;
3247 	}
3248 
3249 	return sk;
3250 }
3251 
unix_get_first(struct seq_file * seq,loff_t * pos)3252 static struct sock *unix_get_first(struct seq_file *seq, loff_t *pos)
3253 {
3254 	unsigned long bucket = get_bucket(*pos);
3255 	struct net *net = seq_file_net(seq);
3256 	struct sock *sk;
3257 
3258 	while (bucket < UNIX_HASH_SIZE) {
3259 		spin_lock(&net->unx.table.locks[bucket]);
3260 
3261 		sk = unix_from_bucket(seq, pos);
3262 		if (sk)
3263 			return sk;
3264 
3265 		spin_unlock(&net->unx.table.locks[bucket]);
3266 
3267 		*pos = set_bucket_offset(++bucket, 1);
3268 	}
3269 
3270 	return NULL;
3271 }
3272 
unix_get_next(struct seq_file * seq,struct sock * sk,loff_t * pos)3273 static struct sock *unix_get_next(struct seq_file *seq, struct sock *sk,
3274 				  loff_t *pos)
3275 {
3276 	unsigned long bucket = get_bucket(*pos);
3277 
3278 	sk = sk_next(sk);
3279 	if (sk)
3280 		return sk;
3281 
3282 
3283 	spin_unlock(&seq_file_net(seq)->unx.table.locks[bucket]);
3284 
3285 	*pos = set_bucket_offset(++bucket, 1);
3286 
3287 	return unix_get_first(seq, pos);
3288 }
3289 
unix_seq_start(struct seq_file * seq,loff_t * pos)3290 static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
3291 {
3292 	if (!*pos)
3293 		return SEQ_START_TOKEN;
3294 
3295 	return unix_get_first(seq, pos);
3296 }
3297 
unix_seq_next(struct seq_file * seq,void * v,loff_t * pos)3298 static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3299 {
3300 	++*pos;
3301 
3302 	if (v == SEQ_START_TOKEN)
3303 		return unix_get_first(seq, pos);
3304 
3305 	return unix_get_next(seq, v, pos);
3306 }
3307 
unix_seq_stop(struct seq_file * seq,void * v)3308 static void unix_seq_stop(struct seq_file *seq, void *v)
3309 {
3310 	struct sock *sk = v;
3311 
3312 	if (sk)
3313 		spin_unlock(&seq_file_net(seq)->unx.table.locks[sk->sk_hash]);
3314 }
3315 
unix_seq_show(struct seq_file * seq,void * v)3316 static int unix_seq_show(struct seq_file *seq, void *v)
3317 {
3318 
3319 	if (v == SEQ_START_TOKEN)
3320 		seq_puts(seq, "Num       RefCount Protocol Flags    Type St "
3321 			 "Inode Path\n");
3322 	else {
3323 		struct sock *s = v;
3324 		struct unix_sock *u = unix_sk(s);
3325 		unix_state_lock(s);
3326 
3327 		seq_printf(seq, "%pK: %08X %08X %08X %04X %02X %5lu",
3328 			s,
3329 			refcount_read(&s->sk_refcnt),
3330 			0,
3331 			s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
3332 			s->sk_type,
3333 			s->sk_socket ?
3334 			(s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
3335 			(s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
3336 			sock_i_ino(s));
3337 
3338 		if (u->addr) {	// under a hash table lock here
3339 			int i, len;
3340 			seq_putc(seq, ' ');
3341 
3342 			i = 0;
3343 			len = u->addr->len -
3344 				offsetof(struct sockaddr_un, sun_path);
3345 			if (u->addr->name->sun_path[0]) {
3346 				len--;
3347 			} else {
3348 				seq_putc(seq, '@');
3349 				i++;
3350 			}
3351 			for ( ; i < len; i++)
3352 				seq_putc(seq, u->addr->name->sun_path[i] ?:
3353 					 '@');
3354 		}
3355 		unix_state_unlock(s);
3356 		seq_putc(seq, '\n');
3357 	}
3358 
3359 	return 0;
3360 }
3361 
3362 static const struct seq_operations unix_seq_ops = {
3363 	.start  = unix_seq_start,
3364 	.next   = unix_seq_next,
3365 	.stop   = unix_seq_stop,
3366 	.show   = unix_seq_show,
3367 };
3368 
3369 #if IS_BUILTIN(CONFIG_UNIX) && defined(CONFIG_BPF_SYSCALL)
3370 struct bpf_unix_iter_state {
3371 	struct seq_net_private p;
3372 	unsigned int cur_sk;
3373 	unsigned int end_sk;
3374 	unsigned int max_sk;
3375 	struct sock **batch;
3376 	bool st_bucket_done;
3377 };
3378 
3379 struct bpf_iter__unix {
3380 	__bpf_md_ptr(struct bpf_iter_meta *, meta);
3381 	__bpf_md_ptr(struct unix_sock *, unix_sk);
3382 	uid_t uid __aligned(8);
3383 };
3384 
unix_prog_seq_show(struct bpf_prog * prog,struct bpf_iter_meta * meta,struct unix_sock * unix_sk,uid_t uid)3385 static int unix_prog_seq_show(struct bpf_prog *prog, struct bpf_iter_meta *meta,
3386 			      struct unix_sock *unix_sk, uid_t uid)
3387 {
3388 	struct bpf_iter__unix ctx;
3389 
3390 	meta->seq_num--;  /* skip SEQ_START_TOKEN */
3391 	ctx.meta = meta;
3392 	ctx.unix_sk = unix_sk;
3393 	ctx.uid = uid;
3394 	return bpf_iter_run_prog(prog, &ctx);
3395 }
3396 
bpf_iter_unix_hold_batch(struct seq_file * seq,struct sock * start_sk)3397 static int bpf_iter_unix_hold_batch(struct seq_file *seq, struct sock *start_sk)
3398 
3399 {
3400 	struct bpf_unix_iter_state *iter = seq->private;
3401 	unsigned int expected = 1;
3402 	struct sock *sk;
3403 
3404 	sock_hold(start_sk);
3405 	iter->batch[iter->end_sk++] = start_sk;
3406 
3407 	for (sk = sk_next(start_sk); sk; sk = sk_next(sk)) {
3408 		if (iter->end_sk < iter->max_sk) {
3409 			sock_hold(sk);
3410 			iter->batch[iter->end_sk++] = sk;
3411 		}
3412 
3413 		expected++;
3414 	}
3415 
3416 	spin_unlock(&seq_file_net(seq)->unx.table.locks[start_sk->sk_hash]);
3417 
3418 	return expected;
3419 }
3420 
bpf_iter_unix_put_batch(struct bpf_unix_iter_state * iter)3421 static void bpf_iter_unix_put_batch(struct bpf_unix_iter_state *iter)
3422 {
3423 	while (iter->cur_sk < iter->end_sk)
3424 		sock_put(iter->batch[iter->cur_sk++]);
3425 }
3426 
bpf_iter_unix_realloc_batch(struct bpf_unix_iter_state * iter,unsigned int new_batch_sz)3427 static int bpf_iter_unix_realloc_batch(struct bpf_unix_iter_state *iter,
3428 				       unsigned int new_batch_sz)
3429 {
3430 	struct sock **new_batch;
3431 
3432 	new_batch = kvmalloc(sizeof(*new_batch) * new_batch_sz,
3433 			     GFP_USER | __GFP_NOWARN);
3434 	if (!new_batch)
3435 		return -ENOMEM;
3436 
3437 	bpf_iter_unix_put_batch(iter);
3438 	kvfree(iter->batch);
3439 	iter->batch = new_batch;
3440 	iter->max_sk = new_batch_sz;
3441 
3442 	return 0;
3443 }
3444 
bpf_iter_unix_batch(struct seq_file * seq,loff_t * pos)3445 static struct sock *bpf_iter_unix_batch(struct seq_file *seq,
3446 					loff_t *pos)
3447 {
3448 	struct bpf_unix_iter_state *iter = seq->private;
3449 	unsigned int expected;
3450 	bool resized = false;
3451 	struct sock *sk;
3452 
3453 	if (iter->st_bucket_done)
3454 		*pos = set_bucket_offset(get_bucket(*pos) + 1, 1);
3455 
3456 again:
3457 	/* Get a new batch */
3458 	iter->cur_sk = 0;
3459 	iter->end_sk = 0;
3460 
3461 	sk = unix_get_first(seq, pos);
3462 	if (!sk)
3463 		return NULL; /* Done */
3464 
3465 	expected = bpf_iter_unix_hold_batch(seq, sk);
3466 
3467 	if (iter->end_sk == expected) {
3468 		iter->st_bucket_done = true;
3469 		return sk;
3470 	}
3471 
3472 	if (!resized && !bpf_iter_unix_realloc_batch(iter, expected * 3 / 2)) {
3473 		resized = true;
3474 		goto again;
3475 	}
3476 
3477 	return sk;
3478 }
3479 
bpf_iter_unix_seq_start(struct seq_file * seq,loff_t * pos)3480 static void *bpf_iter_unix_seq_start(struct seq_file *seq, loff_t *pos)
3481 {
3482 	if (!*pos)
3483 		return SEQ_START_TOKEN;
3484 
3485 	/* bpf iter does not support lseek, so it always
3486 	 * continue from where it was stop()-ped.
3487 	 */
3488 	return bpf_iter_unix_batch(seq, pos);
3489 }
3490 
bpf_iter_unix_seq_next(struct seq_file * seq,void * v,loff_t * pos)3491 static void *bpf_iter_unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3492 {
3493 	struct bpf_unix_iter_state *iter = seq->private;
3494 	struct sock *sk;
3495 
3496 	/* Whenever seq_next() is called, the iter->cur_sk is
3497 	 * done with seq_show(), so advance to the next sk in
3498 	 * the batch.
3499 	 */
3500 	if (iter->cur_sk < iter->end_sk)
3501 		sock_put(iter->batch[iter->cur_sk++]);
3502 
3503 	++*pos;
3504 
3505 	if (iter->cur_sk < iter->end_sk)
3506 		sk = iter->batch[iter->cur_sk];
3507 	else
3508 		sk = bpf_iter_unix_batch(seq, pos);
3509 
3510 	return sk;
3511 }
3512 
bpf_iter_unix_seq_show(struct seq_file * seq,void * v)3513 static int bpf_iter_unix_seq_show(struct seq_file *seq, void *v)
3514 {
3515 	struct bpf_iter_meta meta;
3516 	struct bpf_prog *prog;
3517 	struct sock *sk = v;
3518 	uid_t uid;
3519 	bool slow;
3520 	int ret;
3521 
3522 	if (v == SEQ_START_TOKEN)
3523 		return 0;
3524 
3525 	slow = lock_sock_fast(sk);
3526 
3527 	if (unlikely(sk_unhashed(sk))) {
3528 		ret = SEQ_SKIP;
3529 		goto unlock;
3530 	}
3531 
3532 	uid = from_kuid_munged(seq_user_ns(seq), sock_i_uid(sk));
3533 	meta.seq = seq;
3534 	prog = bpf_iter_get_info(&meta, false);
3535 	ret = unix_prog_seq_show(prog, &meta, v, uid);
3536 unlock:
3537 	unlock_sock_fast(sk, slow);
3538 	return ret;
3539 }
3540 
bpf_iter_unix_seq_stop(struct seq_file * seq,void * v)3541 static void bpf_iter_unix_seq_stop(struct seq_file *seq, void *v)
3542 {
3543 	struct bpf_unix_iter_state *iter = seq->private;
3544 	struct bpf_iter_meta meta;
3545 	struct bpf_prog *prog;
3546 
3547 	if (!v) {
3548 		meta.seq = seq;
3549 		prog = bpf_iter_get_info(&meta, true);
3550 		if (prog)
3551 			(void)unix_prog_seq_show(prog, &meta, v, 0);
3552 	}
3553 
3554 	if (iter->cur_sk < iter->end_sk)
3555 		bpf_iter_unix_put_batch(iter);
3556 }
3557 
3558 static const struct seq_operations bpf_iter_unix_seq_ops = {
3559 	.start	= bpf_iter_unix_seq_start,
3560 	.next	= bpf_iter_unix_seq_next,
3561 	.stop	= bpf_iter_unix_seq_stop,
3562 	.show	= bpf_iter_unix_seq_show,
3563 };
3564 #endif
3565 #endif
3566 
3567 static const struct net_proto_family unix_family_ops = {
3568 	.family = PF_UNIX,
3569 	.create = unix_create,
3570 	.owner	= THIS_MODULE,
3571 };
3572 
3573 
unix_net_init(struct net * net)3574 static int __net_init unix_net_init(struct net *net)
3575 {
3576 	int i;
3577 
3578 	net->unx.sysctl_max_dgram_qlen = 10;
3579 	if (unix_sysctl_register(net))
3580 		goto out;
3581 
3582 #ifdef CONFIG_PROC_FS
3583 	if (!proc_create_net("unix", 0, net->proc_net, &unix_seq_ops,
3584 			     sizeof(struct seq_net_private)))
3585 		goto err_sysctl;
3586 #endif
3587 
3588 	net->unx.table.locks = kvmalloc_array(UNIX_HASH_SIZE,
3589 					      sizeof(spinlock_t), GFP_KERNEL);
3590 	if (!net->unx.table.locks)
3591 		goto err_proc;
3592 
3593 	net->unx.table.buckets = kvmalloc_array(UNIX_HASH_SIZE,
3594 						sizeof(struct hlist_head),
3595 						GFP_KERNEL);
3596 	if (!net->unx.table.buckets)
3597 		goto free_locks;
3598 
3599 	for (i = 0; i < UNIX_HASH_SIZE; i++) {
3600 		spin_lock_init(&net->unx.table.locks[i]);
3601 		INIT_HLIST_HEAD(&net->unx.table.buckets[i]);
3602 	}
3603 
3604 	return 0;
3605 
3606 free_locks:
3607 	kvfree(net->unx.table.locks);
3608 err_proc:
3609 #ifdef CONFIG_PROC_FS
3610 	remove_proc_entry("unix", net->proc_net);
3611 err_sysctl:
3612 #endif
3613 	unix_sysctl_unregister(net);
3614 out:
3615 	return -ENOMEM;
3616 }
3617 
unix_net_exit(struct net * net)3618 static void __net_exit unix_net_exit(struct net *net)
3619 {
3620 	kvfree(net->unx.table.buckets);
3621 	kvfree(net->unx.table.locks);
3622 	unix_sysctl_unregister(net);
3623 	remove_proc_entry("unix", net->proc_net);
3624 }
3625 
3626 static struct pernet_operations unix_net_ops = {
3627 	.init = unix_net_init,
3628 	.exit = unix_net_exit,
3629 };
3630 
3631 #if IS_BUILTIN(CONFIG_UNIX) && defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
DEFINE_BPF_ITER_FUNC(unix,struct bpf_iter_meta * meta,struct unix_sock * unix_sk,uid_t uid)3632 DEFINE_BPF_ITER_FUNC(unix, struct bpf_iter_meta *meta,
3633 		     struct unix_sock *unix_sk, uid_t uid)
3634 
3635 #define INIT_BATCH_SZ 16
3636 
3637 static int bpf_iter_init_unix(void *priv_data, struct bpf_iter_aux_info *aux)
3638 {
3639 	struct bpf_unix_iter_state *iter = priv_data;
3640 	int err;
3641 
3642 	err = bpf_iter_init_seq_net(priv_data, aux);
3643 	if (err)
3644 		return err;
3645 
3646 	err = bpf_iter_unix_realloc_batch(iter, INIT_BATCH_SZ);
3647 	if (err) {
3648 		bpf_iter_fini_seq_net(priv_data);
3649 		return err;
3650 	}
3651 
3652 	return 0;
3653 }
3654 
bpf_iter_fini_unix(void * priv_data)3655 static void bpf_iter_fini_unix(void *priv_data)
3656 {
3657 	struct bpf_unix_iter_state *iter = priv_data;
3658 
3659 	bpf_iter_fini_seq_net(priv_data);
3660 	kvfree(iter->batch);
3661 }
3662 
3663 static const struct bpf_iter_seq_info unix_seq_info = {
3664 	.seq_ops		= &bpf_iter_unix_seq_ops,
3665 	.init_seq_private	= bpf_iter_init_unix,
3666 	.fini_seq_private	= bpf_iter_fini_unix,
3667 	.seq_priv_size		= sizeof(struct bpf_unix_iter_state),
3668 };
3669 
3670 static const struct bpf_func_proto *
bpf_iter_unix_get_func_proto(enum bpf_func_id func_id,const struct bpf_prog * prog)3671 bpf_iter_unix_get_func_proto(enum bpf_func_id func_id,
3672 			     const struct bpf_prog *prog)
3673 {
3674 	switch (func_id) {
3675 	case BPF_FUNC_setsockopt:
3676 		return &bpf_sk_setsockopt_proto;
3677 	case BPF_FUNC_getsockopt:
3678 		return &bpf_sk_getsockopt_proto;
3679 	default:
3680 		return NULL;
3681 	}
3682 }
3683 
3684 static struct bpf_iter_reg unix_reg_info = {
3685 	.target			= "unix",
3686 	.ctx_arg_info_size	= 1,
3687 	.ctx_arg_info		= {
3688 		{ offsetof(struct bpf_iter__unix, unix_sk),
3689 		  PTR_TO_BTF_ID_OR_NULL },
3690 	},
3691 	.get_func_proto         = bpf_iter_unix_get_func_proto,
3692 	.seq_info		= &unix_seq_info,
3693 };
3694 
bpf_iter_register(void)3695 static void __init bpf_iter_register(void)
3696 {
3697 	unix_reg_info.ctx_arg_info[0].btf_id = btf_sock_ids[BTF_SOCK_TYPE_UNIX];
3698 	if (bpf_iter_reg_target(&unix_reg_info))
3699 		pr_warn("Warning: could not register bpf iterator unix\n");
3700 }
3701 #endif
3702 
af_unix_init(void)3703 static int __init af_unix_init(void)
3704 {
3705 	int i, rc = -1;
3706 
3707 	BUILD_BUG_ON(sizeof(struct unix_skb_parms) > sizeof_field(struct sk_buff, cb));
3708 
3709 	for (i = 0; i < UNIX_HASH_SIZE / 2; i++) {
3710 		spin_lock_init(&bsd_socket_locks[i]);
3711 		INIT_HLIST_HEAD(&bsd_socket_buckets[i]);
3712 	}
3713 
3714 	rc = proto_register(&unix_dgram_proto, 1);
3715 	if (rc != 0) {
3716 		pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__);
3717 		goto out;
3718 	}
3719 
3720 	rc = proto_register(&unix_stream_proto, 1);
3721 	if (rc != 0) {
3722 		pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__);
3723 		proto_unregister(&unix_dgram_proto);
3724 		goto out;
3725 	}
3726 
3727 	sock_register(&unix_family_ops);
3728 	register_pernet_subsys(&unix_net_ops);
3729 	unix_bpf_build_proto();
3730 
3731 #if IS_BUILTIN(CONFIG_UNIX) && defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
3732 	bpf_iter_register();
3733 #endif
3734 
3735 out:
3736 	return rc;
3737 }
3738 
af_unix_exit(void)3739 static void __exit af_unix_exit(void)
3740 {
3741 	sock_unregister(PF_UNIX);
3742 	proto_unregister(&unix_dgram_proto);
3743 	proto_unregister(&unix_stream_proto);
3744 	unregister_pernet_subsys(&unix_net_ops);
3745 }
3746 
3747 /* Earlier than device_initcall() so that other drivers invoking
3748    request_module() don't end up in a loop when modprobe tries
3749    to use a UNIX socket. But later than subsys_initcall() because
3750    we depend on stuff initialised there */
3751 fs_initcall(af_unix_init);
3752 module_exit(af_unix_exit);
3753 
3754 MODULE_LICENSE("GPL");
3755 MODULE_ALIAS_NETPROTO(PF_UNIX);
3756