xref: /openbmc/linux/net/unix/af_unix.c (revision faa16bc4)
1 /*
2  * NET4:	Implementation of BSD Unix domain sockets.
3  *
4  * Authors:	Alan Cox, <alan@lxorguk.ukuu.org.uk>
5  *
6  *		This program is free software; you can redistribute it and/or
7  *		modify it under the terms of the GNU General Public License
8  *		as published by the Free Software Foundation; either version
9  *		2 of the License, or (at your option) any later version.
10  *
11  * Fixes:
12  *		Linus Torvalds	:	Assorted bug cures.
13  *		Niibe Yutaka	:	async I/O support.
14  *		Carsten Paeth	:	PF_UNIX check, address fixes.
15  *		Alan Cox	:	Limit size of allocated blocks.
16  *		Alan Cox	:	Fixed the stupid socketpair bug.
17  *		Alan Cox	:	BSD compatibility fine tuning.
18  *		Alan Cox	:	Fixed a bug in connect when interrupted.
19  *		Alan Cox	:	Sorted out a proper draft version of
20  *					file descriptor passing hacked up from
21  *					Mike Shaver's work.
22  *		Marty Leisner	:	Fixes to fd passing
23  *		Nick Nevin	:	recvmsg bugfix.
24  *		Alan Cox	:	Started proper garbage collector
25  *		Heiko EiBfeldt	:	Missing verify_area check
26  *		Alan Cox	:	Started POSIXisms
27  *		Andreas Schwab	:	Replace inode by dentry for proper
28  *					reference counting
29  *		Kirk Petersen	:	Made this a module
30  *	    Christoph Rohland	:	Elegant non-blocking accept/connect algorithm.
31  *					Lots of bug fixes.
32  *	     Alexey Kuznetosv	:	Repaired (I hope) bugs introduces
33  *					by above two patches.
34  *	     Andrea Arcangeli	:	If possible we block in connect(2)
35  *					if the max backlog of the listen socket
36  *					is been reached. This won't break
37  *					old apps and it will avoid huge amount
38  *					of socks hashed (this for unix_gc()
39  *					performances reasons).
40  *					Security fix that limits the max
41  *					number of socks to 2*max_files and
42  *					the number of skb queueable in the
43  *					dgram receiver.
44  *		Artur Skawina   :	Hash function optimizations
45  *	     Alexey Kuznetsov   :	Full scale SMP. Lot of bugs are introduced 8)
46  *	      Malcolm Beattie   :	Set peercred for socketpair
47  *	     Michal Ostrowski   :       Module initialization cleanup.
48  *	     Arnaldo C. Melo	:	Remove MOD_{INC,DEC}_USE_COUNT,
49  *	     				the core infrastructure is doing that
50  *	     				for all net proto families now (2.5.69+)
51  *
52  *
53  * Known differences from reference BSD that was tested:
54  *
55  *	[TO FIX]
56  *	ECONNREFUSED is not returned from one end of a connected() socket to the
57  *		other the moment one end closes.
58  *	fstat() doesn't return st_dev=0, and give the blksize as high water mark
59  *		and a fake inode identifier (nor the BSD first socket fstat twice bug).
60  *	[NOT TO FIX]
61  *	accept() returns a path name even if the connecting socket has closed
62  *		in the meantime (BSD loses the path and gives up).
63  *	accept() returns 0 length path for an unbound connector. BSD returns 16
64  *		and a null first byte in the path (but not for gethost/peername - BSD bug ??)
65  *	socketpair(...SOCK_RAW..) doesn't panic the kernel.
66  *	BSD af_unix apparently has connect forgetting to block properly.
67  *		(need to check this with the POSIX spec in detail)
68  *
69  * Differences from 2.0.0-11-... (ANK)
70  *	Bug fixes and improvements.
71  *		- client shutdown killed server socket.
72  *		- removed all useless cli/sti pairs.
73  *
74  *	Semantic changes/extensions.
75  *		- generic control message passing.
76  *		- SCM_CREDENTIALS control message.
77  *		- "Abstract" (not FS based) socket bindings.
78  *		  Abstract names are sequences of bytes (not zero terminated)
79  *		  started by 0, so that this name space does not intersect
80  *		  with BSD names.
81  */
82 
83 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
84 
85 #include <linux/module.h>
86 #include <linux/kernel.h>
87 #include <linux/signal.h>
88 #include <linux/sched/signal.h>
89 #include <linux/errno.h>
90 #include <linux/string.h>
91 #include <linux/stat.h>
92 #include <linux/dcache.h>
93 #include <linux/namei.h>
94 #include <linux/socket.h>
95 #include <linux/un.h>
96 #include <linux/fcntl.h>
97 #include <linux/termios.h>
98 #include <linux/sockios.h>
99 #include <linux/net.h>
100 #include <linux/in.h>
101 #include <linux/fs.h>
102 #include <linux/slab.h>
103 #include <linux/uaccess.h>
104 #include <linux/skbuff.h>
105 #include <linux/netdevice.h>
106 #include <net/net_namespace.h>
107 #include <net/sock.h>
108 #include <net/tcp_states.h>
109 #include <net/af_unix.h>
110 #include <linux/proc_fs.h>
111 #include <linux/seq_file.h>
112 #include <net/scm.h>
113 #include <linux/init.h>
114 #include <linux/poll.h>
115 #include <linux/rtnetlink.h>
116 #include <linux/mount.h>
117 #include <net/checksum.h>
118 #include <linux/security.h>
119 #include <linux/freezer.h>
120 #include <linux/file.h>
121 
122 struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE];
123 EXPORT_SYMBOL_GPL(unix_socket_table);
124 DEFINE_SPINLOCK(unix_table_lock);
125 EXPORT_SYMBOL_GPL(unix_table_lock);
126 static atomic_long_t unix_nr_socks;
127 
128 
129 static struct hlist_head *unix_sockets_unbound(void *addr)
130 {
131 	unsigned long hash = (unsigned long)addr;
132 
133 	hash ^= hash >> 16;
134 	hash ^= hash >> 8;
135 	hash %= UNIX_HASH_SIZE;
136 	return &unix_socket_table[UNIX_HASH_SIZE + hash];
137 }
138 
139 #define UNIX_ABSTRACT(sk)	(unix_sk(sk)->addr->hash < UNIX_HASH_SIZE)
140 
141 #ifdef CONFIG_SECURITY_NETWORK
142 static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
143 {
144 	UNIXCB(skb).secid = scm->secid;
145 }
146 
147 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
148 {
149 	scm->secid = UNIXCB(skb).secid;
150 }
151 
152 static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
153 {
154 	return (scm->secid == UNIXCB(skb).secid);
155 }
156 #else
157 static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
158 { }
159 
160 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
161 { }
162 
163 static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
164 {
165 	return true;
166 }
167 #endif /* CONFIG_SECURITY_NETWORK */
168 
169 /*
170  *  SMP locking strategy:
171  *    hash table is protected with spinlock unix_table_lock
172  *    each socket state is protected by separate spin lock.
173  */
174 
175 static inline unsigned int unix_hash_fold(__wsum n)
176 {
177 	unsigned int hash = (__force unsigned int)csum_fold(n);
178 
179 	hash ^= hash>>8;
180 	return hash&(UNIX_HASH_SIZE-1);
181 }
182 
183 #define unix_peer(sk) (unix_sk(sk)->peer)
184 
185 static inline int unix_our_peer(struct sock *sk, struct sock *osk)
186 {
187 	return unix_peer(osk) == sk;
188 }
189 
190 static inline int unix_may_send(struct sock *sk, struct sock *osk)
191 {
192 	return unix_peer(osk) == NULL || unix_our_peer(sk, osk);
193 }
194 
195 static inline int unix_recvq_full(struct sock const *sk)
196 {
197 	return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog;
198 }
199 
200 struct sock *unix_peer_get(struct sock *s)
201 {
202 	struct sock *peer;
203 
204 	unix_state_lock(s);
205 	peer = unix_peer(s);
206 	if (peer)
207 		sock_hold(peer);
208 	unix_state_unlock(s);
209 	return peer;
210 }
211 EXPORT_SYMBOL_GPL(unix_peer_get);
212 
213 static inline void unix_release_addr(struct unix_address *addr)
214 {
215 	if (refcount_dec_and_test(&addr->refcnt))
216 		kfree(addr);
217 }
218 
219 /*
220  *	Check unix socket name:
221  *		- should be not zero length.
222  *	        - if started by not zero, should be NULL terminated (FS object)
223  *		- if started by zero, it is abstract name.
224  */
225 
226 static int unix_mkname(struct sockaddr_un *sunaddr, int len, unsigned int *hashp)
227 {
228 	if (len <= sizeof(short) || len > sizeof(*sunaddr))
229 		return -EINVAL;
230 	if (!sunaddr || sunaddr->sun_family != AF_UNIX)
231 		return -EINVAL;
232 	if (sunaddr->sun_path[0]) {
233 		/*
234 		 * This may look like an off by one error but it is a bit more
235 		 * subtle. 108 is the longest valid AF_UNIX path for a binding.
236 		 * sun_path[108] doesn't as such exist.  However in kernel space
237 		 * we are guaranteed that it is a valid memory location in our
238 		 * kernel address buffer.
239 		 */
240 		((char *)sunaddr)[len] = 0;
241 		len = strlen(sunaddr->sun_path)+1+sizeof(short);
242 		return len;
243 	}
244 
245 	*hashp = unix_hash_fold(csum_partial(sunaddr, len, 0));
246 	return len;
247 }
248 
249 static void __unix_remove_socket(struct sock *sk)
250 {
251 	sk_del_node_init(sk);
252 }
253 
254 static void __unix_insert_socket(struct hlist_head *list, struct sock *sk)
255 {
256 	WARN_ON(!sk_unhashed(sk));
257 	sk_add_node(sk, list);
258 }
259 
260 static inline void unix_remove_socket(struct sock *sk)
261 {
262 	spin_lock(&unix_table_lock);
263 	__unix_remove_socket(sk);
264 	spin_unlock(&unix_table_lock);
265 }
266 
267 static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk)
268 {
269 	spin_lock(&unix_table_lock);
270 	__unix_insert_socket(list, sk);
271 	spin_unlock(&unix_table_lock);
272 }
273 
274 static struct sock *__unix_find_socket_byname(struct net *net,
275 					      struct sockaddr_un *sunname,
276 					      int len, int type, unsigned int hash)
277 {
278 	struct sock *s;
279 
280 	sk_for_each(s, &unix_socket_table[hash ^ type]) {
281 		struct unix_sock *u = unix_sk(s);
282 
283 		if (!net_eq(sock_net(s), net))
284 			continue;
285 
286 		if (u->addr->len == len &&
287 		    !memcmp(u->addr->name, sunname, len))
288 			goto found;
289 	}
290 	s = NULL;
291 found:
292 	return s;
293 }
294 
295 static inline struct sock *unix_find_socket_byname(struct net *net,
296 						   struct sockaddr_un *sunname,
297 						   int len, int type,
298 						   unsigned int hash)
299 {
300 	struct sock *s;
301 
302 	spin_lock(&unix_table_lock);
303 	s = __unix_find_socket_byname(net, sunname, len, type, hash);
304 	if (s)
305 		sock_hold(s);
306 	spin_unlock(&unix_table_lock);
307 	return s;
308 }
309 
310 static struct sock *unix_find_socket_byinode(struct inode *i)
311 {
312 	struct sock *s;
313 
314 	spin_lock(&unix_table_lock);
315 	sk_for_each(s,
316 		    &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
317 		struct dentry *dentry = unix_sk(s)->path.dentry;
318 
319 		if (dentry && d_backing_inode(dentry) == i) {
320 			sock_hold(s);
321 			goto found;
322 		}
323 	}
324 	s = NULL;
325 found:
326 	spin_unlock(&unix_table_lock);
327 	return s;
328 }
329 
330 /* Support code for asymmetrically connected dgram sockets
331  *
332  * If a datagram socket is connected to a socket not itself connected
333  * to the first socket (eg, /dev/log), clients may only enqueue more
334  * messages if the present receive queue of the server socket is not
335  * "too large". This means there's a second writeability condition
336  * poll and sendmsg need to test. The dgram recv code will do a wake
337  * up on the peer_wait wait queue of a socket upon reception of a
338  * datagram which needs to be propagated to sleeping would-be writers
339  * since these might not have sent anything so far. This can't be
340  * accomplished via poll_wait because the lifetime of the server
341  * socket might be less than that of its clients if these break their
342  * association with it or if the server socket is closed while clients
343  * are still connected to it and there's no way to inform "a polling
344  * implementation" that it should let go of a certain wait queue
345  *
346  * In order to propagate a wake up, a wait_queue_entry_t of the client
347  * socket is enqueued on the peer_wait queue of the server socket
348  * whose wake function does a wake_up on the ordinary client socket
349  * wait queue. This connection is established whenever a write (or
350  * poll for write) hit the flow control condition and broken when the
351  * association to the server socket is dissolved or after a wake up
352  * was relayed.
353  */
354 
355 static int unix_dgram_peer_wake_relay(wait_queue_entry_t *q, unsigned mode, int flags,
356 				      void *key)
357 {
358 	struct unix_sock *u;
359 	wait_queue_head_t *u_sleep;
360 
361 	u = container_of(q, struct unix_sock, peer_wake);
362 
363 	__remove_wait_queue(&unix_sk(u->peer_wake.private)->peer_wait,
364 			    q);
365 	u->peer_wake.private = NULL;
366 
367 	/* relaying can only happen while the wq still exists */
368 	u_sleep = sk_sleep(&u->sk);
369 	if (u_sleep)
370 		wake_up_interruptible_poll(u_sleep, key_to_poll(key));
371 
372 	return 0;
373 }
374 
375 static int unix_dgram_peer_wake_connect(struct sock *sk, struct sock *other)
376 {
377 	struct unix_sock *u, *u_other;
378 	int rc;
379 
380 	u = unix_sk(sk);
381 	u_other = unix_sk(other);
382 	rc = 0;
383 	spin_lock(&u_other->peer_wait.lock);
384 
385 	if (!u->peer_wake.private) {
386 		u->peer_wake.private = other;
387 		__add_wait_queue(&u_other->peer_wait, &u->peer_wake);
388 
389 		rc = 1;
390 	}
391 
392 	spin_unlock(&u_other->peer_wait.lock);
393 	return rc;
394 }
395 
396 static void unix_dgram_peer_wake_disconnect(struct sock *sk,
397 					    struct sock *other)
398 {
399 	struct unix_sock *u, *u_other;
400 
401 	u = unix_sk(sk);
402 	u_other = unix_sk(other);
403 	spin_lock(&u_other->peer_wait.lock);
404 
405 	if (u->peer_wake.private == other) {
406 		__remove_wait_queue(&u_other->peer_wait, &u->peer_wake);
407 		u->peer_wake.private = NULL;
408 	}
409 
410 	spin_unlock(&u_other->peer_wait.lock);
411 }
412 
413 static void unix_dgram_peer_wake_disconnect_wakeup(struct sock *sk,
414 						   struct sock *other)
415 {
416 	unix_dgram_peer_wake_disconnect(sk, other);
417 	wake_up_interruptible_poll(sk_sleep(sk),
418 				   EPOLLOUT |
419 				   EPOLLWRNORM |
420 				   EPOLLWRBAND);
421 }
422 
423 /* preconditions:
424  *	- unix_peer(sk) == other
425  *	- association is stable
426  */
427 static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other)
428 {
429 	int connected;
430 
431 	connected = unix_dgram_peer_wake_connect(sk, other);
432 
433 	if (unix_recvq_full(other))
434 		return 1;
435 
436 	if (connected)
437 		unix_dgram_peer_wake_disconnect(sk, other);
438 
439 	return 0;
440 }
441 
442 static int unix_writable(const struct sock *sk)
443 {
444 	return sk->sk_state != TCP_LISTEN &&
445 	       (refcount_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
446 }
447 
448 static void unix_write_space(struct sock *sk)
449 {
450 	struct socket_wq *wq;
451 
452 	rcu_read_lock();
453 	if (unix_writable(sk)) {
454 		wq = rcu_dereference(sk->sk_wq);
455 		if (skwq_has_sleeper(wq))
456 			wake_up_interruptible_sync_poll(&wq->wait,
457 				EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND);
458 		sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
459 	}
460 	rcu_read_unlock();
461 }
462 
463 /* When dgram socket disconnects (or changes its peer), we clear its receive
464  * queue of packets arrived from previous peer. First, it allows to do
465  * flow control based only on wmem_alloc; second, sk connected to peer
466  * may receive messages only from that peer. */
467 static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
468 {
469 	if (!skb_queue_empty(&sk->sk_receive_queue)) {
470 		skb_queue_purge(&sk->sk_receive_queue);
471 		wake_up_interruptible_all(&unix_sk(sk)->peer_wait);
472 
473 		/* If one link of bidirectional dgram pipe is disconnected,
474 		 * we signal error. Messages are lost. Do not make this,
475 		 * when peer was not connected to us.
476 		 */
477 		if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
478 			other->sk_err = ECONNRESET;
479 			other->sk_error_report(other);
480 		}
481 	}
482 }
483 
484 static void unix_sock_destructor(struct sock *sk)
485 {
486 	struct unix_sock *u = unix_sk(sk);
487 
488 	skb_queue_purge(&sk->sk_receive_queue);
489 
490 	WARN_ON(refcount_read(&sk->sk_wmem_alloc));
491 	WARN_ON(!sk_unhashed(sk));
492 	WARN_ON(sk->sk_socket);
493 	if (!sock_flag(sk, SOCK_DEAD)) {
494 		pr_info("Attempt to release alive unix socket: %p\n", sk);
495 		return;
496 	}
497 
498 	if (u->addr)
499 		unix_release_addr(u->addr);
500 
501 	atomic_long_dec(&unix_nr_socks);
502 	local_bh_disable();
503 	sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
504 	local_bh_enable();
505 #ifdef UNIX_REFCNT_DEBUG
506 	pr_debug("UNIX %p is destroyed, %ld are still alive.\n", sk,
507 		atomic_long_read(&unix_nr_socks));
508 #endif
509 }
510 
511 static void unix_release_sock(struct sock *sk, int embrion)
512 {
513 	struct unix_sock *u = unix_sk(sk);
514 	struct path path;
515 	struct sock *skpair;
516 	struct sk_buff *skb;
517 	int state;
518 
519 	unix_remove_socket(sk);
520 
521 	/* Clear state */
522 	unix_state_lock(sk);
523 	sock_orphan(sk);
524 	sk->sk_shutdown = SHUTDOWN_MASK;
525 	path	     = u->path;
526 	u->path.dentry = NULL;
527 	u->path.mnt = NULL;
528 	state = sk->sk_state;
529 	sk->sk_state = TCP_CLOSE;
530 	unix_state_unlock(sk);
531 
532 	wake_up_interruptible_all(&u->peer_wait);
533 
534 	skpair = unix_peer(sk);
535 
536 	if (skpair != NULL) {
537 		if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
538 			unix_state_lock(skpair);
539 			/* No more writes */
540 			skpair->sk_shutdown = SHUTDOWN_MASK;
541 			if (!skb_queue_empty(&sk->sk_receive_queue) || embrion)
542 				skpair->sk_err = ECONNRESET;
543 			unix_state_unlock(skpair);
544 			skpair->sk_state_change(skpair);
545 			sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
546 		}
547 
548 		unix_dgram_peer_wake_disconnect(sk, skpair);
549 		sock_put(skpair); /* It may now die */
550 		unix_peer(sk) = NULL;
551 	}
552 
553 	/* Try to flush out this socket. Throw out buffers at least */
554 
555 	while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
556 		if (state == TCP_LISTEN)
557 			unix_release_sock(skb->sk, 1);
558 		/* passed fds are erased in the kfree_skb hook	      */
559 		UNIXCB(skb).consumed = skb->len;
560 		kfree_skb(skb);
561 	}
562 
563 	if (path.dentry)
564 		path_put(&path);
565 
566 	sock_put(sk);
567 
568 	/* ---- Socket is dead now and most probably destroyed ---- */
569 
570 	/*
571 	 * Fixme: BSD difference: In BSD all sockets connected to us get
572 	 *	  ECONNRESET and we die on the spot. In Linux we behave
573 	 *	  like files and pipes do and wait for the last
574 	 *	  dereference.
575 	 *
576 	 * Can't we simply set sock->err?
577 	 *
578 	 *	  What the above comment does talk about? --ANK(980817)
579 	 */
580 
581 	if (unix_tot_inflight)
582 		unix_gc();		/* Garbage collect fds */
583 }
584 
585 static void init_peercred(struct sock *sk)
586 {
587 	put_pid(sk->sk_peer_pid);
588 	if (sk->sk_peer_cred)
589 		put_cred(sk->sk_peer_cred);
590 	sk->sk_peer_pid  = get_pid(task_tgid(current));
591 	sk->sk_peer_cred = get_current_cred();
592 }
593 
594 static void copy_peercred(struct sock *sk, struct sock *peersk)
595 {
596 	put_pid(sk->sk_peer_pid);
597 	if (sk->sk_peer_cred)
598 		put_cred(sk->sk_peer_cred);
599 	sk->sk_peer_pid  = get_pid(peersk->sk_peer_pid);
600 	sk->sk_peer_cred = get_cred(peersk->sk_peer_cred);
601 }
602 
603 static int unix_listen(struct socket *sock, int backlog)
604 {
605 	int err;
606 	struct sock *sk = sock->sk;
607 	struct unix_sock *u = unix_sk(sk);
608 	struct pid *old_pid = NULL;
609 
610 	err = -EOPNOTSUPP;
611 	if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
612 		goto out;	/* Only stream/seqpacket sockets accept */
613 	err = -EINVAL;
614 	if (!u->addr)
615 		goto out;	/* No listens on an unbound socket */
616 	unix_state_lock(sk);
617 	if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
618 		goto out_unlock;
619 	if (backlog > sk->sk_max_ack_backlog)
620 		wake_up_interruptible_all(&u->peer_wait);
621 	sk->sk_max_ack_backlog	= backlog;
622 	sk->sk_state		= TCP_LISTEN;
623 	/* set credentials so connect can copy them */
624 	init_peercred(sk);
625 	err = 0;
626 
627 out_unlock:
628 	unix_state_unlock(sk);
629 	put_pid(old_pid);
630 out:
631 	return err;
632 }
633 
634 static int unix_release(struct socket *);
635 static int unix_bind(struct socket *, struct sockaddr *, int);
636 static int unix_stream_connect(struct socket *, struct sockaddr *,
637 			       int addr_len, int flags);
638 static int unix_socketpair(struct socket *, struct socket *);
639 static int unix_accept(struct socket *, struct socket *, int, bool);
640 static int unix_getname(struct socket *, struct sockaddr *, int);
641 static __poll_t unix_poll_mask(struct socket *, __poll_t);
642 static __poll_t unix_dgram_poll_mask(struct socket *, __poll_t);
643 static int unix_ioctl(struct socket *, unsigned int, unsigned long);
644 static int unix_shutdown(struct socket *, int);
645 static int unix_stream_sendmsg(struct socket *, struct msghdr *, size_t);
646 static int unix_stream_recvmsg(struct socket *, struct msghdr *, size_t, int);
647 static ssize_t unix_stream_sendpage(struct socket *, struct page *, int offset,
648 				    size_t size, int flags);
649 static ssize_t unix_stream_splice_read(struct socket *,  loff_t *ppos,
650 				       struct pipe_inode_info *, size_t size,
651 				       unsigned int flags);
652 static int unix_dgram_sendmsg(struct socket *, struct msghdr *, size_t);
653 static int unix_dgram_recvmsg(struct socket *, struct msghdr *, size_t, int);
654 static int unix_dgram_connect(struct socket *, struct sockaddr *,
655 			      int, int);
656 static int unix_seqpacket_sendmsg(struct socket *, struct msghdr *, size_t);
657 static int unix_seqpacket_recvmsg(struct socket *, struct msghdr *, size_t,
658 				  int);
659 
660 static int unix_set_peek_off(struct sock *sk, int val)
661 {
662 	struct unix_sock *u = unix_sk(sk);
663 
664 	if (mutex_lock_interruptible(&u->iolock))
665 		return -EINTR;
666 
667 	sk->sk_peek_off = val;
668 	mutex_unlock(&u->iolock);
669 
670 	return 0;
671 }
672 
673 
674 static const struct proto_ops unix_stream_ops = {
675 	.family =	PF_UNIX,
676 	.owner =	THIS_MODULE,
677 	.release =	unix_release,
678 	.bind =		unix_bind,
679 	.connect =	unix_stream_connect,
680 	.socketpair =	unix_socketpair,
681 	.accept =	unix_accept,
682 	.getname =	unix_getname,
683 	.poll_mask =	unix_poll_mask,
684 	.ioctl =	unix_ioctl,
685 	.listen =	unix_listen,
686 	.shutdown =	unix_shutdown,
687 	.setsockopt =	sock_no_setsockopt,
688 	.getsockopt =	sock_no_getsockopt,
689 	.sendmsg =	unix_stream_sendmsg,
690 	.recvmsg =	unix_stream_recvmsg,
691 	.mmap =		sock_no_mmap,
692 	.sendpage =	unix_stream_sendpage,
693 	.splice_read =	unix_stream_splice_read,
694 	.set_peek_off =	unix_set_peek_off,
695 };
696 
697 static const struct proto_ops unix_dgram_ops = {
698 	.family =	PF_UNIX,
699 	.owner =	THIS_MODULE,
700 	.release =	unix_release,
701 	.bind =		unix_bind,
702 	.connect =	unix_dgram_connect,
703 	.socketpair =	unix_socketpair,
704 	.accept =	sock_no_accept,
705 	.getname =	unix_getname,
706 	.poll_mask =	unix_dgram_poll_mask,
707 	.ioctl =	unix_ioctl,
708 	.listen =	sock_no_listen,
709 	.shutdown =	unix_shutdown,
710 	.setsockopt =	sock_no_setsockopt,
711 	.getsockopt =	sock_no_getsockopt,
712 	.sendmsg =	unix_dgram_sendmsg,
713 	.recvmsg =	unix_dgram_recvmsg,
714 	.mmap =		sock_no_mmap,
715 	.sendpage =	sock_no_sendpage,
716 	.set_peek_off =	unix_set_peek_off,
717 };
718 
719 static const struct proto_ops unix_seqpacket_ops = {
720 	.family =	PF_UNIX,
721 	.owner =	THIS_MODULE,
722 	.release =	unix_release,
723 	.bind =		unix_bind,
724 	.connect =	unix_stream_connect,
725 	.socketpair =	unix_socketpair,
726 	.accept =	unix_accept,
727 	.getname =	unix_getname,
728 	.poll_mask =	unix_dgram_poll_mask,
729 	.ioctl =	unix_ioctl,
730 	.listen =	unix_listen,
731 	.shutdown =	unix_shutdown,
732 	.setsockopt =	sock_no_setsockopt,
733 	.getsockopt =	sock_no_getsockopt,
734 	.sendmsg =	unix_seqpacket_sendmsg,
735 	.recvmsg =	unix_seqpacket_recvmsg,
736 	.mmap =		sock_no_mmap,
737 	.sendpage =	sock_no_sendpage,
738 	.set_peek_off =	unix_set_peek_off,
739 };
740 
741 static struct proto unix_proto = {
742 	.name			= "UNIX",
743 	.owner			= THIS_MODULE,
744 	.obj_size		= sizeof(struct unix_sock),
745 };
746 
747 static struct sock *unix_create1(struct net *net, struct socket *sock, int kern)
748 {
749 	struct sock *sk = NULL;
750 	struct unix_sock *u;
751 
752 	atomic_long_inc(&unix_nr_socks);
753 	if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files())
754 		goto out;
755 
756 	sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto, kern);
757 	if (!sk)
758 		goto out;
759 
760 	sock_init_data(sock, sk);
761 
762 	sk->sk_allocation	= GFP_KERNEL_ACCOUNT;
763 	sk->sk_write_space	= unix_write_space;
764 	sk->sk_max_ack_backlog	= net->unx.sysctl_max_dgram_qlen;
765 	sk->sk_destruct		= unix_sock_destructor;
766 	u	  = unix_sk(sk);
767 	u->path.dentry = NULL;
768 	u->path.mnt = NULL;
769 	spin_lock_init(&u->lock);
770 	atomic_long_set(&u->inflight, 0);
771 	INIT_LIST_HEAD(&u->link);
772 	mutex_init(&u->iolock); /* single task reading lock */
773 	mutex_init(&u->bindlock); /* single task binding lock */
774 	init_waitqueue_head(&u->peer_wait);
775 	init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay);
776 	unix_insert_socket(unix_sockets_unbound(sk), sk);
777 out:
778 	if (sk == NULL)
779 		atomic_long_dec(&unix_nr_socks);
780 	else {
781 		local_bh_disable();
782 		sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
783 		local_bh_enable();
784 	}
785 	return sk;
786 }
787 
788 static int unix_create(struct net *net, struct socket *sock, int protocol,
789 		       int kern)
790 {
791 	if (protocol && protocol != PF_UNIX)
792 		return -EPROTONOSUPPORT;
793 
794 	sock->state = SS_UNCONNECTED;
795 
796 	switch (sock->type) {
797 	case SOCK_STREAM:
798 		sock->ops = &unix_stream_ops;
799 		break;
800 		/*
801 		 *	Believe it or not BSD has AF_UNIX, SOCK_RAW though
802 		 *	nothing uses it.
803 		 */
804 	case SOCK_RAW:
805 		sock->type = SOCK_DGRAM;
806 		/* fall through */
807 	case SOCK_DGRAM:
808 		sock->ops = &unix_dgram_ops;
809 		break;
810 	case SOCK_SEQPACKET:
811 		sock->ops = &unix_seqpacket_ops;
812 		break;
813 	default:
814 		return -ESOCKTNOSUPPORT;
815 	}
816 
817 	return unix_create1(net, sock, kern) ? 0 : -ENOMEM;
818 }
819 
820 static int unix_release(struct socket *sock)
821 {
822 	struct sock *sk = sock->sk;
823 
824 	if (!sk)
825 		return 0;
826 
827 	unix_release_sock(sk, 0);
828 	sock->sk = NULL;
829 
830 	return 0;
831 }
832 
833 static int unix_autobind(struct socket *sock)
834 {
835 	struct sock *sk = sock->sk;
836 	struct net *net = sock_net(sk);
837 	struct unix_sock *u = unix_sk(sk);
838 	static u32 ordernum = 1;
839 	struct unix_address *addr;
840 	int err;
841 	unsigned int retries = 0;
842 
843 	err = mutex_lock_interruptible(&u->bindlock);
844 	if (err)
845 		return err;
846 
847 	err = 0;
848 	if (u->addr)
849 		goto out;
850 
851 	err = -ENOMEM;
852 	addr = kzalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL);
853 	if (!addr)
854 		goto out;
855 
856 	addr->name->sun_family = AF_UNIX;
857 	refcount_set(&addr->refcnt, 1);
858 
859 retry:
860 	addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short);
861 	addr->hash = unix_hash_fold(csum_partial(addr->name, addr->len, 0));
862 
863 	spin_lock(&unix_table_lock);
864 	ordernum = (ordernum+1)&0xFFFFF;
865 
866 	if (__unix_find_socket_byname(net, addr->name, addr->len, sock->type,
867 				      addr->hash)) {
868 		spin_unlock(&unix_table_lock);
869 		/*
870 		 * __unix_find_socket_byname() may take long time if many names
871 		 * are already in use.
872 		 */
873 		cond_resched();
874 		/* Give up if all names seems to be in use. */
875 		if (retries++ == 0xFFFFF) {
876 			err = -ENOSPC;
877 			kfree(addr);
878 			goto out;
879 		}
880 		goto retry;
881 	}
882 	addr->hash ^= sk->sk_type;
883 
884 	__unix_remove_socket(sk);
885 	u->addr = addr;
886 	__unix_insert_socket(&unix_socket_table[addr->hash], sk);
887 	spin_unlock(&unix_table_lock);
888 	err = 0;
889 
890 out:	mutex_unlock(&u->bindlock);
891 	return err;
892 }
893 
894 static struct sock *unix_find_other(struct net *net,
895 				    struct sockaddr_un *sunname, int len,
896 				    int type, unsigned int hash, int *error)
897 {
898 	struct sock *u;
899 	struct path path;
900 	int err = 0;
901 
902 	if (sunname->sun_path[0]) {
903 		struct inode *inode;
904 		err = kern_path(sunname->sun_path, LOOKUP_FOLLOW, &path);
905 		if (err)
906 			goto fail;
907 		inode = d_backing_inode(path.dentry);
908 		err = inode_permission(inode, MAY_WRITE);
909 		if (err)
910 			goto put_fail;
911 
912 		err = -ECONNREFUSED;
913 		if (!S_ISSOCK(inode->i_mode))
914 			goto put_fail;
915 		u = unix_find_socket_byinode(inode);
916 		if (!u)
917 			goto put_fail;
918 
919 		if (u->sk_type == type)
920 			touch_atime(&path);
921 
922 		path_put(&path);
923 
924 		err = -EPROTOTYPE;
925 		if (u->sk_type != type) {
926 			sock_put(u);
927 			goto fail;
928 		}
929 	} else {
930 		err = -ECONNREFUSED;
931 		u = unix_find_socket_byname(net, sunname, len, type, hash);
932 		if (u) {
933 			struct dentry *dentry;
934 			dentry = unix_sk(u)->path.dentry;
935 			if (dentry)
936 				touch_atime(&unix_sk(u)->path);
937 		} else
938 			goto fail;
939 	}
940 	return u;
941 
942 put_fail:
943 	path_put(&path);
944 fail:
945 	*error = err;
946 	return NULL;
947 }
948 
949 static int unix_mknod(const char *sun_path, umode_t mode, struct path *res)
950 {
951 	struct dentry *dentry;
952 	struct path path;
953 	int err = 0;
954 	/*
955 	 * Get the parent directory, calculate the hash for last
956 	 * component.
957 	 */
958 	dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0);
959 	err = PTR_ERR(dentry);
960 	if (IS_ERR(dentry))
961 		return err;
962 
963 	/*
964 	 * All right, let's create it.
965 	 */
966 	err = security_path_mknod(&path, dentry, mode, 0);
967 	if (!err) {
968 		err = vfs_mknod(d_inode(path.dentry), dentry, mode, 0);
969 		if (!err) {
970 			res->mnt = mntget(path.mnt);
971 			res->dentry = dget(dentry);
972 		}
973 	}
974 	done_path_create(&path, dentry);
975 	return err;
976 }
977 
978 static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
979 {
980 	struct sock *sk = sock->sk;
981 	struct net *net = sock_net(sk);
982 	struct unix_sock *u = unix_sk(sk);
983 	struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
984 	char *sun_path = sunaddr->sun_path;
985 	int err;
986 	unsigned int hash;
987 	struct unix_address *addr;
988 	struct hlist_head *list;
989 	struct path path = { };
990 
991 	err = -EINVAL;
992 	if (addr_len < offsetofend(struct sockaddr_un, sun_family) ||
993 	    sunaddr->sun_family != AF_UNIX)
994 		goto out;
995 
996 	if (addr_len == sizeof(short)) {
997 		err = unix_autobind(sock);
998 		goto out;
999 	}
1000 
1001 	err = unix_mkname(sunaddr, addr_len, &hash);
1002 	if (err < 0)
1003 		goto out;
1004 	addr_len = err;
1005 
1006 	if (sun_path[0]) {
1007 		umode_t mode = S_IFSOCK |
1008 		       (SOCK_INODE(sock)->i_mode & ~current_umask());
1009 		err = unix_mknod(sun_path, mode, &path);
1010 		if (err) {
1011 			if (err == -EEXIST)
1012 				err = -EADDRINUSE;
1013 			goto out;
1014 		}
1015 	}
1016 
1017 	err = mutex_lock_interruptible(&u->bindlock);
1018 	if (err)
1019 		goto out_put;
1020 
1021 	err = -EINVAL;
1022 	if (u->addr)
1023 		goto out_up;
1024 
1025 	err = -ENOMEM;
1026 	addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
1027 	if (!addr)
1028 		goto out_up;
1029 
1030 	memcpy(addr->name, sunaddr, addr_len);
1031 	addr->len = addr_len;
1032 	addr->hash = hash ^ sk->sk_type;
1033 	refcount_set(&addr->refcnt, 1);
1034 
1035 	if (sun_path[0]) {
1036 		addr->hash = UNIX_HASH_SIZE;
1037 		hash = d_backing_inode(path.dentry)->i_ino & (UNIX_HASH_SIZE - 1);
1038 		spin_lock(&unix_table_lock);
1039 		u->path = path;
1040 		list = &unix_socket_table[hash];
1041 	} else {
1042 		spin_lock(&unix_table_lock);
1043 		err = -EADDRINUSE;
1044 		if (__unix_find_socket_byname(net, sunaddr, addr_len,
1045 					      sk->sk_type, hash)) {
1046 			unix_release_addr(addr);
1047 			goto out_unlock;
1048 		}
1049 
1050 		list = &unix_socket_table[addr->hash];
1051 	}
1052 
1053 	err = 0;
1054 	__unix_remove_socket(sk);
1055 	u->addr = addr;
1056 	__unix_insert_socket(list, sk);
1057 
1058 out_unlock:
1059 	spin_unlock(&unix_table_lock);
1060 out_up:
1061 	mutex_unlock(&u->bindlock);
1062 out_put:
1063 	if (err)
1064 		path_put(&path);
1065 out:
1066 	return err;
1067 }
1068 
1069 static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
1070 {
1071 	if (unlikely(sk1 == sk2) || !sk2) {
1072 		unix_state_lock(sk1);
1073 		return;
1074 	}
1075 	if (sk1 < sk2) {
1076 		unix_state_lock(sk1);
1077 		unix_state_lock_nested(sk2);
1078 	} else {
1079 		unix_state_lock(sk2);
1080 		unix_state_lock_nested(sk1);
1081 	}
1082 }
1083 
1084 static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)
1085 {
1086 	if (unlikely(sk1 == sk2) || !sk2) {
1087 		unix_state_unlock(sk1);
1088 		return;
1089 	}
1090 	unix_state_unlock(sk1);
1091 	unix_state_unlock(sk2);
1092 }
1093 
1094 static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
1095 			      int alen, int flags)
1096 {
1097 	struct sock *sk = sock->sk;
1098 	struct net *net = sock_net(sk);
1099 	struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr;
1100 	struct sock *other;
1101 	unsigned int hash;
1102 	int err;
1103 
1104 	err = -EINVAL;
1105 	if (alen < offsetofend(struct sockaddr, sa_family))
1106 		goto out;
1107 
1108 	if (addr->sa_family != AF_UNSPEC) {
1109 		err = unix_mkname(sunaddr, alen, &hash);
1110 		if (err < 0)
1111 			goto out;
1112 		alen = err;
1113 
1114 		if (test_bit(SOCK_PASSCRED, &sock->flags) &&
1115 		    !unix_sk(sk)->addr && (err = unix_autobind(sock)) != 0)
1116 			goto out;
1117 
1118 restart:
1119 		other = unix_find_other(net, sunaddr, alen, sock->type, hash, &err);
1120 		if (!other)
1121 			goto out;
1122 
1123 		unix_state_double_lock(sk, other);
1124 
1125 		/* Apparently VFS overslept socket death. Retry. */
1126 		if (sock_flag(other, SOCK_DEAD)) {
1127 			unix_state_double_unlock(sk, other);
1128 			sock_put(other);
1129 			goto restart;
1130 		}
1131 
1132 		err = -EPERM;
1133 		if (!unix_may_send(sk, other))
1134 			goto out_unlock;
1135 
1136 		err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1137 		if (err)
1138 			goto out_unlock;
1139 
1140 	} else {
1141 		/*
1142 		 *	1003.1g breaking connected state with AF_UNSPEC
1143 		 */
1144 		other = NULL;
1145 		unix_state_double_lock(sk, other);
1146 	}
1147 
1148 	/*
1149 	 * If it was connected, reconnect.
1150 	 */
1151 	if (unix_peer(sk)) {
1152 		struct sock *old_peer = unix_peer(sk);
1153 		unix_peer(sk) = other;
1154 		unix_dgram_peer_wake_disconnect_wakeup(sk, old_peer);
1155 
1156 		unix_state_double_unlock(sk, other);
1157 
1158 		if (other != old_peer)
1159 			unix_dgram_disconnected(sk, old_peer);
1160 		sock_put(old_peer);
1161 	} else {
1162 		unix_peer(sk) = other;
1163 		unix_state_double_unlock(sk, other);
1164 	}
1165 	return 0;
1166 
1167 out_unlock:
1168 	unix_state_double_unlock(sk, other);
1169 	sock_put(other);
1170 out:
1171 	return err;
1172 }
1173 
1174 static long unix_wait_for_peer(struct sock *other, long timeo)
1175 {
1176 	struct unix_sock *u = unix_sk(other);
1177 	int sched;
1178 	DEFINE_WAIT(wait);
1179 
1180 	prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);
1181 
1182 	sched = !sock_flag(other, SOCK_DEAD) &&
1183 		!(other->sk_shutdown & RCV_SHUTDOWN) &&
1184 		unix_recvq_full(other);
1185 
1186 	unix_state_unlock(other);
1187 
1188 	if (sched)
1189 		timeo = schedule_timeout(timeo);
1190 
1191 	finish_wait(&u->peer_wait, &wait);
1192 	return timeo;
1193 }
1194 
1195 static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
1196 			       int addr_len, int flags)
1197 {
1198 	struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1199 	struct sock *sk = sock->sk;
1200 	struct net *net = sock_net(sk);
1201 	struct unix_sock *u = unix_sk(sk), *newu, *otheru;
1202 	struct sock *newsk = NULL;
1203 	struct sock *other = NULL;
1204 	struct sk_buff *skb = NULL;
1205 	unsigned int hash;
1206 	int st;
1207 	int err;
1208 	long timeo;
1209 
1210 	err = unix_mkname(sunaddr, addr_len, &hash);
1211 	if (err < 0)
1212 		goto out;
1213 	addr_len = err;
1214 
1215 	if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr &&
1216 	    (err = unix_autobind(sock)) != 0)
1217 		goto out;
1218 
1219 	timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
1220 
1221 	/* First of all allocate resources.
1222 	   If we will make it after state is locked,
1223 	   we will have to recheck all again in any case.
1224 	 */
1225 
1226 	err = -ENOMEM;
1227 
1228 	/* create new sock for complete connection */
1229 	newsk = unix_create1(sock_net(sk), NULL, 0);
1230 	if (newsk == NULL)
1231 		goto out;
1232 
1233 	/* Allocate skb for sending to listening sock */
1234 	skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
1235 	if (skb == NULL)
1236 		goto out;
1237 
1238 restart:
1239 	/*  Find listening sock. */
1240 	other = unix_find_other(net, sunaddr, addr_len, sk->sk_type, hash, &err);
1241 	if (!other)
1242 		goto out;
1243 
1244 	/* Latch state of peer */
1245 	unix_state_lock(other);
1246 
1247 	/* Apparently VFS overslept socket death. Retry. */
1248 	if (sock_flag(other, SOCK_DEAD)) {
1249 		unix_state_unlock(other);
1250 		sock_put(other);
1251 		goto restart;
1252 	}
1253 
1254 	err = -ECONNREFUSED;
1255 	if (other->sk_state != TCP_LISTEN)
1256 		goto out_unlock;
1257 	if (other->sk_shutdown & RCV_SHUTDOWN)
1258 		goto out_unlock;
1259 
1260 	if (unix_recvq_full(other)) {
1261 		err = -EAGAIN;
1262 		if (!timeo)
1263 			goto out_unlock;
1264 
1265 		timeo = unix_wait_for_peer(other, timeo);
1266 
1267 		err = sock_intr_errno(timeo);
1268 		if (signal_pending(current))
1269 			goto out;
1270 		sock_put(other);
1271 		goto restart;
1272 	}
1273 
1274 	/* Latch our state.
1275 
1276 	   It is tricky place. We need to grab our state lock and cannot
1277 	   drop lock on peer. It is dangerous because deadlock is
1278 	   possible. Connect to self case and simultaneous
1279 	   attempt to connect are eliminated by checking socket
1280 	   state. other is TCP_LISTEN, if sk is TCP_LISTEN we
1281 	   check this before attempt to grab lock.
1282 
1283 	   Well, and we have to recheck the state after socket locked.
1284 	 */
1285 	st = sk->sk_state;
1286 
1287 	switch (st) {
1288 	case TCP_CLOSE:
1289 		/* This is ok... continue with connect */
1290 		break;
1291 	case TCP_ESTABLISHED:
1292 		/* Socket is already connected */
1293 		err = -EISCONN;
1294 		goto out_unlock;
1295 	default:
1296 		err = -EINVAL;
1297 		goto out_unlock;
1298 	}
1299 
1300 	unix_state_lock_nested(sk);
1301 
1302 	if (sk->sk_state != st) {
1303 		unix_state_unlock(sk);
1304 		unix_state_unlock(other);
1305 		sock_put(other);
1306 		goto restart;
1307 	}
1308 
1309 	err = security_unix_stream_connect(sk, other, newsk);
1310 	if (err) {
1311 		unix_state_unlock(sk);
1312 		goto out_unlock;
1313 	}
1314 
1315 	/* The way is open! Fastly set all the necessary fields... */
1316 
1317 	sock_hold(sk);
1318 	unix_peer(newsk)	= sk;
1319 	newsk->sk_state		= TCP_ESTABLISHED;
1320 	newsk->sk_type		= sk->sk_type;
1321 	init_peercred(newsk);
1322 	newu = unix_sk(newsk);
1323 	RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq);
1324 	otheru = unix_sk(other);
1325 
1326 	/* copy address information from listening to new sock*/
1327 	if (otheru->addr) {
1328 		refcount_inc(&otheru->addr->refcnt);
1329 		newu->addr = otheru->addr;
1330 	}
1331 	if (otheru->path.dentry) {
1332 		path_get(&otheru->path);
1333 		newu->path = otheru->path;
1334 	}
1335 
1336 	/* Set credentials */
1337 	copy_peercred(sk, other);
1338 
1339 	sock->state	= SS_CONNECTED;
1340 	sk->sk_state	= TCP_ESTABLISHED;
1341 	sock_hold(newsk);
1342 
1343 	smp_mb__after_atomic();	/* sock_hold() does an atomic_inc() */
1344 	unix_peer(sk)	= newsk;
1345 
1346 	unix_state_unlock(sk);
1347 
1348 	/* take ten and and send info to listening sock */
1349 	spin_lock(&other->sk_receive_queue.lock);
1350 	__skb_queue_tail(&other->sk_receive_queue, skb);
1351 	spin_unlock(&other->sk_receive_queue.lock);
1352 	unix_state_unlock(other);
1353 	other->sk_data_ready(other);
1354 	sock_put(other);
1355 	return 0;
1356 
1357 out_unlock:
1358 	if (other)
1359 		unix_state_unlock(other);
1360 
1361 out:
1362 	kfree_skb(skb);
1363 	if (newsk)
1364 		unix_release_sock(newsk, 0);
1365 	if (other)
1366 		sock_put(other);
1367 	return err;
1368 }
1369 
1370 static int unix_socketpair(struct socket *socka, struct socket *sockb)
1371 {
1372 	struct sock *ska = socka->sk, *skb = sockb->sk;
1373 
1374 	/* Join our sockets back to back */
1375 	sock_hold(ska);
1376 	sock_hold(skb);
1377 	unix_peer(ska) = skb;
1378 	unix_peer(skb) = ska;
1379 	init_peercred(ska);
1380 	init_peercred(skb);
1381 
1382 	if (ska->sk_type != SOCK_DGRAM) {
1383 		ska->sk_state = TCP_ESTABLISHED;
1384 		skb->sk_state = TCP_ESTABLISHED;
1385 		socka->state  = SS_CONNECTED;
1386 		sockb->state  = SS_CONNECTED;
1387 	}
1388 	return 0;
1389 }
1390 
1391 static void unix_sock_inherit_flags(const struct socket *old,
1392 				    struct socket *new)
1393 {
1394 	if (test_bit(SOCK_PASSCRED, &old->flags))
1395 		set_bit(SOCK_PASSCRED, &new->flags);
1396 	if (test_bit(SOCK_PASSSEC, &old->flags))
1397 		set_bit(SOCK_PASSSEC, &new->flags);
1398 }
1399 
1400 static int unix_accept(struct socket *sock, struct socket *newsock, int flags,
1401 		       bool kern)
1402 {
1403 	struct sock *sk = sock->sk;
1404 	struct sock *tsk;
1405 	struct sk_buff *skb;
1406 	int err;
1407 
1408 	err = -EOPNOTSUPP;
1409 	if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
1410 		goto out;
1411 
1412 	err = -EINVAL;
1413 	if (sk->sk_state != TCP_LISTEN)
1414 		goto out;
1415 
1416 	/* If socket state is TCP_LISTEN it cannot change (for now...),
1417 	 * so that no locks are necessary.
1418 	 */
1419 
1420 	skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err);
1421 	if (!skb) {
1422 		/* This means receive shutdown. */
1423 		if (err == 0)
1424 			err = -EINVAL;
1425 		goto out;
1426 	}
1427 
1428 	tsk = skb->sk;
1429 	skb_free_datagram(sk, skb);
1430 	wake_up_interruptible(&unix_sk(sk)->peer_wait);
1431 
1432 	/* attach accepted sock to socket */
1433 	unix_state_lock(tsk);
1434 	newsock->state = SS_CONNECTED;
1435 	unix_sock_inherit_flags(sock, newsock);
1436 	sock_graft(tsk, newsock);
1437 	unix_state_unlock(tsk);
1438 	return 0;
1439 
1440 out:
1441 	return err;
1442 }
1443 
1444 
1445 static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int peer)
1446 {
1447 	struct sock *sk = sock->sk;
1448 	struct unix_sock *u;
1449 	DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, uaddr);
1450 	int err = 0;
1451 
1452 	if (peer) {
1453 		sk = unix_peer_get(sk);
1454 
1455 		err = -ENOTCONN;
1456 		if (!sk)
1457 			goto out;
1458 		err = 0;
1459 	} else {
1460 		sock_hold(sk);
1461 	}
1462 
1463 	u = unix_sk(sk);
1464 	unix_state_lock(sk);
1465 	if (!u->addr) {
1466 		sunaddr->sun_family = AF_UNIX;
1467 		sunaddr->sun_path[0] = 0;
1468 		err = sizeof(short);
1469 	} else {
1470 		struct unix_address *addr = u->addr;
1471 
1472 		err = addr->len;
1473 		memcpy(sunaddr, addr->name, addr->len);
1474 	}
1475 	unix_state_unlock(sk);
1476 	sock_put(sk);
1477 out:
1478 	return err;
1479 }
1480 
1481 static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1482 {
1483 	int i;
1484 
1485 	scm->fp = UNIXCB(skb).fp;
1486 	UNIXCB(skb).fp = NULL;
1487 
1488 	for (i = scm->fp->count-1; i >= 0; i--)
1489 		unix_notinflight(scm->fp->user, scm->fp->fp[i]);
1490 }
1491 
1492 static void unix_destruct_scm(struct sk_buff *skb)
1493 {
1494 	struct scm_cookie scm;
1495 	memset(&scm, 0, sizeof(scm));
1496 	scm.pid  = UNIXCB(skb).pid;
1497 	if (UNIXCB(skb).fp)
1498 		unix_detach_fds(&scm, skb);
1499 
1500 	/* Alas, it calls VFS */
1501 	/* So fscking what? fput() had been SMP-safe since the last Summer */
1502 	scm_destroy(&scm);
1503 	sock_wfree(skb);
1504 }
1505 
1506 /*
1507  * The "user->unix_inflight" variable is protected by the garbage
1508  * collection lock, and we just read it locklessly here. If you go
1509  * over the limit, there might be a tiny race in actually noticing
1510  * it across threads. Tough.
1511  */
1512 static inline bool too_many_unix_fds(struct task_struct *p)
1513 {
1514 	struct user_struct *user = current_user();
1515 
1516 	if (unlikely(user->unix_inflight > task_rlimit(p, RLIMIT_NOFILE)))
1517 		return !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN);
1518 	return false;
1519 }
1520 
1521 static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1522 {
1523 	int i;
1524 
1525 	if (too_many_unix_fds(current))
1526 		return -ETOOMANYREFS;
1527 
1528 	/*
1529 	 * Need to duplicate file references for the sake of garbage
1530 	 * collection.  Otherwise a socket in the fps might become a
1531 	 * candidate for GC while the skb is not yet queued.
1532 	 */
1533 	UNIXCB(skb).fp = scm_fp_dup(scm->fp);
1534 	if (!UNIXCB(skb).fp)
1535 		return -ENOMEM;
1536 
1537 	for (i = scm->fp->count - 1; i >= 0; i--)
1538 		unix_inflight(scm->fp->user, scm->fp->fp[i]);
1539 	return 0;
1540 }
1541 
1542 static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)
1543 {
1544 	int err = 0;
1545 
1546 	UNIXCB(skb).pid  = get_pid(scm->pid);
1547 	UNIXCB(skb).uid = scm->creds.uid;
1548 	UNIXCB(skb).gid = scm->creds.gid;
1549 	UNIXCB(skb).fp = NULL;
1550 	unix_get_secdata(scm, skb);
1551 	if (scm->fp && send_fds)
1552 		err = unix_attach_fds(scm, skb);
1553 
1554 	skb->destructor = unix_destruct_scm;
1555 	return err;
1556 }
1557 
1558 static bool unix_passcred_enabled(const struct socket *sock,
1559 				  const struct sock *other)
1560 {
1561 	return test_bit(SOCK_PASSCRED, &sock->flags) ||
1562 	       !other->sk_socket ||
1563 	       test_bit(SOCK_PASSCRED, &other->sk_socket->flags);
1564 }
1565 
1566 /*
1567  * Some apps rely on write() giving SCM_CREDENTIALS
1568  * We include credentials if source or destination socket
1569  * asserted SOCK_PASSCRED.
1570  */
1571 static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock,
1572 			    const struct sock *other)
1573 {
1574 	if (UNIXCB(skb).pid)
1575 		return;
1576 	if (unix_passcred_enabled(sock, other)) {
1577 		UNIXCB(skb).pid  = get_pid(task_tgid(current));
1578 		current_uid_gid(&UNIXCB(skb).uid, &UNIXCB(skb).gid);
1579 	}
1580 }
1581 
1582 static int maybe_init_creds(struct scm_cookie *scm,
1583 			    struct socket *socket,
1584 			    const struct sock *other)
1585 {
1586 	int err;
1587 	struct msghdr msg = { .msg_controllen = 0 };
1588 
1589 	err = scm_send(socket, &msg, scm, false);
1590 	if (err)
1591 		return err;
1592 
1593 	if (unix_passcred_enabled(socket, other)) {
1594 		scm->pid = get_pid(task_tgid(current));
1595 		current_uid_gid(&scm->creds.uid, &scm->creds.gid);
1596 	}
1597 	return err;
1598 }
1599 
1600 static bool unix_skb_scm_eq(struct sk_buff *skb,
1601 			    struct scm_cookie *scm)
1602 {
1603 	const struct unix_skb_parms *u = &UNIXCB(skb);
1604 
1605 	return u->pid == scm->pid &&
1606 	       uid_eq(u->uid, scm->creds.uid) &&
1607 	       gid_eq(u->gid, scm->creds.gid) &&
1608 	       unix_secdata_eq(scm, skb);
1609 }
1610 
1611 /*
1612  *	Send AF_UNIX data.
1613  */
1614 
1615 static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
1616 			      size_t len)
1617 {
1618 	struct sock *sk = sock->sk;
1619 	struct net *net = sock_net(sk);
1620 	struct unix_sock *u = unix_sk(sk);
1621 	DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, msg->msg_name);
1622 	struct sock *other = NULL;
1623 	int namelen = 0; /* fake GCC */
1624 	int err;
1625 	unsigned int hash;
1626 	struct sk_buff *skb;
1627 	long timeo;
1628 	struct scm_cookie scm;
1629 	int data_len = 0;
1630 	int sk_locked;
1631 
1632 	wait_for_unix_gc();
1633 	err = scm_send(sock, msg, &scm, false);
1634 	if (err < 0)
1635 		return err;
1636 
1637 	err = -EOPNOTSUPP;
1638 	if (msg->msg_flags&MSG_OOB)
1639 		goto out;
1640 
1641 	if (msg->msg_namelen) {
1642 		err = unix_mkname(sunaddr, msg->msg_namelen, &hash);
1643 		if (err < 0)
1644 			goto out;
1645 		namelen = err;
1646 	} else {
1647 		sunaddr = NULL;
1648 		err = -ENOTCONN;
1649 		other = unix_peer_get(sk);
1650 		if (!other)
1651 			goto out;
1652 	}
1653 
1654 	if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr
1655 	    && (err = unix_autobind(sock)) != 0)
1656 		goto out;
1657 
1658 	err = -EMSGSIZE;
1659 	if (len > sk->sk_sndbuf - 32)
1660 		goto out;
1661 
1662 	if (len > SKB_MAX_ALLOC) {
1663 		data_len = min_t(size_t,
1664 				 len - SKB_MAX_ALLOC,
1665 				 MAX_SKB_FRAGS * PAGE_SIZE);
1666 		data_len = PAGE_ALIGN(data_len);
1667 
1668 		BUILD_BUG_ON(SKB_MAX_ALLOC < PAGE_SIZE);
1669 	}
1670 
1671 	skb = sock_alloc_send_pskb(sk, len - data_len, data_len,
1672 				   msg->msg_flags & MSG_DONTWAIT, &err,
1673 				   PAGE_ALLOC_COSTLY_ORDER);
1674 	if (skb == NULL)
1675 		goto out;
1676 
1677 	err = unix_scm_to_skb(&scm, skb, true);
1678 	if (err < 0)
1679 		goto out_free;
1680 
1681 	skb_put(skb, len - data_len);
1682 	skb->data_len = data_len;
1683 	skb->len = len;
1684 	err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, len);
1685 	if (err)
1686 		goto out_free;
1687 
1688 	timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
1689 
1690 restart:
1691 	if (!other) {
1692 		err = -ECONNRESET;
1693 		if (sunaddr == NULL)
1694 			goto out_free;
1695 
1696 		other = unix_find_other(net, sunaddr, namelen, sk->sk_type,
1697 					hash, &err);
1698 		if (other == NULL)
1699 			goto out_free;
1700 	}
1701 
1702 	if (sk_filter(other, skb) < 0) {
1703 		/* Toss the packet but do not return any error to the sender */
1704 		err = len;
1705 		goto out_free;
1706 	}
1707 
1708 	sk_locked = 0;
1709 	unix_state_lock(other);
1710 restart_locked:
1711 	err = -EPERM;
1712 	if (!unix_may_send(sk, other))
1713 		goto out_unlock;
1714 
1715 	if (unlikely(sock_flag(other, SOCK_DEAD))) {
1716 		/*
1717 		 *	Check with 1003.1g - what should
1718 		 *	datagram error
1719 		 */
1720 		unix_state_unlock(other);
1721 		sock_put(other);
1722 
1723 		if (!sk_locked)
1724 			unix_state_lock(sk);
1725 
1726 		err = 0;
1727 		if (unix_peer(sk) == other) {
1728 			unix_peer(sk) = NULL;
1729 			unix_dgram_peer_wake_disconnect_wakeup(sk, other);
1730 
1731 			unix_state_unlock(sk);
1732 
1733 			unix_dgram_disconnected(sk, other);
1734 			sock_put(other);
1735 			err = -ECONNREFUSED;
1736 		} else {
1737 			unix_state_unlock(sk);
1738 		}
1739 
1740 		other = NULL;
1741 		if (err)
1742 			goto out_free;
1743 		goto restart;
1744 	}
1745 
1746 	err = -EPIPE;
1747 	if (other->sk_shutdown & RCV_SHUTDOWN)
1748 		goto out_unlock;
1749 
1750 	if (sk->sk_type != SOCK_SEQPACKET) {
1751 		err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1752 		if (err)
1753 			goto out_unlock;
1754 	}
1755 
1756 	/* other == sk && unix_peer(other) != sk if
1757 	 * - unix_peer(sk) == NULL, destination address bound to sk
1758 	 * - unix_peer(sk) == sk by time of get but disconnected before lock
1759 	 */
1760 	if (other != sk &&
1761 	    unlikely(unix_peer(other) != sk && unix_recvq_full(other))) {
1762 		if (timeo) {
1763 			timeo = unix_wait_for_peer(other, timeo);
1764 
1765 			err = sock_intr_errno(timeo);
1766 			if (signal_pending(current))
1767 				goto out_free;
1768 
1769 			goto restart;
1770 		}
1771 
1772 		if (!sk_locked) {
1773 			unix_state_unlock(other);
1774 			unix_state_double_lock(sk, other);
1775 		}
1776 
1777 		if (unix_peer(sk) != other ||
1778 		    unix_dgram_peer_wake_me(sk, other)) {
1779 			err = -EAGAIN;
1780 			sk_locked = 1;
1781 			goto out_unlock;
1782 		}
1783 
1784 		if (!sk_locked) {
1785 			sk_locked = 1;
1786 			goto restart_locked;
1787 		}
1788 	}
1789 
1790 	if (unlikely(sk_locked))
1791 		unix_state_unlock(sk);
1792 
1793 	if (sock_flag(other, SOCK_RCVTSTAMP))
1794 		__net_timestamp(skb);
1795 	maybe_add_creds(skb, sock, other);
1796 	skb_queue_tail(&other->sk_receive_queue, skb);
1797 	unix_state_unlock(other);
1798 	other->sk_data_ready(other);
1799 	sock_put(other);
1800 	scm_destroy(&scm);
1801 	return len;
1802 
1803 out_unlock:
1804 	if (sk_locked)
1805 		unix_state_unlock(sk);
1806 	unix_state_unlock(other);
1807 out_free:
1808 	kfree_skb(skb);
1809 out:
1810 	if (other)
1811 		sock_put(other);
1812 	scm_destroy(&scm);
1813 	return err;
1814 }
1815 
1816 /* We use paged skbs for stream sockets, and limit occupancy to 32768
1817  * bytes, and a minimum of a full page.
1818  */
1819 #define UNIX_SKB_FRAGS_SZ (PAGE_SIZE << get_order(32768))
1820 
1821 static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
1822 			       size_t len)
1823 {
1824 	struct sock *sk = sock->sk;
1825 	struct sock *other = NULL;
1826 	int err, size;
1827 	struct sk_buff *skb;
1828 	int sent = 0;
1829 	struct scm_cookie scm;
1830 	bool fds_sent = false;
1831 	int data_len;
1832 
1833 	wait_for_unix_gc();
1834 	err = scm_send(sock, msg, &scm, false);
1835 	if (err < 0)
1836 		return err;
1837 
1838 	err = -EOPNOTSUPP;
1839 	if (msg->msg_flags&MSG_OOB)
1840 		goto out_err;
1841 
1842 	if (msg->msg_namelen) {
1843 		err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
1844 		goto out_err;
1845 	} else {
1846 		err = -ENOTCONN;
1847 		other = unix_peer(sk);
1848 		if (!other)
1849 			goto out_err;
1850 	}
1851 
1852 	if (sk->sk_shutdown & SEND_SHUTDOWN)
1853 		goto pipe_err;
1854 
1855 	while (sent < len) {
1856 		size = len - sent;
1857 
1858 		/* Keep two messages in the pipe so it schedules better */
1859 		size = min_t(int, size, (sk->sk_sndbuf >> 1) - 64);
1860 
1861 		/* allow fallback to order-0 allocations */
1862 		size = min_t(int, size, SKB_MAX_HEAD(0) + UNIX_SKB_FRAGS_SZ);
1863 
1864 		data_len = max_t(int, 0, size - SKB_MAX_HEAD(0));
1865 
1866 		data_len = min_t(size_t, size, PAGE_ALIGN(data_len));
1867 
1868 		skb = sock_alloc_send_pskb(sk, size - data_len, data_len,
1869 					   msg->msg_flags & MSG_DONTWAIT, &err,
1870 					   get_order(UNIX_SKB_FRAGS_SZ));
1871 		if (!skb)
1872 			goto out_err;
1873 
1874 		/* Only send the fds in the first buffer */
1875 		err = unix_scm_to_skb(&scm, skb, !fds_sent);
1876 		if (err < 0) {
1877 			kfree_skb(skb);
1878 			goto out_err;
1879 		}
1880 		fds_sent = true;
1881 
1882 		skb_put(skb, size - data_len);
1883 		skb->data_len = data_len;
1884 		skb->len = size;
1885 		err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size);
1886 		if (err) {
1887 			kfree_skb(skb);
1888 			goto out_err;
1889 		}
1890 
1891 		unix_state_lock(other);
1892 
1893 		if (sock_flag(other, SOCK_DEAD) ||
1894 		    (other->sk_shutdown & RCV_SHUTDOWN))
1895 			goto pipe_err_free;
1896 
1897 		maybe_add_creds(skb, sock, other);
1898 		skb_queue_tail(&other->sk_receive_queue, skb);
1899 		unix_state_unlock(other);
1900 		other->sk_data_ready(other);
1901 		sent += size;
1902 	}
1903 
1904 	scm_destroy(&scm);
1905 
1906 	return sent;
1907 
1908 pipe_err_free:
1909 	unix_state_unlock(other);
1910 	kfree_skb(skb);
1911 pipe_err:
1912 	if (sent == 0 && !(msg->msg_flags&MSG_NOSIGNAL))
1913 		send_sig(SIGPIPE, current, 0);
1914 	err = -EPIPE;
1915 out_err:
1916 	scm_destroy(&scm);
1917 	return sent ? : err;
1918 }
1919 
1920 static ssize_t unix_stream_sendpage(struct socket *socket, struct page *page,
1921 				    int offset, size_t size, int flags)
1922 {
1923 	int err;
1924 	bool send_sigpipe = false;
1925 	bool init_scm = true;
1926 	struct scm_cookie scm;
1927 	struct sock *other, *sk = socket->sk;
1928 	struct sk_buff *skb, *newskb = NULL, *tail = NULL;
1929 
1930 	if (flags & MSG_OOB)
1931 		return -EOPNOTSUPP;
1932 
1933 	other = unix_peer(sk);
1934 	if (!other || sk->sk_state != TCP_ESTABLISHED)
1935 		return -ENOTCONN;
1936 
1937 	if (false) {
1938 alloc_skb:
1939 		unix_state_unlock(other);
1940 		mutex_unlock(&unix_sk(other)->iolock);
1941 		newskb = sock_alloc_send_pskb(sk, 0, 0, flags & MSG_DONTWAIT,
1942 					      &err, 0);
1943 		if (!newskb)
1944 			goto err;
1945 	}
1946 
1947 	/* we must acquire iolock as we modify already present
1948 	 * skbs in the sk_receive_queue and mess with skb->len
1949 	 */
1950 	err = mutex_lock_interruptible(&unix_sk(other)->iolock);
1951 	if (err) {
1952 		err = flags & MSG_DONTWAIT ? -EAGAIN : -ERESTARTSYS;
1953 		goto err;
1954 	}
1955 
1956 	if (sk->sk_shutdown & SEND_SHUTDOWN) {
1957 		err = -EPIPE;
1958 		send_sigpipe = true;
1959 		goto err_unlock;
1960 	}
1961 
1962 	unix_state_lock(other);
1963 
1964 	if (sock_flag(other, SOCK_DEAD) ||
1965 	    other->sk_shutdown & RCV_SHUTDOWN) {
1966 		err = -EPIPE;
1967 		send_sigpipe = true;
1968 		goto err_state_unlock;
1969 	}
1970 
1971 	if (init_scm) {
1972 		err = maybe_init_creds(&scm, socket, other);
1973 		if (err)
1974 			goto err_state_unlock;
1975 		init_scm = false;
1976 	}
1977 
1978 	skb = skb_peek_tail(&other->sk_receive_queue);
1979 	if (tail && tail == skb) {
1980 		skb = newskb;
1981 	} else if (!skb || !unix_skb_scm_eq(skb, &scm)) {
1982 		if (newskb) {
1983 			skb = newskb;
1984 		} else {
1985 			tail = skb;
1986 			goto alloc_skb;
1987 		}
1988 	} else if (newskb) {
1989 		/* this is fast path, we don't necessarily need to
1990 		 * call to kfree_skb even though with newskb == NULL
1991 		 * this - does no harm
1992 		 */
1993 		consume_skb(newskb);
1994 		newskb = NULL;
1995 	}
1996 
1997 	if (skb_append_pagefrags(skb, page, offset, size)) {
1998 		tail = skb;
1999 		goto alloc_skb;
2000 	}
2001 
2002 	skb->len += size;
2003 	skb->data_len += size;
2004 	skb->truesize += size;
2005 	refcount_add(size, &sk->sk_wmem_alloc);
2006 
2007 	if (newskb) {
2008 		err = unix_scm_to_skb(&scm, skb, false);
2009 		if (err)
2010 			goto err_state_unlock;
2011 		spin_lock(&other->sk_receive_queue.lock);
2012 		__skb_queue_tail(&other->sk_receive_queue, newskb);
2013 		spin_unlock(&other->sk_receive_queue.lock);
2014 	}
2015 
2016 	unix_state_unlock(other);
2017 	mutex_unlock(&unix_sk(other)->iolock);
2018 
2019 	other->sk_data_ready(other);
2020 	scm_destroy(&scm);
2021 	return size;
2022 
2023 err_state_unlock:
2024 	unix_state_unlock(other);
2025 err_unlock:
2026 	mutex_unlock(&unix_sk(other)->iolock);
2027 err:
2028 	kfree_skb(newskb);
2029 	if (send_sigpipe && !(flags & MSG_NOSIGNAL))
2030 		send_sig(SIGPIPE, current, 0);
2031 	if (!init_scm)
2032 		scm_destroy(&scm);
2033 	return err;
2034 }
2035 
2036 static int unix_seqpacket_sendmsg(struct socket *sock, struct msghdr *msg,
2037 				  size_t len)
2038 {
2039 	int err;
2040 	struct sock *sk = sock->sk;
2041 
2042 	err = sock_error(sk);
2043 	if (err)
2044 		return err;
2045 
2046 	if (sk->sk_state != TCP_ESTABLISHED)
2047 		return -ENOTCONN;
2048 
2049 	if (msg->msg_namelen)
2050 		msg->msg_namelen = 0;
2051 
2052 	return unix_dgram_sendmsg(sock, msg, len);
2053 }
2054 
2055 static int unix_seqpacket_recvmsg(struct socket *sock, struct msghdr *msg,
2056 				  size_t size, int flags)
2057 {
2058 	struct sock *sk = sock->sk;
2059 
2060 	if (sk->sk_state != TCP_ESTABLISHED)
2061 		return -ENOTCONN;
2062 
2063 	return unix_dgram_recvmsg(sock, msg, size, flags);
2064 }
2065 
2066 static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
2067 {
2068 	struct unix_sock *u = unix_sk(sk);
2069 
2070 	if (u->addr) {
2071 		msg->msg_namelen = u->addr->len;
2072 		memcpy(msg->msg_name, u->addr->name, u->addr->len);
2073 	}
2074 }
2075 
2076 static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
2077 			      size_t size, int flags)
2078 {
2079 	struct scm_cookie scm;
2080 	struct sock *sk = sock->sk;
2081 	struct unix_sock *u = unix_sk(sk);
2082 	struct sk_buff *skb, *last;
2083 	long timeo;
2084 	int err;
2085 	int peeked, skip;
2086 
2087 	err = -EOPNOTSUPP;
2088 	if (flags&MSG_OOB)
2089 		goto out;
2090 
2091 	timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
2092 
2093 	do {
2094 		mutex_lock(&u->iolock);
2095 
2096 		skip = sk_peek_offset(sk, flags);
2097 		skb = __skb_try_recv_datagram(sk, flags, NULL, &peeked, &skip,
2098 					      &err, &last);
2099 		if (skb)
2100 			break;
2101 
2102 		mutex_unlock(&u->iolock);
2103 
2104 		if (err != -EAGAIN)
2105 			break;
2106 	} while (timeo &&
2107 		 !__skb_wait_for_more_packets(sk, &err, &timeo, last));
2108 
2109 	if (!skb) { /* implies iolock unlocked */
2110 		unix_state_lock(sk);
2111 		/* Signal EOF on disconnected non-blocking SEQPACKET socket. */
2112 		if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN &&
2113 		    (sk->sk_shutdown & RCV_SHUTDOWN))
2114 			err = 0;
2115 		unix_state_unlock(sk);
2116 		goto out;
2117 	}
2118 
2119 	if (wq_has_sleeper(&u->peer_wait))
2120 		wake_up_interruptible_sync_poll(&u->peer_wait,
2121 						EPOLLOUT | EPOLLWRNORM |
2122 						EPOLLWRBAND);
2123 
2124 	if (msg->msg_name)
2125 		unix_copy_addr(msg, skb->sk);
2126 
2127 	if (size > skb->len - skip)
2128 		size = skb->len - skip;
2129 	else if (size < skb->len - skip)
2130 		msg->msg_flags |= MSG_TRUNC;
2131 
2132 	err = skb_copy_datagram_msg(skb, skip, msg, size);
2133 	if (err)
2134 		goto out_free;
2135 
2136 	if (sock_flag(sk, SOCK_RCVTSTAMP))
2137 		__sock_recv_timestamp(msg, sk, skb);
2138 
2139 	memset(&scm, 0, sizeof(scm));
2140 
2141 	scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2142 	unix_set_secdata(&scm, skb);
2143 
2144 	if (!(flags & MSG_PEEK)) {
2145 		if (UNIXCB(skb).fp)
2146 			unix_detach_fds(&scm, skb);
2147 
2148 		sk_peek_offset_bwd(sk, skb->len);
2149 	} else {
2150 		/* It is questionable: on PEEK we could:
2151 		   - do not return fds - good, but too simple 8)
2152 		   - return fds, and do not return them on read (old strategy,
2153 		     apparently wrong)
2154 		   - clone fds (I chose it for now, it is the most universal
2155 		     solution)
2156 
2157 		   POSIX 1003.1g does not actually define this clearly
2158 		   at all. POSIX 1003.1g doesn't define a lot of things
2159 		   clearly however!
2160 
2161 		*/
2162 
2163 		sk_peek_offset_fwd(sk, size);
2164 
2165 		if (UNIXCB(skb).fp)
2166 			scm.fp = scm_fp_dup(UNIXCB(skb).fp);
2167 	}
2168 	err = (flags & MSG_TRUNC) ? skb->len - skip : size;
2169 
2170 	scm_recv(sock, msg, &scm, flags);
2171 
2172 out_free:
2173 	skb_free_datagram(sk, skb);
2174 	mutex_unlock(&u->iolock);
2175 out:
2176 	return err;
2177 }
2178 
2179 /*
2180  *	Sleep until more data has arrived. But check for races..
2181  */
2182 static long unix_stream_data_wait(struct sock *sk, long timeo,
2183 				  struct sk_buff *last, unsigned int last_len,
2184 				  bool freezable)
2185 {
2186 	struct sk_buff *tail;
2187 	DEFINE_WAIT(wait);
2188 
2189 	unix_state_lock(sk);
2190 
2191 	for (;;) {
2192 		prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
2193 
2194 		tail = skb_peek_tail(&sk->sk_receive_queue);
2195 		if (tail != last ||
2196 		    (tail && tail->len != last_len) ||
2197 		    sk->sk_err ||
2198 		    (sk->sk_shutdown & RCV_SHUTDOWN) ||
2199 		    signal_pending(current) ||
2200 		    !timeo)
2201 			break;
2202 
2203 		sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2204 		unix_state_unlock(sk);
2205 		if (freezable)
2206 			timeo = freezable_schedule_timeout(timeo);
2207 		else
2208 			timeo = schedule_timeout(timeo);
2209 		unix_state_lock(sk);
2210 
2211 		if (sock_flag(sk, SOCK_DEAD))
2212 			break;
2213 
2214 		sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2215 	}
2216 
2217 	finish_wait(sk_sleep(sk), &wait);
2218 	unix_state_unlock(sk);
2219 	return timeo;
2220 }
2221 
2222 static unsigned int unix_skb_len(const struct sk_buff *skb)
2223 {
2224 	return skb->len - UNIXCB(skb).consumed;
2225 }
2226 
2227 struct unix_stream_read_state {
2228 	int (*recv_actor)(struct sk_buff *, int, int,
2229 			  struct unix_stream_read_state *);
2230 	struct socket *socket;
2231 	struct msghdr *msg;
2232 	struct pipe_inode_info *pipe;
2233 	size_t size;
2234 	int flags;
2235 	unsigned int splice_flags;
2236 };
2237 
2238 static int unix_stream_read_generic(struct unix_stream_read_state *state,
2239 				    bool freezable)
2240 {
2241 	struct scm_cookie scm;
2242 	struct socket *sock = state->socket;
2243 	struct sock *sk = sock->sk;
2244 	struct unix_sock *u = unix_sk(sk);
2245 	int copied = 0;
2246 	int flags = state->flags;
2247 	int noblock = flags & MSG_DONTWAIT;
2248 	bool check_creds = false;
2249 	int target;
2250 	int err = 0;
2251 	long timeo;
2252 	int skip;
2253 	size_t size = state->size;
2254 	unsigned int last_len;
2255 
2256 	if (unlikely(sk->sk_state != TCP_ESTABLISHED)) {
2257 		err = -EINVAL;
2258 		goto out;
2259 	}
2260 
2261 	if (unlikely(flags & MSG_OOB)) {
2262 		err = -EOPNOTSUPP;
2263 		goto out;
2264 	}
2265 
2266 	target = sock_rcvlowat(sk, flags & MSG_WAITALL, size);
2267 	timeo = sock_rcvtimeo(sk, noblock);
2268 
2269 	memset(&scm, 0, sizeof(scm));
2270 
2271 	/* Lock the socket to prevent queue disordering
2272 	 * while sleeps in memcpy_tomsg
2273 	 */
2274 	mutex_lock(&u->iolock);
2275 
2276 	skip = max(sk_peek_offset(sk, flags), 0);
2277 
2278 	do {
2279 		int chunk;
2280 		bool drop_skb;
2281 		struct sk_buff *skb, *last;
2282 
2283 redo:
2284 		unix_state_lock(sk);
2285 		if (sock_flag(sk, SOCK_DEAD)) {
2286 			err = -ECONNRESET;
2287 			goto unlock;
2288 		}
2289 		last = skb = skb_peek(&sk->sk_receive_queue);
2290 		last_len = last ? last->len : 0;
2291 again:
2292 		if (skb == NULL) {
2293 			if (copied >= target)
2294 				goto unlock;
2295 
2296 			/*
2297 			 *	POSIX 1003.1g mandates this order.
2298 			 */
2299 
2300 			err = sock_error(sk);
2301 			if (err)
2302 				goto unlock;
2303 			if (sk->sk_shutdown & RCV_SHUTDOWN)
2304 				goto unlock;
2305 
2306 			unix_state_unlock(sk);
2307 			if (!timeo) {
2308 				err = -EAGAIN;
2309 				break;
2310 			}
2311 
2312 			mutex_unlock(&u->iolock);
2313 
2314 			timeo = unix_stream_data_wait(sk, timeo, last,
2315 						      last_len, freezable);
2316 
2317 			if (signal_pending(current)) {
2318 				err = sock_intr_errno(timeo);
2319 				scm_destroy(&scm);
2320 				goto out;
2321 			}
2322 
2323 			mutex_lock(&u->iolock);
2324 			goto redo;
2325 unlock:
2326 			unix_state_unlock(sk);
2327 			break;
2328 		}
2329 
2330 		while (skip >= unix_skb_len(skb)) {
2331 			skip -= unix_skb_len(skb);
2332 			last = skb;
2333 			last_len = skb->len;
2334 			skb = skb_peek_next(skb, &sk->sk_receive_queue);
2335 			if (!skb)
2336 				goto again;
2337 		}
2338 
2339 		unix_state_unlock(sk);
2340 
2341 		if (check_creds) {
2342 			/* Never glue messages from different writers */
2343 			if (!unix_skb_scm_eq(skb, &scm))
2344 				break;
2345 		} else if (test_bit(SOCK_PASSCRED, &sock->flags)) {
2346 			/* Copy credentials */
2347 			scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2348 			unix_set_secdata(&scm, skb);
2349 			check_creds = true;
2350 		}
2351 
2352 		/* Copy address just once */
2353 		if (state->msg && state->msg->msg_name) {
2354 			DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr,
2355 					 state->msg->msg_name);
2356 			unix_copy_addr(state->msg, skb->sk);
2357 			sunaddr = NULL;
2358 		}
2359 
2360 		chunk = min_t(unsigned int, unix_skb_len(skb) - skip, size);
2361 		skb_get(skb);
2362 		chunk = state->recv_actor(skb, skip, chunk, state);
2363 		drop_skb = !unix_skb_len(skb);
2364 		/* skb is only safe to use if !drop_skb */
2365 		consume_skb(skb);
2366 		if (chunk < 0) {
2367 			if (copied == 0)
2368 				copied = -EFAULT;
2369 			break;
2370 		}
2371 		copied += chunk;
2372 		size -= chunk;
2373 
2374 		if (drop_skb) {
2375 			/* the skb was touched by a concurrent reader;
2376 			 * we should not expect anything from this skb
2377 			 * anymore and assume it invalid - we can be
2378 			 * sure it was dropped from the socket queue
2379 			 *
2380 			 * let's report a short read
2381 			 */
2382 			err = 0;
2383 			break;
2384 		}
2385 
2386 		/* Mark read part of skb as used */
2387 		if (!(flags & MSG_PEEK)) {
2388 			UNIXCB(skb).consumed += chunk;
2389 
2390 			sk_peek_offset_bwd(sk, chunk);
2391 
2392 			if (UNIXCB(skb).fp)
2393 				unix_detach_fds(&scm, skb);
2394 
2395 			if (unix_skb_len(skb))
2396 				break;
2397 
2398 			skb_unlink(skb, &sk->sk_receive_queue);
2399 			consume_skb(skb);
2400 
2401 			if (scm.fp)
2402 				break;
2403 		} else {
2404 			/* It is questionable, see note in unix_dgram_recvmsg.
2405 			 */
2406 			if (UNIXCB(skb).fp)
2407 				scm.fp = scm_fp_dup(UNIXCB(skb).fp);
2408 
2409 			sk_peek_offset_fwd(sk, chunk);
2410 
2411 			if (UNIXCB(skb).fp)
2412 				break;
2413 
2414 			skip = 0;
2415 			last = skb;
2416 			last_len = skb->len;
2417 			unix_state_lock(sk);
2418 			skb = skb_peek_next(skb, &sk->sk_receive_queue);
2419 			if (skb)
2420 				goto again;
2421 			unix_state_unlock(sk);
2422 			break;
2423 		}
2424 	} while (size);
2425 
2426 	mutex_unlock(&u->iolock);
2427 	if (state->msg)
2428 		scm_recv(sock, state->msg, &scm, flags);
2429 	else
2430 		scm_destroy(&scm);
2431 out:
2432 	return copied ? : err;
2433 }
2434 
2435 static int unix_stream_read_actor(struct sk_buff *skb,
2436 				  int skip, int chunk,
2437 				  struct unix_stream_read_state *state)
2438 {
2439 	int ret;
2440 
2441 	ret = skb_copy_datagram_msg(skb, UNIXCB(skb).consumed + skip,
2442 				    state->msg, chunk);
2443 	return ret ?: chunk;
2444 }
2445 
2446 static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg,
2447 			       size_t size, int flags)
2448 {
2449 	struct unix_stream_read_state state = {
2450 		.recv_actor = unix_stream_read_actor,
2451 		.socket = sock,
2452 		.msg = msg,
2453 		.size = size,
2454 		.flags = flags
2455 	};
2456 
2457 	return unix_stream_read_generic(&state, true);
2458 }
2459 
2460 static int unix_stream_splice_actor(struct sk_buff *skb,
2461 				    int skip, int chunk,
2462 				    struct unix_stream_read_state *state)
2463 {
2464 	return skb_splice_bits(skb, state->socket->sk,
2465 			       UNIXCB(skb).consumed + skip,
2466 			       state->pipe, chunk, state->splice_flags);
2467 }
2468 
2469 static ssize_t unix_stream_splice_read(struct socket *sock,  loff_t *ppos,
2470 				       struct pipe_inode_info *pipe,
2471 				       size_t size, unsigned int flags)
2472 {
2473 	struct unix_stream_read_state state = {
2474 		.recv_actor = unix_stream_splice_actor,
2475 		.socket = sock,
2476 		.pipe = pipe,
2477 		.size = size,
2478 		.splice_flags = flags,
2479 	};
2480 
2481 	if (unlikely(*ppos))
2482 		return -ESPIPE;
2483 
2484 	if (sock->file->f_flags & O_NONBLOCK ||
2485 	    flags & SPLICE_F_NONBLOCK)
2486 		state.flags = MSG_DONTWAIT;
2487 
2488 	return unix_stream_read_generic(&state, false);
2489 }
2490 
2491 static int unix_shutdown(struct socket *sock, int mode)
2492 {
2493 	struct sock *sk = sock->sk;
2494 	struct sock *other;
2495 
2496 	if (mode < SHUT_RD || mode > SHUT_RDWR)
2497 		return -EINVAL;
2498 	/* This maps:
2499 	 * SHUT_RD   (0) -> RCV_SHUTDOWN  (1)
2500 	 * SHUT_WR   (1) -> SEND_SHUTDOWN (2)
2501 	 * SHUT_RDWR (2) -> SHUTDOWN_MASK (3)
2502 	 */
2503 	++mode;
2504 
2505 	unix_state_lock(sk);
2506 	sk->sk_shutdown |= mode;
2507 	other = unix_peer(sk);
2508 	if (other)
2509 		sock_hold(other);
2510 	unix_state_unlock(sk);
2511 	sk->sk_state_change(sk);
2512 
2513 	if (other &&
2514 		(sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
2515 
2516 		int peer_mode = 0;
2517 
2518 		if (mode&RCV_SHUTDOWN)
2519 			peer_mode |= SEND_SHUTDOWN;
2520 		if (mode&SEND_SHUTDOWN)
2521 			peer_mode |= RCV_SHUTDOWN;
2522 		unix_state_lock(other);
2523 		other->sk_shutdown |= peer_mode;
2524 		unix_state_unlock(other);
2525 		other->sk_state_change(other);
2526 		if (peer_mode == SHUTDOWN_MASK)
2527 			sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
2528 		else if (peer_mode & RCV_SHUTDOWN)
2529 			sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
2530 	}
2531 	if (other)
2532 		sock_put(other);
2533 
2534 	return 0;
2535 }
2536 
2537 long unix_inq_len(struct sock *sk)
2538 {
2539 	struct sk_buff *skb;
2540 	long amount = 0;
2541 
2542 	if (sk->sk_state == TCP_LISTEN)
2543 		return -EINVAL;
2544 
2545 	spin_lock(&sk->sk_receive_queue.lock);
2546 	if (sk->sk_type == SOCK_STREAM ||
2547 	    sk->sk_type == SOCK_SEQPACKET) {
2548 		skb_queue_walk(&sk->sk_receive_queue, skb)
2549 			amount += unix_skb_len(skb);
2550 	} else {
2551 		skb = skb_peek(&sk->sk_receive_queue);
2552 		if (skb)
2553 			amount = skb->len;
2554 	}
2555 	spin_unlock(&sk->sk_receive_queue.lock);
2556 
2557 	return amount;
2558 }
2559 EXPORT_SYMBOL_GPL(unix_inq_len);
2560 
2561 long unix_outq_len(struct sock *sk)
2562 {
2563 	return sk_wmem_alloc_get(sk);
2564 }
2565 EXPORT_SYMBOL_GPL(unix_outq_len);
2566 
2567 static int unix_open_file(struct sock *sk)
2568 {
2569 	struct path path;
2570 	struct file *f;
2571 	int fd;
2572 
2573 	if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
2574 		return -EPERM;
2575 
2576 	unix_state_lock(sk);
2577 	path = unix_sk(sk)->path;
2578 	if (!path.dentry) {
2579 		unix_state_unlock(sk);
2580 		return -ENOENT;
2581 	}
2582 
2583 	path_get(&path);
2584 	unix_state_unlock(sk);
2585 
2586 	fd = get_unused_fd_flags(O_CLOEXEC);
2587 	if (fd < 0)
2588 		goto out;
2589 
2590 	f = dentry_open(&path, O_PATH, current_cred());
2591 	if (IS_ERR(f)) {
2592 		put_unused_fd(fd);
2593 		fd = PTR_ERR(f);
2594 		goto out;
2595 	}
2596 
2597 	fd_install(fd, f);
2598 out:
2599 	path_put(&path);
2600 
2601 	return fd;
2602 }
2603 
2604 static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
2605 {
2606 	struct sock *sk = sock->sk;
2607 	long amount = 0;
2608 	int err;
2609 
2610 	switch (cmd) {
2611 	case SIOCOUTQ:
2612 		amount = unix_outq_len(sk);
2613 		err = put_user(amount, (int __user *)arg);
2614 		break;
2615 	case SIOCINQ:
2616 		amount = unix_inq_len(sk);
2617 		if (amount < 0)
2618 			err = amount;
2619 		else
2620 			err = put_user(amount, (int __user *)arg);
2621 		break;
2622 	case SIOCUNIXFILE:
2623 		err = unix_open_file(sk);
2624 		break;
2625 	default:
2626 		err = -ENOIOCTLCMD;
2627 		break;
2628 	}
2629 	return err;
2630 }
2631 
2632 static __poll_t unix_poll_mask(struct socket *sock, __poll_t events)
2633 {
2634 	struct sock *sk = sock->sk;
2635 	__poll_t mask = 0;
2636 
2637 	/* exceptional events? */
2638 	if (sk->sk_err)
2639 		mask |= EPOLLERR;
2640 	if (sk->sk_shutdown == SHUTDOWN_MASK)
2641 		mask |= EPOLLHUP;
2642 	if (sk->sk_shutdown & RCV_SHUTDOWN)
2643 		mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
2644 
2645 	/* readable? */
2646 	if (!skb_queue_empty(&sk->sk_receive_queue))
2647 		mask |= EPOLLIN | EPOLLRDNORM;
2648 
2649 	/* Connection-based need to check for termination and startup */
2650 	if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) &&
2651 	    sk->sk_state == TCP_CLOSE)
2652 		mask |= EPOLLHUP;
2653 
2654 	/*
2655 	 * we set writable also when the other side has shut down the
2656 	 * connection. This prevents stuck sockets.
2657 	 */
2658 	if (unix_writable(sk))
2659 		mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
2660 
2661 	return mask;
2662 }
2663 
2664 static __poll_t unix_dgram_poll_mask(struct socket *sock, __poll_t events)
2665 {
2666 	struct sock *sk = sock->sk, *other;
2667 	int writable;
2668 	__poll_t mask = 0;
2669 
2670 	/* exceptional events? */
2671 	if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
2672 		mask |= EPOLLERR |
2673 			(sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0);
2674 
2675 	if (sk->sk_shutdown & RCV_SHUTDOWN)
2676 		mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
2677 	if (sk->sk_shutdown == SHUTDOWN_MASK)
2678 		mask |= EPOLLHUP;
2679 
2680 	/* readable? */
2681 	if (!skb_queue_empty(&sk->sk_receive_queue))
2682 		mask |= EPOLLIN | EPOLLRDNORM;
2683 
2684 	/* Connection-based need to check for termination and startup */
2685 	if (sk->sk_type == SOCK_SEQPACKET) {
2686 		if (sk->sk_state == TCP_CLOSE)
2687 			mask |= EPOLLHUP;
2688 		/* connection hasn't started yet? */
2689 		if (sk->sk_state == TCP_SYN_SENT)
2690 			return mask;
2691 	}
2692 
2693 	/* No write status requested, avoid expensive OUT tests. */
2694 	if (!(events & (EPOLLWRBAND|EPOLLWRNORM|EPOLLOUT)))
2695 		return mask;
2696 
2697 	writable = unix_writable(sk);
2698 	if (writable) {
2699 		unix_state_lock(sk);
2700 
2701 		other = unix_peer(sk);
2702 		if (other && unix_peer(other) != sk &&
2703 		    unix_recvq_full(other) &&
2704 		    unix_dgram_peer_wake_me(sk, other))
2705 			writable = 0;
2706 
2707 		unix_state_unlock(sk);
2708 	}
2709 
2710 	if (writable)
2711 		mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
2712 	else
2713 		sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
2714 
2715 	return mask;
2716 }
2717 
2718 #ifdef CONFIG_PROC_FS
2719 
2720 #define BUCKET_SPACE (BITS_PER_LONG - (UNIX_HASH_BITS + 1) - 1)
2721 
2722 #define get_bucket(x) ((x) >> BUCKET_SPACE)
2723 #define get_offset(x) ((x) & ((1L << BUCKET_SPACE) - 1))
2724 #define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o))
2725 
2726 static struct sock *unix_from_bucket(struct seq_file *seq, loff_t *pos)
2727 {
2728 	unsigned long offset = get_offset(*pos);
2729 	unsigned long bucket = get_bucket(*pos);
2730 	struct sock *sk;
2731 	unsigned long count = 0;
2732 
2733 	for (sk = sk_head(&unix_socket_table[bucket]); sk; sk = sk_next(sk)) {
2734 		if (sock_net(sk) != seq_file_net(seq))
2735 			continue;
2736 		if (++count == offset)
2737 			break;
2738 	}
2739 
2740 	return sk;
2741 }
2742 
2743 static struct sock *unix_next_socket(struct seq_file *seq,
2744 				     struct sock *sk,
2745 				     loff_t *pos)
2746 {
2747 	unsigned long bucket;
2748 
2749 	while (sk > (struct sock *)SEQ_START_TOKEN) {
2750 		sk = sk_next(sk);
2751 		if (!sk)
2752 			goto next_bucket;
2753 		if (sock_net(sk) == seq_file_net(seq))
2754 			return sk;
2755 	}
2756 
2757 	do {
2758 		sk = unix_from_bucket(seq, pos);
2759 		if (sk)
2760 			return sk;
2761 
2762 next_bucket:
2763 		bucket = get_bucket(*pos) + 1;
2764 		*pos = set_bucket_offset(bucket, 1);
2765 	} while (bucket < ARRAY_SIZE(unix_socket_table));
2766 
2767 	return NULL;
2768 }
2769 
2770 static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
2771 	__acquires(unix_table_lock)
2772 {
2773 	spin_lock(&unix_table_lock);
2774 
2775 	if (!*pos)
2776 		return SEQ_START_TOKEN;
2777 
2778 	if (get_bucket(*pos) >= ARRAY_SIZE(unix_socket_table))
2779 		return NULL;
2780 
2781 	return unix_next_socket(seq, NULL, pos);
2782 }
2783 
2784 static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2785 {
2786 	++*pos;
2787 	return unix_next_socket(seq, v, pos);
2788 }
2789 
2790 static void unix_seq_stop(struct seq_file *seq, void *v)
2791 	__releases(unix_table_lock)
2792 {
2793 	spin_unlock(&unix_table_lock);
2794 }
2795 
2796 static int unix_seq_show(struct seq_file *seq, void *v)
2797 {
2798 
2799 	if (v == SEQ_START_TOKEN)
2800 		seq_puts(seq, "Num       RefCount Protocol Flags    Type St "
2801 			 "Inode Path\n");
2802 	else {
2803 		struct sock *s = v;
2804 		struct unix_sock *u = unix_sk(s);
2805 		unix_state_lock(s);
2806 
2807 		seq_printf(seq, "%pK: %08X %08X %08X %04X %02X %5lu",
2808 			s,
2809 			refcount_read(&s->sk_refcnt),
2810 			0,
2811 			s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
2812 			s->sk_type,
2813 			s->sk_socket ?
2814 			(s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
2815 			(s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
2816 			sock_i_ino(s));
2817 
2818 		if (u->addr) {
2819 			int i, len;
2820 			seq_putc(seq, ' ');
2821 
2822 			i = 0;
2823 			len = u->addr->len - sizeof(short);
2824 			if (!UNIX_ABSTRACT(s))
2825 				len--;
2826 			else {
2827 				seq_putc(seq, '@');
2828 				i++;
2829 			}
2830 			for ( ; i < len; i++)
2831 				seq_putc(seq, u->addr->name->sun_path[i] ?:
2832 					 '@');
2833 		}
2834 		unix_state_unlock(s);
2835 		seq_putc(seq, '\n');
2836 	}
2837 
2838 	return 0;
2839 }
2840 
2841 static const struct seq_operations unix_seq_ops = {
2842 	.start  = unix_seq_start,
2843 	.next   = unix_seq_next,
2844 	.stop   = unix_seq_stop,
2845 	.show   = unix_seq_show,
2846 };
2847 #endif
2848 
2849 static const struct net_proto_family unix_family_ops = {
2850 	.family = PF_UNIX,
2851 	.create = unix_create,
2852 	.owner	= THIS_MODULE,
2853 };
2854 
2855 
2856 static int __net_init unix_net_init(struct net *net)
2857 {
2858 	int error = -ENOMEM;
2859 
2860 	net->unx.sysctl_max_dgram_qlen = 10;
2861 	if (unix_sysctl_register(net))
2862 		goto out;
2863 
2864 #ifdef CONFIG_PROC_FS
2865 	if (!proc_create_net("unix", 0, net->proc_net, &unix_seq_ops,
2866 			sizeof(struct seq_net_private))) {
2867 		unix_sysctl_unregister(net);
2868 		goto out;
2869 	}
2870 #endif
2871 	error = 0;
2872 out:
2873 	return error;
2874 }
2875 
2876 static void __net_exit unix_net_exit(struct net *net)
2877 {
2878 	unix_sysctl_unregister(net);
2879 	remove_proc_entry("unix", net->proc_net);
2880 }
2881 
2882 static struct pernet_operations unix_net_ops = {
2883 	.init = unix_net_init,
2884 	.exit = unix_net_exit,
2885 };
2886 
2887 static int __init af_unix_init(void)
2888 {
2889 	int rc = -1;
2890 
2891 	BUILD_BUG_ON(sizeof(struct unix_skb_parms) > FIELD_SIZEOF(struct sk_buff, cb));
2892 
2893 	rc = proto_register(&unix_proto, 1);
2894 	if (rc != 0) {
2895 		pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__);
2896 		goto out;
2897 	}
2898 
2899 	sock_register(&unix_family_ops);
2900 	register_pernet_subsys(&unix_net_ops);
2901 out:
2902 	return rc;
2903 }
2904 
2905 static void __exit af_unix_exit(void)
2906 {
2907 	sock_unregister(PF_UNIX);
2908 	proto_unregister(&unix_proto);
2909 	unregister_pernet_subsys(&unix_net_ops);
2910 }
2911 
2912 /* Earlier than device_initcall() so that other drivers invoking
2913    request_module() don't end up in a loop when modprobe tries
2914    to use a UNIX socket. But later than subsys_initcall() because
2915    we depend on stuff initialised there */
2916 fs_initcall(af_unix_init);
2917 module_exit(af_unix_exit);
2918 
2919 MODULE_LICENSE("GPL");
2920 MODULE_ALIAS_NETPROTO(PF_UNIX);
2921