xref: /openbmc/linux/net/socket.c (revision 29c49648)
12874c5fdSThomas Gleixner // SPDX-License-Identifier: GPL-2.0-or-later
21da177e4SLinus Torvalds /*
31da177e4SLinus Torvalds  * NET		An implementation of the SOCKET network access protocol.
41da177e4SLinus Torvalds  *
51da177e4SLinus Torvalds  * Version:	@(#)socket.c	1.1.93	18/02/95
61da177e4SLinus Torvalds  *
71da177e4SLinus Torvalds  * Authors:	Orest Zborowski, <obz@Kodak.COM>
802c30a84SJesper Juhl  *		Ross Biro
91da177e4SLinus Torvalds  *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
101da177e4SLinus Torvalds  *
111da177e4SLinus Torvalds  * Fixes:
121da177e4SLinus Torvalds  *		Anonymous	:	NOTSOCK/BADF cleanup. Error fix in
131da177e4SLinus Torvalds  *					shutdown()
141da177e4SLinus Torvalds  *		Alan Cox	:	verify_area() fixes
151da177e4SLinus Torvalds  *		Alan Cox	:	Removed DDI
161da177e4SLinus Torvalds  *		Jonathan Kamens	:	SOCK_DGRAM reconnect bug
171da177e4SLinus Torvalds  *		Alan Cox	:	Moved a load of checks to the very
181da177e4SLinus Torvalds  *					top level.
191da177e4SLinus Torvalds  *		Alan Cox	:	Move address structures to/from user
201da177e4SLinus Torvalds  *					mode above the protocol layers.
211da177e4SLinus Torvalds  *		Rob Janssen	:	Allow 0 length sends.
221da177e4SLinus Torvalds  *		Alan Cox	:	Asynchronous I/O support (cribbed from the
231da177e4SLinus Torvalds  *					tty drivers).
241da177e4SLinus Torvalds  *		Niibe Yutaka	:	Asynchronous I/O for writes (4.4BSD style)
251da177e4SLinus Torvalds  *		Jeff Uphoff	:	Made max number of sockets command-line
261da177e4SLinus Torvalds  *					configurable.
271da177e4SLinus Torvalds  *		Matti Aarnio	:	Made the number of sockets dynamic,
281da177e4SLinus Torvalds  *					to be allocated when needed, and mr.
291da177e4SLinus Torvalds  *					Uphoff's max is used as max to be
301da177e4SLinus Torvalds  *					allowed to allocate.
311da177e4SLinus Torvalds  *		Linus		:	Argh. removed all the socket allocation
321da177e4SLinus Torvalds  *					altogether: it's in the inode now.
331da177e4SLinus Torvalds  *		Alan Cox	:	Made sock_alloc()/sock_release() public
341da177e4SLinus Torvalds  *					for NetROM and future kernel nfsd type
351da177e4SLinus Torvalds  *					stuff.
361da177e4SLinus Torvalds  *		Alan Cox	:	sendmsg/recvmsg basics.
371da177e4SLinus Torvalds  *		Tom Dyas	:	Export net symbols.
381da177e4SLinus Torvalds  *		Marcin Dalecki	:	Fixed problems with CONFIG_NET="n".
391da177e4SLinus Torvalds  *		Alan Cox	:	Added thread locking to sys_* calls
401da177e4SLinus Torvalds  *					for sockets. May have errors at the
411da177e4SLinus Torvalds  *					moment.
421da177e4SLinus Torvalds  *		Kevin Buhr	:	Fixed the dumb errors in the above.
431da177e4SLinus Torvalds  *		Andi Kleen	:	Some small cleanups, optimizations,
441da177e4SLinus Torvalds  *					and fixed a copy_from_user() bug.
451da177e4SLinus Torvalds  *		Tigran Aivazian	:	sys_send(args) calls sys_sendto(args, NULL, 0)
461da177e4SLinus Torvalds  *		Tigran Aivazian	:	Made listen(2) backlog sanity checks
471da177e4SLinus Torvalds  *					protocol-independent
481da177e4SLinus Torvalds  *
491da177e4SLinus Torvalds  *	This module is effectively the top level interface to the BSD socket
501da177e4SLinus Torvalds  *	paradigm.
511da177e4SLinus Torvalds  *
521da177e4SLinus Torvalds  *	Based upon Swansea University Computer Society NET3.039
531da177e4SLinus Torvalds  */
541da177e4SLinus Torvalds 
55cc69837fSJakub Kicinski #include <linux/ethtool.h>
561da177e4SLinus Torvalds #include <linux/mm.h>
571da177e4SLinus Torvalds #include <linux/socket.h>
581da177e4SLinus Torvalds #include <linux/file.h>
591da177e4SLinus Torvalds #include <linux/net.h>
601da177e4SLinus Torvalds #include <linux/interrupt.h>
61aaca0bdcSUlrich Drepper #include <linux/thread_info.h>
6255737fdaSStephen Hemminger #include <linux/rcupdate.h>
631da177e4SLinus Torvalds #include <linux/netdevice.h>
641da177e4SLinus Torvalds #include <linux/proc_fs.h>
651da177e4SLinus Torvalds #include <linux/seq_file.h>
664a3e2f71SArjan van de Ven #include <linux/mutex.h>
671da177e4SLinus Torvalds #include <linux/if_bridge.h>
6820380731SArnaldo Carvalho de Melo #include <linux/if_vlan.h>
69408eccceSDaniel Borkmann #include <linux/ptp_classify.h>
701da177e4SLinus Torvalds #include <linux/init.h>
711da177e4SLinus Torvalds #include <linux/poll.h>
721da177e4SLinus Torvalds #include <linux/cache.h>
731da177e4SLinus Torvalds #include <linux/module.h>
741da177e4SLinus Torvalds #include <linux/highmem.h>
751da177e4SLinus Torvalds #include <linux/mount.h>
76fba9be49SDavid Howells #include <linux/pseudo_fs.h>
771da177e4SLinus Torvalds #include <linux/security.h>
781da177e4SLinus Torvalds #include <linux/syscalls.h>
791da177e4SLinus Torvalds #include <linux/compat.h>
801da177e4SLinus Torvalds #include <linux/kmod.h>
813ec3b2fbSDavid Woodhouse #include <linux/audit.h>
82d86b5e0eSAdrian Bunk #include <linux/wireless.h>
831b8d7ae4SEric W. Biederman #include <linux/nsproxy.h>
841fd7317dSNick Black #include <linux/magic.h>
855a0e3ad6STejun Heo #include <linux/slab.h>
86600e1779SMasatake YAMATO #include <linux/xattr.h>
87c8e8cd57SJeremy Cline #include <linux/nospec.h>
888c3c447bSPaolo Abeni #include <linux/indirect_call_wrapper.h>
891da177e4SLinus Torvalds 
907c0f6ba6SLinus Torvalds #include <linux/uaccess.h>
911da177e4SLinus Torvalds #include <asm/unistd.h>
921da177e4SLinus Torvalds 
931da177e4SLinus Torvalds #include <net/compat.h>
9487de87d5SDavid S. Miller #include <net/wext.h>
95f8451725SHerbert Xu #include <net/cls_cgroup.h>
961da177e4SLinus Torvalds 
971da177e4SLinus Torvalds #include <net/sock.h>
981da177e4SLinus Torvalds #include <linux/netfilter.h>
991da177e4SLinus Torvalds 
1006b96018bSArnd Bergmann #include <linux/if_tun.h>
1016b96018bSArnd Bergmann #include <linux/ipv6_route.h>
1026b96018bSArnd Bergmann #include <linux/route.h>
103c7dc504eSArnd Bergmann #include <linux/termios.h>
1046b96018bSArnd Bergmann #include <linux/sockios.h>
105076bb0c8SEliezer Tamir #include <net/busy_poll.h>
106f24b9be5SWillem de Bruijn #include <linux/errqueue.h>
107d7c08826SYangbo Lu #include <linux/ptp_clock_kernel.h>
10806021292SEliezer Tamir 
109e0d1095aSCong Wang #ifdef CONFIG_NET_RX_BUSY_POLL
11064b0dc51SEliezer Tamir unsigned int sysctl_net_busy_read __read_mostly;
11164b0dc51SEliezer Tamir unsigned int sysctl_net_busy_poll __read_mostly;
11206021292SEliezer Tamir #endif
1136b96018bSArnd Bergmann 
1148ae5e030SAl Viro static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to);
1158ae5e030SAl Viro static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from);
1161da177e4SLinus Torvalds static int sock_mmap(struct file *file, struct vm_area_struct *vma);
1171da177e4SLinus Torvalds 
1181da177e4SLinus Torvalds static int sock_close(struct inode *inode, struct file *file);
119a11e1d43SLinus Torvalds static __poll_t sock_poll(struct file *file,
120a11e1d43SLinus Torvalds 			      struct poll_table_struct *wait);
12189bddce5SStephen Hemminger static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
12289bbfc95SShaun Pereira #ifdef CONFIG_COMPAT
12389bbfc95SShaun Pereira static long compat_sock_ioctl(struct file *file,
12489bbfc95SShaun Pereira 			      unsigned int cmd, unsigned long arg);
12589bbfc95SShaun Pereira #endif
1261da177e4SLinus Torvalds static int sock_fasync(int fd, struct file *filp, int on);
1271da177e4SLinus Torvalds static ssize_t sock_sendpage(struct file *file, struct page *page,
1281da177e4SLinus Torvalds 			     int offset, size_t size, loff_t *ppos, int more);
1299c55e01cSJens Axboe static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
1309c55e01cSJens Axboe 				struct pipe_inode_info *pipe, size_t len,
1319c55e01cSJens Axboe 				unsigned int flags);
132542d3065SArnd Bergmann 
133542d3065SArnd Bergmann #ifdef CONFIG_PROC_FS
134542d3065SArnd Bergmann static void sock_show_fdinfo(struct seq_file *m, struct file *f)
135542d3065SArnd Bergmann {
136542d3065SArnd Bergmann 	struct socket *sock = f->private_data;
137542d3065SArnd Bergmann 
138542d3065SArnd Bergmann 	if (sock->ops->show_fdinfo)
139542d3065SArnd Bergmann 		sock->ops->show_fdinfo(m, sock);
140542d3065SArnd Bergmann }
141542d3065SArnd Bergmann #else
142542d3065SArnd Bergmann #define sock_show_fdinfo NULL
143542d3065SArnd Bergmann #endif
1441da177e4SLinus Torvalds 
1451da177e4SLinus Torvalds /*
1461da177e4SLinus Torvalds  *	Socket files have a set of 'special' operations as well as the generic file ones. These don't appear
1471da177e4SLinus Torvalds  *	in the operation structures but are done directly via the socketcall() multiplexor.
1481da177e4SLinus Torvalds  */
1491da177e4SLinus Torvalds 
150da7071d7SArjan van de Ven static const struct file_operations socket_file_ops = {
1511da177e4SLinus Torvalds 	.owner =	THIS_MODULE,
1521da177e4SLinus Torvalds 	.llseek =	no_llseek,
1538ae5e030SAl Viro 	.read_iter =	sock_read_iter,
1548ae5e030SAl Viro 	.write_iter =	sock_write_iter,
1551da177e4SLinus Torvalds 	.poll =		sock_poll,
1561da177e4SLinus Torvalds 	.unlocked_ioctl = sock_ioctl,
15789bbfc95SShaun Pereira #ifdef CONFIG_COMPAT
15889bbfc95SShaun Pereira 	.compat_ioctl = compat_sock_ioctl,
15989bbfc95SShaun Pereira #endif
1601da177e4SLinus Torvalds 	.mmap =		sock_mmap,
1611da177e4SLinus Torvalds 	.release =	sock_close,
1621da177e4SLinus Torvalds 	.fasync =	sock_fasync,
1635274f052SJens Axboe 	.sendpage =	sock_sendpage,
1645274f052SJens Axboe 	.splice_write = generic_splice_sendpage,
1659c55e01cSJens Axboe 	.splice_read =	sock_splice_read,
166b4653342SKirill Tkhai 	.show_fdinfo =	sock_show_fdinfo,
1671da177e4SLinus Torvalds };
1681da177e4SLinus Torvalds 
169fe0bdbdeSYejune Deng static const char * const pf_family_names[] = {
170fe0bdbdeSYejune Deng 	[PF_UNSPEC]	= "PF_UNSPEC",
171fe0bdbdeSYejune Deng 	[PF_UNIX]	= "PF_UNIX/PF_LOCAL",
172fe0bdbdeSYejune Deng 	[PF_INET]	= "PF_INET",
173fe0bdbdeSYejune Deng 	[PF_AX25]	= "PF_AX25",
174fe0bdbdeSYejune Deng 	[PF_IPX]	= "PF_IPX",
175fe0bdbdeSYejune Deng 	[PF_APPLETALK]	= "PF_APPLETALK",
176fe0bdbdeSYejune Deng 	[PF_NETROM]	= "PF_NETROM",
177fe0bdbdeSYejune Deng 	[PF_BRIDGE]	= "PF_BRIDGE",
178fe0bdbdeSYejune Deng 	[PF_ATMPVC]	= "PF_ATMPVC",
179fe0bdbdeSYejune Deng 	[PF_X25]	= "PF_X25",
180fe0bdbdeSYejune Deng 	[PF_INET6]	= "PF_INET6",
181fe0bdbdeSYejune Deng 	[PF_ROSE]	= "PF_ROSE",
182fe0bdbdeSYejune Deng 	[PF_DECnet]	= "PF_DECnet",
183fe0bdbdeSYejune Deng 	[PF_NETBEUI]	= "PF_NETBEUI",
184fe0bdbdeSYejune Deng 	[PF_SECURITY]	= "PF_SECURITY",
185fe0bdbdeSYejune Deng 	[PF_KEY]	= "PF_KEY",
186fe0bdbdeSYejune Deng 	[PF_NETLINK]	= "PF_NETLINK/PF_ROUTE",
187fe0bdbdeSYejune Deng 	[PF_PACKET]	= "PF_PACKET",
188fe0bdbdeSYejune Deng 	[PF_ASH]	= "PF_ASH",
189fe0bdbdeSYejune Deng 	[PF_ECONET]	= "PF_ECONET",
190fe0bdbdeSYejune Deng 	[PF_ATMSVC]	= "PF_ATMSVC",
191fe0bdbdeSYejune Deng 	[PF_RDS]	= "PF_RDS",
192fe0bdbdeSYejune Deng 	[PF_SNA]	= "PF_SNA",
193fe0bdbdeSYejune Deng 	[PF_IRDA]	= "PF_IRDA",
194fe0bdbdeSYejune Deng 	[PF_PPPOX]	= "PF_PPPOX",
195fe0bdbdeSYejune Deng 	[PF_WANPIPE]	= "PF_WANPIPE",
196fe0bdbdeSYejune Deng 	[PF_LLC]	= "PF_LLC",
197fe0bdbdeSYejune Deng 	[PF_IB]		= "PF_IB",
198fe0bdbdeSYejune Deng 	[PF_MPLS]	= "PF_MPLS",
199fe0bdbdeSYejune Deng 	[PF_CAN]	= "PF_CAN",
200fe0bdbdeSYejune Deng 	[PF_TIPC]	= "PF_TIPC",
201fe0bdbdeSYejune Deng 	[PF_BLUETOOTH]	= "PF_BLUETOOTH",
202fe0bdbdeSYejune Deng 	[PF_IUCV]	= "PF_IUCV",
203fe0bdbdeSYejune Deng 	[PF_RXRPC]	= "PF_RXRPC",
204fe0bdbdeSYejune Deng 	[PF_ISDN]	= "PF_ISDN",
205fe0bdbdeSYejune Deng 	[PF_PHONET]	= "PF_PHONET",
206fe0bdbdeSYejune Deng 	[PF_IEEE802154]	= "PF_IEEE802154",
207fe0bdbdeSYejune Deng 	[PF_CAIF]	= "PF_CAIF",
208fe0bdbdeSYejune Deng 	[PF_ALG]	= "PF_ALG",
209fe0bdbdeSYejune Deng 	[PF_NFC]	= "PF_NFC",
210fe0bdbdeSYejune Deng 	[PF_VSOCK]	= "PF_VSOCK",
211fe0bdbdeSYejune Deng 	[PF_KCM]	= "PF_KCM",
212fe0bdbdeSYejune Deng 	[PF_QIPCRTR]	= "PF_QIPCRTR",
213fe0bdbdeSYejune Deng 	[PF_SMC]	= "PF_SMC",
214fe0bdbdeSYejune Deng 	[PF_XDP]	= "PF_XDP",
215fe0bdbdeSYejune Deng };
216fe0bdbdeSYejune Deng 
2171da177e4SLinus Torvalds /*
2181da177e4SLinus Torvalds  *	The protocol list. Each protocol is registered in here.
2191da177e4SLinus Torvalds  */
2201da177e4SLinus Torvalds 
2211da177e4SLinus Torvalds static DEFINE_SPINLOCK(net_family_lock);
222190683a9SEric Dumazet static const struct net_proto_family __rcu *net_families[NPROTO] __read_mostly;
2231da177e4SLinus Torvalds 
2241da177e4SLinus Torvalds /*
22589bddce5SStephen Hemminger  * Support routines.
22689bddce5SStephen Hemminger  * Move socket addresses back and forth across the kernel/user
2271da177e4SLinus Torvalds  * divide and look after the messy bits.
2281da177e4SLinus Torvalds  */
2291da177e4SLinus Torvalds 
2301da177e4SLinus Torvalds /**
2311da177e4SLinus Torvalds  *	move_addr_to_kernel	-	copy a socket address into kernel space
2321da177e4SLinus Torvalds  *	@uaddr: Address in user space
2331da177e4SLinus Torvalds  *	@kaddr: Address in kernel space
2341da177e4SLinus Torvalds  *	@ulen: Length in user space
2351da177e4SLinus Torvalds  *
2361da177e4SLinus Torvalds  *	The address is copied into kernel space. If the provided address is
2371da177e4SLinus Torvalds  *	too long an error code of -EINVAL is returned. If the copy gives
2381da177e4SLinus Torvalds  *	invalid addresses -EFAULT is returned. On a success 0 is returned.
2391da177e4SLinus Torvalds  */
2401da177e4SLinus Torvalds 
24143db362dSMaciej Żenczykowski int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr_storage *kaddr)
2421da177e4SLinus Torvalds {
243230b1839SYOSHIFUJI Hideaki 	if (ulen < 0 || ulen > sizeof(struct sockaddr_storage))
2441da177e4SLinus Torvalds 		return -EINVAL;
2451da177e4SLinus Torvalds 	if (ulen == 0)
2461da177e4SLinus Torvalds 		return 0;
2471da177e4SLinus Torvalds 	if (copy_from_user(kaddr, uaddr, ulen))
2481da177e4SLinus Torvalds 		return -EFAULT;
2493ec3b2fbSDavid Woodhouse 	return audit_sockaddr(ulen, kaddr);
2501da177e4SLinus Torvalds }
2511da177e4SLinus Torvalds 
2521da177e4SLinus Torvalds /**
2531da177e4SLinus Torvalds  *	move_addr_to_user	-	copy an address to user space
2541da177e4SLinus Torvalds  *	@kaddr: kernel space address
2551da177e4SLinus Torvalds  *	@klen: length of address in kernel
2561da177e4SLinus Torvalds  *	@uaddr: user space address
2571da177e4SLinus Torvalds  *	@ulen: pointer to user length field
2581da177e4SLinus Torvalds  *
2591da177e4SLinus Torvalds  *	The value pointed to by ulen on entry is the buffer length available.
2601da177e4SLinus Torvalds  *	This is overwritten with the buffer space used. -EINVAL is returned
2611da177e4SLinus Torvalds  *	if an overlong buffer is specified or a negative buffer size. -EFAULT
2621da177e4SLinus Torvalds  *	is returned if either the buffer or the length field are not
2631da177e4SLinus Torvalds  *	accessible.
2641da177e4SLinus Torvalds  *	After copying the data up to the limit the user specifies, the true
2651da177e4SLinus Torvalds  *	length of the data is written over the length limit the user
2661da177e4SLinus Torvalds  *	specified. Zero is returned for a success.
2671da177e4SLinus Torvalds  */
2681da177e4SLinus Torvalds 
26943db362dSMaciej Żenczykowski static int move_addr_to_user(struct sockaddr_storage *kaddr, int klen,
27011165f14Sstephen hemminger 			     void __user *uaddr, int __user *ulen)
2711da177e4SLinus Torvalds {
2721da177e4SLinus Torvalds 	int err;
2731da177e4SLinus Torvalds 	int len;
2741da177e4SLinus Torvalds 
27568c6beb3SHannes Frederic Sowa 	BUG_ON(klen > sizeof(struct sockaddr_storage));
27689bddce5SStephen Hemminger 	err = get_user(len, ulen);
27789bddce5SStephen Hemminger 	if (err)
2781da177e4SLinus Torvalds 		return err;
2791da177e4SLinus Torvalds 	if (len > klen)
2801da177e4SLinus Torvalds 		len = klen;
28168c6beb3SHannes Frederic Sowa 	if (len < 0)
2821da177e4SLinus Torvalds 		return -EINVAL;
28389bddce5SStephen Hemminger 	if (len) {
284d6fe3945SSteve Grubb 		if (audit_sockaddr(klen, kaddr))
285d6fe3945SSteve Grubb 			return -ENOMEM;
2861da177e4SLinus Torvalds 		if (copy_to_user(uaddr, kaddr, len))
2871da177e4SLinus Torvalds 			return -EFAULT;
2881da177e4SLinus Torvalds 	}
2891da177e4SLinus Torvalds 	/*
2901da177e4SLinus Torvalds 	 *      "fromlen shall refer to the value before truncation.."
2911da177e4SLinus Torvalds 	 *                      1003.1g
2921da177e4SLinus Torvalds 	 */
2931da177e4SLinus Torvalds 	return __put_user(klen, ulen);
2941da177e4SLinus Torvalds }
2951da177e4SLinus Torvalds 
29608009a76SAlexey Dobriyan static struct kmem_cache *sock_inode_cachep __ro_after_init;
2971da177e4SLinus Torvalds 
2981da177e4SLinus Torvalds static struct inode *sock_alloc_inode(struct super_block *sb)
2991da177e4SLinus Torvalds {
3001da177e4SLinus Torvalds 	struct socket_alloc *ei;
30189bddce5SStephen Hemminger 
302e94b1766SChristoph Lameter 	ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL);
3031da177e4SLinus Torvalds 	if (!ei)
3041da177e4SLinus Torvalds 		return NULL;
305333f7909SAl Viro 	init_waitqueue_head(&ei->socket.wq.wait);
306333f7909SAl Viro 	ei->socket.wq.fasync_list = NULL;
307333f7909SAl Viro 	ei->socket.wq.flags = 0;
3081da177e4SLinus Torvalds 
3091da177e4SLinus Torvalds 	ei->socket.state = SS_UNCONNECTED;
3101da177e4SLinus Torvalds 	ei->socket.flags = 0;
3111da177e4SLinus Torvalds 	ei->socket.ops = NULL;
3121da177e4SLinus Torvalds 	ei->socket.sk = NULL;
3131da177e4SLinus Torvalds 	ei->socket.file = NULL;
3141da177e4SLinus Torvalds 
3151da177e4SLinus Torvalds 	return &ei->vfs_inode;
3161da177e4SLinus Torvalds }
3171da177e4SLinus Torvalds 
3186d7855c5SAl Viro static void sock_free_inode(struct inode *inode)
3191da177e4SLinus Torvalds {
32043815482SEric Dumazet 	struct socket_alloc *ei;
32143815482SEric Dumazet 
32243815482SEric Dumazet 	ei = container_of(inode, struct socket_alloc, vfs_inode);
32343815482SEric Dumazet 	kmem_cache_free(sock_inode_cachep, ei);
3241da177e4SLinus Torvalds }
3251da177e4SLinus Torvalds 
32651cc5068SAlexey Dobriyan static void init_once(void *foo)
3271da177e4SLinus Torvalds {
3281da177e4SLinus Torvalds 	struct socket_alloc *ei = (struct socket_alloc *)foo;
3291da177e4SLinus Torvalds 
3301da177e4SLinus Torvalds 	inode_init_once(&ei->vfs_inode);
3311da177e4SLinus Torvalds }
3321da177e4SLinus Torvalds 
3331e911632Syuan linyu static void init_inodecache(void)
3341da177e4SLinus Torvalds {
3351da177e4SLinus Torvalds 	sock_inode_cachep = kmem_cache_create("sock_inode_cache",
3361da177e4SLinus Torvalds 					      sizeof(struct socket_alloc),
33789bddce5SStephen Hemminger 					      0,
33889bddce5SStephen Hemminger 					      (SLAB_HWCACHE_ALIGN |
33989bddce5SStephen Hemminger 					       SLAB_RECLAIM_ACCOUNT |
3405d097056SVladimir Davydov 					       SLAB_MEM_SPREAD | SLAB_ACCOUNT),
34120c2df83SPaul Mundt 					      init_once);
3421e911632Syuan linyu 	BUG_ON(sock_inode_cachep == NULL);
3431da177e4SLinus Torvalds }
3441da177e4SLinus Torvalds 
345b87221deSAlexey Dobriyan static const struct super_operations sockfs_ops = {
3461da177e4SLinus Torvalds 	.alloc_inode	= sock_alloc_inode,
3476d7855c5SAl Viro 	.free_inode	= sock_free_inode,
3481da177e4SLinus Torvalds 	.statfs		= simple_statfs,
3491da177e4SLinus Torvalds };
3501da177e4SLinus Torvalds 
351c23fbb6bSEric Dumazet /*
352c23fbb6bSEric Dumazet  * sockfs_dname() is called from d_path().
353c23fbb6bSEric Dumazet  */
354c23fbb6bSEric Dumazet static char *sockfs_dname(struct dentry *dentry, char *buffer, int buflen)
355c23fbb6bSEric Dumazet {
356c23fbb6bSEric Dumazet 	return dynamic_dname(dentry, buffer, buflen, "socket:[%lu]",
357c5ef6035SDavid Howells 				d_inode(dentry)->i_ino);
358c23fbb6bSEric Dumazet }
359c23fbb6bSEric Dumazet 
3603ba13d17SAl Viro static const struct dentry_operations sockfs_dentry_operations = {
361c23fbb6bSEric Dumazet 	.d_dname  = sockfs_dname,
3621da177e4SLinus Torvalds };
3631da177e4SLinus Torvalds 
364bba0bd31SAndreas Gruenbacher static int sockfs_xattr_get(const struct xattr_handler *handler,
365bba0bd31SAndreas Gruenbacher 			    struct dentry *dentry, struct inode *inode,
366bba0bd31SAndreas Gruenbacher 			    const char *suffix, void *value, size_t size)
367bba0bd31SAndreas Gruenbacher {
368bba0bd31SAndreas Gruenbacher 	if (value) {
369bba0bd31SAndreas Gruenbacher 		if (dentry->d_name.len + 1 > size)
370bba0bd31SAndreas Gruenbacher 			return -ERANGE;
371bba0bd31SAndreas Gruenbacher 		memcpy(value, dentry->d_name.name, dentry->d_name.len + 1);
372bba0bd31SAndreas Gruenbacher 	}
373bba0bd31SAndreas Gruenbacher 	return dentry->d_name.len + 1;
374bba0bd31SAndreas Gruenbacher }
375bba0bd31SAndreas Gruenbacher 
376bba0bd31SAndreas Gruenbacher #define XATTR_SOCKPROTONAME_SUFFIX "sockprotoname"
377bba0bd31SAndreas Gruenbacher #define XATTR_NAME_SOCKPROTONAME (XATTR_SYSTEM_PREFIX XATTR_SOCKPROTONAME_SUFFIX)
378bba0bd31SAndreas Gruenbacher #define XATTR_NAME_SOCKPROTONAME_LEN (sizeof(XATTR_NAME_SOCKPROTONAME)-1)
379bba0bd31SAndreas Gruenbacher 
380bba0bd31SAndreas Gruenbacher static const struct xattr_handler sockfs_xattr_handler = {
381bba0bd31SAndreas Gruenbacher 	.name = XATTR_NAME_SOCKPROTONAME,
382bba0bd31SAndreas Gruenbacher 	.get = sockfs_xattr_get,
383bba0bd31SAndreas Gruenbacher };
384bba0bd31SAndreas Gruenbacher 
3854a590153SAndreas Gruenbacher static int sockfs_security_xattr_set(const struct xattr_handler *handler,
386e65ce2a5SChristian Brauner 				     struct user_namespace *mnt_userns,
3874a590153SAndreas Gruenbacher 				     struct dentry *dentry, struct inode *inode,
3884a590153SAndreas Gruenbacher 				     const char *suffix, const void *value,
3894a590153SAndreas Gruenbacher 				     size_t size, int flags)
3904a590153SAndreas Gruenbacher {
3914a590153SAndreas Gruenbacher 	/* Handled by LSM. */
3924a590153SAndreas Gruenbacher 	return -EAGAIN;
3934a590153SAndreas Gruenbacher }
3944a590153SAndreas Gruenbacher 
3954a590153SAndreas Gruenbacher static const struct xattr_handler sockfs_security_xattr_handler = {
3964a590153SAndreas Gruenbacher 	.prefix = XATTR_SECURITY_PREFIX,
3974a590153SAndreas Gruenbacher 	.set = sockfs_security_xattr_set,
3984a590153SAndreas Gruenbacher };
3994a590153SAndreas Gruenbacher 
400bba0bd31SAndreas Gruenbacher static const struct xattr_handler *sockfs_xattr_handlers[] = {
401bba0bd31SAndreas Gruenbacher 	&sockfs_xattr_handler,
4024a590153SAndreas Gruenbacher 	&sockfs_security_xattr_handler,
403bba0bd31SAndreas Gruenbacher 	NULL
404bba0bd31SAndreas Gruenbacher };
405bba0bd31SAndreas Gruenbacher 
406fba9be49SDavid Howells static int sockfs_init_fs_context(struct fs_context *fc)
407c74a1cbbSAl Viro {
408fba9be49SDavid Howells 	struct pseudo_fs_context *ctx = init_pseudo(fc, SOCKFS_MAGIC);
409fba9be49SDavid Howells 	if (!ctx)
410fba9be49SDavid Howells 		return -ENOMEM;
411fba9be49SDavid Howells 	ctx->ops = &sockfs_ops;
412fba9be49SDavid Howells 	ctx->dops = &sockfs_dentry_operations;
413fba9be49SDavid Howells 	ctx->xattr = sockfs_xattr_handlers;
414fba9be49SDavid Howells 	return 0;
415c74a1cbbSAl Viro }
416c74a1cbbSAl Viro 
417c74a1cbbSAl Viro static struct vfsmount *sock_mnt __read_mostly;
418c74a1cbbSAl Viro 
419c74a1cbbSAl Viro static struct file_system_type sock_fs_type = {
420c74a1cbbSAl Viro 	.name =		"sockfs",
421fba9be49SDavid Howells 	.init_fs_context = sockfs_init_fs_context,
422c74a1cbbSAl Viro 	.kill_sb =	kill_anon_super,
423c74a1cbbSAl Viro };
424c74a1cbbSAl Viro 
4251da177e4SLinus Torvalds /*
4261da177e4SLinus Torvalds  *	Obtains the first available file descriptor and sets it up for use.
4271da177e4SLinus Torvalds  *
42839d8c1b6SDavid S. Miller  *	These functions create file structures and maps them to fd space
42939d8c1b6SDavid S. Miller  *	of the current process. On success it returns file descriptor
4301da177e4SLinus Torvalds  *	and file struct implicitly stored in sock->file.
4311da177e4SLinus Torvalds  *	Note that another thread may close file descriptor before we return
4321da177e4SLinus Torvalds  *	from this function. We use the fact that now we do not refer
4331da177e4SLinus Torvalds  *	to socket after mapping. If one day we will need it, this
4341da177e4SLinus Torvalds  *	function will increment ref. count on file by 1.
4351da177e4SLinus Torvalds  *
4361da177e4SLinus Torvalds  *	In any case returned fd MAY BE not valid!
4371da177e4SLinus Torvalds  *	This race condition is unavoidable
4381da177e4SLinus Torvalds  *	with shared fd spaces, we cannot solve it inside kernel,
4391da177e4SLinus Torvalds  *	but we take care of internal coherence yet.
4401da177e4SLinus Torvalds  */
4411da177e4SLinus Torvalds 
4428a3c245cSPedro Tammela /**
4438a3c245cSPedro Tammela  *	sock_alloc_file - Bind a &socket to a &file
4448a3c245cSPedro Tammela  *	@sock: socket
4458a3c245cSPedro Tammela  *	@flags: file status flags
4468a3c245cSPedro Tammela  *	@dname: protocol name
4478a3c245cSPedro Tammela  *
4488a3c245cSPedro Tammela  *	Returns the &file bound with @sock, implicitly storing it
4498a3c245cSPedro Tammela  *	in sock->file. If dname is %NULL, sets to "".
4508a3c245cSPedro Tammela  *	On failure the return is a ERR pointer (see linux/err.h).
4518a3c245cSPedro Tammela  *	This function uses GFP_KERNEL internally.
4528a3c245cSPedro Tammela  */
4538a3c245cSPedro Tammela 
454aab174f0SLinus Torvalds struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname)
4551da177e4SLinus Torvalds {
4567cbe66b6SAl Viro 	struct file *file;
4571da177e4SLinus Torvalds 
458d93aa9d8SAl Viro 	if (!dname)
459d93aa9d8SAl Viro 		dname = sock->sk ? sock->sk->sk_prot_creator->name : "";
46039d8c1b6SDavid S. Miller 
461d93aa9d8SAl Viro 	file = alloc_file_pseudo(SOCK_INODE(sock), sock_mnt, dname,
462d93aa9d8SAl Viro 				O_RDWR | (flags & O_NONBLOCK),
463cc3808f8SAl Viro 				&socket_file_ops);
464b5ffe634SViresh Kumar 	if (IS_ERR(file)) {
4658e1611e2SAl Viro 		sock_release(sock);
46639b65252SAnatol Pomozov 		return file;
467cc3808f8SAl Viro 	}
4681da177e4SLinus Torvalds 
4691da177e4SLinus Torvalds 	sock->file = file;
47007dc3f07SBenjamin LaHaise 	file->private_data = sock;
471d8e464ecSLinus Torvalds 	stream_open(SOCK_INODE(sock), file);
47228407630SAl Viro 	return file;
4731da177e4SLinus Torvalds }
47456b31d1cSAl Viro EXPORT_SYMBOL(sock_alloc_file);
4751da177e4SLinus Torvalds 
47656b31d1cSAl Viro static int sock_map_fd(struct socket *sock, int flags)
47739d8c1b6SDavid S. Miller {
47839d8c1b6SDavid S. Miller 	struct file *newfile;
47928407630SAl Viro 	int fd = get_unused_fd_flags(flags);
480ce4bb04cSAl Viro 	if (unlikely(fd < 0)) {
481ce4bb04cSAl Viro 		sock_release(sock);
4821da177e4SLinus Torvalds 		return fd;
483ce4bb04cSAl Viro 	}
4841da177e4SLinus Torvalds 
485aab174f0SLinus Torvalds 	newfile = sock_alloc_file(sock, flags, NULL);
4864546e44cSEnrico Weigelt 	if (!IS_ERR(newfile)) {
4871da177e4SLinus Torvalds 		fd_install(fd, newfile);
4881da177e4SLinus Torvalds 		return fd;
4891da177e4SLinus Torvalds 	}
49028407630SAl Viro 
49128407630SAl Viro 	put_unused_fd(fd);
49228407630SAl Viro 	return PTR_ERR(newfile);
4931da177e4SLinus Torvalds }
4941da177e4SLinus Torvalds 
4958a3c245cSPedro Tammela /**
4968a3c245cSPedro Tammela  *	sock_from_file - Return the &socket bounded to @file.
4978a3c245cSPedro Tammela  *	@file: file
4988a3c245cSPedro Tammela  *
499dba4a925SFlorent Revest  *	On failure returns %NULL.
5008a3c245cSPedro Tammela  */
5018a3c245cSPedro Tammela 
502dba4a925SFlorent Revest struct socket *sock_from_file(struct file *file)
5036cb153caSBenjamin LaHaise {
5046cb153caSBenjamin LaHaise 	if (file->f_op == &socket_file_ops)
5056cb153caSBenjamin LaHaise 		return file->private_data;	/* set in sock_map_fd */
5066cb153caSBenjamin LaHaise 
5076cb153caSBenjamin LaHaise 	return NULL;
5086cb153caSBenjamin LaHaise }
509406a3c63SJohn Fastabend EXPORT_SYMBOL(sock_from_file);
5106cb153caSBenjamin LaHaise 
5111da177e4SLinus Torvalds /**
5121da177e4SLinus Torvalds  *	sockfd_lookup - Go from a file number to its socket slot
5131da177e4SLinus Torvalds  *	@fd: file handle
5141da177e4SLinus Torvalds  *	@err: pointer to an error code return
5151da177e4SLinus Torvalds  *
5161da177e4SLinus Torvalds  *	The file handle passed in is locked and the socket it is bound
517241c4667SRosen, Rami  *	to is returned. If an error occurs the err pointer is overwritten
5181da177e4SLinus Torvalds  *	with a negative errno code and NULL is returned. The function checks
5191da177e4SLinus Torvalds  *	for both invalid handles and passing a handle which is not a socket.
5201da177e4SLinus Torvalds  *
5211da177e4SLinus Torvalds  *	On a success the socket object pointer is returned.
5221da177e4SLinus Torvalds  */
5231da177e4SLinus Torvalds 
5241da177e4SLinus Torvalds struct socket *sockfd_lookup(int fd, int *err)
5251da177e4SLinus Torvalds {
5261da177e4SLinus Torvalds 	struct file *file;
5271da177e4SLinus Torvalds 	struct socket *sock;
5281da177e4SLinus Torvalds 
52989bddce5SStephen Hemminger 	file = fget(fd);
53089bddce5SStephen Hemminger 	if (!file) {
5311da177e4SLinus Torvalds 		*err = -EBADF;
5321da177e4SLinus Torvalds 		return NULL;
5331da177e4SLinus Torvalds 	}
53489bddce5SStephen Hemminger 
535dba4a925SFlorent Revest 	sock = sock_from_file(file);
536dba4a925SFlorent Revest 	if (!sock) {
537dba4a925SFlorent Revest 		*err = -ENOTSOCK;
5381da177e4SLinus Torvalds 		fput(file);
539dba4a925SFlorent Revest 	}
5406cb153caSBenjamin LaHaise 	return sock;
5411da177e4SLinus Torvalds }
542c6d409cfSEric Dumazet EXPORT_SYMBOL(sockfd_lookup);
5431da177e4SLinus Torvalds 
5446cb153caSBenjamin LaHaise static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed)
5456cb153caSBenjamin LaHaise {
54600e188efSAl Viro 	struct fd f = fdget(fd);
5476cb153caSBenjamin LaHaise 	struct socket *sock;
5486cb153caSBenjamin LaHaise 
5493672558cSHua Zhong 	*err = -EBADF;
55000e188efSAl Viro 	if (f.file) {
551dba4a925SFlorent Revest 		sock = sock_from_file(f.file);
55200e188efSAl Viro 		if (likely(sock)) {
553ce787a5aSMiaohe Lin 			*fput_needed = f.flags & FDPUT_FPUT;
5541da177e4SLinus Torvalds 			return sock;
55500e188efSAl Viro 		}
556dba4a925SFlorent Revest 		*err = -ENOTSOCK;
55700e188efSAl Viro 		fdput(f);
5586cb153caSBenjamin LaHaise 	}
5596cb153caSBenjamin LaHaise 	return NULL;
5601da177e4SLinus Torvalds }
5611da177e4SLinus Torvalds 
562600e1779SMasatake YAMATO static ssize_t sockfs_listxattr(struct dentry *dentry, char *buffer,
563600e1779SMasatake YAMATO 				size_t size)
564600e1779SMasatake YAMATO {
565600e1779SMasatake YAMATO 	ssize_t len;
566600e1779SMasatake YAMATO 	ssize_t used = 0;
567600e1779SMasatake YAMATO 
568c5ef6035SDavid Howells 	len = security_inode_listsecurity(d_inode(dentry), buffer, size);
569600e1779SMasatake YAMATO 	if (len < 0)
570600e1779SMasatake YAMATO 		return len;
571600e1779SMasatake YAMATO 	used += len;
572600e1779SMasatake YAMATO 	if (buffer) {
573600e1779SMasatake YAMATO 		if (size < used)
574600e1779SMasatake YAMATO 			return -ERANGE;
575600e1779SMasatake YAMATO 		buffer += len;
576600e1779SMasatake YAMATO 	}
577600e1779SMasatake YAMATO 
578600e1779SMasatake YAMATO 	len = (XATTR_NAME_SOCKPROTONAME_LEN + 1);
579600e1779SMasatake YAMATO 	used += len;
580600e1779SMasatake YAMATO 	if (buffer) {
581600e1779SMasatake YAMATO 		if (size < used)
582600e1779SMasatake YAMATO 			return -ERANGE;
583600e1779SMasatake YAMATO 		memcpy(buffer, XATTR_NAME_SOCKPROTONAME, len);
584600e1779SMasatake YAMATO 		buffer += len;
585600e1779SMasatake YAMATO 	}
586600e1779SMasatake YAMATO 
587600e1779SMasatake YAMATO 	return used;
588600e1779SMasatake YAMATO }
589600e1779SMasatake YAMATO 
590549c7297SChristian Brauner static int sockfs_setattr(struct user_namespace *mnt_userns,
591549c7297SChristian Brauner 			  struct dentry *dentry, struct iattr *iattr)
59286741ec2SLorenzo Colitti {
593549c7297SChristian Brauner 	int err = simple_setattr(&init_user_ns, dentry, iattr);
59486741ec2SLorenzo Colitti 
595e1a3a60aSEric Biggers 	if (!err && (iattr->ia_valid & ATTR_UID)) {
59686741ec2SLorenzo Colitti 		struct socket *sock = SOCKET_I(d_inode(dentry));
59786741ec2SLorenzo Colitti 
5986d8c50dcSCong Wang 		if (sock->sk)
59986741ec2SLorenzo Colitti 			sock->sk->sk_uid = iattr->ia_uid;
6006d8c50dcSCong Wang 		else
6016d8c50dcSCong Wang 			err = -ENOENT;
60286741ec2SLorenzo Colitti 	}
60386741ec2SLorenzo Colitti 
60486741ec2SLorenzo Colitti 	return err;
60586741ec2SLorenzo Colitti }
60686741ec2SLorenzo Colitti 
607600e1779SMasatake YAMATO static const struct inode_operations sockfs_inode_ops = {
608600e1779SMasatake YAMATO 	.listxattr = sockfs_listxattr,
60986741ec2SLorenzo Colitti 	.setattr = sockfs_setattr,
610600e1779SMasatake YAMATO };
611600e1779SMasatake YAMATO 
6121da177e4SLinus Torvalds /**
6131da177e4SLinus Torvalds  *	sock_alloc - allocate a socket
6141da177e4SLinus Torvalds  *
6151da177e4SLinus Torvalds  *	Allocate a new inode and socket object. The two are bound together
6161da177e4SLinus Torvalds  *	and initialised. The socket is then returned. If we are out of inodes
6178a3c245cSPedro Tammela  *	NULL is returned. This functions uses GFP_KERNEL internally.
6181da177e4SLinus Torvalds  */
6191da177e4SLinus Torvalds 
620f4a00aacSTom Herbert struct socket *sock_alloc(void)
6211da177e4SLinus Torvalds {
6221da177e4SLinus Torvalds 	struct inode *inode;
6231da177e4SLinus Torvalds 	struct socket *sock;
6241da177e4SLinus Torvalds 
625a209dfc7SEric Dumazet 	inode = new_inode_pseudo(sock_mnt->mnt_sb);
6261da177e4SLinus Torvalds 	if (!inode)
6271da177e4SLinus Torvalds 		return NULL;
6281da177e4SLinus Torvalds 
6291da177e4SLinus Torvalds 	sock = SOCKET_I(inode);
6301da177e4SLinus Torvalds 
63185fe4025SChristoph Hellwig 	inode->i_ino = get_next_ino();
6321da177e4SLinus Torvalds 	inode->i_mode = S_IFSOCK | S_IRWXUGO;
6338192b0c4SDavid Howells 	inode->i_uid = current_fsuid();
6348192b0c4SDavid Howells 	inode->i_gid = current_fsgid();
635600e1779SMasatake YAMATO 	inode->i_op = &sockfs_inode_ops;
6361da177e4SLinus Torvalds 
6371da177e4SLinus Torvalds 	return sock;
6381da177e4SLinus Torvalds }
639f4a00aacSTom Herbert EXPORT_SYMBOL(sock_alloc);
6401da177e4SLinus Torvalds 
6416d8c50dcSCong Wang static void __sock_release(struct socket *sock, struct inode *inode)
6421da177e4SLinus Torvalds {
6431da177e4SLinus Torvalds 	if (sock->ops) {
6441da177e4SLinus Torvalds 		struct module *owner = sock->ops->owner;
6451da177e4SLinus Torvalds 
6466d8c50dcSCong Wang 		if (inode)
6476d8c50dcSCong Wang 			inode_lock(inode);
6481da177e4SLinus Torvalds 		sock->ops->release(sock);
649ff7b11aaSEric Biggers 		sock->sk = NULL;
6506d8c50dcSCong Wang 		if (inode)
6516d8c50dcSCong Wang 			inode_unlock(inode);
6521da177e4SLinus Torvalds 		sock->ops = NULL;
6531da177e4SLinus Torvalds 		module_put(owner);
6541da177e4SLinus Torvalds 	}
6551da177e4SLinus Torvalds 
656333f7909SAl Viro 	if (sock->wq.fasync_list)
6573410f22eSYang Yingliang 		pr_err("%s: fasync list not empty!\n", __func__);
6581da177e4SLinus Torvalds 
6591da177e4SLinus Torvalds 	if (!sock->file) {
6601da177e4SLinus Torvalds 		iput(SOCK_INODE(sock));
6611da177e4SLinus Torvalds 		return;
6621da177e4SLinus Torvalds 	}
6631da177e4SLinus Torvalds 	sock->file = NULL;
6641da177e4SLinus Torvalds }
6656d8c50dcSCong Wang 
6669a8ad9acSAndrew Lunn /**
6679a8ad9acSAndrew Lunn  *	sock_release - close a socket
6689a8ad9acSAndrew Lunn  *	@sock: socket to close
6699a8ad9acSAndrew Lunn  *
6709a8ad9acSAndrew Lunn  *	The socket is released from the protocol stack if it has a release
6719a8ad9acSAndrew Lunn  *	callback, and the inode is then released if the socket is bound to
6729a8ad9acSAndrew Lunn  *	an inode not a file.
6739a8ad9acSAndrew Lunn  */
6746d8c50dcSCong Wang void sock_release(struct socket *sock)
6756d8c50dcSCong Wang {
6766d8c50dcSCong Wang 	__sock_release(sock, NULL);
6776d8c50dcSCong Wang }
678c6d409cfSEric Dumazet EXPORT_SYMBOL(sock_release);
6791da177e4SLinus Torvalds 
680c14ac945SSoheil Hassas Yeganeh void __sock_tx_timestamp(__u16 tsflags, __u8 *tx_flags)
68120d49473SPatrick Ohly {
682140c55d4SEric Dumazet 	u8 flags = *tx_flags;
683140c55d4SEric Dumazet 
684c14ac945SSoheil Hassas Yeganeh 	if (tsflags & SOF_TIMESTAMPING_TX_HARDWARE)
685140c55d4SEric Dumazet 		flags |= SKBTX_HW_TSTAMP;
686140c55d4SEric Dumazet 
687c14ac945SSoheil Hassas Yeganeh 	if (tsflags & SOF_TIMESTAMPING_TX_SOFTWARE)
688140c55d4SEric Dumazet 		flags |= SKBTX_SW_TSTAMP;
689140c55d4SEric Dumazet 
690c14ac945SSoheil Hassas Yeganeh 	if (tsflags & SOF_TIMESTAMPING_TX_SCHED)
691140c55d4SEric Dumazet 		flags |= SKBTX_SCHED_TSTAMP;
692140c55d4SEric Dumazet 
693140c55d4SEric Dumazet 	*tx_flags = flags;
69420d49473SPatrick Ohly }
69567cc0d40SWillem de Bruijn EXPORT_SYMBOL(__sock_tx_timestamp);
69620d49473SPatrick Ohly 
6978c3c447bSPaolo Abeni INDIRECT_CALLABLE_DECLARE(int inet_sendmsg(struct socket *, struct msghdr *,
6988c3c447bSPaolo Abeni 					   size_t));
699a648a592SPaolo Abeni INDIRECT_CALLABLE_DECLARE(int inet6_sendmsg(struct socket *, struct msghdr *,
700a648a592SPaolo Abeni 					    size_t));
701d8725c86SAl Viro static inline int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg)
7021da177e4SLinus Torvalds {
703a648a592SPaolo Abeni 	int ret = INDIRECT_CALL_INET(sock->ops->sendmsg, inet6_sendmsg,
704a648a592SPaolo Abeni 				     inet_sendmsg, sock, msg,
705a648a592SPaolo Abeni 				     msg_data_left(msg));
706d8725c86SAl Viro 	BUG_ON(ret == -EIOCBQUEUED);
707d8725c86SAl Viro 	return ret;
7081da177e4SLinus Torvalds }
7090cf00c6fSGu Zheng 
71085806af0SRandy Dunlap /**
71185806af0SRandy Dunlap  *	sock_sendmsg - send a message through @sock
71285806af0SRandy Dunlap  *	@sock: socket
71385806af0SRandy Dunlap  *	@msg: message to send
71485806af0SRandy Dunlap  *
71585806af0SRandy Dunlap  *	Sends @msg through @sock, passing through LSM.
71685806af0SRandy Dunlap  *	Returns the number of bytes sent, or an error code.
71785806af0SRandy Dunlap  */
718d8725c86SAl Viro int sock_sendmsg(struct socket *sock, struct msghdr *msg)
7190cf00c6fSGu Zheng {
720d8725c86SAl Viro 	int err = security_socket_sendmsg(sock, msg,
72101e97e65SAl Viro 					  msg_data_left(msg));
7221b784140SYing Xue 
723d8725c86SAl Viro 	return err ?: sock_sendmsg_nosec(sock, msg);
7240cf00c6fSGu Zheng }
725c6d409cfSEric Dumazet EXPORT_SYMBOL(sock_sendmsg);
7261da177e4SLinus Torvalds 
7278a3c245cSPedro Tammela /**
7288a3c245cSPedro Tammela  *	kernel_sendmsg - send a message through @sock (kernel-space)
7298a3c245cSPedro Tammela  *	@sock: socket
7308a3c245cSPedro Tammela  *	@msg: message header
7318a3c245cSPedro Tammela  *	@vec: kernel vec
7328a3c245cSPedro Tammela  *	@num: vec array length
7338a3c245cSPedro Tammela  *	@size: total message data size
7348a3c245cSPedro Tammela  *
7358a3c245cSPedro Tammela  *	Builds the message data with @vec and sends it through @sock.
7368a3c245cSPedro Tammela  *	Returns the number of bytes sent, or an error code.
7378a3c245cSPedro Tammela  */
7388a3c245cSPedro Tammela 
7391da177e4SLinus Torvalds int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
7401da177e4SLinus Torvalds 		   struct kvec *vec, size_t num, size_t size)
7411da177e4SLinus Torvalds {
742aa563d7bSDavid Howells 	iov_iter_kvec(&msg->msg_iter, WRITE, vec, num, size);
743d8725c86SAl Viro 	return sock_sendmsg(sock, msg);
7441da177e4SLinus Torvalds }
745c6d409cfSEric Dumazet EXPORT_SYMBOL(kernel_sendmsg);
7461da177e4SLinus Torvalds 
7478a3c245cSPedro Tammela /**
7488a3c245cSPedro Tammela  *	kernel_sendmsg_locked - send a message through @sock (kernel-space)
7498a3c245cSPedro Tammela  *	@sk: sock
7508a3c245cSPedro Tammela  *	@msg: message header
7518a3c245cSPedro Tammela  *	@vec: output s/g array
7528a3c245cSPedro Tammela  *	@num: output s/g array length
7538a3c245cSPedro Tammela  *	@size: total message data size
7548a3c245cSPedro Tammela  *
7558a3c245cSPedro Tammela  *	Builds the message data with @vec and sends it through @sock.
7568a3c245cSPedro Tammela  *	Returns the number of bytes sent, or an error code.
7578a3c245cSPedro Tammela  *	Caller must hold @sk.
7588a3c245cSPedro Tammela  */
7598a3c245cSPedro Tammela 
760306b13ebSTom Herbert int kernel_sendmsg_locked(struct sock *sk, struct msghdr *msg,
761306b13ebSTom Herbert 			  struct kvec *vec, size_t num, size_t size)
762306b13ebSTom Herbert {
763306b13ebSTom Herbert 	struct socket *sock = sk->sk_socket;
764306b13ebSTom Herbert 
765306b13ebSTom Herbert 	if (!sock->ops->sendmsg_locked)
766db5980d8SJohn Fastabend 		return sock_no_sendmsg_locked(sk, msg, size);
767306b13ebSTom Herbert 
768aa563d7bSDavid Howells 	iov_iter_kvec(&msg->msg_iter, WRITE, vec, num, size);
769306b13ebSTom Herbert 
770306b13ebSTom Herbert 	return sock->ops->sendmsg_locked(sk, msg, msg_data_left(msg));
771306b13ebSTom Herbert }
772306b13ebSTom Herbert EXPORT_SYMBOL(kernel_sendmsg_locked);
773306b13ebSTom Herbert 
7748605330aSSoheil Hassas Yeganeh static bool skb_is_err_queue(const struct sk_buff *skb)
7758605330aSSoheil Hassas Yeganeh {
7768605330aSSoheil Hassas Yeganeh 	/* pkt_type of skbs enqueued on the error queue are set to
7778605330aSSoheil Hassas Yeganeh 	 * PACKET_OUTGOING in skb_set_err_queue(). This is only safe to do
7788605330aSSoheil Hassas Yeganeh 	 * in recvmsg, since skbs received on a local socket will never
7798605330aSSoheil Hassas Yeganeh 	 * have a pkt_type of PACKET_OUTGOING.
7808605330aSSoheil Hassas Yeganeh 	 */
7818605330aSSoheil Hassas Yeganeh 	return skb->pkt_type == PACKET_OUTGOING;
7828605330aSSoheil Hassas Yeganeh }
7838605330aSSoheil Hassas Yeganeh 
784b50a5c70SMiroslav Lichvar /* On transmit, software and hardware timestamps are returned independently.
785b50a5c70SMiroslav Lichvar  * As the two skb clones share the hardware timestamp, which may be updated
786b50a5c70SMiroslav Lichvar  * before the software timestamp is received, a hardware TX timestamp may be
787b50a5c70SMiroslav Lichvar  * returned only if there is no software TX timestamp. Ignore false software
788b50a5c70SMiroslav Lichvar  * timestamps, which may be made in the __sock_recv_timestamp() call when the
7897f1bc6e9SDeepa Dinamani  * option SO_TIMESTAMP_OLD(NS) is enabled on the socket, even when the skb has a
790b50a5c70SMiroslav Lichvar  * hardware timestamp.
791b50a5c70SMiroslav Lichvar  */
792b50a5c70SMiroslav Lichvar static bool skb_is_swtx_tstamp(const struct sk_buff *skb, int false_tstamp)
793b50a5c70SMiroslav Lichvar {
794b50a5c70SMiroslav Lichvar 	return skb->tstamp && !false_tstamp && skb_is_err_queue(skb);
795b50a5c70SMiroslav Lichvar }
796b50a5c70SMiroslav Lichvar 
797aad9c8c4SMiroslav Lichvar static void put_ts_pktinfo(struct msghdr *msg, struct sk_buff *skb)
798aad9c8c4SMiroslav Lichvar {
799aad9c8c4SMiroslav Lichvar 	struct scm_ts_pktinfo ts_pktinfo;
800aad9c8c4SMiroslav Lichvar 	struct net_device *orig_dev;
801aad9c8c4SMiroslav Lichvar 
802aad9c8c4SMiroslav Lichvar 	if (!skb_mac_header_was_set(skb))
803aad9c8c4SMiroslav Lichvar 		return;
804aad9c8c4SMiroslav Lichvar 
805aad9c8c4SMiroslav Lichvar 	memset(&ts_pktinfo, 0, sizeof(ts_pktinfo));
806aad9c8c4SMiroslav Lichvar 
807aad9c8c4SMiroslav Lichvar 	rcu_read_lock();
808aad9c8c4SMiroslav Lichvar 	orig_dev = dev_get_by_napi_id(skb_napi_id(skb));
809aad9c8c4SMiroslav Lichvar 	if (orig_dev)
810aad9c8c4SMiroslav Lichvar 		ts_pktinfo.if_index = orig_dev->ifindex;
811aad9c8c4SMiroslav Lichvar 	rcu_read_unlock();
812aad9c8c4SMiroslav Lichvar 
813aad9c8c4SMiroslav Lichvar 	ts_pktinfo.pkt_length = skb->len - skb_mac_offset(skb);
814aad9c8c4SMiroslav Lichvar 	put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPING_PKTINFO,
815aad9c8c4SMiroslav Lichvar 		 sizeof(ts_pktinfo), &ts_pktinfo);
816aad9c8c4SMiroslav Lichvar }
817aad9c8c4SMiroslav Lichvar 
81892f37fd2SEric Dumazet /*
81992f37fd2SEric Dumazet  * called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP)
82092f37fd2SEric Dumazet  */
82192f37fd2SEric Dumazet void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
82292f37fd2SEric Dumazet 	struct sk_buff *skb)
82392f37fd2SEric Dumazet {
82420d49473SPatrick Ohly 	int need_software_tstamp = sock_flag(sk, SOCK_RCVTSTAMP);
825887feae3SDeepa Dinamani 	int new_tstamp = sock_flag(sk, SOCK_TSTAMP_NEW);
8269718475eSDeepa Dinamani 	struct scm_timestamping_internal tss;
8279718475eSDeepa Dinamani 
828b50a5c70SMiroslav Lichvar 	int empty = 1, false_tstamp = 0;
82920d49473SPatrick Ohly 	struct skb_shared_hwtstamps *shhwtstamps =
83020d49473SPatrick Ohly 		skb_hwtstamps(skb);
83192f37fd2SEric Dumazet 
83220d49473SPatrick Ohly 	/* Race occurred between timestamp enabling and packet
83320d49473SPatrick Ohly 	   receiving.  Fill in the current time for now. */
834b50a5c70SMiroslav Lichvar 	if (need_software_tstamp && skb->tstamp == 0) {
83520d49473SPatrick Ohly 		__net_timestamp(skb);
836b50a5c70SMiroslav Lichvar 		false_tstamp = 1;
837b50a5c70SMiroslav Lichvar 	}
83820d49473SPatrick Ohly 
83920d49473SPatrick Ohly 	if (need_software_tstamp) {
84092f37fd2SEric Dumazet 		if (!sock_flag(sk, SOCK_RCVTSTAMPNS)) {
841887feae3SDeepa Dinamani 			if (new_tstamp) {
842887feae3SDeepa Dinamani 				struct __kernel_sock_timeval tv;
843887feae3SDeepa Dinamani 
844887feae3SDeepa Dinamani 				skb_get_new_timestamp(skb, &tv);
845887feae3SDeepa Dinamani 				put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP_NEW,
846887feae3SDeepa Dinamani 					 sizeof(tv), &tv);
847887feae3SDeepa Dinamani 			} else {
84813c6ee2aSDeepa Dinamani 				struct __kernel_old_timeval tv;
849887feae3SDeepa Dinamani 
85020d49473SPatrick Ohly 				skb_get_timestamp(skb, &tv);
8517f1bc6e9SDeepa Dinamani 				put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP_OLD,
85220d49473SPatrick Ohly 					 sizeof(tv), &tv);
853887feae3SDeepa Dinamani 			}
854887feae3SDeepa Dinamani 		} else {
855887feae3SDeepa Dinamani 			if (new_tstamp) {
856887feae3SDeepa Dinamani 				struct __kernel_timespec ts;
857887feae3SDeepa Dinamani 
858887feae3SDeepa Dinamani 				skb_get_new_timestampns(skb, &ts);
859887feae3SDeepa Dinamani 				put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMPNS_NEW,
860887feae3SDeepa Dinamani 					 sizeof(ts), &ts);
86192f37fd2SEric Dumazet 			} else {
862df1b4ba9SArnd Bergmann 				struct __kernel_old_timespec ts;
863887feae3SDeepa Dinamani 
864f24b9be5SWillem de Bruijn 				skb_get_timestampns(skb, &ts);
8657f1bc6e9SDeepa Dinamani 				put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMPNS_OLD,
866f24b9be5SWillem de Bruijn 					 sizeof(ts), &ts);
86792f37fd2SEric Dumazet 			}
86892f37fd2SEric Dumazet 		}
869887feae3SDeepa Dinamani 	}
87092f37fd2SEric Dumazet 
871f24b9be5SWillem de Bruijn 	memset(&tss, 0, sizeof(tss));
872c199105dSWillem de Bruijn 	if ((sk->sk_tsflags & SOF_TIMESTAMPING_SOFTWARE) &&
8739718475eSDeepa Dinamani 	    ktime_to_timespec64_cond(skb->tstamp, tss.ts + 0))
87420d49473SPatrick Ohly 		empty = 0;
8754d276eb6SWillem de Bruijn 	if (shhwtstamps &&
876b9f40e21SWillem de Bruijn 	    (sk->sk_tsflags & SOF_TIMESTAMPING_RAW_HARDWARE) &&
877d7c08826SYangbo Lu 	    !skb_is_swtx_tstamp(skb, false_tstamp)) {
878d7c08826SYangbo Lu 		if (sk->sk_tsflags & SOF_TIMESTAMPING_BIND_PHC)
879d7c08826SYangbo Lu 			ptp_convert_timestamp(shhwtstamps, sk->sk_bind_phc);
880d7c08826SYangbo Lu 
881d7c08826SYangbo Lu 		if (ktime_to_timespec64_cond(shhwtstamps->hwtstamp,
882d7c08826SYangbo Lu 					     tss.ts + 2)) {
88320d49473SPatrick Ohly 			empty = 0;
884d7c08826SYangbo Lu 
885aad9c8c4SMiroslav Lichvar 			if ((sk->sk_tsflags & SOF_TIMESTAMPING_OPT_PKTINFO) &&
886aad9c8c4SMiroslav Lichvar 			    !skb_is_err_queue(skb))
887aad9c8c4SMiroslav Lichvar 				put_ts_pktinfo(msg, skb);
888aad9c8c4SMiroslav Lichvar 		}
889d7c08826SYangbo Lu 	}
8901c885808SFrancis Yan 	if (!empty) {
8919718475eSDeepa Dinamani 		if (sock_flag(sk, SOCK_TSTAMP_NEW))
8929718475eSDeepa Dinamani 			put_cmsg_scm_timestamping64(msg, &tss);
8939718475eSDeepa Dinamani 		else
8949718475eSDeepa Dinamani 			put_cmsg_scm_timestamping(msg, &tss);
8951c885808SFrancis Yan 
8968605330aSSoheil Hassas Yeganeh 		if (skb_is_err_queue(skb) && skb->len &&
8974ef1b286SSoheil Hassas Yeganeh 		    SKB_EXT_ERR(skb)->opt_stats)
8981c885808SFrancis Yan 			put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPING_OPT_STATS,
8991c885808SFrancis Yan 				 skb->len, skb->data);
9001c885808SFrancis Yan 	}
90120d49473SPatrick Ohly }
9027c81fd8bSArnaldo Carvalho de Melo EXPORT_SYMBOL_GPL(__sock_recv_timestamp);
9037c81fd8bSArnaldo Carvalho de Melo 
9046e3e939fSJohannes Berg void __sock_recv_wifi_status(struct msghdr *msg, struct sock *sk,
9056e3e939fSJohannes Berg 	struct sk_buff *skb)
9066e3e939fSJohannes Berg {
9076e3e939fSJohannes Berg 	int ack;
9086e3e939fSJohannes Berg 
9096e3e939fSJohannes Berg 	if (!sock_flag(sk, SOCK_WIFI_STATUS))
9106e3e939fSJohannes Berg 		return;
9116e3e939fSJohannes Berg 	if (!skb->wifi_acked_valid)
9126e3e939fSJohannes Berg 		return;
9136e3e939fSJohannes Berg 
9146e3e939fSJohannes Berg 	ack = skb->wifi_acked;
9156e3e939fSJohannes Berg 
9166e3e939fSJohannes Berg 	put_cmsg(msg, SOL_SOCKET, SCM_WIFI_STATUS, sizeof(ack), &ack);
9176e3e939fSJohannes Berg }
9186e3e939fSJohannes Berg EXPORT_SYMBOL_GPL(__sock_recv_wifi_status);
9196e3e939fSJohannes Berg 
92011165f14Sstephen hemminger static inline void sock_recv_drops(struct msghdr *msg, struct sock *sk,
92111165f14Sstephen hemminger 				   struct sk_buff *skb)
9223b885787SNeil Horman {
923744d5a3eSEyal Birger 	if (sock_flag(sk, SOCK_RXQ_OVFL) && skb && SOCK_SKB_CB(skb)->dropcount)
9243b885787SNeil Horman 		put_cmsg(msg, SOL_SOCKET, SO_RXQ_OVFL,
925744d5a3eSEyal Birger 			sizeof(__u32), &SOCK_SKB_CB(skb)->dropcount);
9263b885787SNeil Horman }
9273b885787SNeil Horman 
928767dd033SEric Dumazet void __sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk,
9293b885787SNeil Horman 	struct sk_buff *skb)
9303b885787SNeil Horman {
9313b885787SNeil Horman 	sock_recv_timestamp(msg, sk, skb);
9323b885787SNeil Horman 	sock_recv_drops(msg, sk, skb);
9333b885787SNeil Horman }
934767dd033SEric Dumazet EXPORT_SYMBOL_GPL(__sock_recv_ts_and_drops);
9353b885787SNeil Horman 
9368c3c447bSPaolo Abeni INDIRECT_CALLABLE_DECLARE(int inet_recvmsg(struct socket *, struct msghdr *,
9378c3c447bSPaolo Abeni 					   size_t, int));
938a648a592SPaolo Abeni INDIRECT_CALLABLE_DECLARE(int inet6_recvmsg(struct socket *, struct msghdr *,
939a648a592SPaolo Abeni 					    size_t, int));
9401b784140SYing Xue static inline int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg,
9411b784140SYing Xue 				     int flags)
942a2e27255SArnaldo Carvalho de Melo {
943a648a592SPaolo Abeni 	return INDIRECT_CALL_INET(sock->ops->recvmsg, inet6_recvmsg,
944a648a592SPaolo Abeni 				  inet_recvmsg, sock, msg, msg_data_left(msg),
945a648a592SPaolo Abeni 				  flags);
9462da62906SAl Viro }
947a2e27255SArnaldo Carvalho de Melo 
94885806af0SRandy Dunlap /**
94985806af0SRandy Dunlap  *	sock_recvmsg - receive a message from @sock
95085806af0SRandy Dunlap  *	@sock: socket
95185806af0SRandy Dunlap  *	@msg: message to receive
95285806af0SRandy Dunlap  *	@flags: message flags
95385806af0SRandy Dunlap  *
95485806af0SRandy Dunlap  *	Receives @msg from @sock, passing through LSM. Returns the total number
95585806af0SRandy Dunlap  *	of bytes received, or an error.
95685806af0SRandy Dunlap  */
9572da62906SAl Viro int sock_recvmsg(struct socket *sock, struct msghdr *msg, int flags)
9582da62906SAl Viro {
9592da62906SAl Viro 	int err = security_socket_recvmsg(sock, msg, msg_data_left(msg), flags);
9602da62906SAl Viro 
9612da62906SAl Viro 	return err ?: sock_recvmsg_nosec(sock, msg, flags);
9621da177e4SLinus Torvalds }
963c6d409cfSEric Dumazet EXPORT_SYMBOL(sock_recvmsg);
9641da177e4SLinus Torvalds 
965c1249c0aSMartin Lucina /**
966c1249c0aSMartin Lucina  *	kernel_recvmsg - Receive a message from a socket (kernel space)
967c1249c0aSMartin Lucina  *	@sock: The socket to receive the message from
968c1249c0aSMartin Lucina  *	@msg: Received message
969c1249c0aSMartin Lucina  *	@vec: Input s/g array for message data
970c1249c0aSMartin Lucina  *	@num: Size of input s/g array
971c1249c0aSMartin Lucina  *	@size: Number of bytes to read
972c1249c0aSMartin Lucina  *	@flags: Message flags (MSG_DONTWAIT, etc...)
973c1249c0aSMartin Lucina  *
974c1249c0aSMartin Lucina  *	On return the msg structure contains the scatter/gather array passed in the
975c1249c0aSMartin Lucina  *	vec argument. The array is modified so that it consists of the unfilled
976c1249c0aSMartin Lucina  *	portion of the original array.
977c1249c0aSMartin Lucina  *
978c1249c0aSMartin Lucina  *	The returned value is the total number of bytes received, or an error.
979c1249c0aSMartin Lucina  */
9808a3c245cSPedro Tammela 
9811da177e4SLinus Torvalds int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
98289bddce5SStephen Hemminger 		   struct kvec *vec, size_t num, size_t size, int flags)
9831da177e4SLinus Torvalds {
9841f466e1fSChristoph Hellwig 	msg->msg_control_is_user = false;
985aa563d7bSDavid Howells 	iov_iter_kvec(&msg->msg_iter, READ, vec, num, size);
9861f466e1fSChristoph Hellwig 	return sock_recvmsg(sock, msg, flags);
9871da177e4SLinus Torvalds }
988c6d409cfSEric Dumazet EXPORT_SYMBOL(kernel_recvmsg);
9891da177e4SLinus Torvalds 
99020380731SArnaldo Carvalho de Melo static ssize_t sock_sendpage(struct file *file, struct page *page,
9911da177e4SLinus Torvalds 			     int offset, size_t size, loff_t *ppos, int more)
9921da177e4SLinus Torvalds {
9931da177e4SLinus Torvalds 	struct socket *sock;
9941da177e4SLinus Torvalds 	int flags;
9951da177e4SLinus Torvalds 
996b69aee04SEric Dumazet 	sock = file->private_data;
9971da177e4SLinus Torvalds 
99835f9c09fSEric Dumazet 	flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
99935f9c09fSEric Dumazet 	/* more is a combination of MSG_MORE and MSG_SENDPAGE_NOTLAST */
100035f9c09fSEric Dumazet 	flags |= more;
10011da177e4SLinus Torvalds 
1002e6949583SLinus Torvalds 	return kernel_sendpage(sock, page, offset, size, flags);
10031da177e4SLinus Torvalds }
10041da177e4SLinus Torvalds 
10059c55e01cSJens Axboe static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
10069c55e01cSJens Axboe 				struct pipe_inode_info *pipe, size_t len,
10079c55e01cSJens Axboe 				unsigned int flags)
10089c55e01cSJens Axboe {
10099c55e01cSJens Axboe 	struct socket *sock = file->private_data;
10109c55e01cSJens Axboe 
1011997b37daSRémi Denis-Courmont 	if (unlikely(!sock->ops->splice_read))
101295506588SSlavomir Kaslev 		return generic_file_splice_read(file, ppos, pipe, len, flags);
1013997b37daSRémi Denis-Courmont 
10149c55e01cSJens Axboe 	return sock->ops->splice_read(sock, ppos, pipe, len, flags);
10159c55e01cSJens Axboe }
10169c55e01cSJens Axboe 
10178ae5e030SAl Viro static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to)
1018ce1d4d3eSChristoph Hellwig {
10196d652330SAl Viro 	struct file *file = iocb->ki_filp;
10206d652330SAl Viro 	struct socket *sock = file->private_data;
10210345f931Stadeusz.struk@intel.com 	struct msghdr msg = {.msg_iter = *to,
10220345f931Stadeusz.struk@intel.com 			     .msg_iocb = iocb};
10238ae5e030SAl Viro 	ssize_t res;
1024ce1d4d3eSChristoph Hellwig 
1025ebfcd895SJens Axboe 	if (file->f_flags & O_NONBLOCK || (iocb->ki_flags & IOCB_NOWAIT))
10268ae5e030SAl Viro 		msg.msg_flags = MSG_DONTWAIT;
10278ae5e030SAl Viro 
10288ae5e030SAl Viro 	if (iocb->ki_pos != 0)
1029ce1d4d3eSChristoph Hellwig 		return -ESPIPE;
1030027445c3SBadari Pulavarty 
103166ee59afSChristoph Hellwig 	if (!iov_iter_count(to))	/* Match SYS5 behaviour */
1032ce1d4d3eSChristoph Hellwig 		return 0;
1033ce1d4d3eSChristoph Hellwig 
10342da62906SAl Viro 	res = sock_recvmsg(sock, &msg, msg.msg_flags);
10358ae5e030SAl Viro 	*to = msg.msg_iter;
10368ae5e030SAl Viro 	return res;
1037ce1d4d3eSChristoph Hellwig }
1038ce1d4d3eSChristoph Hellwig 
10398ae5e030SAl Viro static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from)
10401da177e4SLinus Torvalds {
10416d652330SAl Viro 	struct file *file = iocb->ki_filp;
10426d652330SAl Viro 	struct socket *sock = file->private_data;
10430345f931Stadeusz.struk@intel.com 	struct msghdr msg = {.msg_iter = *from,
10440345f931Stadeusz.struk@intel.com 			     .msg_iocb = iocb};
10458ae5e030SAl Viro 	ssize_t res;
10461da177e4SLinus Torvalds 
10478ae5e030SAl Viro 	if (iocb->ki_pos != 0)
1048ce1d4d3eSChristoph Hellwig 		return -ESPIPE;
1049027445c3SBadari Pulavarty 
1050ebfcd895SJens Axboe 	if (file->f_flags & O_NONBLOCK || (iocb->ki_flags & IOCB_NOWAIT))
10518ae5e030SAl Viro 		msg.msg_flags = MSG_DONTWAIT;
10528ae5e030SAl Viro 
10536d652330SAl Viro 	if (sock->type == SOCK_SEQPACKET)
10546d652330SAl Viro 		msg.msg_flags |= MSG_EOR;
10556d652330SAl Viro 
1056d8725c86SAl Viro 	res = sock_sendmsg(sock, &msg);
10578ae5e030SAl Viro 	*from = msg.msg_iter;
10588ae5e030SAl Viro 	return res;
10591da177e4SLinus Torvalds }
10601da177e4SLinus Torvalds 
10611da177e4SLinus Torvalds /*
10621da177e4SLinus Torvalds  * Atomic setting of ioctl hooks to avoid race
10631da177e4SLinus Torvalds  * with module unload.
10641da177e4SLinus Torvalds  */
10651da177e4SLinus Torvalds 
10664a3e2f71SArjan van de Ven static DEFINE_MUTEX(br_ioctl_mutex);
1067c6d409cfSEric Dumazet static int (*br_ioctl_hook) (struct net *, unsigned int cmd, void __user *arg);
10681da177e4SLinus Torvalds 
1069881d966bSEric W. Biederman void brioctl_set(int (*hook) (struct net *, unsigned int, void __user *))
10701da177e4SLinus Torvalds {
10714a3e2f71SArjan van de Ven 	mutex_lock(&br_ioctl_mutex);
10721da177e4SLinus Torvalds 	br_ioctl_hook = hook;
10734a3e2f71SArjan van de Ven 	mutex_unlock(&br_ioctl_mutex);
10741da177e4SLinus Torvalds }
10751da177e4SLinus Torvalds EXPORT_SYMBOL(brioctl_set);
10761da177e4SLinus Torvalds 
10774a3e2f71SArjan van de Ven static DEFINE_MUTEX(vlan_ioctl_mutex);
1078881d966bSEric W. Biederman static int (*vlan_ioctl_hook) (struct net *, void __user *arg);
10791da177e4SLinus Torvalds 
1080881d966bSEric W. Biederman void vlan_ioctl_set(int (*hook) (struct net *, void __user *))
10811da177e4SLinus Torvalds {
10824a3e2f71SArjan van de Ven 	mutex_lock(&vlan_ioctl_mutex);
10831da177e4SLinus Torvalds 	vlan_ioctl_hook = hook;
10844a3e2f71SArjan van de Ven 	mutex_unlock(&vlan_ioctl_mutex);
10851da177e4SLinus Torvalds }
10861da177e4SLinus Torvalds EXPORT_SYMBOL(vlan_ioctl_set);
10871da177e4SLinus Torvalds 
10886b96018bSArnd Bergmann static long sock_do_ioctl(struct net *net, struct socket *sock,
108963ff03abSJohannes Berg 			  unsigned int cmd, unsigned long arg)
10906b96018bSArnd Bergmann {
1091876f0bf9SArnd Bergmann 	struct ifreq ifr;
1092876f0bf9SArnd Bergmann 	bool need_copyout;
10936b96018bSArnd Bergmann 	int err;
10946b96018bSArnd Bergmann 	void __user *argp = (void __user *)arg;
10956b96018bSArnd Bergmann 
10966b96018bSArnd Bergmann 	err = sock->ops->ioctl(sock, cmd, arg);
10976b96018bSArnd Bergmann 
10986b96018bSArnd Bergmann 	/*
10996b96018bSArnd Bergmann 	 * If this ioctl is unknown try to hand it down
11006b96018bSArnd Bergmann 	 * to the NIC driver.
11016b96018bSArnd Bergmann 	 */
110236fd633eSAl Viro 	if (err != -ENOIOCTLCMD)
11036b96018bSArnd Bergmann 		return err;
11046b96018bSArnd Bergmann 
110563ff03abSJohannes Berg 	if (copy_from_user(&ifr, argp, sizeof(struct ifreq)))
110644c02a2cSAl Viro 		return -EFAULT;
110744c02a2cSAl Viro 	err = dev_ioctl(net, cmd, &ifr, &need_copyout);
110844c02a2cSAl Viro 	if (!err && need_copyout)
110963ff03abSJohannes Berg 		if (copy_to_user(argp, &ifr, sizeof(struct ifreq)))
111044c02a2cSAl Viro 			return -EFAULT;
1111876f0bf9SArnd Bergmann 
11126b96018bSArnd Bergmann 	return err;
11136b96018bSArnd Bergmann }
11146b96018bSArnd Bergmann 
11151da177e4SLinus Torvalds /*
11161da177e4SLinus Torvalds  *	With an ioctl, arg may well be a user mode pointer, but we don't know
11171da177e4SLinus Torvalds  *	what to do with it - that's up to the protocol still.
11181da177e4SLinus Torvalds  */
11191da177e4SLinus Torvalds 
11201da177e4SLinus Torvalds static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
11211da177e4SLinus Torvalds {
11221da177e4SLinus Torvalds 	struct socket *sock;
1123881d966bSEric W. Biederman 	struct sock *sk;
11241da177e4SLinus Torvalds 	void __user *argp = (void __user *)arg;
11251da177e4SLinus Torvalds 	int pid, err;
1126881d966bSEric W. Biederman 	struct net *net;
11271da177e4SLinus Torvalds 
1128b69aee04SEric Dumazet 	sock = file->private_data;
1129881d966bSEric W. Biederman 	sk = sock->sk;
11303b1e0a65SYOSHIFUJI Hideaki 	net = sock_net(sk);
113144c02a2cSAl Viro 	if (unlikely(cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15))) {
113244c02a2cSAl Viro 		struct ifreq ifr;
113344c02a2cSAl Viro 		bool need_copyout;
113444c02a2cSAl Viro 		if (copy_from_user(&ifr, argp, sizeof(struct ifreq)))
113544c02a2cSAl Viro 			return -EFAULT;
113644c02a2cSAl Viro 		err = dev_ioctl(net, cmd, &ifr, &need_copyout);
113744c02a2cSAl Viro 		if (!err && need_copyout)
113844c02a2cSAl Viro 			if (copy_to_user(argp, &ifr, sizeof(struct ifreq)))
113944c02a2cSAl Viro 				return -EFAULT;
11401da177e4SLinus Torvalds 	} else
11413d23e349SJohannes Berg #ifdef CONFIG_WEXT_CORE
11421da177e4SLinus Torvalds 	if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
1143b1b0c245SAl Viro 		err = wext_handle_ioctl(net, cmd, argp);
11441da177e4SLinus Torvalds 	} else
11453d23e349SJohannes Berg #endif
11461da177e4SLinus Torvalds 		switch (cmd) {
11471da177e4SLinus Torvalds 		case FIOSETOWN:
11481da177e4SLinus Torvalds 		case SIOCSPGRP:
11491da177e4SLinus Torvalds 			err = -EFAULT;
11501da177e4SLinus Torvalds 			if (get_user(pid, (int __user *)argp))
11511da177e4SLinus Torvalds 				break;
1152393cc3f5SJiri Slaby 			err = f_setown(sock->file, pid, 1);
11531da177e4SLinus Torvalds 			break;
11541da177e4SLinus Torvalds 		case FIOGETOWN:
11551da177e4SLinus Torvalds 		case SIOCGPGRP:
1156609d7fa9SEric W. Biederman 			err = put_user(f_getown(sock->file),
115789bddce5SStephen Hemminger 				       (int __user *)argp);
11581da177e4SLinus Torvalds 			break;
11591da177e4SLinus Torvalds 		case SIOCGIFBR:
11601da177e4SLinus Torvalds 		case SIOCSIFBR:
11611da177e4SLinus Torvalds 		case SIOCBRADDBR:
11621da177e4SLinus Torvalds 		case SIOCBRDELBR:
11631da177e4SLinus Torvalds 			err = -ENOPKG;
11641da177e4SLinus Torvalds 			if (!br_ioctl_hook)
11651da177e4SLinus Torvalds 				request_module("bridge");
11661da177e4SLinus Torvalds 
11674a3e2f71SArjan van de Ven 			mutex_lock(&br_ioctl_mutex);
11681da177e4SLinus Torvalds 			if (br_ioctl_hook)
1169881d966bSEric W. Biederman 				err = br_ioctl_hook(net, cmd, argp);
11704a3e2f71SArjan van de Ven 			mutex_unlock(&br_ioctl_mutex);
11711da177e4SLinus Torvalds 			break;
11721da177e4SLinus Torvalds 		case SIOCGIFVLAN:
11731da177e4SLinus Torvalds 		case SIOCSIFVLAN:
11741da177e4SLinus Torvalds 			err = -ENOPKG;
11751da177e4SLinus Torvalds 			if (!vlan_ioctl_hook)
11761da177e4SLinus Torvalds 				request_module("8021q");
11771da177e4SLinus Torvalds 
11784a3e2f71SArjan van de Ven 			mutex_lock(&vlan_ioctl_mutex);
11791da177e4SLinus Torvalds 			if (vlan_ioctl_hook)
1180881d966bSEric W. Biederman 				err = vlan_ioctl_hook(net, argp);
11814a3e2f71SArjan van de Ven 			mutex_unlock(&vlan_ioctl_mutex);
11821da177e4SLinus Torvalds 			break;
1183c62cce2cSAndrey Vagin 		case SIOCGSKNS:
1184c62cce2cSAndrey Vagin 			err = -EPERM;
1185c62cce2cSAndrey Vagin 			if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1186c62cce2cSAndrey Vagin 				break;
1187c62cce2cSAndrey Vagin 
1188c62cce2cSAndrey Vagin 			err = open_related_ns(&net->ns, get_net_ns);
1189c62cce2cSAndrey Vagin 			break;
11900768e170SArnd Bergmann 		case SIOCGSTAMP_OLD:
11910768e170SArnd Bergmann 		case SIOCGSTAMPNS_OLD:
1192c7cbdbf2SArnd Bergmann 			if (!sock->ops->gettstamp) {
1193c7cbdbf2SArnd Bergmann 				err = -ENOIOCTLCMD;
1194c7cbdbf2SArnd Bergmann 				break;
1195c7cbdbf2SArnd Bergmann 			}
1196c7cbdbf2SArnd Bergmann 			err = sock->ops->gettstamp(sock, argp,
11970768e170SArnd Bergmann 						   cmd == SIOCGSTAMP_OLD,
11980768e170SArnd Bergmann 						   !IS_ENABLED(CONFIG_64BIT));
119960747828SGustavo A. R. Silva 			break;
12000768e170SArnd Bergmann 		case SIOCGSTAMP_NEW:
12010768e170SArnd Bergmann 		case SIOCGSTAMPNS_NEW:
12020768e170SArnd Bergmann 			if (!sock->ops->gettstamp) {
12030768e170SArnd Bergmann 				err = -ENOIOCTLCMD;
12040768e170SArnd Bergmann 				break;
12050768e170SArnd Bergmann 			}
12060768e170SArnd Bergmann 			err = sock->ops->gettstamp(sock, argp,
12070768e170SArnd Bergmann 						   cmd == SIOCGSTAMP_NEW,
12080768e170SArnd Bergmann 						   false);
1209c7cbdbf2SArnd Bergmann 			break;
1210876f0bf9SArnd Bergmann 
1211876f0bf9SArnd Bergmann 		case SIOCGIFCONF:
1212876f0bf9SArnd Bergmann 			err = dev_ifconf(net, argp);
1213876f0bf9SArnd Bergmann 			break;
1214876f0bf9SArnd Bergmann 
12151da177e4SLinus Torvalds 		default:
121663ff03abSJohannes Berg 			err = sock_do_ioctl(net, sock, cmd, arg);
12171da177e4SLinus Torvalds 			break;
12181da177e4SLinus Torvalds 		}
12191da177e4SLinus Torvalds 	return err;
12201da177e4SLinus Torvalds }
12211da177e4SLinus Torvalds 
12228a3c245cSPedro Tammela /**
12238a3c245cSPedro Tammela  *	sock_create_lite - creates a socket
12248a3c245cSPedro Tammela  *	@family: protocol family (AF_INET, ...)
12258a3c245cSPedro Tammela  *	@type: communication type (SOCK_STREAM, ...)
12268a3c245cSPedro Tammela  *	@protocol: protocol (0, ...)
12278a3c245cSPedro Tammela  *	@res: new socket
12288a3c245cSPedro Tammela  *
12298a3c245cSPedro Tammela  *	Creates a new socket and assigns it to @res, passing through LSM.
12308a3c245cSPedro Tammela  *	The new socket initialization is not complete, see kernel_accept().
12318a3c245cSPedro Tammela  *	Returns 0 or an error. On failure @res is set to %NULL.
12328a3c245cSPedro Tammela  *	This function internally uses GFP_KERNEL.
12338a3c245cSPedro Tammela  */
12348a3c245cSPedro Tammela 
12351da177e4SLinus Torvalds int sock_create_lite(int family, int type, int protocol, struct socket **res)
12361da177e4SLinus Torvalds {
12371da177e4SLinus Torvalds 	int err;
12381da177e4SLinus Torvalds 	struct socket *sock = NULL;
12391da177e4SLinus Torvalds 
12401da177e4SLinus Torvalds 	err = security_socket_create(family, type, protocol, 1);
12411da177e4SLinus Torvalds 	if (err)
12421da177e4SLinus Torvalds 		goto out;
12431da177e4SLinus Torvalds 
12441da177e4SLinus Torvalds 	sock = sock_alloc();
12451da177e4SLinus Torvalds 	if (!sock) {
12461da177e4SLinus Torvalds 		err = -ENOMEM;
12471da177e4SLinus Torvalds 		goto out;
12481da177e4SLinus Torvalds 	}
12491da177e4SLinus Torvalds 
12501da177e4SLinus Torvalds 	sock->type = type;
12517420ed23SVenkat Yekkirala 	err = security_socket_post_create(sock, family, type, protocol, 1);
12527420ed23SVenkat Yekkirala 	if (err)
12537420ed23SVenkat Yekkirala 		goto out_release;
12547420ed23SVenkat Yekkirala 
12551da177e4SLinus Torvalds out:
12561da177e4SLinus Torvalds 	*res = sock;
12571da177e4SLinus Torvalds 	return err;
12587420ed23SVenkat Yekkirala out_release:
12597420ed23SVenkat Yekkirala 	sock_release(sock);
12607420ed23SVenkat Yekkirala 	sock = NULL;
12617420ed23SVenkat Yekkirala 	goto out;
12621da177e4SLinus Torvalds }
1263c6d409cfSEric Dumazet EXPORT_SYMBOL(sock_create_lite);
12641da177e4SLinus Torvalds 
12651da177e4SLinus Torvalds /* No kernel lock held - perfect */
1266ade994f4SAl Viro static __poll_t sock_poll(struct file *file, poll_table *wait)
12671da177e4SLinus Torvalds {
12683cafb376SChristoph Hellwig 	struct socket *sock = file->private_data;
1269a331de3bSChristoph Hellwig 	__poll_t events = poll_requested_events(wait), flag = 0;
12701da177e4SLinus Torvalds 
1271e88958e6SChristoph Hellwig 	if (!sock->ops->poll)
1272e88958e6SChristoph Hellwig 		return 0;
1273f641f13bSChristoph Hellwig 
1274a331de3bSChristoph Hellwig 	if (sk_can_busy_loop(sock->sk)) {
1275f641f13bSChristoph Hellwig 		/* poll once if requested by the syscall */
1276a331de3bSChristoph Hellwig 		if (events & POLL_BUSY_LOOP)
1277f641f13bSChristoph Hellwig 			sk_busy_loop(sock->sk, 1);
1278a331de3bSChristoph Hellwig 
1279a331de3bSChristoph Hellwig 		/* if this socket can poll_ll, tell the system call */
1280a331de3bSChristoph Hellwig 		flag = POLL_BUSY_LOOP;
1281a331de3bSChristoph Hellwig 	}
1282a331de3bSChristoph Hellwig 
1283a331de3bSChristoph Hellwig 	return sock->ops->poll(file, sock, wait) | flag;
12841da177e4SLinus Torvalds }
12851da177e4SLinus Torvalds 
12861da177e4SLinus Torvalds static int sock_mmap(struct file *file, struct vm_area_struct *vma)
12871da177e4SLinus Torvalds {
1288b69aee04SEric Dumazet 	struct socket *sock = file->private_data;
12891da177e4SLinus Torvalds 
12901da177e4SLinus Torvalds 	return sock->ops->mmap(file, sock, vma);
12911da177e4SLinus Torvalds }
12921da177e4SLinus Torvalds 
129320380731SArnaldo Carvalho de Melo static int sock_close(struct inode *inode, struct file *filp)
12941da177e4SLinus Torvalds {
12956d8c50dcSCong Wang 	__sock_release(SOCKET_I(inode), inode);
12961da177e4SLinus Torvalds 	return 0;
12971da177e4SLinus Torvalds }
12981da177e4SLinus Torvalds 
12991da177e4SLinus Torvalds /*
13001da177e4SLinus Torvalds  *	Update the socket async list
13011da177e4SLinus Torvalds  *
13021da177e4SLinus Torvalds  *	Fasync_list locking strategy.
13031da177e4SLinus Torvalds  *
13041da177e4SLinus Torvalds  *	1. fasync_list is modified only under process context socket lock
13051da177e4SLinus Torvalds  *	   i.e. under semaphore.
13061da177e4SLinus Torvalds  *	2. fasync_list is used under read_lock(&sk->sk_callback_lock)
1307989a2979SEric Dumazet  *	   or under socket lock
13081da177e4SLinus Torvalds  */
13091da177e4SLinus Torvalds 
13101da177e4SLinus Torvalds static int sock_fasync(int fd, struct file *filp, int on)
13111da177e4SLinus Torvalds {
1312989a2979SEric Dumazet 	struct socket *sock = filp->private_data;
1313989a2979SEric Dumazet 	struct sock *sk = sock->sk;
1314333f7909SAl Viro 	struct socket_wq *wq = &sock->wq;
13151da177e4SLinus Torvalds 
1316989a2979SEric Dumazet 	if (sk == NULL)
13171da177e4SLinus Torvalds 		return -EINVAL;
13181da177e4SLinus Torvalds 
13191da177e4SLinus Torvalds 	lock_sock(sk);
1320eaefd110SEric Dumazet 	fasync_helper(fd, filp, on, &wq->fasync_list);
13211da177e4SLinus Torvalds 
1322eaefd110SEric Dumazet 	if (!wq->fasync_list)
1323bcdce719SEric Dumazet 		sock_reset_flag(sk, SOCK_FASYNC);
1324989a2979SEric Dumazet 	else
1325989a2979SEric Dumazet 		sock_set_flag(sk, SOCK_FASYNC);
13261da177e4SLinus Torvalds 
1327989a2979SEric Dumazet 	release_sock(sk);
13281da177e4SLinus Torvalds 	return 0;
13291da177e4SLinus Torvalds }
13301da177e4SLinus Torvalds 
1331ceb5d58bSEric Dumazet /* This function may be called only under rcu_lock */
13321da177e4SLinus Torvalds 
1333ceb5d58bSEric Dumazet int sock_wake_async(struct socket_wq *wq, int how, int band)
13341da177e4SLinus Torvalds {
1335ceb5d58bSEric Dumazet 	if (!wq || !wq->fasync_list)
1336ceb5d58bSEric Dumazet 		return -1;
133743815482SEric Dumazet 
133889bddce5SStephen Hemminger 	switch (how) {
13398d8ad9d7SPavel Emelyanov 	case SOCK_WAKE_WAITD:
1340ceb5d58bSEric Dumazet 		if (test_bit(SOCKWQ_ASYNC_WAITDATA, &wq->flags))
13411da177e4SLinus Torvalds 			break;
13421da177e4SLinus Torvalds 		goto call_kill;
13438d8ad9d7SPavel Emelyanov 	case SOCK_WAKE_SPACE:
1344ceb5d58bSEric Dumazet 		if (!test_and_clear_bit(SOCKWQ_ASYNC_NOSPACE, &wq->flags))
13451da177e4SLinus Torvalds 			break;
13467c7ab580SMiaohe Lin 		fallthrough;
13478d8ad9d7SPavel Emelyanov 	case SOCK_WAKE_IO:
13481da177e4SLinus Torvalds call_kill:
134943815482SEric Dumazet 		kill_fasync(&wq->fasync_list, SIGIO, band);
13501da177e4SLinus Torvalds 		break;
13518d8ad9d7SPavel Emelyanov 	case SOCK_WAKE_URG:
135243815482SEric Dumazet 		kill_fasync(&wq->fasync_list, SIGURG, band);
13531da177e4SLinus Torvalds 	}
1354ceb5d58bSEric Dumazet 
13551da177e4SLinus Torvalds 	return 0;
13561da177e4SLinus Torvalds }
1357c6d409cfSEric Dumazet EXPORT_SYMBOL(sock_wake_async);
13581da177e4SLinus Torvalds 
13598a3c245cSPedro Tammela /**
13608a3c245cSPedro Tammela  *	__sock_create - creates a socket
13618a3c245cSPedro Tammela  *	@net: net namespace
13628a3c245cSPedro Tammela  *	@family: protocol family (AF_INET, ...)
13638a3c245cSPedro Tammela  *	@type: communication type (SOCK_STREAM, ...)
13648a3c245cSPedro Tammela  *	@protocol: protocol (0, ...)
13658a3c245cSPedro Tammela  *	@res: new socket
13668a3c245cSPedro Tammela  *	@kern: boolean for kernel space sockets
13678a3c245cSPedro Tammela  *
13688a3c245cSPedro Tammela  *	Creates a new socket and assigns it to @res, passing through LSM.
13698a3c245cSPedro Tammela  *	Returns 0 or an error. On failure @res is set to %NULL. @kern must
13708a3c245cSPedro Tammela  *	be set to true if the socket resides in kernel space.
13718a3c245cSPedro Tammela  *	This function internally uses GFP_KERNEL.
13728a3c245cSPedro Tammela  */
13738a3c245cSPedro Tammela 
1374721db93aSPavel Emelyanov int __sock_create(struct net *net, int family, int type, int protocol,
137589bddce5SStephen Hemminger 			 struct socket **res, int kern)
13761da177e4SLinus Torvalds {
13771da177e4SLinus Torvalds 	int err;
13781da177e4SLinus Torvalds 	struct socket *sock;
137955737fdaSStephen Hemminger 	const struct net_proto_family *pf;
13801da177e4SLinus Torvalds 
13811da177e4SLinus Torvalds 	/*
13821da177e4SLinus Torvalds 	 *      Check protocol is in range
13831da177e4SLinus Torvalds 	 */
13841da177e4SLinus Torvalds 	if (family < 0 || family >= NPROTO)
13851da177e4SLinus Torvalds 		return -EAFNOSUPPORT;
13861da177e4SLinus Torvalds 	if (type < 0 || type >= SOCK_MAX)
13871da177e4SLinus Torvalds 		return -EINVAL;
13881da177e4SLinus Torvalds 
13891da177e4SLinus Torvalds 	/* Compatibility.
13901da177e4SLinus Torvalds 
13911da177e4SLinus Torvalds 	   This uglymoron is moved from INET layer to here to avoid
13921da177e4SLinus Torvalds 	   deadlock in module load.
13931da177e4SLinus Torvalds 	 */
13941da177e4SLinus Torvalds 	if (family == PF_INET && type == SOCK_PACKET) {
1395f3c98690Sliping.zhang 		pr_info_once("%s uses obsolete (PF_INET,SOCK_PACKET)\n",
139689bddce5SStephen Hemminger 			     current->comm);
13971da177e4SLinus Torvalds 		family = PF_PACKET;
13981da177e4SLinus Torvalds 	}
13991da177e4SLinus Torvalds 
14001da177e4SLinus Torvalds 	err = security_socket_create(family, type, protocol, kern);
14011da177e4SLinus Torvalds 	if (err)
14021da177e4SLinus Torvalds 		return err;
14031da177e4SLinus Torvalds 
140455737fdaSStephen Hemminger 	/*
140555737fdaSStephen Hemminger 	 *	Allocate the socket and allow the family to set things up. if
140655737fdaSStephen Hemminger 	 *	the protocol is 0, the family is instructed to select an appropriate
140755737fdaSStephen Hemminger 	 *	default.
140855737fdaSStephen Hemminger 	 */
140955737fdaSStephen Hemminger 	sock = sock_alloc();
141055737fdaSStephen Hemminger 	if (!sock) {
1411e87cc472SJoe Perches 		net_warn_ratelimited("socket: no more sockets\n");
141255737fdaSStephen Hemminger 		return -ENFILE;	/* Not exactly a match, but its the
141355737fdaSStephen Hemminger 				   closest posix thing */
141455737fdaSStephen Hemminger 	}
141555737fdaSStephen Hemminger 
141655737fdaSStephen Hemminger 	sock->type = type;
141755737fdaSStephen Hemminger 
141895a5afcaSJohannes Berg #ifdef CONFIG_MODULES
14191da177e4SLinus Torvalds 	/* Attempt to load a protocol module if the find failed.
14201da177e4SLinus Torvalds 	 *
14211da177e4SLinus Torvalds 	 * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user
14221da177e4SLinus Torvalds 	 * requested real, full-featured networking support upon configuration.
14231da177e4SLinus Torvalds 	 * Otherwise module support will break!
14241da177e4SLinus Torvalds 	 */
1425190683a9SEric Dumazet 	if (rcu_access_pointer(net_families[family]) == NULL)
14261da177e4SLinus Torvalds 		request_module("net-pf-%d", family);
14271da177e4SLinus Torvalds #endif
14281da177e4SLinus Torvalds 
142955737fdaSStephen Hemminger 	rcu_read_lock();
143055737fdaSStephen Hemminger 	pf = rcu_dereference(net_families[family]);
14311da177e4SLinus Torvalds 	err = -EAFNOSUPPORT;
143255737fdaSStephen Hemminger 	if (!pf)
143355737fdaSStephen Hemminger 		goto out_release;
14341da177e4SLinus Torvalds 
14351da177e4SLinus Torvalds 	/*
14361da177e4SLinus Torvalds 	 * We will call the ->create function, that possibly is in a loadable
14371da177e4SLinus Torvalds 	 * module, so we have to bump that loadable module refcnt first.
14381da177e4SLinus Torvalds 	 */
143955737fdaSStephen Hemminger 	if (!try_module_get(pf->owner))
14401da177e4SLinus Torvalds 		goto out_release;
14411da177e4SLinus Torvalds 
144255737fdaSStephen Hemminger 	/* Now protected by module ref count */
144355737fdaSStephen Hemminger 	rcu_read_unlock();
144455737fdaSStephen Hemminger 
14453f378b68SEric Paris 	err = pf->create(net, sock, protocol, kern);
144655737fdaSStephen Hemminger 	if (err < 0)
14471da177e4SLinus Torvalds 		goto out_module_put;
1448a79af59eSFrank Filz 
14491da177e4SLinus Torvalds 	/*
14501da177e4SLinus Torvalds 	 * Now to bump the refcnt of the [loadable] module that owns this
14511da177e4SLinus Torvalds 	 * socket at sock_release time we decrement its refcnt.
14521da177e4SLinus Torvalds 	 */
145355737fdaSStephen Hemminger 	if (!try_module_get(sock->ops->owner))
145455737fdaSStephen Hemminger 		goto out_module_busy;
145555737fdaSStephen Hemminger 
14561da177e4SLinus Torvalds 	/*
14571da177e4SLinus Torvalds 	 * Now that we're done with the ->create function, the [loadable]
14581da177e4SLinus Torvalds 	 * module can have its refcnt decremented
14591da177e4SLinus Torvalds 	 */
146055737fdaSStephen Hemminger 	module_put(pf->owner);
14617420ed23SVenkat Yekkirala 	err = security_socket_post_create(sock, family, type, protocol, kern);
14627420ed23SVenkat Yekkirala 	if (err)
14633b185525SHerbert Xu 		goto out_sock_release;
146455737fdaSStephen Hemminger 	*res = sock;
14651da177e4SLinus Torvalds 
146655737fdaSStephen Hemminger 	return 0;
146755737fdaSStephen Hemminger 
146855737fdaSStephen Hemminger out_module_busy:
146955737fdaSStephen Hemminger 	err = -EAFNOSUPPORT;
14701da177e4SLinus Torvalds out_module_put:
147155737fdaSStephen Hemminger 	sock->ops = NULL;
147255737fdaSStephen Hemminger 	module_put(pf->owner);
147355737fdaSStephen Hemminger out_sock_release:
14741da177e4SLinus Torvalds 	sock_release(sock);
147555737fdaSStephen Hemminger 	return err;
147655737fdaSStephen Hemminger 
147755737fdaSStephen Hemminger out_release:
147855737fdaSStephen Hemminger 	rcu_read_unlock();
147955737fdaSStephen Hemminger 	goto out_sock_release;
14801da177e4SLinus Torvalds }
1481721db93aSPavel Emelyanov EXPORT_SYMBOL(__sock_create);
14821da177e4SLinus Torvalds 
14838a3c245cSPedro Tammela /**
14848a3c245cSPedro Tammela  *	sock_create - creates a socket
14858a3c245cSPedro Tammela  *	@family: protocol family (AF_INET, ...)
14868a3c245cSPedro Tammela  *	@type: communication type (SOCK_STREAM, ...)
14878a3c245cSPedro Tammela  *	@protocol: protocol (0, ...)
14888a3c245cSPedro Tammela  *	@res: new socket
14898a3c245cSPedro Tammela  *
14908a3c245cSPedro Tammela  *	A wrapper around __sock_create().
14918a3c245cSPedro Tammela  *	Returns 0 or an error. This function internally uses GFP_KERNEL.
14928a3c245cSPedro Tammela  */
14938a3c245cSPedro Tammela 
14941da177e4SLinus Torvalds int sock_create(int family, int type, int protocol, struct socket **res)
14951da177e4SLinus Torvalds {
14961b8d7ae4SEric W. Biederman 	return __sock_create(current->nsproxy->net_ns, family, type, protocol, res, 0);
14971da177e4SLinus Torvalds }
1498c6d409cfSEric Dumazet EXPORT_SYMBOL(sock_create);
14991da177e4SLinus Torvalds 
15008a3c245cSPedro Tammela /**
15018a3c245cSPedro Tammela  *	sock_create_kern - creates a socket (kernel space)
15028a3c245cSPedro Tammela  *	@net: net namespace
15038a3c245cSPedro Tammela  *	@family: protocol family (AF_INET, ...)
15048a3c245cSPedro Tammela  *	@type: communication type (SOCK_STREAM, ...)
15058a3c245cSPedro Tammela  *	@protocol: protocol (0, ...)
15068a3c245cSPedro Tammela  *	@res: new socket
15078a3c245cSPedro Tammela  *
15088a3c245cSPedro Tammela  *	A wrapper around __sock_create().
15098a3c245cSPedro Tammela  *	Returns 0 or an error. This function internally uses GFP_KERNEL.
15108a3c245cSPedro Tammela  */
15118a3c245cSPedro Tammela 
1512eeb1bd5cSEric W. Biederman int sock_create_kern(struct net *net, int family, int type, int protocol, struct socket **res)
15131da177e4SLinus Torvalds {
1514eeb1bd5cSEric W. Biederman 	return __sock_create(net, family, type, protocol, res, 1);
15151da177e4SLinus Torvalds }
1516c6d409cfSEric Dumazet EXPORT_SYMBOL(sock_create_kern);
15171da177e4SLinus Torvalds 
15189d6a15c3SDominik Brodowski int __sys_socket(int family, int type, int protocol)
15191da177e4SLinus Torvalds {
15201da177e4SLinus Torvalds 	int retval;
15211da177e4SLinus Torvalds 	struct socket *sock;
1522a677a039SUlrich Drepper 	int flags;
1523a677a039SUlrich Drepper 
1524e38b36f3SUlrich Drepper 	/* Check the SOCK_* constants for consistency.  */
1525e38b36f3SUlrich Drepper 	BUILD_BUG_ON(SOCK_CLOEXEC != O_CLOEXEC);
1526e38b36f3SUlrich Drepper 	BUILD_BUG_ON((SOCK_MAX | SOCK_TYPE_MASK) != SOCK_TYPE_MASK);
1527e38b36f3SUlrich Drepper 	BUILD_BUG_ON(SOCK_CLOEXEC & SOCK_TYPE_MASK);
1528e38b36f3SUlrich Drepper 	BUILD_BUG_ON(SOCK_NONBLOCK & SOCK_TYPE_MASK);
1529e38b36f3SUlrich Drepper 
1530a677a039SUlrich Drepper 	flags = type & ~SOCK_TYPE_MASK;
153177d27200SUlrich Drepper 	if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
1532a677a039SUlrich Drepper 		return -EINVAL;
1533a677a039SUlrich Drepper 	type &= SOCK_TYPE_MASK;
15341da177e4SLinus Torvalds 
1535aaca0bdcSUlrich Drepper 	if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1536aaca0bdcSUlrich Drepper 		flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1537aaca0bdcSUlrich Drepper 
15381da177e4SLinus Torvalds 	retval = sock_create(family, type, protocol, &sock);
15391da177e4SLinus Torvalds 	if (retval < 0)
15401da177e4SLinus Torvalds 		return retval;
15411da177e4SLinus Torvalds 
15428e1611e2SAl Viro 	return sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK));
15431da177e4SLinus Torvalds }
15441da177e4SLinus Torvalds 
15459d6a15c3SDominik Brodowski SYSCALL_DEFINE3(socket, int, family, int, type, int, protocol)
15469d6a15c3SDominik Brodowski {
15479d6a15c3SDominik Brodowski 	return __sys_socket(family, type, protocol);
15489d6a15c3SDominik Brodowski }
15499d6a15c3SDominik Brodowski 
15501da177e4SLinus Torvalds /*
15511da177e4SLinus Torvalds  *	Create a pair of connected sockets.
15521da177e4SLinus Torvalds  */
15531da177e4SLinus Torvalds 
15546debc8d8SDominik Brodowski int __sys_socketpair(int family, int type, int protocol, int __user *usockvec)
15551da177e4SLinus Torvalds {
15561da177e4SLinus Torvalds 	struct socket *sock1, *sock2;
15571da177e4SLinus Torvalds 	int fd1, fd2, err;
1558db349509SAl Viro 	struct file *newfile1, *newfile2;
1559a677a039SUlrich Drepper 	int flags;
1560a677a039SUlrich Drepper 
1561a677a039SUlrich Drepper 	flags = type & ~SOCK_TYPE_MASK;
156277d27200SUlrich Drepper 	if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
1563a677a039SUlrich Drepper 		return -EINVAL;
1564a677a039SUlrich Drepper 	type &= SOCK_TYPE_MASK;
15651da177e4SLinus Torvalds 
1566aaca0bdcSUlrich Drepper 	if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1567aaca0bdcSUlrich Drepper 		flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1568aaca0bdcSUlrich Drepper 
15691da177e4SLinus Torvalds 	/*
1570016a266bSAl Viro 	 * reserve descriptors and make sure we won't fail
1571016a266bSAl Viro 	 * to return them to userland.
1572016a266bSAl Viro 	 */
1573016a266bSAl Viro 	fd1 = get_unused_fd_flags(flags);
1574016a266bSAl Viro 	if (unlikely(fd1 < 0))
1575016a266bSAl Viro 		return fd1;
1576016a266bSAl Viro 
1577016a266bSAl Viro 	fd2 = get_unused_fd_flags(flags);
1578016a266bSAl Viro 	if (unlikely(fd2 < 0)) {
1579016a266bSAl Viro 		put_unused_fd(fd1);
1580016a266bSAl Viro 		return fd2;
1581016a266bSAl Viro 	}
1582016a266bSAl Viro 
1583016a266bSAl Viro 	err = put_user(fd1, &usockvec[0]);
1584016a266bSAl Viro 	if (err)
1585016a266bSAl Viro 		goto out;
1586016a266bSAl Viro 
1587016a266bSAl Viro 	err = put_user(fd2, &usockvec[1]);
1588016a266bSAl Viro 	if (err)
1589016a266bSAl Viro 		goto out;
1590016a266bSAl Viro 
1591016a266bSAl Viro 	/*
15921da177e4SLinus Torvalds 	 * Obtain the first socket and check if the underlying protocol
15931da177e4SLinus Torvalds 	 * supports the socketpair call.
15941da177e4SLinus Torvalds 	 */
15951da177e4SLinus Torvalds 
15961da177e4SLinus Torvalds 	err = sock_create(family, type, protocol, &sock1);
1597016a266bSAl Viro 	if (unlikely(err < 0))
15981da177e4SLinus Torvalds 		goto out;
15991da177e4SLinus Torvalds 
16001da177e4SLinus Torvalds 	err = sock_create(family, type, protocol, &sock2);
1601016a266bSAl Viro 	if (unlikely(err < 0)) {
1602016a266bSAl Viro 		sock_release(sock1);
1603016a266bSAl Viro 		goto out;
1604bf3c23d1SDavid S. Miller 	}
1605d73aa286SYann Droneaud 
1606d47cd945SDavid Herrmann 	err = security_socket_socketpair(sock1, sock2);
1607d47cd945SDavid Herrmann 	if (unlikely(err)) {
1608d47cd945SDavid Herrmann 		sock_release(sock2);
1609d47cd945SDavid Herrmann 		sock_release(sock1);
1610d47cd945SDavid Herrmann 		goto out;
1611d47cd945SDavid Herrmann 	}
1612d47cd945SDavid Herrmann 
1613016a266bSAl Viro 	err = sock1->ops->socketpair(sock1, sock2);
1614016a266bSAl Viro 	if (unlikely(err < 0)) {
1615016a266bSAl Viro 		sock_release(sock2);
1616016a266bSAl Viro 		sock_release(sock1);
1617016a266bSAl Viro 		goto out;
161828407630SAl Viro 	}
161928407630SAl Viro 
1620aab174f0SLinus Torvalds 	newfile1 = sock_alloc_file(sock1, flags, NULL);
1621b5ffe634SViresh Kumar 	if (IS_ERR(newfile1)) {
162228407630SAl Viro 		err = PTR_ERR(newfile1);
1623016a266bSAl Viro 		sock_release(sock2);
1624016a266bSAl Viro 		goto out;
162528407630SAl Viro 	}
162628407630SAl Viro 
1627aab174f0SLinus Torvalds 	newfile2 = sock_alloc_file(sock2, flags, NULL);
162828407630SAl Viro 	if (IS_ERR(newfile2)) {
162928407630SAl Viro 		err = PTR_ERR(newfile2);
1630016a266bSAl Viro 		fput(newfile1);
1631016a266bSAl Viro 		goto out;
1632db349509SAl Viro 	}
1633db349509SAl Viro 
1634157cf649SAl Viro 	audit_fd_pair(fd1, fd2);
1635d73aa286SYann Droneaud 
1636db349509SAl Viro 	fd_install(fd1, newfile1);
1637db349509SAl Viro 	fd_install(fd2, newfile2);
16381da177e4SLinus Torvalds 	return 0;
16391da177e4SLinus Torvalds 
16401da177e4SLinus Torvalds out:
1641016a266bSAl Viro 	put_unused_fd(fd2);
1642016a266bSAl Viro 	put_unused_fd(fd1);
16431da177e4SLinus Torvalds 	return err;
16441da177e4SLinus Torvalds }
16451da177e4SLinus Torvalds 
16466debc8d8SDominik Brodowski SYSCALL_DEFINE4(socketpair, int, family, int, type, int, protocol,
16476debc8d8SDominik Brodowski 		int __user *, usockvec)
16486debc8d8SDominik Brodowski {
16496debc8d8SDominik Brodowski 	return __sys_socketpair(family, type, protocol, usockvec);
16506debc8d8SDominik Brodowski }
16516debc8d8SDominik Brodowski 
16521da177e4SLinus Torvalds /*
16531da177e4SLinus Torvalds  *	Bind a name to a socket. Nothing much to do here since it's
16541da177e4SLinus Torvalds  *	the protocol's responsibility to handle the local address.
16551da177e4SLinus Torvalds  *
16561da177e4SLinus Torvalds  *	We move the socket address to kernel space before we call
16571da177e4SLinus Torvalds  *	the protocol layer (having also checked the address is ok).
16581da177e4SLinus Torvalds  */
16591da177e4SLinus Torvalds 
1660a87d35d8SDominik Brodowski int __sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen)
16611da177e4SLinus Torvalds {
16621da177e4SLinus Torvalds 	struct socket *sock;
1663230b1839SYOSHIFUJI Hideaki 	struct sockaddr_storage address;
16646cb153caSBenjamin LaHaise 	int err, fput_needed;
16651da177e4SLinus Torvalds 
166689bddce5SStephen Hemminger 	sock = sockfd_lookup_light(fd, &err, &fput_needed);
166789bddce5SStephen Hemminger 	if (sock) {
166843db362dSMaciej Żenczykowski 		err = move_addr_to_kernel(umyaddr, addrlen, &address);
1669068b88ccSJakub Sitnicki 		if (!err) {
167089bddce5SStephen Hemminger 			err = security_socket_bind(sock,
1671230b1839SYOSHIFUJI Hideaki 						   (struct sockaddr *)&address,
167289bddce5SStephen Hemminger 						   addrlen);
16736cb153caSBenjamin LaHaise 			if (!err)
16746cb153caSBenjamin LaHaise 				err = sock->ops->bind(sock,
167589bddce5SStephen Hemminger 						      (struct sockaddr *)
1676230b1839SYOSHIFUJI Hideaki 						      &address, addrlen);
16771da177e4SLinus Torvalds 		}
16786cb153caSBenjamin LaHaise 		fput_light(sock->file, fput_needed);
16791da177e4SLinus Torvalds 	}
16801da177e4SLinus Torvalds 	return err;
16811da177e4SLinus Torvalds }
16821da177e4SLinus Torvalds 
1683a87d35d8SDominik Brodowski SYSCALL_DEFINE3(bind, int, fd, struct sockaddr __user *, umyaddr, int, addrlen)
1684a87d35d8SDominik Brodowski {
1685a87d35d8SDominik Brodowski 	return __sys_bind(fd, umyaddr, addrlen);
1686a87d35d8SDominik Brodowski }
1687a87d35d8SDominik Brodowski 
16881da177e4SLinus Torvalds /*
16891da177e4SLinus Torvalds  *	Perform a listen. Basically, we allow the protocol to do anything
16901da177e4SLinus Torvalds  *	necessary for a listen, and if that works, we mark the socket as
16911da177e4SLinus Torvalds  *	ready for listening.
16921da177e4SLinus Torvalds  */
16931da177e4SLinus Torvalds 
169425e290eeSDominik Brodowski int __sys_listen(int fd, int backlog)
16951da177e4SLinus Torvalds {
16961da177e4SLinus Torvalds 	struct socket *sock;
16976cb153caSBenjamin LaHaise 	int err, fput_needed;
1698b8e1f9b5SPavel Emelyanov 	int somaxconn;
16991da177e4SLinus Torvalds 
170089bddce5SStephen Hemminger 	sock = sockfd_lookup_light(fd, &err, &fput_needed);
170189bddce5SStephen Hemminger 	if (sock) {
17028efa6e93SPavel Emelyanov 		somaxconn = sock_net(sock->sk)->core.sysctl_somaxconn;
170395c96174SEric Dumazet 		if ((unsigned int)backlog > somaxconn)
1704b8e1f9b5SPavel Emelyanov 			backlog = somaxconn;
17051da177e4SLinus Torvalds 
17061da177e4SLinus Torvalds 		err = security_socket_listen(sock, backlog);
17076cb153caSBenjamin LaHaise 		if (!err)
17081da177e4SLinus Torvalds 			err = sock->ops->listen(sock, backlog);
17096cb153caSBenjamin LaHaise 
17106cb153caSBenjamin LaHaise 		fput_light(sock->file, fput_needed);
17111da177e4SLinus Torvalds 	}
17121da177e4SLinus Torvalds 	return err;
17131da177e4SLinus Torvalds }
17141da177e4SLinus Torvalds 
171525e290eeSDominik Brodowski SYSCALL_DEFINE2(listen, int, fd, int, backlog)
171625e290eeSDominik Brodowski {
171725e290eeSDominik Brodowski 	return __sys_listen(fd, backlog);
171825e290eeSDominik Brodowski }
171925e290eeSDominik Brodowski 
1720de2ea4b6SJens Axboe int __sys_accept4_file(struct file *file, unsigned file_flags,
1721de2ea4b6SJens Axboe 		       struct sockaddr __user *upeer_sockaddr,
172209952e3eSJens Axboe 		       int __user *upeer_addrlen, int flags,
172309952e3eSJens Axboe 		       unsigned long nofile)
17241da177e4SLinus Torvalds {
17251da177e4SLinus Torvalds 	struct socket *sock, *newsock;
172639d8c1b6SDavid S. Miller 	struct file *newfile;
1727de2ea4b6SJens Axboe 	int err, len, newfd;
1728230b1839SYOSHIFUJI Hideaki 	struct sockaddr_storage address;
17291da177e4SLinus Torvalds 
173077d27200SUlrich Drepper 	if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
1731aaca0bdcSUlrich Drepper 		return -EINVAL;
1732aaca0bdcSUlrich Drepper 
1733aaca0bdcSUlrich Drepper 	if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1734aaca0bdcSUlrich Drepper 		flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1735aaca0bdcSUlrich Drepper 
1736dba4a925SFlorent Revest 	sock = sock_from_file(file);
1737dba4a925SFlorent Revest 	if (!sock) {
1738dba4a925SFlorent Revest 		err = -ENOTSOCK;
17391da177e4SLinus Torvalds 		goto out;
1740dba4a925SFlorent Revest 	}
17411da177e4SLinus Torvalds 
17421da177e4SLinus Torvalds 	err = -ENFILE;
1743c6d409cfSEric Dumazet 	newsock = sock_alloc();
1744c6d409cfSEric Dumazet 	if (!newsock)
1745de2ea4b6SJens Axboe 		goto out;
17461da177e4SLinus Torvalds 
17471da177e4SLinus Torvalds 	newsock->type = sock->type;
17481da177e4SLinus Torvalds 	newsock->ops = sock->ops;
17491da177e4SLinus Torvalds 
17501da177e4SLinus Torvalds 	/*
17511da177e4SLinus Torvalds 	 * We don't need try_module_get here, as the listening socket (sock)
17521da177e4SLinus Torvalds 	 * has the protocol module (sock->ops->owner) held.
17531da177e4SLinus Torvalds 	 */
17541da177e4SLinus Torvalds 	__module_get(newsock->ops->owner);
17551da177e4SLinus Torvalds 
175609952e3eSJens Axboe 	newfd = __get_unused_fd_flags(flags, nofile);
175739d8c1b6SDavid S. Miller 	if (unlikely(newfd < 0)) {
175839d8c1b6SDavid S. Miller 		err = newfd;
17599a1875e6SDavid S. Miller 		sock_release(newsock);
1760de2ea4b6SJens Axboe 		goto out;
176139d8c1b6SDavid S. Miller 	}
1762aab174f0SLinus Torvalds 	newfile = sock_alloc_file(newsock, flags, sock->sk->sk_prot_creator->name);
1763b5ffe634SViresh Kumar 	if (IS_ERR(newfile)) {
176428407630SAl Viro 		err = PTR_ERR(newfile);
176528407630SAl Viro 		put_unused_fd(newfd);
1766de2ea4b6SJens Axboe 		goto out;
176728407630SAl Viro 	}
176839d8c1b6SDavid S. Miller 
1769a79af59eSFrank Filz 	err = security_socket_accept(sock, newsock);
1770a79af59eSFrank Filz 	if (err)
177139d8c1b6SDavid S. Miller 		goto out_fd;
1772a79af59eSFrank Filz 
1773de2ea4b6SJens Axboe 	err = sock->ops->accept(sock, newsock, sock->file->f_flags | file_flags,
1774de2ea4b6SJens Axboe 					false);
17751da177e4SLinus Torvalds 	if (err < 0)
177639d8c1b6SDavid S. Miller 		goto out_fd;
17771da177e4SLinus Torvalds 
17781da177e4SLinus Torvalds 	if (upeer_sockaddr) {
17799b2c45d4SDenys Vlasenko 		len = newsock->ops->getname(newsock,
17809b2c45d4SDenys Vlasenko 					(struct sockaddr *)&address, 2);
17819b2c45d4SDenys Vlasenko 		if (len < 0) {
17821da177e4SLinus Torvalds 			err = -ECONNABORTED;
178339d8c1b6SDavid S. Miller 			goto out_fd;
17841da177e4SLinus Torvalds 		}
178543db362dSMaciej Żenczykowski 		err = move_addr_to_user(&address,
1786230b1839SYOSHIFUJI Hideaki 					len, upeer_sockaddr, upeer_addrlen);
17871da177e4SLinus Torvalds 		if (err < 0)
178839d8c1b6SDavid S. Miller 			goto out_fd;
17891da177e4SLinus Torvalds 	}
17901da177e4SLinus Torvalds 
17911da177e4SLinus Torvalds 	/* File flags are not inherited via accept() unlike another OSes. */
17921da177e4SLinus Torvalds 
179339d8c1b6SDavid S. Miller 	fd_install(newfd, newfile);
179439d8c1b6SDavid S. Miller 	err = newfd;
17951da177e4SLinus Torvalds out:
17961da177e4SLinus Torvalds 	return err;
179739d8c1b6SDavid S. Miller out_fd:
17989606a216SDavid S. Miller 	fput(newfile);
179939d8c1b6SDavid S. Miller 	put_unused_fd(newfd);
1800de2ea4b6SJens Axboe 	goto out;
1801de2ea4b6SJens Axboe 
1802de2ea4b6SJens Axboe }
1803de2ea4b6SJens Axboe 
1804de2ea4b6SJens Axboe /*
1805de2ea4b6SJens Axboe  *	For accept, we attempt to create a new socket, set up the link
1806de2ea4b6SJens Axboe  *	with the client, wake up the client, then return the new
1807de2ea4b6SJens Axboe  *	connected fd. We collect the address of the connector in kernel
1808de2ea4b6SJens Axboe  *	space and move it to user at the very end. This is unclean because
1809de2ea4b6SJens Axboe  *	we open the socket then return an error.
1810de2ea4b6SJens Axboe  *
1811de2ea4b6SJens Axboe  *	1003.1g adds the ability to recvmsg() to query connection pending
1812de2ea4b6SJens Axboe  *	status to recvmsg. We need to add that support in a way thats
1813de2ea4b6SJens Axboe  *	clean when we restructure accept also.
1814de2ea4b6SJens Axboe  */
1815de2ea4b6SJens Axboe 
1816de2ea4b6SJens Axboe int __sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr,
1817de2ea4b6SJens Axboe 		  int __user *upeer_addrlen, int flags)
1818de2ea4b6SJens Axboe {
1819de2ea4b6SJens Axboe 	int ret = -EBADF;
1820de2ea4b6SJens Axboe 	struct fd f;
1821de2ea4b6SJens Axboe 
1822de2ea4b6SJens Axboe 	f = fdget(fd);
1823de2ea4b6SJens Axboe 	if (f.file) {
1824de2ea4b6SJens Axboe 		ret = __sys_accept4_file(f.file, 0, upeer_sockaddr,
182509952e3eSJens Axboe 						upeer_addrlen, flags,
182609952e3eSJens Axboe 						rlimit(RLIMIT_NOFILE));
18276b07edebSMiaohe Lin 		fdput(f);
1828de2ea4b6SJens Axboe 	}
1829de2ea4b6SJens Axboe 
1830de2ea4b6SJens Axboe 	return ret;
18311da177e4SLinus Torvalds }
18321da177e4SLinus Torvalds 
18334541e805SDominik Brodowski SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr,
18344541e805SDominik Brodowski 		int __user *, upeer_addrlen, int, flags)
18354541e805SDominik Brodowski {
18364541e805SDominik Brodowski 	return __sys_accept4(fd, upeer_sockaddr, upeer_addrlen, flags);
18374541e805SDominik Brodowski }
18384541e805SDominik Brodowski 
183920f37034SHeiko Carstens SYSCALL_DEFINE3(accept, int, fd, struct sockaddr __user *, upeer_sockaddr,
184020f37034SHeiko Carstens 		int __user *, upeer_addrlen)
1841aaca0bdcSUlrich Drepper {
18424541e805SDominik Brodowski 	return __sys_accept4(fd, upeer_sockaddr, upeer_addrlen, 0);
1843aaca0bdcSUlrich Drepper }
1844aaca0bdcSUlrich Drepper 
18451da177e4SLinus Torvalds /*
18461da177e4SLinus Torvalds  *	Attempt to connect to a socket with the server address.  The address
18471da177e4SLinus Torvalds  *	is in user space so we verify it is OK and move it to kernel space.
18481da177e4SLinus Torvalds  *
18491da177e4SLinus Torvalds  *	For 1003.1g we need to add clean support for a bind to AF_UNSPEC to
18501da177e4SLinus Torvalds  *	break bindings
18511da177e4SLinus Torvalds  *
18521da177e4SLinus Torvalds  *	NOTE: 1003.1g draft 6.3 is broken with respect to AX.25/NetROM and
18531da177e4SLinus Torvalds  *	other SEQPACKET protocols that take time to connect() as it doesn't
18541da177e4SLinus Torvalds  *	include the -EINPROGRESS status for such sockets.
18551da177e4SLinus Torvalds  */
18561da177e4SLinus Torvalds 
1857f499a021SJens Axboe int __sys_connect_file(struct file *file, struct sockaddr_storage *address,
1858bd3ded31SJens Axboe 		       int addrlen, int file_flags)
18591da177e4SLinus Torvalds {
18601da177e4SLinus Torvalds 	struct socket *sock;
1861bd3ded31SJens Axboe 	int err;
18621da177e4SLinus Torvalds 
1863dba4a925SFlorent Revest 	sock = sock_from_file(file);
1864dba4a925SFlorent Revest 	if (!sock) {
1865dba4a925SFlorent Revest 		err = -ENOTSOCK;
18661da177e4SLinus Torvalds 		goto out;
1867dba4a925SFlorent Revest 	}
18681da177e4SLinus Torvalds 
186989bddce5SStephen Hemminger 	err =
1870f499a021SJens Axboe 	    security_socket_connect(sock, (struct sockaddr *)address, addrlen);
18711da177e4SLinus Torvalds 	if (err)
1872bd3ded31SJens Axboe 		goto out;
18731da177e4SLinus Torvalds 
1874f499a021SJens Axboe 	err = sock->ops->connect(sock, (struct sockaddr *)address, addrlen,
1875bd3ded31SJens Axboe 				 sock->file->f_flags | file_flags);
18761da177e4SLinus Torvalds out:
18771da177e4SLinus Torvalds 	return err;
18781da177e4SLinus Torvalds }
18791da177e4SLinus Torvalds 
1880bd3ded31SJens Axboe int __sys_connect(int fd, struct sockaddr __user *uservaddr, int addrlen)
1881bd3ded31SJens Axboe {
1882bd3ded31SJens Axboe 	int ret = -EBADF;
1883bd3ded31SJens Axboe 	struct fd f;
1884bd3ded31SJens Axboe 
1885bd3ded31SJens Axboe 	f = fdget(fd);
1886bd3ded31SJens Axboe 	if (f.file) {
1887f499a021SJens Axboe 		struct sockaddr_storage address;
1888f499a021SJens Axboe 
1889f499a021SJens Axboe 		ret = move_addr_to_kernel(uservaddr, addrlen, &address);
1890f499a021SJens Axboe 		if (!ret)
1891f499a021SJens Axboe 			ret = __sys_connect_file(f.file, &address, addrlen, 0);
18926b07edebSMiaohe Lin 		fdput(f);
1893bd3ded31SJens Axboe 	}
1894bd3ded31SJens Axboe 
1895bd3ded31SJens Axboe 	return ret;
1896bd3ded31SJens Axboe }
1897bd3ded31SJens Axboe 
18981387c2c2SDominik Brodowski SYSCALL_DEFINE3(connect, int, fd, struct sockaddr __user *, uservaddr,
18991387c2c2SDominik Brodowski 		int, addrlen)
19001387c2c2SDominik Brodowski {
19011387c2c2SDominik Brodowski 	return __sys_connect(fd, uservaddr, addrlen);
19021387c2c2SDominik Brodowski }
19031387c2c2SDominik Brodowski 
19041da177e4SLinus Torvalds /*
19051da177e4SLinus Torvalds  *	Get the local address ('name') of a socket object. Move the obtained
19061da177e4SLinus Torvalds  *	name to user space.
19071da177e4SLinus Torvalds  */
19081da177e4SLinus Torvalds 
19098882a107SDominik Brodowski int __sys_getsockname(int fd, struct sockaddr __user *usockaddr,
19108882a107SDominik Brodowski 		      int __user *usockaddr_len)
19111da177e4SLinus Torvalds {
19121da177e4SLinus Torvalds 	struct socket *sock;
1913230b1839SYOSHIFUJI Hideaki 	struct sockaddr_storage address;
19149b2c45d4SDenys Vlasenko 	int err, fput_needed;
19151da177e4SLinus Torvalds 
19166cb153caSBenjamin LaHaise 	sock = sockfd_lookup_light(fd, &err, &fput_needed);
19171da177e4SLinus Torvalds 	if (!sock)
19181da177e4SLinus Torvalds 		goto out;
19191da177e4SLinus Torvalds 
19201da177e4SLinus Torvalds 	err = security_socket_getsockname(sock);
19211da177e4SLinus Torvalds 	if (err)
19221da177e4SLinus Torvalds 		goto out_put;
19231da177e4SLinus Torvalds 
19249b2c45d4SDenys Vlasenko 	err = sock->ops->getname(sock, (struct sockaddr *)&address, 0);
19259b2c45d4SDenys Vlasenko 	if (err < 0)
19261da177e4SLinus Torvalds 		goto out_put;
19279b2c45d4SDenys Vlasenko         /* "err" is actually length in this case */
19289b2c45d4SDenys Vlasenko 	err = move_addr_to_user(&address, err, usockaddr, usockaddr_len);
19291da177e4SLinus Torvalds 
19301da177e4SLinus Torvalds out_put:
19316cb153caSBenjamin LaHaise 	fput_light(sock->file, fput_needed);
19321da177e4SLinus Torvalds out:
19331da177e4SLinus Torvalds 	return err;
19341da177e4SLinus Torvalds }
19351da177e4SLinus Torvalds 
19368882a107SDominik Brodowski SYSCALL_DEFINE3(getsockname, int, fd, struct sockaddr __user *, usockaddr,
19378882a107SDominik Brodowski 		int __user *, usockaddr_len)
19388882a107SDominik Brodowski {
19398882a107SDominik Brodowski 	return __sys_getsockname(fd, usockaddr, usockaddr_len);
19408882a107SDominik Brodowski }
19418882a107SDominik Brodowski 
19421da177e4SLinus Torvalds /*
19431da177e4SLinus Torvalds  *	Get the remote address ('name') of a socket object. Move the obtained
19441da177e4SLinus Torvalds  *	name to user space.
19451da177e4SLinus Torvalds  */
19461da177e4SLinus Torvalds 
1947b21c8f83SDominik Brodowski int __sys_getpeername(int fd, struct sockaddr __user *usockaddr,
1948b21c8f83SDominik Brodowski 		      int __user *usockaddr_len)
19491da177e4SLinus Torvalds {
19501da177e4SLinus Torvalds 	struct socket *sock;
1951230b1839SYOSHIFUJI Hideaki 	struct sockaddr_storage address;
19529b2c45d4SDenys Vlasenko 	int err, fput_needed;
19531da177e4SLinus Torvalds 
195489bddce5SStephen Hemminger 	sock = sockfd_lookup_light(fd, &err, &fput_needed);
195589bddce5SStephen Hemminger 	if (sock != NULL) {
19561da177e4SLinus Torvalds 		err = security_socket_getpeername(sock);
19571da177e4SLinus Torvalds 		if (err) {
19586cb153caSBenjamin LaHaise 			fput_light(sock->file, fput_needed);
19591da177e4SLinus Torvalds 			return err;
19601da177e4SLinus Torvalds 		}
19611da177e4SLinus Torvalds 
19629b2c45d4SDenys Vlasenko 		err = sock->ops->getname(sock, (struct sockaddr *)&address, 1);
19639b2c45d4SDenys Vlasenko 		if (err >= 0)
19649b2c45d4SDenys Vlasenko 			/* "err" is actually length in this case */
19659b2c45d4SDenys Vlasenko 			err = move_addr_to_user(&address, err, usockaddr,
196689bddce5SStephen Hemminger 						usockaddr_len);
19676cb153caSBenjamin LaHaise 		fput_light(sock->file, fput_needed);
19681da177e4SLinus Torvalds 	}
19691da177e4SLinus Torvalds 	return err;
19701da177e4SLinus Torvalds }
19711da177e4SLinus Torvalds 
1972b21c8f83SDominik Brodowski SYSCALL_DEFINE3(getpeername, int, fd, struct sockaddr __user *, usockaddr,
1973b21c8f83SDominik Brodowski 		int __user *, usockaddr_len)
1974b21c8f83SDominik Brodowski {
1975b21c8f83SDominik Brodowski 	return __sys_getpeername(fd, usockaddr, usockaddr_len);
1976b21c8f83SDominik Brodowski }
1977b21c8f83SDominik Brodowski 
19781da177e4SLinus Torvalds /*
19791da177e4SLinus Torvalds  *	Send a datagram to a given address. We move the address into kernel
19801da177e4SLinus Torvalds  *	space and check the user space data area is readable before invoking
19811da177e4SLinus Torvalds  *	the protocol.
19821da177e4SLinus Torvalds  */
1983211b634bSDominik Brodowski int __sys_sendto(int fd, void __user *buff, size_t len, unsigned int flags,
1984211b634bSDominik Brodowski 		 struct sockaddr __user *addr,  int addr_len)
19851da177e4SLinus Torvalds {
19861da177e4SLinus Torvalds 	struct socket *sock;
1987230b1839SYOSHIFUJI Hideaki 	struct sockaddr_storage address;
19881da177e4SLinus Torvalds 	int err;
19891da177e4SLinus Torvalds 	struct msghdr msg;
19901da177e4SLinus Torvalds 	struct iovec iov;
19916cb153caSBenjamin LaHaise 	int fput_needed;
19921da177e4SLinus Torvalds 
1993602bd0e9SAl Viro 	err = import_single_range(WRITE, buff, len, &iov, &msg.msg_iter);
1994602bd0e9SAl Viro 	if (unlikely(err))
1995602bd0e9SAl Viro 		return err;
1996de0fa95cSPavel Emelyanov 	sock = sockfd_lookup_light(fd, &err, &fput_needed);
1997de0fa95cSPavel Emelyanov 	if (!sock)
19984387ff75SDavid S. Miller 		goto out;
19996cb153caSBenjamin LaHaise 
20001da177e4SLinus Torvalds 	msg.msg_name = NULL;
20011da177e4SLinus Torvalds 	msg.msg_control = NULL;
20021da177e4SLinus Torvalds 	msg.msg_controllen = 0;
20031da177e4SLinus Torvalds 	msg.msg_namelen = 0;
20046cb153caSBenjamin LaHaise 	if (addr) {
200543db362dSMaciej Żenczykowski 		err = move_addr_to_kernel(addr, addr_len, &address);
20061da177e4SLinus Torvalds 		if (err < 0)
20071da177e4SLinus Torvalds 			goto out_put;
2008230b1839SYOSHIFUJI Hideaki 		msg.msg_name = (struct sockaddr *)&address;
20091da177e4SLinus Torvalds 		msg.msg_namelen = addr_len;
20101da177e4SLinus Torvalds 	}
20111da177e4SLinus Torvalds 	if (sock->file->f_flags & O_NONBLOCK)
20121da177e4SLinus Torvalds 		flags |= MSG_DONTWAIT;
20131da177e4SLinus Torvalds 	msg.msg_flags = flags;
2014d8725c86SAl Viro 	err = sock_sendmsg(sock, &msg);
20151da177e4SLinus Torvalds 
20161da177e4SLinus Torvalds out_put:
2017de0fa95cSPavel Emelyanov 	fput_light(sock->file, fput_needed);
20184387ff75SDavid S. Miller out:
20191da177e4SLinus Torvalds 	return err;
20201da177e4SLinus Torvalds }
20211da177e4SLinus Torvalds 
2022211b634bSDominik Brodowski SYSCALL_DEFINE6(sendto, int, fd, void __user *, buff, size_t, len,
2023211b634bSDominik Brodowski 		unsigned int, flags, struct sockaddr __user *, addr,
2024211b634bSDominik Brodowski 		int, addr_len)
2025211b634bSDominik Brodowski {
2026211b634bSDominik Brodowski 	return __sys_sendto(fd, buff, len, flags, addr, addr_len);
2027211b634bSDominik Brodowski }
2028211b634bSDominik Brodowski 
20291da177e4SLinus Torvalds /*
20301da177e4SLinus Torvalds  *	Send a datagram down a socket.
20311da177e4SLinus Torvalds  */
20321da177e4SLinus Torvalds 
20333e0fa65fSHeiko Carstens SYSCALL_DEFINE4(send, int, fd, void __user *, buff, size_t, len,
203495c96174SEric Dumazet 		unsigned int, flags)
20351da177e4SLinus Torvalds {
2036211b634bSDominik Brodowski 	return __sys_sendto(fd, buff, len, flags, NULL, 0);
20371da177e4SLinus Torvalds }
20381da177e4SLinus Torvalds 
20391da177e4SLinus Torvalds /*
20401da177e4SLinus Torvalds  *	Receive a frame from the socket and optionally record the address of the
20411da177e4SLinus Torvalds  *	sender. We verify the buffers are writable and if needed move the
20421da177e4SLinus Torvalds  *	sender address from kernel to user space.
20431da177e4SLinus Torvalds  */
20447a09e1ebSDominik Brodowski int __sys_recvfrom(int fd, void __user *ubuf, size_t size, unsigned int flags,
20457a09e1ebSDominik Brodowski 		   struct sockaddr __user *addr, int __user *addr_len)
20461da177e4SLinus Torvalds {
20471da177e4SLinus Torvalds 	struct socket *sock;
20481da177e4SLinus Torvalds 	struct iovec iov;
20491da177e4SLinus Torvalds 	struct msghdr msg;
2050230b1839SYOSHIFUJI Hideaki 	struct sockaddr_storage address;
20511da177e4SLinus Torvalds 	int err, err2;
20526cb153caSBenjamin LaHaise 	int fput_needed;
20531da177e4SLinus Torvalds 
2054602bd0e9SAl Viro 	err = import_single_range(READ, ubuf, size, &iov, &msg.msg_iter);
2055602bd0e9SAl Viro 	if (unlikely(err))
2056602bd0e9SAl Viro 		return err;
2057de0fa95cSPavel Emelyanov 	sock = sockfd_lookup_light(fd, &err, &fput_needed);
20581da177e4SLinus Torvalds 	if (!sock)
2059de0fa95cSPavel Emelyanov 		goto out;
20601da177e4SLinus Torvalds 
20611da177e4SLinus Torvalds 	msg.msg_control = NULL;
20621da177e4SLinus Torvalds 	msg.msg_controllen = 0;
2063f3d33426SHannes Frederic Sowa 	/* Save some cycles and don't copy the address if not needed */
2064f3d33426SHannes Frederic Sowa 	msg.msg_name = addr ? (struct sockaddr *)&address : NULL;
2065f3d33426SHannes Frederic Sowa 	/* We assume all kernel code knows the size of sockaddr_storage */
2066f3d33426SHannes Frederic Sowa 	msg.msg_namelen = 0;
2067130ed5d1Stadeusz.struk@intel.com 	msg.msg_iocb = NULL;
20689f138fa6SAlexander Potapenko 	msg.msg_flags = 0;
20691da177e4SLinus Torvalds 	if (sock->file->f_flags & O_NONBLOCK)
20701da177e4SLinus Torvalds 		flags |= MSG_DONTWAIT;
20712da62906SAl Viro 	err = sock_recvmsg(sock, &msg, flags);
20721da177e4SLinus Torvalds 
207389bddce5SStephen Hemminger 	if (err >= 0 && addr != NULL) {
207443db362dSMaciej Żenczykowski 		err2 = move_addr_to_user(&address,
2075230b1839SYOSHIFUJI Hideaki 					 msg.msg_namelen, addr, addr_len);
20761da177e4SLinus Torvalds 		if (err2 < 0)
20771da177e4SLinus Torvalds 			err = err2;
20781da177e4SLinus Torvalds 	}
2079de0fa95cSPavel Emelyanov 
2080de0fa95cSPavel Emelyanov 	fput_light(sock->file, fput_needed);
20814387ff75SDavid S. Miller out:
20821da177e4SLinus Torvalds 	return err;
20831da177e4SLinus Torvalds }
20841da177e4SLinus Torvalds 
20857a09e1ebSDominik Brodowski SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, ubuf, size_t, size,
20867a09e1ebSDominik Brodowski 		unsigned int, flags, struct sockaddr __user *, addr,
20877a09e1ebSDominik Brodowski 		int __user *, addr_len)
20887a09e1ebSDominik Brodowski {
20897a09e1ebSDominik Brodowski 	return __sys_recvfrom(fd, ubuf, size, flags, addr, addr_len);
20907a09e1ebSDominik Brodowski }
20917a09e1ebSDominik Brodowski 
20921da177e4SLinus Torvalds /*
20931da177e4SLinus Torvalds  *	Receive a datagram from a socket.
20941da177e4SLinus Torvalds  */
20951da177e4SLinus Torvalds 
2096b7c0ddf5SJan Glauber SYSCALL_DEFINE4(recv, int, fd, void __user *, ubuf, size_t, size,
2097b7c0ddf5SJan Glauber 		unsigned int, flags)
20981da177e4SLinus Torvalds {
20997a09e1ebSDominik Brodowski 	return __sys_recvfrom(fd, ubuf, size, flags, NULL, NULL);
21001da177e4SLinus Torvalds }
21011da177e4SLinus Torvalds 
210283f0c10bSFlorian Westphal static bool sock_use_custom_sol_socket(const struct socket *sock)
210383f0c10bSFlorian Westphal {
210483f0c10bSFlorian Westphal 	const struct sock *sk = sock->sk;
210583f0c10bSFlorian Westphal 
210683f0c10bSFlorian Westphal 	/* Use sock->ops->setsockopt() for MPTCP */
210783f0c10bSFlorian Westphal 	return IS_ENABLED(CONFIG_MPTCP) &&
210883f0c10bSFlorian Westphal 	       sk->sk_protocol == IPPROTO_MPTCP &&
210983f0c10bSFlorian Westphal 	       sk->sk_type == SOCK_STREAM &&
211083f0c10bSFlorian Westphal 	       (sk->sk_family == AF_INET || sk->sk_family == AF_INET6);
211183f0c10bSFlorian Westphal }
211283f0c10bSFlorian Westphal 
21131da177e4SLinus Torvalds /*
21141da177e4SLinus Torvalds  *	Set a socket option. Because we don't know the option lengths we have
21151da177e4SLinus Torvalds  *	to pass the user mode parameter for the protocols to sort out.
21161da177e4SLinus Torvalds  */
2117a7b75c5aSChristoph Hellwig int __sys_setsockopt(int fd, int level, int optname, char __user *user_optval,
211855db9c0eSChristoph Hellwig 		int optlen)
21191da177e4SLinus Torvalds {
2120519a8a6cSChristoph Hellwig 	sockptr_t optval = USER_SOCKPTR(user_optval);
21210d01da6aSStanislav Fomichev 	char *kernel_optval = NULL;
21226cb153caSBenjamin LaHaise 	int err, fput_needed;
21231da177e4SLinus Torvalds 	struct socket *sock;
21241da177e4SLinus Torvalds 
21251da177e4SLinus Torvalds 	if (optlen < 0)
21261da177e4SLinus Torvalds 		return -EINVAL;
21271da177e4SLinus Torvalds 
212889bddce5SStephen Hemminger 	sock = sockfd_lookup_light(fd, &err, &fput_needed);
21294a367299SChristoph Hellwig 	if (!sock)
21304a367299SChristoph Hellwig 		return err;
21314a367299SChristoph Hellwig 
21321da177e4SLinus Torvalds 	err = security_socket_setsockopt(sock, level, optname);
21336cb153caSBenjamin LaHaise 	if (err)
21346cb153caSBenjamin LaHaise 		goto out_put;
21351da177e4SLinus Torvalds 
213655db9c0eSChristoph Hellwig 	if (!in_compat_syscall())
21374a367299SChristoph Hellwig 		err = BPF_CGROUP_RUN_PROG_SETSOCKOPT(sock->sk, &level, &optname,
2138a7b75c5aSChristoph Hellwig 						     user_optval, &optlen,
213955db9c0eSChristoph Hellwig 						     &kernel_optval);
21404a367299SChristoph Hellwig 	if (err < 0)
21410d01da6aSStanislav Fomichev 		goto out_put;
21424a367299SChristoph Hellwig 	if (err > 0) {
21430d01da6aSStanislav Fomichev 		err = 0;
21440d01da6aSStanislav Fomichev 		goto out_put;
21450d01da6aSStanislav Fomichev 	}
21460d01da6aSStanislav Fomichev 
2147a7b75c5aSChristoph Hellwig 	if (kernel_optval)
2148a7b75c5aSChristoph Hellwig 		optval = KERNEL_SOCKPTR(kernel_optval);
214983f0c10bSFlorian Westphal 	if (level == SOL_SOCKET && !sock_use_custom_sol_socket(sock))
2150a7b75c5aSChristoph Hellwig 		err = sock_setsockopt(sock, level, optname, optval, optlen);
2151a44d9e72SChristoph Hellwig 	else if (unlikely(!sock->ops->setsockopt))
2152a44d9e72SChristoph Hellwig 		err = -EOPNOTSUPP;
21531da177e4SLinus Torvalds 	else
21544a367299SChristoph Hellwig 		err = sock->ops->setsockopt(sock, level, optname, optval,
215589bddce5SStephen Hemminger 					    optlen);
21560d01da6aSStanislav Fomichev 	kfree(kernel_optval);
21576cb153caSBenjamin LaHaise out_put:
21586cb153caSBenjamin LaHaise 	fput_light(sock->file, fput_needed);
21591da177e4SLinus Torvalds 	return err;
21601da177e4SLinus Torvalds }
21611da177e4SLinus Torvalds 
2162cc36dca0SDominik Brodowski SYSCALL_DEFINE5(setsockopt, int, fd, int, level, int, optname,
2163cc36dca0SDominik Brodowski 		char __user *, optval, int, optlen)
2164cc36dca0SDominik Brodowski {
2165cc36dca0SDominik Brodowski 	return __sys_setsockopt(fd, level, optname, optval, optlen);
2166cc36dca0SDominik Brodowski }
2167cc36dca0SDominik Brodowski 
21689cacf81fSStanislav Fomichev INDIRECT_CALLABLE_DECLARE(bool tcp_bpf_bypass_getsockopt(int level,
21699cacf81fSStanislav Fomichev 							 int optname));
21709cacf81fSStanislav Fomichev 
21711da177e4SLinus Torvalds /*
21721da177e4SLinus Torvalds  *	Get a socket option. Because we don't know the option lengths we have
21731da177e4SLinus Torvalds  *	to pass a user mode parameter for the protocols to sort out.
21741da177e4SLinus Torvalds  */
217555db9c0eSChristoph Hellwig int __sys_getsockopt(int fd, int level, int optname, char __user *optval,
217655db9c0eSChristoph Hellwig 		int __user *optlen)
21771da177e4SLinus Torvalds {
21786cb153caSBenjamin LaHaise 	int err, fput_needed;
21791da177e4SLinus Torvalds 	struct socket *sock;
21800d01da6aSStanislav Fomichev 	int max_optlen;
21811da177e4SLinus Torvalds 
218289bddce5SStephen Hemminger 	sock = sockfd_lookup_light(fd, &err, &fput_needed);
2183d8a9b38fSChristoph Hellwig 	if (!sock)
2184d8a9b38fSChristoph Hellwig 		return err;
2185d8a9b38fSChristoph Hellwig 
21866cb153caSBenjamin LaHaise 	err = security_socket_getsockopt(sock, level, optname);
21876cb153caSBenjamin LaHaise 	if (err)
21886cb153caSBenjamin LaHaise 		goto out_put;
21891da177e4SLinus Torvalds 
219055db9c0eSChristoph Hellwig 	if (!in_compat_syscall())
21910d01da6aSStanislav Fomichev 		max_optlen = BPF_CGROUP_GETSOCKOPT_MAX_OPTLEN(optlen);
21920d01da6aSStanislav Fomichev 
21931da177e4SLinus Torvalds 	if (level == SOL_SOCKET)
2194d8a9b38fSChristoph Hellwig 		err = sock_getsockopt(sock, level, optname, optval, optlen);
2195a44d9e72SChristoph Hellwig 	else if (unlikely(!sock->ops->getsockopt))
2196a44d9e72SChristoph Hellwig 		err = -EOPNOTSUPP;
21971da177e4SLinus Torvalds 	else
2198d8a9b38fSChristoph Hellwig 		err = sock->ops->getsockopt(sock, level, optname, optval,
219989bddce5SStephen Hemminger 					    optlen);
22000d01da6aSStanislav Fomichev 
220155db9c0eSChristoph Hellwig 	if (!in_compat_syscall())
220255db9c0eSChristoph Hellwig 		err = BPF_CGROUP_RUN_PROG_GETSOCKOPT(sock->sk, level, optname,
220355db9c0eSChristoph Hellwig 						     optval, optlen, max_optlen,
220455db9c0eSChristoph Hellwig 						     err);
22056cb153caSBenjamin LaHaise out_put:
22066cb153caSBenjamin LaHaise 	fput_light(sock->file, fput_needed);
22071da177e4SLinus Torvalds 	return err;
22081da177e4SLinus Torvalds }
22091da177e4SLinus Torvalds 
221013a2d70eSDominik Brodowski SYSCALL_DEFINE5(getsockopt, int, fd, int, level, int, optname,
221113a2d70eSDominik Brodowski 		char __user *, optval, int __user *, optlen)
221213a2d70eSDominik Brodowski {
221313a2d70eSDominik Brodowski 	return __sys_getsockopt(fd, level, optname, optval, optlen);
221413a2d70eSDominik Brodowski }
221513a2d70eSDominik Brodowski 
22161da177e4SLinus Torvalds /*
22171da177e4SLinus Torvalds  *	Shutdown a socket.
22181da177e4SLinus Torvalds  */
22191da177e4SLinus Torvalds 
2220b713c195SJens Axboe int __sys_shutdown_sock(struct socket *sock, int how)
2221b713c195SJens Axboe {
2222b713c195SJens Axboe 	int err;
2223b713c195SJens Axboe 
2224b713c195SJens Axboe 	err = security_socket_shutdown(sock, how);
2225b713c195SJens Axboe 	if (!err)
2226b713c195SJens Axboe 		err = sock->ops->shutdown(sock, how);
2227b713c195SJens Axboe 
2228b713c195SJens Axboe 	return err;
2229b713c195SJens Axboe }
2230b713c195SJens Axboe 
2231005a1aeaSDominik Brodowski int __sys_shutdown(int fd, int how)
22321da177e4SLinus Torvalds {
22336cb153caSBenjamin LaHaise 	int err, fput_needed;
22341da177e4SLinus Torvalds 	struct socket *sock;
22351da177e4SLinus Torvalds 
223689bddce5SStephen Hemminger 	sock = sockfd_lookup_light(fd, &err, &fput_needed);
223789bddce5SStephen Hemminger 	if (sock != NULL) {
2238b713c195SJens Axboe 		err = __sys_shutdown_sock(sock, how);
22396cb153caSBenjamin LaHaise 		fput_light(sock->file, fput_needed);
22401da177e4SLinus Torvalds 	}
22411da177e4SLinus Torvalds 	return err;
22421da177e4SLinus Torvalds }
22431da177e4SLinus Torvalds 
2244005a1aeaSDominik Brodowski SYSCALL_DEFINE2(shutdown, int, fd, int, how)
2245005a1aeaSDominik Brodowski {
2246005a1aeaSDominik Brodowski 	return __sys_shutdown(fd, how);
2247005a1aeaSDominik Brodowski }
2248005a1aeaSDominik Brodowski 
22491da177e4SLinus Torvalds /* A couple of helpful macros for getting the address of the 32/64 bit
22501da177e4SLinus Torvalds  * fields which are the same type (int / unsigned) on our platforms.
22511da177e4SLinus Torvalds  */
22521da177e4SLinus Torvalds #define COMPAT_MSG(msg, member)	((MSG_CMSG_COMPAT & flags) ? &msg##_compat->member : &msg->member)
22531da177e4SLinus Torvalds #define COMPAT_NAMELEN(msg)	COMPAT_MSG(msg, msg_namelen)
22541da177e4SLinus Torvalds #define COMPAT_FLAGS(msg)	COMPAT_MSG(msg, msg_flags)
22551da177e4SLinus Torvalds 
2256c71d8ebeSTetsuo Handa struct used_address {
2257c71d8ebeSTetsuo Handa 	struct sockaddr_storage name;
2258c71d8ebeSTetsuo Handa 	unsigned int name_len;
2259c71d8ebeSTetsuo Handa };
2260c71d8ebeSTetsuo Handa 
22610a384abfSJens Axboe int __copy_msghdr_from_user(struct msghdr *kmsg,
226208adb7daSAl Viro 			    struct user_msghdr __user *umsg,
226308adb7daSAl Viro 			    struct sockaddr __user **save_addr,
22640a384abfSJens Axboe 			    struct iovec __user **uiov, size_t *nsegs)
22651661bf36SDan Carpenter {
2266ffb07550SAl Viro 	struct user_msghdr msg;
226708adb7daSAl Viro 	ssize_t err;
226808adb7daSAl Viro 
2269ffb07550SAl Viro 	if (copy_from_user(&msg, umsg, sizeof(*umsg)))
22701661bf36SDan Carpenter 		return -EFAULT;
2271dbb490b9SMatthew Leach 
22721f466e1fSChristoph Hellwig 	kmsg->msg_control_is_user = true;
22731f466e1fSChristoph Hellwig 	kmsg->msg_control_user = msg.msg_control;
2274ffb07550SAl Viro 	kmsg->msg_controllen = msg.msg_controllen;
2275ffb07550SAl Viro 	kmsg->msg_flags = msg.msg_flags;
2276ffb07550SAl Viro 
2277ffb07550SAl Viro 	kmsg->msg_namelen = msg.msg_namelen;
2278ffb07550SAl Viro 	if (!msg.msg_name)
22796a2a2b3aSAni Sinha 		kmsg->msg_namelen = 0;
22806a2a2b3aSAni Sinha 
2281dbb490b9SMatthew Leach 	if (kmsg->msg_namelen < 0)
2282dbb490b9SMatthew Leach 		return -EINVAL;
2283dbb490b9SMatthew Leach 
22841661bf36SDan Carpenter 	if (kmsg->msg_namelen > sizeof(struct sockaddr_storage))
2285db31c55aSDan Carpenter 		kmsg->msg_namelen = sizeof(struct sockaddr_storage);
228608adb7daSAl Viro 
228708adb7daSAl Viro 	if (save_addr)
2288ffb07550SAl Viro 		*save_addr = msg.msg_name;
228908adb7daSAl Viro 
2290ffb07550SAl Viro 	if (msg.msg_name && kmsg->msg_namelen) {
229108adb7daSAl Viro 		if (!save_addr) {
2292864d9664SPaolo Abeni 			err = move_addr_to_kernel(msg.msg_name,
2293864d9664SPaolo Abeni 						  kmsg->msg_namelen,
229408adb7daSAl Viro 						  kmsg->msg_name);
229508adb7daSAl Viro 			if (err < 0)
229608adb7daSAl Viro 				return err;
229708adb7daSAl Viro 		}
229808adb7daSAl Viro 	} else {
229908adb7daSAl Viro 		kmsg->msg_name = NULL;
230008adb7daSAl Viro 		kmsg->msg_namelen = 0;
230108adb7daSAl Viro 	}
230208adb7daSAl Viro 
2303ffb07550SAl Viro 	if (msg.msg_iovlen > UIO_MAXIOV)
230408adb7daSAl Viro 		return -EMSGSIZE;
230508adb7daSAl Viro 
23060345f931Stadeusz.struk@intel.com 	kmsg->msg_iocb = NULL;
23070a384abfSJens Axboe 	*uiov = msg.msg_iov;
23080a384abfSJens Axboe 	*nsegs = msg.msg_iovlen;
23090a384abfSJens Axboe 	return 0;
23100a384abfSJens Axboe }
23110a384abfSJens Axboe 
23120a384abfSJens Axboe static int copy_msghdr_from_user(struct msghdr *kmsg,
23130a384abfSJens Axboe 				 struct user_msghdr __user *umsg,
23140a384abfSJens Axboe 				 struct sockaddr __user **save_addr,
23150a384abfSJens Axboe 				 struct iovec **iov)
23160a384abfSJens Axboe {
23170a384abfSJens Axboe 	struct user_msghdr msg;
23180a384abfSJens Axboe 	ssize_t err;
23190a384abfSJens Axboe 
23200a384abfSJens Axboe 	err = __copy_msghdr_from_user(kmsg, umsg, save_addr, &msg.msg_iov,
23210a384abfSJens Axboe 					&msg.msg_iovlen);
23220a384abfSJens Axboe 	if (err)
23230a384abfSJens Axboe 		return err;
23240345f931Stadeusz.struk@intel.com 
232587e5e6daSJens Axboe 	err = import_iovec(save_addr ? READ : WRITE,
2326ffb07550SAl Viro 			    msg.msg_iov, msg.msg_iovlen,
2327da184284SAl Viro 			    UIO_FASTIOV, iov, &kmsg->msg_iter);
232887e5e6daSJens Axboe 	return err < 0 ? err : 0;
23291661bf36SDan Carpenter }
23301661bf36SDan Carpenter 
23314257c8caSJens Axboe static int ____sys_sendmsg(struct socket *sock, struct msghdr *msg_sys,
23324257c8caSJens Axboe 			   unsigned int flags, struct used_address *used_address,
233328a94d8fSTom Herbert 			   unsigned int allowed_msghdr_flags)
23341da177e4SLinus Torvalds {
2335b9d717a7SAlex Williamson 	unsigned char ctl[sizeof(struct cmsghdr) + 20]
2336846cc123SAmit Kushwaha 				__aligned(sizeof(__kernel_size_t));
2337b9d717a7SAlex Williamson 	/* 20 is size of ipv6_pktinfo */
23381da177e4SLinus Torvalds 	unsigned char *ctl_buf = ctl;
2339d8725c86SAl Viro 	int ctl_len;
234008adb7daSAl Viro 	ssize_t err;
23411da177e4SLinus Torvalds 
23421da177e4SLinus Torvalds 	err = -ENOBUFS;
23431da177e4SLinus Torvalds 
2344228e548eSAnton Blanchard 	if (msg_sys->msg_controllen > INT_MAX)
23454257c8caSJens Axboe 		goto out;
234628a94d8fSTom Herbert 	flags |= (msg_sys->msg_flags & allowed_msghdr_flags);
2347228e548eSAnton Blanchard 	ctl_len = msg_sys->msg_controllen;
23481da177e4SLinus Torvalds 	if ((MSG_CMSG_COMPAT & flags) && ctl_len) {
234989bddce5SStephen Hemminger 		err =
2350228e548eSAnton Blanchard 		    cmsghdr_from_user_compat_to_kern(msg_sys, sock->sk, ctl,
235189bddce5SStephen Hemminger 						     sizeof(ctl));
23521da177e4SLinus Torvalds 		if (err)
23534257c8caSJens Axboe 			goto out;
2354228e548eSAnton Blanchard 		ctl_buf = msg_sys->msg_control;
2355228e548eSAnton Blanchard 		ctl_len = msg_sys->msg_controllen;
23561da177e4SLinus Torvalds 	} else if (ctl_len) {
2357ac4340fcSDavid S. Miller 		BUILD_BUG_ON(sizeof(struct cmsghdr) !=
2358ac4340fcSDavid S. Miller 			     CMSG_ALIGN(sizeof(struct cmsghdr)));
235989bddce5SStephen Hemminger 		if (ctl_len > sizeof(ctl)) {
23601da177e4SLinus Torvalds 			ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL);
23611da177e4SLinus Torvalds 			if (ctl_buf == NULL)
23624257c8caSJens Axboe 				goto out;
23631da177e4SLinus Torvalds 		}
23641da177e4SLinus Torvalds 		err = -EFAULT;
23651f466e1fSChristoph Hellwig 		if (copy_from_user(ctl_buf, msg_sys->msg_control_user, ctl_len))
23661da177e4SLinus Torvalds 			goto out_freectl;
2367228e548eSAnton Blanchard 		msg_sys->msg_control = ctl_buf;
23681f466e1fSChristoph Hellwig 		msg_sys->msg_control_is_user = false;
23691da177e4SLinus Torvalds 	}
2370228e548eSAnton Blanchard 	msg_sys->msg_flags = flags;
23711da177e4SLinus Torvalds 
23721da177e4SLinus Torvalds 	if (sock->file->f_flags & O_NONBLOCK)
2373228e548eSAnton Blanchard 		msg_sys->msg_flags |= MSG_DONTWAIT;
2374c71d8ebeSTetsuo Handa 	/*
2375c71d8ebeSTetsuo Handa 	 * If this is sendmmsg() and current destination address is same as
2376c71d8ebeSTetsuo Handa 	 * previously succeeded address, omit asking LSM's decision.
2377c71d8ebeSTetsuo Handa 	 * used_address->name_len is initialized to UINT_MAX so that the first
2378c71d8ebeSTetsuo Handa 	 * destination address never matches.
2379c71d8ebeSTetsuo Handa 	 */
2380bc909d9dSMathieu Desnoyers 	if (used_address && msg_sys->msg_name &&
2381bc909d9dSMathieu Desnoyers 	    used_address->name_len == msg_sys->msg_namelen &&
2382bc909d9dSMathieu Desnoyers 	    !memcmp(&used_address->name, msg_sys->msg_name,
2383c71d8ebeSTetsuo Handa 		    used_address->name_len)) {
2384d8725c86SAl Viro 		err = sock_sendmsg_nosec(sock, msg_sys);
2385c71d8ebeSTetsuo Handa 		goto out_freectl;
2386c71d8ebeSTetsuo Handa 	}
2387d8725c86SAl Viro 	err = sock_sendmsg(sock, msg_sys);
2388c71d8ebeSTetsuo Handa 	/*
2389c71d8ebeSTetsuo Handa 	 * If this is sendmmsg() and sending to current destination address was
2390c71d8ebeSTetsuo Handa 	 * successful, remember it.
2391c71d8ebeSTetsuo Handa 	 */
2392c71d8ebeSTetsuo Handa 	if (used_address && err >= 0) {
2393c71d8ebeSTetsuo Handa 		used_address->name_len = msg_sys->msg_namelen;
2394bc909d9dSMathieu Desnoyers 		if (msg_sys->msg_name)
2395bc909d9dSMathieu Desnoyers 			memcpy(&used_address->name, msg_sys->msg_name,
2396c71d8ebeSTetsuo Handa 			       used_address->name_len);
2397c71d8ebeSTetsuo Handa 	}
23981da177e4SLinus Torvalds 
23991da177e4SLinus Torvalds out_freectl:
24001da177e4SLinus Torvalds 	if (ctl_buf != ctl)
24011da177e4SLinus Torvalds 		sock_kfree_s(sock->sk, ctl_buf, ctl_len);
24024257c8caSJens Axboe out:
24034257c8caSJens Axboe 	return err;
24044257c8caSJens Axboe }
24054257c8caSJens Axboe 
240603b1230cSJens Axboe int sendmsg_copy_msghdr(struct msghdr *msg,
24074257c8caSJens Axboe 			struct user_msghdr __user *umsg, unsigned flags,
24084257c8caSJens Axboe 			struct iovec **iov)
24094257c8caSJens Axboe {
24104257c8caSJens Axboe 	int err;
24114257c8caSJens Axboe 
24124257c8caSJens Axboe 	if (flags & MSG_CMSG_COMPAT) {
24134257c8caSJens Axboe 		struct compat_msghdr __user *msg_compat;
24144257c8caSJens Axboe 
24154257c8caSJens Axboe 		msg_compat = (struct compat_msghdr __user *) umsg;
24164257c8caSJens Axboe 		err = get_compat_msghdr(msg, msg_compat, NULL, iov);
24174257c8caSJens Axboe 	} else {
24184257c8caSJens Axboe 		err = copy_msghdr_from_user(msg, umsg, NULL, iov);
24194257c8caSJens Axboe 	}
24204257c8caSJens Axboe 	if (err < 0)
24214257c8caSJens Axboe 		return err;
24224257c8caSJens Axboe 
24234257c8caSJens Axboe 	return 0;
24244257c8caSJens Axboe }
24254257c8caSJens Axboe 
24264257c8caSJens Axboe static int ___sys_sendmsg(struct socket *sock, struct user_msghdr __user *msg,
24274257c8caSJens Axboe 			 struct msghdr *msg_sys, unsigned int flags,
24284257c8caSJens Axboe 			 struct used_address *used_address,
24294257c8caSJens Axboe 			 unsigned int allowed_msghdr_flags)
24304257c8caSJens Axboe {
24314257c8caSJens Axboe 	struct sockaddr_storage address;
24324257c8caSJens Axboe 	struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
24334257c8caSJens Axboe 	ssize_t err;
24344257c8caSJens Axboe 
24354257c8caSJens Axboe 	msg_sys->msg_name = &address;
24364257c8caSJens Axboe 
24374257c8caSJens Axboe 	err = sendmsg_copy_msghdr(msg_sys, msg, flags, &iov);
24384257c8caSJens Axboe 	if (err < 0)
24394257c8caSJens Axboe 		return err;
24404257c8caSJens Axboe 
24414257c8caSJens Axboe 	err = ____sys_sendmsg(sock, msg_sys, flags, used_address,
24424257c8caSJens Axboe 				allowed_msghdr_flags);
2443a74e9106SEric Dumazet 	kfree(iov);
2444228e548eSAnton Blanchard 	return err;
2445228e548eSAnton Blanchard }
2446228e548eSAnton Blanchard 
2447228e548eSAnton Blanchard /*
2448228e548eSAnton Blanchard  *	BSD sendmsg interface
2449228e548eSAnton Blanchard  */
245003b1230cSJens Axboe long __sys_sendmsg_sock(struct socket *sock, struct msghdr *msg,
24510fa03c62SJens Axboe 			unsigned int flags)
24520fa03c62SJens Axboe {
245303b1230cSJens Axboe 	return ____sys_sendmsg(sock, msg, flags, NULL, 0);
24540fa03c62SJens Axboe }
2455228e548eSAnton Blanchard 
2456e1834a32SDominik Brodowski long __sys_sendmsg(int fd, struct user_msghdr __user *msg, unsigned int flags,
2457e1834a32SDominik Brodowski 		   bool forbid_cmsg_compat)
2458228e548eSAnton Blanchard {
2459228e548eSAnton Blanchard 	int fput_needed, err;
2460228e548eSAnton Blanchard 	struct msghdr msg_sys;
24611be374a0SAndy Lutomirski 	struct socket *sock;
2462228e548eSAnton Blanchard 
2463e1834a32SDominik Brodowski 	if (forbid_cmsg_compat && (flags & MSG_CMSG_COMPAT))
2464e1834a32SDominik Brodowski 		return -EINVAL;
2465e1834a32SDominik Brodowski 
24661be374a0SAndy Lutomirski 	sock = sockfd_lookup_light(fd, &err, &fput_needed);
2467228e548eSAnton Blanchard 	if (!sock)
2468228e548eSAnton Blanchard 		goto out;
2469228e548eSAnton Blanchard 
247028a94d8fSTom Herbert 	err = ___sys_sendmsg(sock, msg, &msg_sys, flags, NULL, 0);
2471228e548eSAnton Blanchard 
24726cb153caSBenjamin LaHaise 	fput_light(sock->file, fput_needed);
24731da177e4SLinus Torvalds out:
24741da177e4SLinus Torvalds 	return err;
24751da177e4SLinus Torvalds }
24761da177e4SLinus Torvalds 
2477666547ffSAl Viro SYSCALL_DEFINE3(sendmsg, int, fd, struct user_msghdr __user *, msg, unsigned int, flags)
2478a7526eb5SAndy Lutomirski {
2479e1834a32SDominik Brodowski 	return __sys_sendmsg(fd, msg, flags, true);
2480a7526eb5SAndy Lutomirski }
2481a7526eb5SAndy Lutomirski 
2482228e548eSAnton Blanchard /*
2483228e548eSAnton Blanchard  *	Linux sendmmsg interface
2484228e548eSAnton Blanchard  */
2485228e548eSAnton Blanchard 
2486228e548eSAnton Blanchard int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
2487e1834a32SDominik Brodowski 		   unsigned int flags, bool forbid_cmsg_compat)
2488228e548eSAnton Blanchard {
2489228e548eSAnton Blanchard 	int fput_needed, err, datagrams;
2490228e548eSAnton Blanchard 	struct socket *sock;
2491228e548eSAnton Blanchard 	struct mmsghdr __user *entry;
2492228e548eSAnton Blanchard 	struct compat_mmsghdr __user *compat_entry;
2493228e548eSAnton Blanchard 	struct msghdr msg_sys;
2494c71d8ebeSTetsuo Handa 	struct used_address used_address;
2495f092276dSTom Herbert 	unsigned int oflags = flags;
2496228e548eSAnton Blanchard 
2497e1834a32SDominik Brodowski 	if (forbid_cmsg_compat && (flags & MSG_CMSG_COMPAT))
2498e1834a32SDominik Brodowski 		return -EINVAL;
2499e1834a32SDominik Brodowski 
250098382f41SAnton Blanchard 	if (vlen > UIO_MAXIOV)
250198382f41SAnton Blanchard 		vlen = UIO_MAXIOV;
2502228e548eSAnton Blanchard 
2503228e548eSAnton Blanchard 	datagrams = 0;
2504228e548eSAnton Blanchard 
2505228e548eSAnton Blanchard 	sock = sockfd_lookup_light(fd, &err, &fput_needed);
2506228e548eSAnton Blanchard 	if (!sock)
2507228e548eSAnton Blanchard 		return err;
2508228e548eSAnton Blanchard 
2509c71d8ebeSTetsuo Handa 	used_address.name_len = UINT_MAX;
2510228e548eSAnton Blanchard 	entry = mmsg;
2511228e548eSAnton Blanchard 	compat_entry = (struct compat_mmsghdr __user *)mmsg;
2512728ffb86SAnton Blanchard 	err = 0;
2513f092276dSTom Herbert 	flags |= MSG_BATCH;
2514228e548eSAnton Blanchard 
2515228e548eSAnton Blanchard 	while (datagrams < vlen) {
2516f092276dSTom Herbert 		if (datagrams == vlen - 1)
2517f092276dSTom Herbert 			flags = oflags;
2518f092276dSTom Herbert 
2519228e548eSAnton Blanchard 		if (MSG_CMSG_COMPAT & flags) {
2520666547ffSAl Viro 			err = ___sys_sendmsg(sock, (struct user_msghdr __user *)compat_entry,
252128a94d8fSTom Herbert 					     &msg_sys, flags, &used_address, MSG_EOR);
2522228e548eSAnton Blanchard 			if (err < 0)
2523228e548eSAnton Blanchard 				break;
2524228e548eSAnton Blanchard 			err = __put_user(err, &compat_entry->msg_len);
2525228e548eSAnton Blanchard 			++compat_entry;
2526228e548eSAnton Blanchard 		} else {
2527a7526eb5SAndy Lutomirski 			err = ___sys_sendmsg(sock,
2528666547ffSAl Viro 					     (struct user_msghdr __user *)entry,
252928a94d8fSTom Herbert 					     &msg_sys, flags, &used_address, MSG_EOR);
2530228e548eSAnton Blanchard 			if (err < 0)
2531228e548eSAnton Blanchard 				break;
2532228e548eSAnton Blanchard 			err = put_user(err, &entry->msg_len);
2533228e548eSAnton Blanchard 			++entry;
2534228e548eSAnton Blanchard 		}
2535228e548eSAnton Blanchard 
2536228e548eSAnton Blanchard 		if (err)
2537228e548eSAnton Blanchard 			break;
2538228e548eSAnton Blanchard 		++datagrams;
25393023898bSSoheil Hassas Yeganeh 		if (msg_data_left(&msg_sys))
25403023898bSSoheil Hassas Yeganeh 			break;
2541a78cb84cSEric Dumazet 		cond_resched();
2542228e548eSAnton Blanchard 	}
2543228e548eSAnton Blanchard 
2544228e548eSAnton Blanchard 	fput_light(sock->file, fput_needed);
2545228e548eSAnton Blanchard 
2546728ffb86SAnton Blanchard 	/* We only return an error if no datagrams were able to be sent */
2547728ffb86SAnton Blanchard 	if (datagrams != 0)
2548228e548eSAnton Blanchard 		return datagrams;
2549228e548eSAnton Blanchard 
2550228e548eSAnton Blanchard 	return err;
2551228e548eSAnton Blanchard }
2552228e548eSAnton Blanchard 
2553228e548eSAnton Blanchard SYSCALL_DEFINE4(sendmmsg, int, fd, struct mmsghdr __user *, mmsg,
2554228e548eSAnton Blanchard 		unsigned int, vlen, unsigned int, flags)
2555228e548eSAnton Blanchard {
2556e1834a32SDominik Brodowski 	return __sys_sendmmsg(fd, mmsg, vlen, flags, true);
2557228e548eSAnton Blanchard }
2558228e548eSAnton Blanchard 
255903b1230cSJens Axboe int recvmsg_copy_msghdr(struct msghdr *msg,
25604257c8caSJens Axboe 			struct user_msghdr __user *umsg, unsigned flags,
25614257c8caSJens Axboe 			struct sockaddr __user **uaddr,
25624257c8caSJens Axboe 			struct iovec **iov)
25634257c8caSJens Axboe {
25644257c8caSJens Axboe 	ssize_t err;
25654257c8caSJens Axboe 
25664257c8caSJens Axboe 	if (MSG_CMSG_COMPAT & flags) {
25674257c8caSJens Axboe 		struct compat_msghdr __user *msg_compat;
25684257c8caSJens Axboe 
25694257c8caSJens Axboe 		msg_compat = (struct compat_msghdr __user *) umsg;
25704257c8caSJens Axboe 		err = get_compat_msghdr(msg, msg_compat, uaddr, iov);
25714257c8caSJens Axboe 	} else {
25724257c8caSJens Axboe 		err = copy_msghdr_from_user(msg, umsg, uaddr, iov);
25734257c8caSJens Axboe 	}
25744257c8caSJens Axboe 	if (err < 0)
25754257c8caSJens Axboe 		return err;
25764257c8caSJens Axboe 
25774257c8caSJens Axboe 	return 0;
25784257c8caSJens Axboe }
25794257c8caSJens Axboe 
25804257c8caSJens Axboe static int ____sys_recvmsg(struct socket *sock, struct msghdr *msg_sys,
25814257c8caSJens Axboe 			   struct user_msghdr __user *msg,
25824257c8caSJens Axboe 			   struct sockaddr __user *uaddr,
25834257c8caSJens Axboe 			   unsigned int flags, int nosec)
25841da177e4SLinus Torvalds {
258589bddce5SStephen Hemminger 	struct compat_msghdr __user *msg_compat =
258689bddce5SStephen Hemminger 					(struct compat_msghdr __user *) msg;
25874257c8caSJens Axboe 	int __user *uaddr_len = COMPAT_NAMELEN(msg);
25884257c8caSJens Axboe 	struct sockaddr_storage addr;
25891da177e4SLinus Torvalds 	unsigned long cmsg_ptr;
25902da62906SAl Viro 	int len;
259108adb7daSAl Viro 	ssize_t err;
25921da177e4SLinus Torvalds 
259308adb7daSAl Viro 	msg_sys->msg_name = &addr;
2594a2e27255SArnaldo Carvalho de Melo 	cmsg_ptr = (unsigned long)msg_sys->msg_control;
2595a2e27255SArnaldo Carvalho de Melo 	msg_sys->msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT);
25961da177e4SLinus Torvalds 
2597f3d33426SHannes Frederic Sowa 	/* We assume all kernel code knows the size of sockaddr_storage */
2598f3d33426SHannes Frederic Sowa 	msg_sys->msg_namelen = 0;
2599f3d33426SHannes Frederic Sowa 
26001da177e4SLinus Torvalds 	if (sock->file->f_flags & O_NONBLOCK)
26011da177e4SLinus Torvalds 		flags |= MSG_DONTWAIT;
26021af66221SEric Dumazet 
26031af66221SEric Dumazet 	if (unlikely(nosec))
26041af66221SEric Dumazet 		err = sock_recvmsg_nosec(sock, msg_sys, flags);
26051af66221SEric Dumazet 	else
26061af66221SEric Dumazet 		err = sock_recvmsg(sock, msg_sys, flags);
26071af66221SEric Dumazet 
26081da177e4SLinus Torvalds 	if (err < 0)
26094257c8caSJens Axboe 		goto out;
26101da177e4SLinus Torvalds 	len = err;
26111da177e4SLinus Torvalds 
26121da177e4SLinus Torvalds 	if (uaddr != NULL) {
261343db362dSMaciej Żenczykowski 		err = move_addr_to_user(&addr,
2614a2e27255SArnaldo Carvalho de Melo 					msg_sys->msg_namelen, uaddr,
261589bddce5SStephen Hemminger 					uaddr_len);
26161da177e4SLinus Torvalds 		if (err < 0)
26174257c8caSJens Axboe 			goto out;
26181da177e4SLinus Torvalds 	}
2619a2e27255SArnaldo Carvalho de Melo 	err = __put_user((msg_sys->msg_flags & ~MSG_CMSG_COMPAT),
262037f7f421SDavid S. Miller 			 COMPAT_FLAGS(msg));
26211da177e4SLinus Torvalds 	if (err)
26224257c8caSJens Axboe 		goto out;
26231da177e4SLinus Torvalds 	if (MSG_CMSG_COMPAT & flags)
2624a2e27255SArnaldo Carvalho de Melo 		err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
26251da177e4SLinus Torvalds 				 &msg_compat->msg_controllen);
26261da177e4SLinus Torvalds 	else
2627a2e27255SArnaldo Carvalho de Melo 		err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
26281da177e4SLinus Torvalds 				 &msg->msg_controllen);
26291da177e4SLinus Torvalds 	if (err)
26304257c8caSJens Axboe 		goto out;
26311da177e4SLinus Torvalds 	err = len;
26324257c8caSJens Axboe out:
26334257c8caSJens Axboe 	return err;
26344257c8caSJens Axboe }
26351da177e4SLinus Torvalds 
26364257c8caSJens Axboe static int ___sys_recvmsg(struct socket *sock, struct user_msghdr __user *msg,
26374257c8caSJens Axboe 			 struct msghdr *msg_sys, unsigned int flags, int nosec)
26384257c8caSJens Axboe {
26394257c8caSJens Axboe 	struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
26404257c8caSJens Axboe 	/* user mode address pointers */
26414257c8caSJens Axboe 	struct sockaddr __user *uaddr;
26424257c8caSJens Axboe 	ssize_t err;
26434257c8caSJens Axboe 
26444257c8caSJens Axboe 	err = recvmsg_copy_msghdr(msg_sys, msg, flags, &uaddr, &iov);
26454257c8caSJens Axboe 	if (err < 0)
26464257c8caSJens Axboe 		return err;
26474257c8caSJens Axboe 
26484257c8caSJens Axboe 	err = ____sys_recvmsg(sock, msg_sys, msg, uaddr, flags, nosec);
2649a74e9106SEric Dumazet 	kfree(iov);
2650a2e27255SArnaldo Carvalho de Melo 	return err;
2651a2e27255SArnaldo Carvalho de Melo }
2652a2e27255SArnaldo Carvalho de Melo 
2653a2e27255SArnaldo Carvalho de Melo /*
2654a2e27255SArnaldo Carvalho de Melo  *	BSD recvmsg interface
2655a2e27255SArnaldo Carvalho de Melo  */
2656a2e27255SArnaldo Carvalho de Melo 
265703b1230cSJens Axboe long __sys_recvmsg_sock(struct socket *sock, struct msghdr *msg,
265803b1230cSJens Axboe 			struct user_msghdr __user *umsg,
265903b1230cSJens Axboe 			struct sockaddr __user *uaddr, unsigned int flags)
2660aa1fa28fSJens Axboe {
266103b1230cSJens Axboe 	return ____sys_recvmsg(sock, msg, umsg, uaddr, flags, 0);
2662aa1fa28fSJens Axboe }
2663aa1fa28fSJens Axboe 
2664e1834a32SDominik Brodowski long __sys_recvmsg(int fd, struct user_msghdr __user *msg, unsigned int flags,
2665e1834a32SDominik Brodowski 		   bool forbid_cmsg_compat)
2666a2e27255SArnaldo Carvalho de Melo {
2667a2e27255SArnaldo Carvalho de Melo 	int fput_needed, err;
2668a2e27255SArnaldo Carvalho de Melo 	struct msghdr msg_sys;
26691be374a0SAndy Lutomirski 	struct socket *sock;
2670a2e27255SArnaldo Carvalho de Melo 
2671e1834a32SDominik Brodowski 	if (forbid_cmsg_compat && (flags & MSG_CMSG_COMPAT))
2672e1834a32SDominik Brodowski 		return -EINVAL;
2673e1834a32SDominik Brodowski 
26741be374a0SAndy Lutomirski 	sock = sockfd_lookup_light(fd, &err, &fput_needed);
2675a2e27255SArnaldo Carvalho de Melo 	if (!sock)
2676a2e27255SArnaldo Carvalho de Melo 		goto out;
2677a2e27255SArnaldo Carvalho de Melo 
2678a7526eb5SAndy Lutomirski 	err = ___sys_recvmsg(sock, msg, &msg_sys, flags, 0);
2679a2e27255SArnaldo Carvalho de Melo 
26806cb153caSBenjamin LaHaise 	fput_light(sock->file, fput_needed);
26811da177e4SLinus Torvalds out:
26821da177e4SLinus Torvalds 	return err;
26831da177e4SLinus Torvalds }
26841da177e4SLinus Torvalds 
2685666547ffSAl Viro SYSCALL_DEFINE3(recvmsg, int, fd, struct user_msghdr __user *, msg,
2686a7526eb5SAndy Lutomirski 		unsigned int, flags)
2687a7526eb5SAndy Lutomirski {
2688e1834a32SDominik Brodowski 	return __sys_recvmsg(fd, msg, flags, true);
2689a7526eb5SAndy Lutomirski }
2690a7526eb5SAndy Lutomirski 
2691a2e27255SArnaldo Carvalho de Melo /*
2692a2e27255SArnaldo Carvalho de Melo  *     Linux recvmmsg interface
2693a2e27255SArnaldo Carvalho de Melo  */
26941da177e4SLinus Torvalds 
2695e11d4284SArnd Bergmann static int do_recvmmsg(int fd, struct mmsghdr __user *mmsg,
2696e11d4284SArnd Bergmann 			  unsigned int vlen, unsigned int flags,
2697e11d4284SArnd Bergmann 			  struct timespec64 *timeout)
2698a2e27255SArnaldo Carvalho de Melo {
2699a2e27255SArnaldo Carvalho de Melo 	int fput_needed, err, datagrams;
2700a2e27255SArnaldo Carvalho de Melo 	struct socket *sock;
2701a2e27255SArnaldo Carvalho de Melo 	struct mmsghdr __user *entry;
2702d7256d0eSJean-Mickael Guerin 	struct compat_mmsghdr __user *compat_entry;
2703a2e27255SArnaldo Carvalho de Melo 	struct msghdr msg_sys;
2704766b9f92SDeepa Dinamani 	struct timespec64 end_time;
2705766b9f92SDeepa Dinamani 	struct timespec64 timeout64;
2706a2e27255SArnaldo Carvalho de Melo 
2707a2e27255SArnaldo Carvalho de Melo 	if (timeout &&
2708a2e27255SArnaldo Carvalho de Melo 	    poll_select_set_timeout(&end_time, timeout->tv_sec,
2709a2e27255SArnaldo Carvalho de Melo 				    timeout->tv_nsec))
2710a2e27255SArnaldo Carvalho de Melo 		return -EINVAL;
2711a2e27255SArnaldo Carvalho de Melo 
2712a2e27255SArnaldo Carvalho de Melo 	datagrams = 0;
2713a2e27255SArnaldo Carvalho de Melo 
2714a2e27255SArnaldo Carvalho de Melo 	sock = sockfd_lookup_light(fd, &err, &fput_needed);
2715a2e27255SArnaldo Carvalho de Melo 	if (!sock)
2716a2e27255SArnaldo Carvalho de Melo 		return err;
2717a2e27255SArnaldo Carvalho de Melo 
27187797dc41SSoheil Hassas Yeganeh 	if (likely(!(flags & MSG_ERRQUEUE))) {
2719a2e27255SArnaldo Carvalho de Melo 		err = sock_error(sock->sk);
2720e623a9e9SMaxime Jayat 		if (err) {
2721e623a9e9SMaxime Jayat 			datagrams = err;
2722a2e27255SArnaldo Carvalho de Melo 			goto out_put;
2723e623a9e9SMaxime Jayat 		}
27247797dc41SSoheil Hassas Yeganeh 	}
2725a2e27255SArnaldo Carvalho de Melo 
2726a2e27255SArnaldo Carvalho de Melo 	entry = mmsg;
2727d7256d0eSJean-Mickael Guerin 	compat_entry = (struct compat_mmsghdr __user *)mmsg;
2728a2e27255SArnaldo Carvalho de Melo 
2729a2e27255SArnaldo Carvalho de Melo 	while (datagrams < vlen) {
2730a2e27255SArnaldo Carvalho de Melo 		/*
2731a2e27255SArnaldo Carvalho de Melo 		 * No need to ask LSM for more than the first datagram.
2732a2e27255SArnaldo Carvalho de Melo 		 */
2733d7256d0eSJean-Mickael Guerin 		if (MSG_CMSG_COMPAT & flags) {
2734666547ffSAl Viro 			err = ___sys_recvmsg(sock, (struct user_msghdr __user *)compat_entry,
2735b9eb8b87SAnton Blanchard 					     &msg_sys, flags & ~MSG_WAITFORONE,
2736b9eb8b87SAnton Blanchard 					     datagrams);
2737d7256d0eSJean-Mickael Guerin 			if (err < 0)
2738d7256d0eSJean-Mickael Guerin 				break;
2739d7256d0eSJean-Mickael Guerin 			err = __put_user(err, &compat_entry->msg_len);
2740d7256d0eSJean-Mickael Guerin 			++compat_entry;
2741d7256d0eSJean-Mickael Guerin 		} else {
2742a7526eb5SAndy Lutomirski 			err = ___sys_recvmsg(sock,
2743666547ffSAl Viro 					     (struct user_msghdr __user *)entry,
2744b9eb8b87SAnton Blanchard 					     &msg_sys, flags & ~MSG_WAITFORONE,
2745b9eb8b87SAnton Blanchard 					     datagrams);
2746a2e27255SArnaldo Carvalho de Melo 			if (err < 0)
2747a2e27255SArnaldo Carvalho de Melo 				break;
2748a2e27255SArnaldo Carvalho de Melo 			err = put_user(err, &entry->msg_len);
2749d7256d0eSJean-Mickael Guerin 			++entry;
2750d7256d0eSJean-Mickael Guerin 		}
2751d7256d0eSJean-Mickael Guerin 
2752a2e27255SArnaldo Carvalho de Melo 		if (err)
2753a2e27255SArnaldo Carvalho de Melo 			break;
2754a2e27255SArnaldo Carvalho de Melo 		++datagrams;
2755a2e27255SArnaldo Carvalho de Melo 
275671c5c159SBrandon L Black 		/* MSG_WAITFORONE turns on MSG_DONTWAIT after one packet */
275771c5c159SBrandon L Black 		if (flags & MSG_WAITFORONE)
275871c5c159SBrandon L Black 			flags |= MSG_DONTWAIT;
275971c5c159SBrandon L Black 
2760a2e27255SArnaldo Carvalho de Melo 		if (timeout) {
2761766b9f92SDeepa Dinamani 			ktime_get_ts64(&timeout64);
2762c2e6c856SArnd Bergmann 			*timeout = timespec64_sub(end_time, timeout64);
2763a2e27255SArnaldo Carvalho de Melo 			if (timeout->tv_sec < 0) {
2764a2e27255SArnaldo Carvalho de Melo 				timeout->tv_sec = timeout->tv_nsec = 0;
2765a2e27255SArnaldo Carvalho de Melo 				break;
2766a2e27255SArnaldo Carvalho de Melo 			}
2767a2e27255SArnaldo Carvalho de Melo 
2768a2e27255SArnaldo Carvalho de Melo 			/* Timeout, return less than vlen datagrams */
2769a2e27255SArnaldo Carvalho de Melo 			if (timeout->tv_nsec == 0 && timeout->tv_sec == 0)
2770a2e27255SArnaldo Carvalho de Melo 				break;
2771a2e27255SArnaldo Carvalho de Melo 		}
2772a2e27255SArnaldo Carvalho de Melo 
2773a2e27255SArnaldo Carvalho de Melo 		/* Out of band data, return right away */
2774a2e27255SArnaldo Carvalho de Melo 		if (msg_sys.msg_flags & MSG_OOB)
2775a2e27255SArnaldo Carvalho de Melo 			break;
2776a78cb84cSEric Dumazet 		cond_resched();
2777a2e27255SArnaldo Carvalho de Melo 	}
2778a2e27255SArnaldo Carvalho de Melo 
2779a2e27255SArnaldo Carvalho de Melo 	if (err == 0)
278034b88a68SArnaldo Carvalho de Melo 		goto out_put;
2781a2e27255SArnaldo Carvalho de Melo 
278234b88a68SArnaldo Carvalho de Melo 	if (datagrams == 0) {
278334b88a68SArnaldo Carvalho de Melo 		datagrams = err;
278434b88a68SArnaldo Carvalho de Melo 		goto out_put;
278534b88a68SArnaldo Carvalho de Melo 	}
278634b88a68SArnaldo Carvalho de Melo 
2787a2e27255SArnaldo Carvalho de Melo 	/*
2788a2e27255SArnaldo Carvalho de Melo 	 * We may return less entries than requested (vlen) if the
2789a2e27255SArnaldo Carvalho de Melo 	 * sock is non block and there aren't enough datagrams...
2790a2e27255SArnaldo Carvalho de Melo 	 */
2791a2e27255SArnaldo Carvalho de Melo 	if (err != -EAGAIN) {
2792a2e27255SArnaldo Carvalho de Melo 		/*
2793a2e27255SArnaldo Carvalho de Melo 		 * ... or  if recvmsg returns an error after we
2794a2e27255SArnaldo Carvalho de Melo 		 * received some datagrams, where we record the
2795a2e27255SArnaldo Carvalho de Melo 		 * error to return on the next call or if the
2796a2e27255SArnaldo Carvalho de Melo 		 * app asks about it using getsockopt(SO_ERROR).
2797a2e27255SArnaldo Carvalho de Melo 		 */
2798a2e27255SArnaldo Carvalho de Melo 		sock->sk->sk_err = -err;
2799a2e27255SArnaldo Carvalho de Melo 	}
280034b88a68SArnaldo Carvalho de Melo out_put:
280134b88a68SArnaldo Carvalho de Melo 	fput_light(sock->file, fput_needed);
2802a2e27255SArnaldo Carvalho de Melo 
2803a2e27255SArnaldo Carvalho de Melo 	return datagrams;
2804a2e27255SArnaldo Carvalho de Melo }
2805a2e27255SArnaldo Carvalho de Melo 
2806e11d4284SArnd Bergmann int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg,
28071255e269SDominik Brodowski 		   unsigned int vlen, unsigned int flags,
2808e11d4284SArnd Bergmann 		   struct __kernel_timespec __user *timeout,
2809e11d4284SArnd Bergmann 		   struct old_timespec32 __user *timeout32)
2810a2e27255SArnaldo Carvalho de Melo {
2811a2e27255SArnaldo Carvalho de Melo 	int datagrams;
2812c2e6c856SArnd Bergmann 	struct timespec64 timeout_sys;
2813a2e27255SArnaldo Carvalho de Melo 
2814e11d4284SArnd Bergmann 	if (timeout && get_timespec64(&timeout_sys, timeout))
2815a2e27255SArnaldo Carvalho de Melo 		return -EFAULT;
2816a2e27255SArnaldo Carvalho de Melo 
2817e11d4284SArnd Bergmann 	if (timeout32 && get_old_timespec32(&timeout_sys, timeout32))
2818e11d4284SArnd Bergmann 		return -EFAULT;
2819a2e27255SArnaldo Carvalho de Melo 
2820e11d4284SArnd Bergmann 	if (!timeout && !timeout32)
2821e11d4284SArnd Bergmann 		return do_recvmmsg(fd, mmsg, vlen, flags, NULL);
2822e11d4284SArnd Bergmann 
2823e11d4284SArnd Bergmann 	datagrams = do_recvmmsg(fd, mmsg, vlen, flags, &timeout_sys);
2824e11d4284SArnd Bergmann 
2825e11d4284SArnd Bergmann 	if (datagrams <= 0)
2826e11d4284SArnd Bergmann 		return datagrams;
2827e11d4284SArnd Bergmann 
2828e11d4284SArnd Bergmann 	if (timeout && put_timespec64(&timeout_sys, timeout))
2829e11d4284SArnd Bergmann 		datagrams = -EFAULT;
2830e11d4284SArnd Bergmann 
2831e11d4284SArnd Bergmann 	if (timeout32 && put_old_timespec32(&timeout_sys, timeout32))
2832a2e27255SArnaldo Carvalho de Melo 		datagrams = -EFAULT;
2833a2e27255SArnaldo Carvalho de Melo 
2834a2e27255SArnaldo Carvalho de Melo 	return datagrams;
2835a2e27255SArnaldo Carvalho de Melo }
2836a2e27255SArnaldo Carvalho de Melo 
28371255e269SDominik Brodowski SYSCALL_DEFINE5(recvmmsg, int, fd, struct mmsghdr __user *, mmsg,
28381255e269SDominik Brodowski 		unsigned int, vlen, unsigned int, flags,
2839c2e6c856SArnd Bergmann 		struct __kernel_timespec __user *, timeout)
28401255e269SDominik Brodowski {
2841e11d4284SArnd Bergmann 	if (flags & MSG_CMSG_COMPAT)
2842e11d4284SArnd Bergmann 		return -EINVAL;
2843e11d4284SArnd Bergmann 
2844e11d4284SArnd Bergmann 	return __sys_recvmmsg(fd, mmsg, vlen, flags, timeout, NULL);
28451255e269SDominik Brodowski }
28461255e269SDominik Brodowski 
2847e11d4284SArnd Bergmann #ifdef CONFIG_COMPAT_32BIT_TIME
2848e11d4284SArnd Bergmann SYSCALL_DEFINE5(recvmmsg_time32, int, fd, struct mmsghdr __user *, mmsg,
2849e11d4284SArnd Bergmann 		unsigned int, vlen, unsigned int, flags,
2850e11d4284SArnd Bergmann 		struct old_timespec32 __user *, timeout)
2851e11d4284SArnd Bergmann {
2852e11d4284SArnd Bergmann 	if (flags & MSG_CMSG_COMPAT)
2853e11d4284SArnd Bergmann 		return -EINVAL;
2854e11d4284SArnd Bergmann 
2855e11d4284SArnd Bergmann 	return __sys_recvmmsg(fd, mmsg, vlen, flags, NULL, timeout);
2856e11d4284SArnd Bergmann }
2857e11d4284SArnd Bergmann #endif
2858e11d4284SArnd Bergmann 
2859a2e27255SArnaldo Carvalho de Melo #ifdef __ARCH_WANT_SYS_SOCKETCALL
28601da177e4SLinus Torvalds /* Argument list sizes for sys_socketcall */
28611da177e4SLinus Torvalds #define AL(x) ((x) * sizeof(unsigned long))
2862228e548eSAnton Blanchard static const unsigned char nargs[21] = {
286389bddce5SStephen Hemminger 	AL(0), AL(3), AL(3), AL(3), AL(2), AL(3),
28641da177e4SLinus Torvalds 	AL(3), AL(3), AL(4), AL(4), AL(4), AL(6),
2865aaca0bdcSUlrich Drepper 	AL(6), AL(2), AL(5), AL(5), AL(3), AL(3),
2866228e548eSAnton Blanchard 	AL(4), AL(5), AL(4)
286789bddce5SStephen Hemminger };
286889bddce5SStephen Hemminger 
28691da177e4SLinus Torvalds #undef AL
28701da177e4SLinus Torvalds 
28711da177e4SLinus Torvalds /*
28721da177e4SLinus Torvalds  *	System call vectors.
28731da177e4SLinus Torvalds  *
28741da177e4SLinus Torvalds  *	Argument checking cleaned up. Saved 20% in size.
28751da177e4SLinus Torvalds  *  This function doesn't need to set the kernel lock because
28761da177e4SLinus Torvalds  *  it is set by the callees.
28771da177e4SLinus Torvalds  */
28781da177e4SLinus Torvalds 
28793e0fa65fSHeiko Carstens SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args)
28801da177e4SLinus Torvalds {
28812950fa9dSChen Gang 	unsigned long a[AUDITSC_ARGS];
28821da177e4SLinus Torvalds 	unsigned long a0, a1;
28831da177e4SLinus Torvalds 	int err;
288447379052SArjan van de Ven 	unsigned int len;
28851da177e4SLinus Torvalds 
2886228e548eSAnton Blanchard 	if (call < 1 || call > SYS_SENDMMSG)
28871da177e4SLinus Torvalds 		return -EINVAL;
2888c8e8cd57SJeremy Cline 	call = array_index_nospec(call, SYS_SENDMMSG + 1);
28891da177e4SLinus Torvalds 
289047379052SArjan van de Ven 	len = nargs[call];
289147379052SArjan van de Ven 	if (len > sizeof(a))
289247379052SArjan van de Ven 		return -EINVAL;
289347379052SArjan van de Ven 
28941da177e4SLinus Torvalds 	/* copy_from_user should be SMP safe. */
289547379052SArjan van de Ven 	if (copy_from_user(a, args, len))
28961da177e4SLinus Torvalds 		return -EFAULT;
28971da177e4SLinus Torvalds 
28982950fa9dSChen Gang 	err = audit_socketcall(nargs[call] / sizeof(unsigned long), a);
28992950fa9dSChen Gang 	if (err)
29002950fa9dSChen Gang 		return err;
29013ec3b2fbSDavid Woodhouse 
29021da177e4SLinus Torvalds 	a0 = a[0];
29031da177e4SLinus Torvalds 	a1 = a[1];
29041da177e4SLinus Torvalds 
290589bddce5SStephen Hemminger 	switch (call) {
29061da177e4SLinus Torvalds 	case SYS_SOCKET:
29079d6a15c3SDominik Brodowski 		err = __sys_socket(a0, a1, a[2]);
29081da177e4SLinus Torvalds 		break;
29091da177e4SLinus Torvalds 	case SYS_BIND:
2910a87d35d8SDominik Brodowski 		err = __sys_bind(a0, (struct sockaddr __user *)a1, a[2]);
29111da177e4SLinus Torvalds 		break;
29121da177e4SLinus Torvalds 	case SYS_CONNECT:
29131387c2c2SDominik Brodowski 		err = __sys_connect(a0, (struct sockaddr __user *)a1, a[2]);
29141da177e4SLinus Torvalds 		break;
29151da177e4SLinus Torvalds 	case SYS_LISTEN:
291625e290eeSDominik Brodowski 		err = __sys_listen(a0, a1);
29171da177e4SLinus Torvalds 		break;
29181da177e4SLinus Torvalds 	case SYS_ACCEPT:
29194541e805SDominik Brodowski 		err = __sys_accept4(a0, (struct sockaddr __user *)a1,
2920aaca0bdcSUlrich Drepper 				    (int __user *)a[2], 0);
29211da177e4SLinus Torvalds 		break;
29221da177e4SLinus Torvalds 	case SYS_GETSOCKNAME:
292389bddce5SStephen Hemminger 		err =
29248882a107SDominik Brodowski 		    __sys_getsockname(a0, (struct sockaddr __user *)a1,
292589bddce5SStephen Hemminger 				      (int __user *)a[2]);
29261da177e4SLinus Torvalds 		break;
29271da177e4SLinus Torvalds 	case SYS_GETPEERNAME:
292889bddce5SStephen Hemminger 		err =
2929b21c8f83SDominik Brodowski 		    __sys_getpeername(a0, (struct sockaddr __user *)a1,
293089bddce5SStephen Hemminger 				      (int __user *)a[2]);
29311da177e4SLinus Torvalds 		break;
29321da177e4SLinus Torvalds 	case SYS_SOCKETPAIR:
29336debc8d8SDominik Brodowski 		err = __sys_socketpair(a0, a1, a[2], (int __user *)a[3]);
29341da177e4SLinus Torvalds 		break;
29351da177e4SLinus Torvalds 	case SYS_SEND:
2936f3bf896bSDominik Brodowski 		err = __sys_sendto(a0, (void __user *)a1, a[2], a[3],
2937f3bf896bSDominik Brodowski 				   NULL, 0);
29381da177e4SLinus Torvalds 		break;
29391da177e4SLinus Torvalds 	case SYS_SENDTO:
2940211b634bSDominik Brodowski 		err = __sys_sendto(a0, (void __user *)a1, a[2], a[3],
29411da177e4SLinus Torvalds 				   (struct sockaddr __user *)a[4], a[5]);
29421da177e4SLinus Torvalds 		break;
29431da177e4SLinus Torvalds 	case SYS_RECV:
2944d27e9afcSDominik Brodowski 		err = __sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
2945d27e9afcSDominik Brodowski 				     NULL, NULL);
29461da177e4SLinus Torvalds 		break;
29471da177e4SLinus Torvalds 	case SYS_RECVFROM:
29487a09e1ebSDominik Brodowski 		err = __sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
294989bddce5SStephen Hemminger 				     (struct sockaddr __user *)a[4],
295089bddce5SStephen Hemminger 				     (int __user *)a[5]);
29511da177e4SLinus Torvalds 		break;
29521da177e4SLinus Torvalds 	case SYS_SHUTDOWN:
2953005a1aeaSDominik Brodowski 		err = __sys_shutdown(a0, a1);
29541da177e4SLinus Torvalds 		break;
29551da177e4SLinus Torvalds 	case SYS_SETSOCKOPT:
2956cc36dca0SDominik Brodowski 		err = __sys_setsockopt(a0, a1, a[2], (char __user *)a[3],
2957cc36dca0SDominik Brodowski 				       a[4]);
29581da177e4SLinus Torvalds 		break;
29591da177e4SLinus Torvalds 	case SYS_GETSOCKOPT:
296089bddce5SStephen Hemminger 		err =
296113a2d70eSDominik Brodowski 		    __sys_getsockopt(a0, a1, a[2], (char __user *)a[3],
296289bddce5SStephen Hemminger 				     (int __user *)a[4]);
29631da177e4SLinus Torvalds 		break;
29641da177e4SLinus Torvalds 	case SYS_SENDMSG:
2965e1834a32SDominik Brodowski 		err = __sys_sendmsg(a0, (struct user_msghdr __user *)a1,
2966e1834a32SDominik Brodowski 				    a[2], true);
29671da177e4SLinus Torvalds 		break;
2968228e548eSAnton Blanchard 	case SYS_SENDMMSG:
2969e1834a32SDominik Brodowski 		err = __sys_sendmmsg(a0, (struct mmsghdr __user *)a1, a[2],
2970e1834a32SDominik Brodowski 				     a[3], true);
2971228e548eSAnton Blanchard 		break;
29721da177e4SLinus Torvalds 	case SYS_RECVMSG:
2973e1834a32SDominik Brodowski 		err = __sys_recvmsg(a0, (struct user_msghdr __user *)a1,
2974e1834a32SDominik Brodowski 				    a[2], true);
29751da177e4SLinus Torvalds 		break;
2976a2e27255SArnaldo Carvalho de Melo 	case SYS_RECVMMSG:
29773ca47e95SArnd Bergmann 		if (IS_ENABLED(CONFIG_64BIT))
2978e11d4284SArnd Bergmann 			err = __sys_recvmmsg(a0, (struct mmsghdr __user *)a1,
2979e11d4284SArnd Bergmann 					     a[2], a[3],
2980e11d4284SArnd Bergmann 					     (struct __kernel_timespec __user *)a[4],
2981e11d4284SArnd Bergmann 					     NULL);
2982e11d4284SArnd Bergmann 		else
2983e11d4284SArnd Bergmann 			err = __sys_recvmmsg(a0, (struct mmsghdr __user *)a1,
2984e11d4284SArnd Bergmann 					     a[2], a[3], NULL,
2985e11d4284SArnd Bergmann 					     (struct old_timespec32 __user *)a[4]);
2986a2e27255SArnaldo Carvalho de Melo 		break;
2987de11defeSUlrich Drepper 	case SYS_ACCEPT4:
29884541e805SDominik Brodowski 		err = __sys_accept4(a0, (struct sockaddr __user *)a1,
2989de11defeSUlrich Drepper 				    (int __user *)a[2], a[3]);
2990aaca0bdcSUlrich Drepper 		break;
29911da177e4SLinus Torvalds 	default:
29921da177e4SLinus Torvalds 		err = -EINVAL;
29931da177e4SLinus Torvalds 		break;
29941da177e4SLinus Torvalds 	}
29951da177e4SLinus Torvalds 	return err;
29961da177e4SLinus Torvalds }
29971da177e4SLinus Torvalds 
29981da177e4SLinus Torvalds #endif				/* __ARCH_WANT_SYS_SOCKETCALL */
29991da177e4SLinus Torvalds 
300055737fdaSStephen Hemminger /**
300155737fdaSStephen Hemminger  *	sock_register - add a socket protocol handler
300255737fdaSStephen Hemminger  *	@ops: description of protocol
300355737fdaSStephen Hemminger  *
30041da177e4SLinus Torvalds  *	This function is called by a protocol handler that wants to
30051da177e4SLinus Torvalds  *	advertise its address family, and have it linked into the
3006e793c0f7SMasanari Iida  *	socket interface. The value ops->family corresponds to the
300755737fdaSStephen Hemminger  *	socket system call protocol family.
30081da177e4SLinus Torvalds  */
3009f0fd27d4SStephen Hemminger int sock_register(const struct net_proto_family *ops)
30101da177e4SLinus Torvalds {
30111da177e4SLinus Torvalds 	int err;
30121da177e4SLinus Torvalds 
30131da177e4SLinus Torvalds 	if (ops->family >= NPROTO) {
30143410f22eSYang Yingliang 		pr_crit("protocol %d >= NPROTO(%d)\n", ops->family, NPROTO);
30151da177e4SLinus Torvalds 		return -ENOBUFS;
30161da177e4SLinus Torvalds 	}
301755737fdaSStephen Hemminger 
301855737fdaSStephen Hemminger 	spin_lock(&net_family_lock);
3019190683a9SEric Dumazet 	if (rcu_dereference_protected(net_families[ops->family],
3020190683a9SEric Dumazet 				      lockdep_is_held(&net_family_lock)))
30211da177e4SLinus Torvalds 		err = -EEXIST;
302255737fdaSStephen Hemminger 	else {
3023cf778b00SEric Dumazet 		rcu_assign_pointer(net_families[ops->family], ops);
30241da177e4SLinus Torvalds 		err = 0;
30251da177e4SLinus Torvalds 	}
302655737fdaSStephen Hemminger 	spin_unlock(&net_family_lock);
302755737fdaSStephen Hemminger 
3028fe0bdbdeSYejune Deng 	pr_info("NET: Registered %s protocol family\n", pf_family_names[ops->family]);
30291da177e4SLinus Torvalds 	return err;
30301da177e4SLinus Torvalds }
3031c6d409cfSEric Dumazet EXPORT_SYMBOL(sock_register);
30321da177e4SLinus Torvalds 
303355737fdaSStephen Hemminger /**
303455737fdaSStephen Hemminger  *	sock_unregister - remove a protocol handler
303555737fdaSStephen Hemminger  *	@family: protocol family to remove
303655737fdaSStephen Hemminger  *
30371da177e4SLinus Torvalds  *	This function is called by a protocol handler that wants to
30381da177e4SLinus Torvalds  *	remove its address family, and have it unlinked from the
303955737fdaSStephen Hemminger  *	new socket creation.
304055737fdaSStephen Hemminger  *
304155737fdaSStephen Hemminger  *	If protocol handler is a module, then it can use module reference
304255737fdaSStephen Hemminger  *	counts to protect against new references. If protocol handler is not
304355737fdaSStephen Hemminger  *	a module then it needs to provide its own protection in
304455737fdaSStephen Hemminger  *	the ops->create routine.
30451da177e4SLinus Torvalds  */
3046f0fd27d4SStephen Hemminger void sock_unregister(int family)
30471da177e4SLinus Torvalds {
3048f0fd27d4SStephen Hemminger 	BUG_ON(family < 0 || family >= NPROTO);
30491da177e4SLinus Torvalds 
305055737fdaSStephen Hemminger 	spin_lock(&net_family_lock);
3051a9b3cd7fSStephen Hemminger 	RCU_INIT_POINTER(net_families[family], NULL);
305255737fdaSStephen Hemminger 	spin_unlock(&net_family_lock);
305355737fdaSStephen Hemminger 
305455737fdaSStephen Hemminger 	synchronize_rcu();
305555737fdaSStephen Hemminger 
3056fe0bdbdeSYejune Deng 	pr_info("NET: Unregistered %s protocol family\n", pf_family_names[family]);
30571da177e4SLinus Torvalds }
3058c6d409cfSEric Dumazet EXPORT_SYMBOL(sock_unregister);
30591da177e4SLinus Torvalds 
3060bf2ae2e4SXin Long bool sock_is_registered(int family)
3061bf2ae2e4SXin Long {
306266b51b0aSJeremy Cline 	return family < NPROTO && rcu_access_pointer(net_families[family]);
3063bf2ae2e4SXin Long }
3064bf2ae2e4SXin Long 
306577d76ea3SAndi Kleen static int __init sock_init(void)
30661da177e4SLinus Torvalds {
3067b3e19d92SNick Piggin 	int err;
30682ca794e5SEric W. Biederman 	/*
30692ca794e5SEric W. Biederman 	 *      Initialize the network sysctl infrastructure.
30702ca794e5SEric W. Biederman 	 */
30712ca794e5SEric W. Biederman 	err = net_sysctl_init();
30722ca794e5SEric W. Biederman 	if (err)
30732ca794e5SEric W. Biederman 		goto out;
3074b3e19d92SNick Piggin 
30751da177e4SLinus Torvalds 	/*
30761da177e4SLinus Torvalds 	 *      Initialize skbuff SLAB cache
30771da177e4SLinus Torvalds 	 */
30781da177e4SLinus Torvalds 	skb_init();
30791da177e4SLinus Torvalds 
30801da177e4SLinus Torvalds 	/*
30811da177e4SLinus Torvalds 	 *      Initialize the protocols module.
30821da177e4SLinus Torvalds 	 */
30831da177e4SLinus Torvalds 
30841da177e4SLinus Torvalds 	init_inodecache();
3085b3e19d92SNick Piggin 
3086b3e19d92SNick Piggin 	err = register_filesystem(&sock_fs_type);
3087b3e19d92SNick Piggin 	if (err)
308847260ba9SMiaohe Lin 		goto out;
30891da177e4SLinus Torvalds 	sock_mnt = kern_mount(&sock_fs_type);
3090b3e19d92SNick Piggin 	if (IS_ERR(sock_mnt)) {
3091b3e19d92SNick Piggin 		err = PTR_ERR(sock_mnt);
3092b3e19d92SNick Piggin 		goto out_mount;
3093b3e19d92SNick Piggin 	}
309477d76ea3SAndi Kleen 
309577d76ea3SAndi Kleen 	/* The real protocol initialization is performed in later initcalls.
30961da177e4SLinus Torvalds 	 */
30971da177e4SLinus Torvalds 
30981da177e4SLinus Torvalds #ifdef CONFIG_NETFILTER
30996d11cfdbSPablo Neira Ayuso 	err = netfilter_init();
31006d11cfdbSPablo Neira Ayuso 	if (err)
31016d11cfdbSPablo Neira Ayuso 		goto out;
31021da177e4SLinus Torvalds #endif
3103cbeb321aSDavid S. Miller 
3104408eccceSDaniel Borkmann 	ptp_classifier_init();
3105c1f19b51SRichard Cochran 
3106b3e19d92SNick Piggin out:
3107b3e19d92SNick Piggin 	return err;
3108b3e19d92SNick Piggin 
3109b3e19d92SNick Piggin out_mount:
3110b3e19d92SNick Piggin 	unregister_filesystem(&sock_fs_type);
3111b3e19d92SNick Piggin 	goto out;
31121da177e4SLinus Torvalds }
31131da177e4SLinus Torvalds 
311477d76ea3SAndi Kleen core_initcall(sock_init);	/* early initcall */
311577d76ea3SAndi Kleen 
31161da177e4SLinus Torvalds #ifdef CONFIG_PROC_FS
31171da177e4SLinus Torvalds void socket_seq_show(struct seq_file *seq)
31181da177e4SLinus Torvalds {
3119648845abSTonghao Zhang 	seq_printf(seq, "sockets: used %d\n",
3120648845abSTonghao Zhang 		   sock_inuse_get(seq->private));
31211da177e4SLinus Torvalds }
31221da177e4SLinus Torvalds #endif				/* CONFIG_PROC_FS */
31231da177e4SLinus Torvalds 
3124*29c49648SArnd Bergmann /* Handle the fact that while struct ifreq has the same *layout* on
3125*29c49648SArnd Bergmann  * 32/64 for everything but ifreq::ifru_ifmap and ifreq::ifru_data,
3126*29c49648SArnd Bergmann  * which are handled elsewhere, it still has different *size* due to
3127*29c49648SArnd Bergmann  * ifreq::ifru_ifmap (which is 16 bytes on 32 bit, 24 bytes on 64-bit,
3128*29c49648SArnd Bergmann  * resulting in struct ifreq being 32 and 40 bytes respectively).
3129*29c49648SArnd Bergmann  * As a result, if the struct happens to be at the end of a page and
3130*29c49648SArnd Bergmann  * the next page isn't readable/writable, we get a fault. To prevent
3131*29c49648SArnd Bergmann  * that, copy back and forth to the full size.
3132*29c49648SArnd Bergmann  */
3133*29c49648SArnd Bergmann int get_user_ifreq(struct ifreq *ifr, void __user **ifrdata, void __user *arg)
3134*29c49648SArnd Bergmann {
3135*29c49648SArnd Bergmann 	if (in_compat_syscall()) {
3136*29c49648SArnd Bergmann 		struct compat_ifreq *ifr32 = (struct compat_ifreq *)ifr;
3137*29c49648SArnd Bergmann 
3138*29c49648SArnd Bergmann 		memset(ifr, 0, sizeof(*ifr));
3139*29c49648SArnd Bergmann 		if (copy_from_user(ifr32, arg, sizeof(*ifr32)))
3140*29c49648SArnd Bergmann 			return -EFAULT;
3141*29c49648SArnd Bergmann 
3142*29c49648SArnd Bergmann 		if (ifrdata)
3143*29c49648SArnd Bergmann 			*ifrdata = compat_ptr(ifr32->ifr_data);
3144*29c49648SArnd Bergmann 
3145*29c49648SArnd Bergmann 		return 0;
3146*29c49648SArnd Bergmann 	}
3147*29c49648SArnd Bergmann 
3148*29c49648SArnd Bergmann 	if (copy_from_user(ifr, arg, sizeof(*ifr)))
3149*29c49648SArnd Bergmann 		return -EFAULT;
3150*29c49648SArnd Bergmann 
3151*29c49648SArnd Bergmann 	if (ifrdata)
3152*29c49648SArnd Bergmann 		*ifrdata = ifr->ifr_data;
3153*29c49648SArnd Bergmann 
3154*29c49648SArnd Bergmann 	return 0;
3155*29c49648SArnd Bergmann }
3156*29c49648SArnd Bergmann EXPORT_SYMBOL(get_user_ifreq);
3157*29c49648SArnd Bergmann 
3158*29c49648SArnd Bergmann int put_user_ifreq(struct ifreq *ifr, void __user *arg)
3159*29c49648SArnd Bergmann {
3160*29c49648SArnd Bergmann 	size_t size = sizeof(*ifr);
3161*29c49648SArnd Bergmann 
3162*29c49648SArnd Bergmann 	if (in_compat_syscall())
3163*29c49648SArnd Bergmann 		size = sizeof(struct compat_ifreq);
3164*29c49648SArnd Bergmann 
3165*29c49648SArnd Bergmann 	if (copy_to_user(arg, ifr, size))
3166*29c49648SArnd Bergmann 		return -EFAULT;
3167*29c49648SArnd Bergmann 
3168*29c49648SArnd Bergmann 	return 0;
3169*29c49648SArnd Bergmann }
3170*29c49648SArnd Bergmann EXPORT_SYMBOL(put_user_ifreq);
3171*29c49648SArnd Bergmann 
317289bbfc95SShaun Pereira #ifdef CONFIG_COMPAT
31737a50a240SArnd Bergmann static int compat_siocwandev(struct net *net, struct compat_ifreq __user *uifr32)
31747a50a240SArnd Bergmann {
31757a50a240SArnd Bergmann 	compat_uptr_t uptr32;
317644c02a2cSAl Viro 	struct ifreq ifr;
317744c02a2cSAl Viro 	void __user *saved;
317844c02a2cSAl Viro 	int err;
31797a50a240SArnd Bergmann 
3180*29c49648SArnd Bergmann 	if (get_user_ifreq(&ifr, NULL, uifr32))
31817a50a240SArnd Bergmann 		return -EFAULT;
31827a50a240SArnd Bergmann 
31837a50a240SArnd Bergmann 	if (get_user(uptr32, &uifr32->ifr_settings.ifs_ifsu))
31847a50a240SArnd Bergmann 		return -EFAULT;
31857a50a240SArnd Bergmann 
318644c02a2cSAl Viro 	saved = ifr.ifr_settings.ifs_ifsu.raw_hdlc;
318744c02a2cSAl Viro 	ifr.ifr_settings.ifs_ifsu.raw_hdlc = compat_ptr(uptr32);
31887a50a240SArnd Bergmann 
318944c02a2cSAl Viro 	err = dev_ioctl(net, SIOCWANDEV, &ifr, NULL);
319044c02a2cSAl Viro 	if (!err) {
319144c02a2cSAl Viro 		ifr.ifr_settings.ifs_ifsu.raw_hdlc = saved;
3192*29c49648SArnd Bergmann 		if (put_user_ifreq(&ifr, uifr32))
319344c02a2cSAl Viro 			err = -EFAULT;
31947a50a240SArnd Bergmann 	}
31957a229387SArnd Bergmann 	return err;
31967a229387SArnd Bergmann }
31977a229387SArnd Bergmann 
3198590d4693SBen Hutchings /* Handle ioctls that use ifreq::ifr_data and just need struct ifreq converted */
3199590d4693SBen Hutchings static int compat_ifr_data_ioctl(struct net *net, unsigned int cmd,
32006b96018bSArnd Bergmann 				 struct compat_ifreq __user *u_ifreq32)
32017a229387SArnd Bergmann {
320244c02a2cSAl Viro 	struct ifreq ifreq;
32037a229387SArnd Bergmann 	u32 data32;
32047a229387SArnd Bergmann 
320544c02a2cSAl Viro 	if (copy_from_user(ifreq.ifr_name, u_ifreq32->ifr_name, IFNAMSIZ))
32067a229387SArnd Bergmann 		return -EFAULT;
320744c02a2cSAl Viro 	if (get_user(data32, &u_ifreq32->ifr_data))
32087a229387SArnd Bergmann 		return -EFAULT;
320944c02a2cSAl Viro 	ifreq.ifr_data = compat_ptr(data32);
32107a229387SArnd Bergmann 
321144c02a2cSAl Viro 	return dev_ioctl(net, cmd, &ifreq, NULL);
3212a2116ed2SArnd Bergmann }
3213a2116ed2SArnd Bergmann 
321437ac39bdSJohannes Berg static int compat_ifreq_ioctl(struct net *net, struct socket *sock,
321537ac39bdSJohannes Berg 			      unsigned int cmd,
3216*29c49648SArnd Bergmann 			      unsigned long arg,
321737ac39bdSJohannes Berg 			      struct compat_ifreq __user *uifr32)
321837ac39bdSJohannes Berg {
3219*29c49648SArnd Bergmann 	struct ifreq ifr;
3220*29c49648SArnd Bergmann 	bool need_copyout;
322137ac39bdSJohannes Berg 	int err;
322237ac39bdSJohannes Berg 
3223*29c49648SArnd Bergmann 	err = sock->ops->ioctl(sock, cmd, arg);
322437ac39bdSJohannes Berg 
3225*29c49648SArnd Bergmann 	/* If this ioctl is unknown try to hand it down
3226*29c49648SArnd Bergmann 	 * to the NIC driver.
3227*29c49648SArnd Bergmann 	 */
3228*29c49648SArnd Bergmann 	if (err != -ENOIOCTLCMD)
3229*29c49648SArnd Bergmann 		return err;
3230*29c49648SArnd Bergmann 
3231*29c49648SArnd Bergmann 	if (get_user_ifreq(&ifr, NULL, uifr32))
3232*29c49648SArnd Bergmann 		return -EFAULT;
3233*29c49648SArnd Bergmann 	err = dev_ioctl(net, cmd, &ifr, &need_copyout);
3234*29c49648SArnd Bergmann 	if (!err && need_copyout)
3235*29c49648SArnd Bergmann 		if (put_user_ifreq(&ifr, uifr32))
323637ac39bdSJohannes Berg 			return -EFAULT;
323737ac39bdSJohannes Berg 
323837ac39bdSJohannes Berg 	return err;
323937ac39bdSJohannes Berg }
324037ac39bdSJohannes Berg 
32417a229387SArnd Bergmann /* Since old style bridge ioctl's endup using SIOCDEVPRIVATE
32427a229387SArnd Bergmann  * for some operations; this forces use of the newer bridge-utils that
324325985edcSLucas De Marchi  * use compatible ioctls
32447a229387SArnd Bergmann  */
32456b96018bSArnd Bergmann static int old_bridge_ioctl(compat_ulong_t __user *argp)
32467a229387SArnd Bergmann {
32476b96018bSArnd Bergmann 	compat_ulong_t tmp;
32487a229387SArnd Bergmann 
32496b96018bSArnd Bergmann 	if (get_user(tmp, argp))
32507a229387SArnd Bergmann 		return -EFAULT;
32517a229387SArnd Bergmann 	if (tmp == BRCTL_GET_VERSION)
32527a229387SArnd Bergmann 		return BRCTL_VERSION + 1;
32537a229387SArnd Bergmann 	return -EINVAL;
32547a229387SArnd Bergmann }
32557a229387SArnd Bergmann 
32566b96018bSArnd Bergmann static int compat_sock_ioctl_trans(struct file *file, struct socket *sock,
32576b96018bSArnd Bergmann 			 unsigned int cmd, unsigned long arg)
32586b96018bSArnd Bergmann {
32596b96018bSArnd Bergmann 	void __user *argp = compat_ptr(arg);
32606b96018bSArnd Bergmann 	struct sock *sk = sock->sk;
32616b96018bSArnd Bergmann 	struct net *net = sock_net(sk);
32627a229387SArnd Bergmann 
32636b96018bSArnd Bergmann 	if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15))
3264590d4693SBen Hutchings 		return compat_ifr_data_ioctl(net, cmd, argp);
32657a229387SArnd Bergmann 
32666b96018bSArnd Bergmann 	switch (cmd) {
32676b96018bSArnd Bergmann 	case SIOCSIFBR:
32686b96018bSArnd Bergmann 	case SIOCGIFBR:
32696b96018bSArnd Bergmann 		return old_bridge_ioctl(argp);
32707a50a240SArnd Bergmann 	case SIOCWANDEV:
32717a50a240SArnd Bergmann 		return compat_siocwandev(net, argp);
32720768e170SArnd Bergmann 	case SIOCGSTAMP_OLD:
32730768e170SArnd Bergmann 	case SIOCGSTAMPNS_OLD:
3274c7cbdbf2SArnd Bergmann 		if (!sock->ops->gettstamp)
3275c7cbdbf2SArnd Bergmann 			return -ENOIOCTLCMD;
32760768e170SArnd Bergmann 		return sock->ops->gettstamp(sock, argp, cmd == SIOCGSTAMP_OLD,
3277c7cbdbf2SArnd Bergmann 					    !COMPAT_USE_64BIT_TIME);
3278c7cbdbf2SArnd Bergmann 
3279dd98d289SArnd Bergmann 	case SIOCETHTOOL:
3280590d4693SBen Hutchings 	case SIOCBONDSLAVEINFOQUERY:
3281590d4693SBen Hutchings 	case SIOCBONDINFOQUERY:
3282a2116ed2SArnd Bergmann 	case SIOCSHWTSTAMP:
3283fd468c74SBen Hutchings 	case SIOCGHWTSTAMP:
3284590d4693SBen Hutchings 		return compat_ifr_data_ioctl(net, cmd, argp);
32857a229387SArnd Bergmann 
32866b96018bSArnd Bergmann 	case FIOSETOWN:
32876b96018bSArnd Bergmann 	case SIOCSPGRP:
32886b96018bSArnd Bergmann 	case FIOGETOWN:
32896b96018bSArnd Bergmann 	case SIOCGPGRP:
32906b96018bSArnd Bergmann 	case SIOCBRADDBR:
32916b96018bSArnd Bergmann 	case SIOCBRDELBR:
32926b96018bSArnd Bergmann 	case SIOCGIFVLAN:
32936b96018bSArnd Bergmann 	case SIOCSIFVLAN:
3294c62cce2cSAndrey Vagin 	case SIOCGSKNS:
32950768e170SArnd Bergmann 	case SIOCGSTAMP_NEW:
32960768e170SArnd Bergmann 	case SIOCGSTAMPNS_NEW:
3297876f0bf9SArnd Bergmann 	case SIOCGIFCONF:
32986b96018bSArnd Bergmann 		return sock_ioctl(file, cmd, arg);
32996b96018bSArnd Bergmann 
33006b96018bSArnd Bergmann 	case SIOCGIFFLAGS:
33016b96018bSArnd Bergmann 	case SIOCSIFFLAGS:
3302709566d7SArnd Bergmann 	case SIOCGIFMAP:
3303709566d7SArnd Bergmann 	case SIOCSIFMAP:
33046b96018bSArnd Bergmann 	case SIOCGIFMETRIC:
33056b96018bSArnd Bergmann 	case SIOCSIFMETRIC:
33066b96018bSArnd Bergmann 	case SIOCGIFMTU:
33076b96018bSArnd Bergmann 	case SIOCSIFMTU:
33086b96018bSArnd Bergmann 	case SIOCGIFMEM:
33096b96018bSArnd Bergmann 	case SIOCSIFMEM:
33106b96018bSArnd Bergmann 	case SIOCGIFHWADDR:
33116b96018bSArnd Bergmann 	case SIOCSIFHWADDR:
33126b96018bSArnd Bergmann 	case SIOCADDMULTI:
33136b96018bSArnd Bergmann 	case SIOCDELMULTI:
33146b96018bSArnd Bergmann 	case SIOCGIFINDEX:
33156b96018bSArnd Bergmann 	case SIOCGIFADDR:
33166b96018bSArnd Bergmann 	case SIOCSIFADDR:
33176b96018bSArnd Bergmann 	case SIOCSIFHWBROADCAST:
33186b96018bSArnd Bergmann 	case SIOCDIFADDR:
33196b96018bSArnd Bergmann 	case SIOCGIFBRDADDR:
33206b96018bSArnd Bergmann 	case SIOCSIFBRDADDR:
33216b96018bSArnd Bergmann 	case SIOCGIFDSTADDR:
33226b96018bSArnd Bergmann 	case SIOCSIFDSTADDR:
33236b96018bSArnd Bergmann 	case SIOCGIFNETMASK:
33246b96018bSArnd Bergmann 	case SIOCSIFNETMASK:
33256b96018bSArnd Bergmann 	case SIOCSIFPFLAGS:
33266b96018bSArnd Bergmann 	case SIOCGIFPFLAGS:
33276b96018bSArnd Bergmann 	case SIOCGIFTXQLEN:
33286b96018bSArnd Bergmann 	case SIOCSIFTXQLEN:
33296b96018bSArnd Bergmann 	case SIOCBRADDIF:
33306b96018bSArnd Bergmann 	case SIOCBRDELIF:
3331c6c9fee3SJohannes Berg 	case SIOCGIFNAME:
33329177efd3SArnd Bergmann 	case SIOCSIFNAME:
33339177efd3SArnd Bergmann 	case SIOCGMIIPHY:
33349177efd3SArnd Bergmann 	case SIOCGMIIREG:
33359177efd3SArnd Bergmann 	case SIOCSMIIREG:
3336f92d4fc9SAl Viro 	case SIOCBONDENSLAVE:
3337f92d4fc9SAl Viro 	case SIOCBONDRELEASE:
3338f92d4fc9SAl Viro 	case SIOCBONDSETHWADDR:
3339f92d4fc9SAl Viro 	case SIOCBONDCHANGEACTIVE:
3340*29c49648SArnd Bergmann 		return compat_ifreq_ioctl(net, sock, cmd, arg, argp);
334137ac39bdSJohannes Berg 
33426b96018bSArnd Bergmann 	case SIOCSARP:
33436b96018bSArnd Bergmann 	case SIOCGARP:
33446b96018bSArnd Bergmann 	case SIOCDARP:
3345c7dc504eSArnd Bergmann 	case SIOCOUTQ:
33469d7bf41fSArnd Bergmann 	case SIOCOUTQNSD:
33476b96018bSArnd Bergmann 	case SIOCATMARK:
334863ff03abSJohannes Berg 		return sock_do_ioctl(net, sock, cmd, arg);
33499177efd3SArnd Bergmann 	}
33509177efd3SArnd Bergmann 
33516b96018bSArnd Bergmann 	return -ENOIOCTLCMD;
33526b96018bSArnd Bergmann }
33537a229387SArnd Bergmann 
335495c96174SEric Dumazet static long compat_sock_ioctl(struct file *file, unsigned int cmd,
335589bbfc95SShaun Pereira 			      unsigned long arg)
335689bbfc95SShaun Pereira {
335789bbfc95SShaun Pereira 	struct socket *sock = file->private_data;
335889bbfc95SShaun Pereira 	int ret = -ENOIOCTLCMD;
335987de87d5SDavid S. Miller 	struct sock *sk;
336087de87d5SDavid S. Miller 	struct net *net;
336187de87d5SDavid S. Miller 
336287de87d5SDavid S. Miller 	sk = sock->sk;
336387de87d5SDavid S. Miller 	net = sock_net(sk);
336489bbfc95SShaun Pereira 
336589bbfc95SShaun Pereira 	if (sock->ops->compat_ioctl)
336689bbfc95SShaun Pereira 		ret = sock->ops->compat_ioctl(sock, cmd, arg);
336789bbfc95SShaun Pereira 
336887de87d5SDavid S. Miller 	if (ret == -ENOIOCTLCMD &&
336987de87d5SDavid S. Miller 	    (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST))
337087de87d5SDavid S. Miller 		ret = compat_wext_handle_ioctl(net, cmd, arg);
337187de87d5SDavid S. Miller 
33726b96018bSArnd Bergmann 	if (ret == -ENOIOCTLCMD)
33736b96018bSArnd Bergmann 		ret = compat_sock_ioctl_trans(file, sock, cmd, arg);
33746b96018bSArnd Bergmann 
337589bbfc95SShaun Pereira 	return ret;
337689bbfc95SShaun Pereira }
337789bbfc95SShaun Pereira #endif
337889bbfc95SShaun Pereira 
33798a3c245cSPedro Tammela /**
33808a3c245cSPedro Tammela  *	kernel_bind - bind an address to a socket (kernel space)
33818a3c245cSPedro Tammela  *	@sock: socket
33828a3c245cSPedro Tammela  *	@addr: address
33838a3c245cSPedro Tammela  *	@addrlen: length of address
33848a3c245cSPedro Tammela  *
33858a3c245cSPedro Tammela  *	Returns 0 or an error.
33868a3c245cSPedro Tammela  */
33878a3c245cSPedro Tammela 
3388ac5a488eSSridhar Samudrala int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen)
3389ac5a488eSSridhar Samudrala {
3390ac5a488eSSridhar Samudrala 	return sock->ops->bind(sock, addr, addrlen);
3391ac5a488eSSridhar Samudrala }
3392c6d409cfSEric Dumazet EXPORT_SYMBOL(kernel_bind);
3393ac5a488eSSridhar Samudrala 
33948a3c245cSPedro Tammela /**
33958a3c245cSPedro Tammela  *	kernel_listen - move socket to listening state (kernel space)
33968a3c245cSPedro Tammela  *	@sock: socket
33978a3c245cSPedro Tammela  *	@backlog: pending connections queue size
33988a3c245cSPedro Tammela  *
33998a3c245cSPedro Tammela  *	Returns 0 or an error.
34008a3c245cSPedro Tammela  */
34018a3c245cSPedro Tammela 
3402ac5a488eSSridhar Samudrala int kernel_listen(struct socket *sock, int backlog)
3403ac5a488eSSridhar Samudrala {
3404ac5a488eSSridhar Samudrala 	return sock->ops->listen(sock, backlog);
3405ac5a488eSSridhar Samudrala }
3406c6d409cfSEric Dumazet EXPORT_SYMBOL(kernel_listen);
3407ac5a488eSSridhar Samudrala 
34088a3c245cSPedro Tammela /**
34098a3c245cSPedro Tammela  *	kernel_accept - accept a connection (kernel space)
34108a3c245cSPedro Tammela  *	@sock: listening socket
34118a3c245cSPedro Tammela  *	@newsock: new connected socket
34128a3c245cSPedro Tammela  *	@flags: flags
34138a3c245cSPedro Tammela  *
34148a3c245cSPedro Tammela  *	@flags must be SOCK_CLOEXEC, SOCK_NONBLOCK or 0.
34158a3c245cSPedro Tammela  *	If it fails, @newsock is guaranteed to be %NULL.
34168a3c245cSPedro Tammela  *	Returns 0 or an error.
34178a3c245cSPedro Tammela  */
34188a3c245cSPedro Tammela 
3419ac5a488eSSridhar Samudrala int kernel_accept(struct socket *sock, struct socket **newsock, int flags)
3420ac5a488eSSridhar Samudrala {
3421ac5a488eSSridhar Samudrala 	struct sock *sk = sock->sk;
3422ac5a488eSSridhar Samudrala 	int err;
3423ac5a488eSSridhar Samudrala 
3424ac5a488eSSridhar Samudrala 	err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol,
3425ac5a488eSSridhar Samudrala 			       newsock);
3426ac5a488eSSridhar Samudrala 	if (err < 0)
3427ac5a488eSSridhar Samudrala 		goto done;
3428ac5a488eSSridhar Samudrala 
3429cdfbabfbSDavid Howells 	err = sock->ops->accept(sock, *newsock, flags, true);
3430ac5a488eSSridhar Samudrala 	if (err < 0) {
3431ac5a488eSSridhar Samudrala 		sock_release(*newsock);
3432fa8705b0STony Battersby 		*newsock = NULL;
3433ac5a488eSSridhar Samudrala 		goto done;
3434ac5a488eSSridhar Samudrala 	}
3435ac5a488eSSridhar Samudrala 
3436ac5a488eSSridhar Samudrala 	(*newsock)->ops = sock->ops;
34371b08534eSWei Yongjun 	__module_get((*newsock)->ops->owner);
3438ac5a488eSSridhar Samudrala 
3439ac5a488eSSridhar Samudrala done:
3440ac5a488eSSridhar Samudrala 	return err;
3441ac5a488eSSridhar Samudrala }
3442c6d409cfSEric Dumazet EXPORT_SYMBOL(kernel_accept);
3443ac5a488eSSridhar Samudrala 
34448a3c245cSPedro Tammela /**
34458a3c245cSPedro Tammela  *	kernel_connect - connect a socket (kernel space)
34468a3c245cSPedro Tammela  *	@sock: socket
34478a3c245cSPedro Tammela  *	@addr: address
34488a3c245cSPedro Tammela  *	@addrlen: address length
34498a3c245cSPedro Tammela  *	@flags: flags (O_NONBLOCK, ...)
34508a3c245cSPedro Tammela  *
3451f1dcffccSLu Wei  *	For datagram sockets, @addr is the address to which datagrams are sent
34528a3c245cSPedro Tammela  *	by default, and the only address from which datagrams are received.
34538a3c245cSPedro Tammela  *	For stream sockets, attempts to connect to @addr.
34548a3c245cSPedro Tammela  *	Returns 0 or an error code.
34558a3c245cSPedro Tammela  */
34568a3c245cSPedro Tammela 
3457ac5a488eSSridhar Samudrala int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
3458ac5a488eSSridhar Samudrala 		   int flags)
3459ac5a488eSSridhar Samudrala {
3460ac5a488eSSridhar Samudrala 	return sock->ops->connect(sock, addr, addrlen, flags);
3461ac5a488eSSridhar Samudrala }
3462c6d409cfSEric Dumazet EXPORT_SYMBOL(kernel_connect);
3463ac5a488eSSridhar Samudrala 
34648a3c245cSPedro Tammela /**
34658a3c245cSPedro Tammela  *	kernel_getsockname - get the address which the socket is bound (kernel space)
34668a3c245cSPedro Tammela  *	@sock: socket
34678a3c245cSPedro Tammela  *	@addr: address holder
34688a3c245cSPedro Tammela  *
34698a3c245cSPedro Tammela  * 	Fills the @addr pointer with the address which the socket is bound.
34708a3c245cSPedro Tammela  *	Returns 0 or an error code.
34718a3c245cSPedro Tammela  */
34728a3c245cSPedro Tammela 
34739b2c45d4SDenys Vlasenko int kernel_getsockname(struct socket *sock, struct sockaddr *addr)
3474ac5a488eSSridhar Samudrala {
34759b2c45d4SDenys Vlasenko 	return sock->ops->getname(sock, addr, 0);
3476ac5a488eSSridhar Samudrala }
3477c6d409cfSEric Dumazet EXPORT_SYMBOL(kernel_getsockname);
3478ac5a488eSSridhar Samudrala 
34798a3c245cSPedro Tammela /**
3480645f0897SMiaohe Lin  *	kernel_getpeername - get the address which the socket is connected (kernel space)
34818a3c245cSPedro Tammela  *	@sock: socket
34828a3c245cSPedro Tammela  *	@addr: address holder
34838a3c245cSPedro Tammela  *
34848a3c245cSPedro Tammela  * 	Fills the @addr pointer with the address which the socket is connected.
34858a3c245cSPedro Tammela  *	Returns 0 or an error code.
34868a3c245cSPedro Tammela  */
34878a3c245cSPedro Tammela 
34889b2c45d4SDenys Vlasenko int kernel_getpeername(struct socket *sock, struct sockaddr *addr)
3489ac5a488eSSridhar Samudrala {
34909b2c45d4SDenys Vlasenko 	return sock->ops->getname(sock, addr, 1);
3491ac5a488eSSridhar Samudrala }
3492c6d409cfSEric Dumazet EXPORT_SYMBOL(kernel_getpeername);
3493ac5a488eSSridhar Samudrala 
34948a3c245cSPedro Tammela /**
34958a3c245cSPedro Tammela  *	kernel_sendpage - send a &page through a socket (kernel space)
34968a3c245cSPedro Tammela  *	@sock: socket
34978a3c245cSPedro Tammela  *	@page: page
34988a3c245cSPedro Tammela  *	@offset: page offset
34998a3c245cSPedro Tammela  *	@size: total size in bytes
35008a3c245cSPedro Tammela  *	@flags: flags (MSG_DONTWAIT, ...)
35018a3c245cSPedro Tammela  *
35028a3c245cSPedro Tammela  *	Returns the total amount sent in bytes or an error.
35038a3c245cSPedro Tammela  */
35048a3c245cSPedro Tammela 
3505ac5a488eSSridhar Samudrala int kernel_sendpage(struct socket *sock, struct page *page, int offset,
3506ac5a488eSSridhar Samudrala 		    size_t size, int flags)
3507ac5a488eSSridhar Samudrala {
35087b62d31dSColy Li 	if (sock->ops->sendpage) {
35097b62d31dSColy Li 		/* Warn in case the improper page to zero-copy send */
35107b62d31dSColy Li 		WARN_ONCE(!sendpage_ok(page), "improper page for zero-copy send");
3511ac5a488eSSridhar Samudrala 		return sock->ops->sendpage(sock, page, offset, size, flags);
35127b62d31dSColy Li 	}
3513ac5a488eSSridhar Samudrala 	return sock_no_sendpage(sock, page, offset, size, flags);
3514ac5a488eSSridhar Samudrala }
3515c6d409cfSEric Dumazet EXPORT_SYMBOL(kernel_sendpage);
3516ac5a488eSSridhar Samudrala 
35178a3c245cSPedro Tammela /**
35188a3c245cSPedro Tammela  *	kernel_sendpage_locked - send a &page through the locked sock (kernel space)
35198a3c245cSPedro Tammela  *	@sk: sock
35208a3c245cSPedro Tammela  *	@page: page
35218a3c245cSPedro Tammela  *	@offset: page offset
35228a3c245cSPedro Tammela  *	@size: total size in bytes
35238a3c245cSPedro Tammela  *	@flags: flags (MSG_DONTWAIT, ...)
35248a3c245cSPedro Tammela  *
35258a3c245cSPedro Tammela  *	Returns the total amount sent in bytes or an error.
35268a3c245cSPedro Tammela  *	Caller must hold @sk.
35278a3c245cSPedro Tammela  */
35288a3c245cSPedro Tammela 
3529306b13ebSTom Herbert int kernel_sendpage_locked(struct sock *sk, struct page *page, int offset,
3530306b13ebSTom Herbert 			   size_t size, int flags)
3531306b13ebSTom Herbert {
3532306b13ebSTom Herbert 	struct socket *sock = sk->sk_socket;
3533306b13ebSTom Herbert 
3534306b13ebSTom Herbert 	if (sock->ops->sendpage_locked)
3535306b13ebSTom Herbert 		return sock->ops->sendpage_locked(sk, page, offset, size,
3536306b13ebSTom Herbert 						  flags);
3537306b13ebSTom Herbert 
3538306b13ebSTom Herbert 	return sock_no_sendpage_locked(sk, page, offset, size, flags);
3539306b13ebSTom Herbert }
3540306b13ebSTom Herbert EXPORT_SYMBOL(kernel_sendpage_locked);
3541306b13ebSTom Herbert 
35428a3c245cSPedro Tammela /**
3543645f0897SMiaohe Lin  *	kernel_sock_shutdown - shut down part of a full-duplex connection (kernel space)
35448a3c245cSPedro Tammela  *	@sock: socket
35458a3c245cSPedro Tammela  *	@how: connection part
35468a3c245cSPedro Tammela  *
35478a3c245cSPedro Tammela  *	Returns 0 or an error.
35488a3c245cSPedro Tammela  */
35498a3c245cSPedro Tammela 
355091cf45f0STrond Myklebust int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how)
355191cf45f0STrond Myklebust {
355291cf45f0STrond Myklebust 	return sock->ops->shutdown(sock, how);
355391cf45f0STrond Myklebust }
355491cf45f0STrond Myklebust EXPORT_SYMBOL(kernel_sock_shutdown);
3555113c3075SR. Parameswaran 
35568a3c245cSPedro Tammela /**
35578a3c245cSPedro Tammela  *	kernel_sock_ip_overhead - returns the IP overhead imposed by a socket
35588a3c245cSPedro Tammela  *	@sk: socket
35598a3c245cSPedro Tammela  *
35608a3c245cSPedro Tammela  *	This routine returns the IP overhead imposed by a socket i.e.
3561113c3075SR. Parameswaran  *	the length of the underlying IP header, depending on whether
3562113c3075SR. Parameswaran  *	this is an IPv4 or IPv6 socket and the length from IP options turned
356357240d00SR. Parameswaran  *	on at the socket. Assumes that the caller has a lock on the socket.
3564113c3075SR. Parameswaran  */
35658a3c245cSPedro Tammela 
3566113c3075SR. Parameswaran u32 kernel_sock_ip_overhead(struct sock *sk)
3567113c3075SR. Parameswaran {
3568113c3075SR. Parameswaran 	struct inet_sock *inet;
3569113c3075SR. Parameswaran 	struct ip_options_rcu *opt;
3570113c3075SR. Parameswaran 	u32 overhead = 0;
3571113c3075SR. Parameswaran #if IS_ENABLED(CONFIG_IPV6)
3572113c3075SR. Parameswaran 	struct ipv6_pinfo *np;
3573113c3075SR. Parameswaran 	struct ipv6_txoptions *optv6 = NULL;
3574113c3075SR. Parameswaran #endif /* IS_ENABLED(CONFIG_IPV6) */
3575113c3075SR. Parameswaran 
3576113c3075SR. Parameswaran 	if (!sk)
3577113c3075SR. Parameswaran 		return overhead;
3578113c3075SR. Parameswaran 
3579113c3075SR. Parameswaran 	switch (sk->sk_family) {
3580113c3075SR. Parameswaran 	case AF_INET:
3581113c3075SR. Parameswaran 		inet = inet_sk(sk);
3582113c3075SR. Parameswaran 		overhead += sizeof(struct iphdr);
3583113c3075SR. Parameswaran 		opt = rcu_dereference_protected(inet->inet_opt,
3584614d79c0Sstephen hemminger 						sock_owned_by_user(sk));
3585113c3075SR. Parameswaran 		if (opt)
3586113c3075SR. Parameswaran 			overhead += opt->opt.optlen;
3587113c3075SR. Parameswaran 		return overhead;
3588113c3075SR. Parameswaran #if IS_ENABLED(CONFIG_IPV6)
3589113c3075SR. Parameswaran 	case AF_INET6:
3590113c3075SR. Parameswaran 		np = inet6_sk(sk);
3591113c3075SR. Parameswaran 		overhead += sizeof(struct ipv6hdr);
3592113c3075SR. Parameswaran 		if (np)
3593113c3075SR. Parameswaran 			optv6 = rcu_dereference_protected(np->opt,
3594614d79c0Sstephen hemminger 							  sock_owned_by_user(sk));
3595113c3075SR. Parameswaran 		if (optv6)
3596113c3075SR. Parameswaran 			overhead += (optv6->opt_flen + optv6->opt_nflen);
3597113c3075SR. Parameswaran 		return overhead;
3598113c3075SR. Parameswaran #endif /* IS_ENABLED(CONFIG_IPV6) */
3599113c3075SR. Parameswaran 	default: /* Returns 0 overhead if the socket is not ipv4 or ipv6 */
3600113c3075SR. Parameswaran 		return overhead;
3601113c3075SR. Parameswaran 	}
3602113c3075SR. Parameswaran }
3603113c3075SR. Parameswaran EXPORT_SYMBOL(kernel_sock_ip_overhead);
3604