xref: /openbmc/linux/net/socket.c (revision 0345f931)
11da177e4SLinus Torvalds /*
21da177e4SLinus Torvalds  * NET		An implementation of the SOCKET network access protocol.
31da177e4SLinus Torvalds  *
41da177e4SLinus Torvalds  * Version:	@(#)socket.c	1.1.93	18/02/95
51da177e4SLinus Torvalds  *
61da177e4SLinus Torvalds  * Authors:	Orest Zborowski, <obz@Kodak.COM>
702c30a84SJesper Juhl  *		Ross Biro
81da177e4SLinus Torvalds  *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
91da177e4SLinus Torvalds  *
101da177e4SLinus Torvalds  * Fixes:
111da177e4SLinus Torvalds  *		Anonymous	:	NOTSOCK/BADF cleanup. Error fix in
121da177e4SLinus Torvalds  *					shutdown()
131da177e4SLinus Torvalds  *		Alan Cox	:	verify_area() fixes
141da177e4SLinus Torvalds  *		Alan Cox	:	Removed DDI
151da177e4SLinus Torvalds  *		Jonathan Kamens	:	SOCK_DGRAM reconnect bug
161da177e4SLinus Torvalds  *		Alan Cox	:	Moved a load of checks to the very
171da177e4SLinus Torvalds  *					top level.
181da177e4SLinus Torvalds  *		Alan Cox	:	Move address structures to/from user
191da177e4SLinus Torvalds  *					mode above the protocol layers.
201da177e4SLinus Torvalds  *		Rob Janssen	:	Allow 0 length sends.
211da177e4SLinus Torvalds  *		Alan Cox	:	Asynchronous I/O support (cribbed from the
221da177e4SLinus Torvalds  *					tty drivers).
231da177e4SLinus Torvalds  *		Niibe Yutaka	:	Asynchronous I/O for writes (4.4BSD style)
241da177e4SLinus Torvalds  *		Jeff Uphoff	:	Made max number of sockets command-line
251da177e4SLinus Torvalds  *					configurable.
261da177e4SLinus Torvalds  *		Matti Aarnio	:	Made the number of sockets dynamic,
271da177e4SLinus Torvalds  *					to be allocated when needed, and mr.
281da177e4SLinus Torvalds  *					Uphoff's max is used as max to be
291da177e4SLinus Torvalds  *					allowed to allocate.
301da177e4SLinus Torvalds  *		Linus		:	Argh. removed all the socket allocation
311da177e4SLinus Torvalds  *					altogether: it's in the inode now.
321da177e4SLinus Torvalds  *		Alan Cox	:	Made sock_alloc()/sock_release() public
331da177e4SLinus Torvalds  *					for NetROM and future kernel nfsd type
341da177e4SLinus Torvalds  *					stuff.
351da177e4SLinus Torvalds  *		Alan Cox	:	sendmsg/recvmsg basics.
361da177e4SLinus Torvalds  *		Tom Dyas	:	Export net symbols.
371da177e4SLinus Torvalds  *		Marcin Dalecki	:	Fixed problems with CONFIG_NET="n".
381da177e4SLinus Torvalds  *		Alan Cox	:	Added thread locking to sys_* calls
391da177e4SLinus Torvalds  *					for sockets. May have errors at the
401da177e4SLinus Torvalds  *					moment.
411da177e4SLinus Torvalds  *		Kevin Buhr	:	Fixed the dumb errors in the above.
421da177e4SLinus Torvalds  *		Andi Kleen	:	Some small cleanups, optimizations,
431da177e4SLinus Torvalds  *					and fixed a copy_from_user() bug.
441da177e4SLinus Torvalds  *		Tigran Aivazian	:	sys_send(args) calls sys_sendto(args, NULL, 0)
451da177e4SLinus Torvalds  *		Tigran Aivazian	:	Made listen(2) backlog sanity checks
461da177e4SLinus Torvalds  *					protocol-independent
471da177e4SLinus Torvalds  *
481da177e4SLinus Torvalds  *
491da177e4SLinus Torvalds  *		This program is free software; you can redistribute it and/or
501da177e4SLinus Torvalds  *		modify it under the terms of the GNU General Public License
511da177e4SLinus Torvalds  *		as published by the Free Software Foundation; either version
521da177e4SLinus Torvalds  *		2 of the License, or (at your option) any later version.
531da177e4SLinus Torvalds  *
541da177e4SLinus Torvalds  *
551da177e4SLinus Torvalds  *	This module is effectively the top level interface to the BSD socket
561da177e4SLinus Torvalds  *	paradigm.
571da177e4SLinus Torvalds  *
581da177e4SLinus Torvalds  *	Based upon Swansea University Computer Society NET3.039
591da177e4SLinus Torvalds  */
601da177e4SLinus Torvalds 
611da177e4SLinus Torvalds #include <linux/mm.h>
621da177e4SLinus Torvalds #include <linux/socket.h>
631da177e4SLinus Torvalds #include <linux/file.h>
641da177e4SLinus Torvalds #include <linux/net.h>
651da177e4SLinus Torvalds #include <linux/interrupt.h>
66aaca0bdcSUlrich Drepper #include <linux/thread_info.h>
6755737fdaSStephen Hemminger #include <linux/rcupdate.h>
681da177e4SLinus Torvalds #include <linux/netdevice.h>
691da177e4SLinus Torvalds #include <linux/proc_fs.h>
701da177e4SLinus Torvalds #include <linux/seq_file.h>
714a3e2f71SArjan van de Ven #include <linux/mutex.h>
721da177e4SLinus Torvalds #include <linux/if_bridge.h>
7320380731SArnaldo Carvalho de Melo #include <linux/if_frad.h>
7420380731SArnaldo Carvalho de Melo #include <linux/if_vlan.h>
75408eccceSDaniel Borkmann #include <linux/ptp_classify.h>
761da177e4SLinus Torvalds #include <linux/init.h>
771da177e4SLinus Torvalds #include <linux/poll.h>
781da177e4SLinus Torvalds #include <linux/cache.h>
791da177e4SLinus Torvalds #include <linux/module.h>
801da177e4SLinus Torvalds #include <linux/highmem.h>
811da177e4SLinus Torvalds #include <linux/mount.h>
821da177e4SLinus Torvalds #include <linux/security.h>
831da177e4SLinus Torvalds #include <linux/syscalls.h>
841da177e4SLinus Torvalds #include <linux/compat.h>
851da177e4SLinus Torvalds #include <linux/kmod.h>
863ec3b2fbSDavid Woodhouse #include <linux/audit.h>
87d86b5e0eSAdrian Bunk #include <linux/wireless.h>
881b8d7ae4SEric W. Biederman #include <linux/nsproxy.h>
891fd7317dSNick Black #include <linux/magic.h>
905a0e3ad6STejun Heo #include <linux/slab.h>
91600e1779SMasatake YAMATO #include <linux/xattr.h>
921da177e4SLinus Torvalds 
931da177e4SLinus Torvalds #include <asm/uaccess.h>
941da177e4SLinus Torvalds #include <asm/unistd.h>
951da177e4SLinus Torvalds 
961da177e4SLinus Torvalds #include <net/compat.h>
9787de87d5SDavid S. Miller #include <net/wext.h>
98f8451725SHerbert Xu #include <net/cls_cgroup.h>
991da177e4SLinus Torvalds 
1001da177e4SLinus Torvalds #include <net/sock.h>
1011da177e4SLinus Torvalds #include <linux/netfilter.h>
1021da177e4SLinus Torvalds 
1036b96018bSArnd Bergmann #include <linux/if_tun.h>
1046b96018bSArnd Bergmann #include <linux/ipv6_route.h>
1056b96018bSArnd Bergmann #include <linux/route.h>
1066b96018bSArnd Bergmann #include <linux/sockios.h>
1076b96018bSArnd Bergmann #include <linux/atalk.h>
108076bb0c8SEliezer Tamir #include <net/busy_poll.h>
109f24b9be5SWillem de Bruijn #include <linux/errqueue.h>
11006021292SEliezer Tamir 
111e0d1095aSCong Wang #ifdef CONFIG_NET_RX_BUSY_POLL
11264b0dc51SEliezer Tamir unsigned int sysctl_net_busy_read __read_mostly;
11364b0dc51SEliezer Tamir unsigned int sysctl_net_busy_poll __read_mostly;
11406021292SEliezer Tamir #endif
1156b96018bSArnd Bergmann 
1168ae5e030SAl Viro static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to);
1178ae5e030SAl Viro static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from);
1181da177e4SLinus Torvalds static int sock_mmap(struct file *file, struct vm_area_struct *vma);
1191da177e4SLinus Torvalds 
1201da177e4SLinus Torvalds static int sock_close(struct inode *inode, struct file *file);
1211da177e4SLinus Torvalds static unsigned int sock_poll(struct file *file,
1221da177e4SLinus Torvalds 			      struct poll_table_struct *wait);
12389bddce5SStephen Hemminger static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
12489bbfc95SShaun Pereira #ifdef CONFIG_COMPAT
12589bbfc95SShaun Pereira static long compat_sock_ioctl(struct file *file,
12689bbfc95SShaun Pereira 			      unsigned int cmd, unsigned long arg);
12789bbfc95SShaun Pereira #endif
1281da177e4SLinus Torvalds static int sock_fasync(int fd, struct file *filp, int on);
1291da177e4SLinus Torvalds static ssize_t sock_sendpage(struct file *file, struct page *page,
1301da177e4SLinus Torvalds 			     int offset, size_t size, loff_t *ppos, int more);
1319c55e01cSJens Axboe static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
1329c55e01cSJens Axboe 				struct pipe_inode_info *pipe, size_t len,
1339c55e01cSJens Axboe 				unsigned int flags);
1341da177e4SLinus Torvalds 
1351da177e4SLinus Torvalds /*
1361da177e4SLinus Torvalds  *	Socket files have a set of 'special' operations as well as the generic file ones. These don't appear
1371da177e4SLinus Torvalds  *	in the operation structures but are done directly via the socketcall() multiplexor.
1381da177e4SLinus Torvalds  */
1391da177e4SLinus Torvalds 
140da7071d7SArjan van de Ven static const struct file_operations socket_file_ops = {
1411da177e4SLinus Torvalds 	.owner =	THIS_MODULE,
1421da177e4SLinus Torvalds 	.llseek =	no_llseek,
1438ae5e030SAl Viro 	.read =		new_sync_read,
1448ae5e030SAl Viro 	.write =	new_sync_write,
1458ae5e030SAl Viro 	.read_iter =	sock_read_iter,
1468ae5e030SAl Viro 	.write_iter =	sock_write_iter,
1471da177e4SLinus Torvalds 	.poll =		sock_poll,
1481da177e4SLinus Torvalds 	.unlocked_ioctl = sock_ioctl,
14989bbfc95SShaun Pereira #ifdef CONFIG_COMPAT
15089bbfc95SShaun Pereira 	.compat_ioctl = compat_sock_ioctl,
15189bbfc95SShaun Pereira #endif
1521da177e4SLinus Torvalds 	.mmap =		sock_mmap,
1531da177e4SLinus Torvalds 	.release =	sock_close,
1541da177e4SLinus Torvalds 	.fasync =	sock_fasync,
1555274f052SJens Axboe 	.sendpage =	sock_sendpage,
1565274f052SJens Axboe 	.splice_write = generic_splice_sendpage,
1579c55e01cSJens Axboe 	.splice_read =	sock_splice_read,
1581da177e4SLinus Torvalds };
1591da177e4SLinus Torvalds 
1601da177e4SLinus Torvalds /*
1611da177e4SLinus Torvalds  *	The protocol list. Each protocol is registered in here.
1621da177e4SLinus Torvalds  */
1631da177e4SLinus Torvalds 
1641da177e4SLinus Torvalds static DEFINE_SPINLOCK(net_family_lock);
165190683a9SEric Dumazet static const struct net_proto_family __rcu *net_families[NPROTO] __read_mostly;
1661da177e4SLinus Torvalds 
1671da177e4SLinus Torvalds /*
1681da177e4SLinus Torvalds  *	Statistics counters of the socket lists
1691da177e4SLinus Torvalds  */
1701da177e4SLinus Torvalds 
171c6d409cfSEric Dumazet static DEFINE_PER_CPU(int, sockets_in_use);
1721da177e4SLinus Torvalds 
1731da177e4SLinus Torvalds /*
17489bddce5SStephen Hemminger  * Support routines.
17589bddce5SStephen Hemminger  * Move socket addresses back and forth across the kernel/user
1761da177e4SLinus Torvalds  * divide and look after the messy bits.
1771da177e4SLinus Torvalds  */
1781da177e4SLinus Torvalds 
1791da177e4SLinus Torvalds /**
1801da177e4SLinus Torvalds  *	move_addr_to_kernel	-	copy a socket address into kernel space
1811da177e4SLinus Torvalds  *	@uaddr: Address in user space
1821da177e4SLinus Torvalds  *	@kaddr: Address in kernel space
1831da177e4SLinus Torvalds  *	@ulen: Length in user space
1841da177e4SLinus Torvalds  *
1851da177e4SLinus Torvalds  *	The address is copied into kernel space. If the provided address is
1861da177e4SLinus Torvalds  *	too long an error code of -EINVAL is returned. If the copy gives
1871da177e4SLinus Torvalds  *	invalid addresses -EFAULT is returned. On a success 0 is returned.
1881da177e4SLinus Torvalds  */
1891da177e4SLinus Torvalds 
19043db362dSMaciej Żenczykowski int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr_storage *kaddr)
1911da177e4SLinus Torvalds {
192230b1839SYOSHIFUJI Hideaki 	if (ulen < 0 || ulen > sizeof(struct sockaddr_storage))
1931da177e4SLinus Torvalds 		return -EINVAL;
1941da177e4SLinus Torvalds 	if (ulen == 0)
1951da177e4SLinus Torvalds 		return 0;
1961da177e4SLinus Torvalds 	if (copy_from_user(kaddr, uaddr, ulen))
1971da177e4SLinus Torvalds 		return -EFAULT;
1983ec3b2fbSDavid Woodhouse 	return audit_sockaddr(ulen, kaddr);
1991da177e4SLinus Torvalds }
2001da177e4SLinus Torvalds 
2011da177e4SLinus Torvalds /**
2021da177e4SLinus Torvalds  *	move_addr_to_user	-	copy an address to user space
2031da177e4SLinus Torvalds  *	@kaddr: kernel space address
2041da177e4SLinus Torvalds  *	@klen: length of address in kernel
2051da177e4SLinus Torvalds  *	@uaddr: user space address
2061da177e4SLinus Torvalds  *	@ulen: pointer to user length field
2071da177e4SLinus Torvalds  *
2081da177e4SLinus Torvalds  *	The value pointed to by ulen on entry is the buffer length available.
2091da177e4SLinus Torvalds  *	This is overwritten with the buffer space used. -EINVAL is returned
2101da177e4SLinus Torvalds  *	if an overlong buffer is specified or a negative buffer size. -EFAULT
2111da177e4SLinus Torvalds  *	is returned if either the buffer or the length field are not
2121da177e4SLinus Torvalds  *	accessible.
2131da177e4SLinus Torvalds  *	After copying the data up to the limit the user specifies, the true
2141da177e4SLinus Torvalds  *	length of the data is written over the length limit the user
2151da177e4SLinus Torvalds  *	specified. Zero is returned for a success.
2161da177e4SLinus Torvalds  */
2171da177e4SLinus Torvalds 
21843db362dSMaciej Żenczykowski static int move_addr_to_user(struct sockaddr_storage *kaddr, int klen,
21911165f14Sstephen hemminger 			     void __user *uaddr, int __user *ulen)
2201da177e4SLinus Torvalds {
2211da177e4SLinus Torvalds 	int err;
2221da177e4SLinus Torvalds 	int len;
2231da177e4SLinus Torvalds 
22468c6beb3SHannes Frederic Sowa 	BUG_ON(klen > sizeof(struct sockaddr_storage));
22589bddce5SStephen Hemminger 	err = get_user(len, ulen);
22689bddce5SStephen Hemminger 	if (err)
2271da177e4SLinus Torvalds 		return err;
2281da177e4SLinus Torvalds 	if (len > klen)
2291da177e4SLinus Torvalds 		len = klen;
23068c6beb3SHannes Frederic Sowa 	if (len < 0)
2311da177e4SLinus Torvalds 		return -EINVAL;
23289bddce5SStephen Hemminger 	if (len) {
233d6fe3945SSteve Grubb 		if (audit_sockaddr(klen, kaddr))
234d6fe3945SSteve Grubb 			return -ENOMEM;
2351da177e4SLinus Torvalds 		if (copy_to_user(uaddr, kaddr, len))
2361da177e4SLinus Torvalds 			return -EFAULT;
2371da177e4SLinus Torvalds 	}
2381da177e4SLinus Torvalds 	/*
2391da177e4SLinus Torvalds 	 *      "fromlen shall refer to the value before truncation.."
2401da177e4SLinus Torvalds 	 *                      1003.1g
2411da177e4SLinus Torvalds 	 */
2421da177e4SLinus Torvalds 	return __put_user(klen, ulen);
2431da177e4SLinus Torvalds }
2441da177e4SLinus Torvalds 
245e18b890bSChristoph Lameter static struct kmem_cache *sock_inode_cachep __read_mostly;
2461da177e4SLinus Torvalds 
2471da177e4SLinus Torvalds static struct inode *sock_alloc_inode(struct super_block *sb)
2481da177e4SLinus Torvalds {
2491da177e4SLinus Torvalds 	struct socket_alloc *ei;
250eaefd110SEric Dumazet 	struct socket_wq *wq;
25189bddce5SStephen Hemminger 
252e94b1766SChristoph Lameter 	ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL);
2531da177e4SLinus Torvalds 	if (!ei)
2541da177e4SLinus Torvalds 		return NULL;
255eaefd110SEric Dumazet 	wq = kmalloc(sizeof(*wq), GFP_KERNEL);
256eaefd110SEric Dumazet 	if (!wq) {
25743815482SEric Dumazet 		kmem_cache_free(sock_inode_cachep, ei);
25843815482SEric Dumazet 		return NULL;
25943815482SEric Dumazet 	}
260eaefd110SEric Dumazet 	init_waitqueue_head(&wq->wait);
261eaefd110SEric Dumazet 	wq->fasync_list = NULL;
262eaefd110SEric Dumazet 	RCU_INIT_POINTER(ei->socket.wq, wq);
2631da177e4SLinus Torvalds 
2641da177e4SLinus Torvalds 	ei->socket.state = SS_UNCONNECTED;
2651da177e4SLinus Torvalds 	ei->socket.flags = 0;
2661da177e4SLinus Torvalds 	ei->socket.ops = NULL;
2671da177e4SLinus Torvalds 	ei->socket.sk = NULL;
2681da177e4SLinus Torvalds 	ei->socket.file = NULL;
2691da177e4SLinus Torvalds 
2701da177e4SLinus Torvalds 	return &ei->vfs_inode;
2711da177e4SLinus Torvalds }
2721da177e4SLinus Torvalds 
2731da177e4SLinus Torvalds static void sock_destroy_inode(struct inode *inode)
2741da177e4SLinus Torvalds {
27543815482SEric Dumazet 	struct socket_alloc *ei;
276eaefd110SEric Dumazet 	struct socket_wq *wq;
27743815482SEric Dumazet 
27843815482SEric Dumazet 	ei = container_of(inode, struct socket_alloc, vfs_inode);
279eaefd110SEric Dumazet 	wq = rcu_dereference_protected(ei->socket.wq, 1);
28061845220SLai Jiangshan 	kfree_rcu(wq, rcu);
28143815482SEric Dumazet 	kmem_cache_free(sock_inode_cachep, ei);
2821da177e4SLinus Torvalds }
2831da177e4SLinus Torvalds 
28451cc5068SAlexey Dobriyan static void init_once(void *foo)
2851da177e4SLinus Torvalds {
2861da177e4SLinus Torvalds 	struct socket_alloc *ei = (struct socket_alloc *)foo;
2871da177e4SLinus Torvalds 
2881da177e4SLinus Torvalds 	inode_init_once(&ei->vfs_inode);
2891da177e4SLinus Torvalds }
2901da177e4SLinus Torvalds 
2911da177e4SLinus Torvalds static int init_inodecache(void)
2921da177e4SLinus Torvalds {
2931da177e4SLinus Torvalds 	sock_inode_cachep = kmem_cache_create("sock_inode_cache",
2941da177e4SLinus Torvalds 					      sizeof(struct socket_alloc),
29589bddce5SStephen Hemminger 					      0,
29689bddce5SStephen Hemminger 					      (SLAB_HWCACHE_ALIGN |
29789bddce5SStephen Hemminger 					       SLAB_RECLAIM_ACCOUNT |
298fffb60f9SPaul Jackson 					       SLAB_MEM_SPREAD),
29920c2df83SPaul Mundt 					      init_once);
3001da177e4SLinus Torvalds 	if (sock_inode_cachep == NULL)
3011da177e4SLinus Torvalds 		return -ENOMEM;
3021da177e4SLinus Torvalds 	return 0;
3031da177e4SLinus Torvalds }
3041da177e4SLinus Torvalds 
305b87221deSAlexey Dobriyan static const struct super_operations sockfs_ops = {
3061da177e4SLinus Torvalds 	.alloc_inode	= sock_alloc_inode,
3071da177e4SLinus Torvalds 	.destroy_inode	= sock_destroy_inode,
3081da177e4SLinus Torvalds 	.statfs		= simple_statfs,
3091da177e4SLinus Torvalds };
3101da177e4SLinus Torvalds 
311c23fbb6bSEric Dumazet /*
312c23fbb6bSEric Dumazet  * sockfs_dname() is called from d_path().
313c23fbb6bSEric Dumazet  */
314c23fbb6bSEric Dumazet static char *sockfs_dname(struct dentry *dentry, char *buffer, int buflen)
315c23fbb6bSEric Dumazet {
316c23fbb6bSEric Dumazet 	return dynamic_dname(dentry, buffer, buflen, "socket:[%lu]",
317c23fbb6bSEric Dumazet 				dentry->d_inode->i_ino);
318c23fbb6bSEric Dumazet }
319c23fbb6bSEric Dumazet 
3203ba13d17SAl Viro static const struct dentry_operations sockfs_dentry_operations = {
321c23fbb6bSEric Dumazet 	.d_dname  = sockfs_dname,
3221da177e4SLinus Torvalds };
3231da177e4SLinus Torvalds 
324c74a1cbbSAl Viro static struct dentry *sockfs_mount(struct file_system_type *fs_type,
325c74a1cbbSAl Viro 			 int flags, const char *dev_name, void *data)
326c74a1cbbSAl Viro {
327c74a1cbbSAl Viro 	return mount_pseudo(fs_type, "socket:", &sockfs_ops,
328c74a1cbbSAl Viro 		&sockfs_dentry_operations, SOCKFS_MAGIC);
329c74a1cbbSAl Viro }
330c74a1cbbSAl Viro 
331c74a1cbbSAl Viro static struct vfsmount *sock_mnt __read_mostly;
332c74a1cbbSAl Viro 
333c74a1cbbSAl Viro static struct file_system_type sock_fs_type = {
334c74a1cbbSAl Viro 	.name =		"sockfs",
335c74a1cbbSAl Viro 	.mount =	sockfs_mount,
336c74a1cbbSAl Viro 	.kill_sb =	kill_anon_super,
337c74a1cbbSAl Viro };
338c74a1cbbSAl Viro 
3391da177e4SLinus Torvalds /*
3401da177e4SLinus Torvalds  *	Obtains the first available file descriptor and sets it up for use.
3411da177e4SLinus Torvalds  *
34239d8c1b6SDavid S. Miller  *	These functions create file structures and maps them to fd space
34339d8c1b6SDavid S. Miller  *	of the current process. On success it returns file descriptor
3441da177e4SLinus Torvalds  *	and file struct implicitly stored in sock->file.
3451da177e4SLinus Torvalds  *	Note that another thread may close file descriptor before we return
3461da177e4SLinus Torvalds  *	from this function. We use the fact that now we do not refer
3471da177e4SLinus Torvalds  *	to socket after mapping. If one day we will need it, this
3481da177e4SLinus Torvalds  *	function will increment ref. count on file by 1.
3491da177e4SLinus Torvalds  *
3501da177e4SLinus Torvalds  *	In any case returned fd MAY BE not valid!
3511da177e4SLinus Torvalds  *	This race condition is unavoidable
3521da177e4SLinus Torvalds  *	with shared fd spaces, we cannot solve it inside kernel,
3531da177e4SLinus Torvalds  *	but we take care of internal coherence yet.
3541da177e4SLinus Torvalds  */
3551da177e4SLinus Torvalds 
356aab174f0SLinus Torvalds struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname)
3571da177e4SLinus Torvalds {
3587cbe66b6SAl Viro 	struct qstr name = { .name = "" };
3592c48b9c4SAl Viro 	struct path path;
3607cbe66b6SAl Viro 	struct file *file;
3611da177e4SLinus Torvalds 
362600e1779SMasatake YAMATO 	if (dname) {
363600e1779SMasatake YAMATO 		name.name = dname;
364600e1779SMasatake YAMATO 		name.len = strlen(name.name);
365600e1779SMasatake YAMATO 	} else if (sock->sk) {
366600e1779SMasatake YAMATO 		name.name = sock->sk->sk_prot_creator->name;
367600e1779SMasatake YAMATO 		name.len = strlen(name.name);
368600e1779SMasatake YAMATO 	}
3694b936885SNick Piggin 	path.dentry = d_alloc_pseudo(sock_mnt->mnt_sb, &name);
37028407630SAl Viro 	if (unlikely(!path.dentry))
37128407630SAl Viro 		return ERR_PTR(-ENOMEM);
3722c48b9c4SAl Viro 	path.mnt = mntget(sock_mnt);
37339d8c1b6SDavid S. Miller 
3742c48b9c4SAl Viro 	d_instantiate(path.dentry, SOCK_INODE(sock));
375cc3808f8SAl Viro 
3762c48b9c4SAl Viro 	file = alloc_file(&path, FMODE_READ | FMODE_WRITE,
377cc3808f8SAl Viro 		  &socket_file_ops);
37839b65252SAnatol Pomozov 	if (unlikely(IS_ERR(file))) {
379cc3808f8SAl Viro 		/* drop dentry, keep inode */
3807de9c6eeSAl Viro 		ihold(path.dentry->d_inode);
3812c48b9c4SAl Viro 		path_put(&path);
38239b65252SAnatol Pomozov 		return file;
383cc3808f8SAl Viro 	}
3841da177e4SLinus Torvalds 
3851da177e4SLinus Torvalds 	sock->file = file;
38677d27200SUlrich Drepper 	file->f_flags = O_RDWR | (flags & O_NONBLOCK);
38707dc3f07SBenjamin LaHaise 	file->private_data = sock;
38828407630SAl Viro 	return file;
3891da177e4SLinus Torvalds }
39056b31d1cSAl Viro EXPORT_SYMBOL(sock_alloc_file);
3911da177e4SLinus Torvalds 
39256b31d1cSAl Viro static int sock_map_fd(struct socket *sock, int flags)
39339d8c1b6SDavid S. Miller {
39439d8c1b6SDavid S. Miller 	struct file *newfile;
39528407630SAl Viro 	int fd = get_unused_fd_flags(flags);
39628407630SAl Viro 	if (unlikely(fd < 0))
3971da177e4SLinus Torvalds 		return fd;
3981da177e4SLinus Torvalds 
399aab174f0SLinus Torvalds 	newfile = sock_alloc_file(sock, flags, NULL);
40028407630SAl Viro 	if (likely(!IS_ERR(newfile))) {
4011da177e4SLinus Torvalds 		fd_install(fd, newfile);
4021da177e4SLinus Torvalds 		return fd;
4031da177e4SLinus Torvalds 	}
40428407630SAl Viro 
40528407630SAl Viro 	put_unused_fd(fd);
40628407630SAl Viro 	return PTR_ERR(newfile);
4071da177e4SLinus Torvalds }
4081da177e4SLinus Torvalds 
409406a3c63SJohn Fastabend struct socket *sock_from_file(struct file *file, int *err)
4106cb153caSBenjamin LaHaise {
4116cb153caSBenjamin LaHaise 	if (file->f_op == &socket_file_ops)
4126cb153caSBenjamin LaHaise 		return file->private_data;	/* set in sock_map_fd */
4136cb153caSBenjamin LaHaise 
4146cb153caSBenjamin LaHaise 	*err = -ENOTSOCK;
4156cb153caSBenjamin LaHaise 	return NULL;
4166cb153caSBenjamin LaHaise }
417406a3c63SJohn Fastabend EXPORT_SYMBOL(sock_from_file);
4186cb153caSBenjamin LaHaise 
4191da177e4SLinus Torvalds /**
4201da177e4SLinus Torvalds  *	sockfd_lookup - Go from a file number to its socket slot
4211da177e4SLinus Torvalds  *	@fd: file handle
4221da177e4SLinus Torvalds  *	@err: pointer to an error code return
4231da177e4SLinus Torvalds  *
4241da177e4SLinus Torvalds  *	The file handle passed in is locked and the socket it is bound
4251da177e4SLinus Torvalds  *	too is returned. If an error occurs the err pointer is overwritten
4261da177e4SLinus Torvalds  *	with a negative errno code and NULL is returned. The function checks
4271da177e4SLinus Torvalds  *	for both invalid handles and passing a handle which is not a socket.
4281da177e4SLinus Torvalds  *
4291da177e4SLinus Torvalds  *	On a success the socket object pointer is returned.
4301da177e4SLinus Torvalds  */
4311da177e4SLinus Torvalds 
4321da177e4SLinus Torvalds struct socket *sockfd_lookup(int fd, int *err)
4331da177e4SLinus Torvalds {
4341da177e4SLinus Torvalds 	struct file *file;
4351da177e4SLinus Torvalds 	struct socket *sock;
4361da177e4SLinus Torvalds 
43789bddce5SStephen Hemminger 	file = fget(fd);
43889bddce5SStephen Hemminger 	if (!file) {
4391da177e4SLinus Torvalds 		*err = -EBADF;
4401da177e4SLinus Torvalds 		return NULL;
4411da177e4SLinus Torvalds 	}
44289bddce5SStephen Hemminger 
4436cb153caSBenjamin LaHaise 	sock = sock_from_file(file, err);
4446cb153caSBenjamin LaHaise 	if (!sock)
4451da177e4SLinus Torvalds 		fput(file);
4466cb153caSBenjamin LaHaise 	return sock;
4471da177e4SLinus Torvalds }
448c6d409cfSEric Dumazet EXPORT_SYMBOL(sockfd_lookup);
4491da177e4SLinus Torvalds 
4506cb153caSBenjamin LaHaise static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed)
4516cb153caSBenjamin LaHaise {
45200e188efSAl Viro 	struct fd f = fdget(fd);
4536cb153caSBenjamin LaHaise 	struct socket *sock;
4546cb153caSBenjamin LaHaise 
4553672558cSHua Zhong 	*err = -EBADF;
45600e188efSAl Viro 	if (f.file) {
45700e188efSAl Viro 		sock = sock_from_file(f.file, err);
45800e188efSAl Viro 		if (likely(sock)) {
45900e188efSAl Viro 			*fput_needed = f.flags;
4601da177e4SLinus Torvalds 			return sock;
46100e188efSAl Viro 		}
46200e188efSAl Viro 		fdput(f);
4636cb153caSBenjamin LaHaise 	}
4646cb153caSBenjamin LaHaise 	return NULL;
4651da177e4SLinus Torvalds }
4661da177e4SLinus Torvalds 
467600e1779SMasatake YAMATO #define XATTR_SOCKPROTONAME_SUFFIX "sockprotoname"
468600e1779SMasatake YAMATO #define XATTR_NAME_SOCKPROTONAME (XATTR_SYSTEM_PREFIX XATTR_SOCKPROTONAME_SUFFIX)
469600e1779SMasatake YAMATO #define XATTR_NAME_SOCKPROTONAME_LEN (sizeof(XATTR_NAME_SOCKPROTONAME)-1)
470600e1779SMasatake YAMATO static ssize_t sockfs_getxattr(struct dentry *dentry,
471600e1779SMasatake YAMATO 			       const char *name, void *value, size_t size)
472600e1779SMasatake YAMATO {
473600e1779SMasatake YAMATO 	const char *proto_name;
474600e1779SMasatake YAMATO 	size_t proto_size;
475600e1779SMasatake YAMATO 	int error;
476600e1779SMasatake YAMATO 
477600e1779SMasatake YAMATO 	error = -ENODATA;
478600e1779SMasatake YAMATO 	if (!strncmp(name, XATTR_NAME_SOCKPROTONAME, XATTR_NAME_SOCKPROTONAME_LEN)) {
479600e1779SMasatake YAMATO 		proto_name = dentry->d_name.name;
480600e1779SMasatake YAMATO 		proto_size = strlen(proto_name);
481600e1779SMasatake YAMATO 
482600e1779SMasatake YAMATO 		if (value) {
483600e1779SMasatake YAMATO 			error = -ERANGE;
484600e1779SMasatake YAMATO 			if (proto_size + 1 > size)
485600e1779SMasatake YAMATO 				goto out;
486600e1779SMasatake YAMATO 
487600e1779SMasatake YAMATO 			strncpy(value, proto_name, proto_size + 1);
488600e1779SMasatake YAMATO 		}
489600e1779SMasatake YAMATO 		error = proto_size + 1;
490600e1779SMasatake YAMATO 	}
491600e1779SMasatake YAMATO 
492600e1779SMasatake YAMATO out:
493600e1779SMasatake YAMATO 	return error;
494600e1779SMasatake YAMATO }
495600e1779SMasatake YAMATO 
496600e1779SMasatake YAMATO static ssize_t sockfs_listxattr(struct dentry *dentry, char *buffer,
497600e1779SMasatake YAMATO 				size_t size)
498600e1779SMasatake YAMATO {
499600e1779SMasatake YAMATO 	ssize_t len;
500600e1779SMasatake YAMATO 	ssize_t used = 0;
501600e1779SMasatake YAMATO 
502600e1779SMasatake YAMATO 	len = security_inode_listsecurity(dentry->d_inode, buffer, size);
503600e1779SMasatake YAMATO 	if (len < 0)
504600e1779SMasatake YAMATO 		return len;
505600e1779SMasatake YAMATO 	used += len;
506600e1779SMasatake YAMATO 	if (buffer) {
507600e1779SMasatake YAMATO 		if (size < used)
508600e1779SMasatake YAMATO 			return -ERANGE;
509600e1779SMasatake YAMATO 		buffer += len;
510600e1779SMasatake YAMATO 	}
511600e1779SMasatake YAMATO 
512600e1779SMasatake YAMATO 	len = (XATTR_NAME_SOCKPROTONAME_LEN + 1);
513600e1779SMasatake YAMATO 	used += len;
514600e1779SMasatake YAMATO 	if (buffer) {
515600e1779SMasatake YAMATO 		if (size < used)
516600e1779SMasatake YAMATO 			return -ERANGE;
517600e1779SMasatake YAMATO 		memcpy(buffer, XATTR_NAME_SOCKPROTONAME, len);
518600e1779SMasatake YAMATO 		buffer += len;
519600e1779SMasatake YAMATO 	}
520600e1779SMasatake YAMATO 
521600e1779SMasatake YAMATO 	return used;
522600e1779SMasatake YAMATO }
523600e1779SMasatake YAMATO 
524600e1779SMasatake YAMATO static const struct inode_operations sockfs_inode_ops = {
525600e1779SMasatake YAMATO 	.getxattr = sockfs_getxattr,
526600e1779SMasatake YAMATO 	.listxattr = sockfs_listxattr,
527600e1779SMasatake YAMATO };
528600e1779SMasatake YAMATO 
5291da177e4SLinus Torvalds /**
5301da177e4SLinus Torvalds  *	sock_alloc	-	allocate a socket
5311da177e4SLinus Torvalds  *
5321da177e4SLinus Torvalds  *	Allocate a new inode and socket object. The two are bound together
5331da177e4SLinus Torvalds  *	and initialised. The socket is then returned. If we are out of inodes
5341da177e4SLinus Torvalds  *	NULL is returned.
5351da177e4SLinus Torvalds  */
5361da177e4SLinus Torvalds 
5371da177e4SLinus Torvalds static struct socket *sock_alloc(void)
5381da177e4SLinus Torvalds {
5391da177e4SLinus Torvalds 	struct inode *inode;
5401da177e4SLinus Torvalds 	struct socket *sock;
5411da177e4SLinus Torvalds 
542a209dfc7SEric Dumazet 	inode = new_inode_pseudo(sock_mnt->mnt_sb);
5431da177e4SLinus Torvalds 	if (!inode)
5441da177e4SLinus Torvalds 		return NULL;
5451da177e4SLinus Torvalds 
5461da177e4SLinus Torvalds 	sock = SOCKET_I(inode);
5471da177e4SLinus Torvalds 
54829a020d3SEric Dumazet 	kmemcheck_annotate_bitfield(sock, type);
54985fe4025SChristoph Hellwig 	inode->i_ino = get_next_ino();
5501da177e4SLinus Torvalds 	inode->i_mode = S_IFSOCK | S_IRWXUGO;
5518192b0c4SDavid Howells 	inode->i_uid = current_fsuid();
5528192b0c4SDavid Howells 	inode->i_gid = current_fsgid();
553600e1779SMasatake YAMATO 	inode->i_op = &sockfs_inode_ops;
5541da177e4SLinus Torvalds 
55519e8d69cSAlex Shi 	this_cpu_add(sockets_in_use, 1);
5561da177e4SLinus Torvalds 	return sock;
5571da177e4SLinus Torvalds }
5581da177e4SLinus Torvalds 
5591da177e4SLinus Torvalds /**
5601da177e4SLinus Torvalds  *	sock_release	-	close a socket
5611da177e4SLinus Torvalds  *	@sock: socket to close
5621da177e4SLinus Torvalds  *
5631da177e4SLinus Torvalds  *	The socket is released from the protocol stack if it has a release
5641da177e4SLinus Torvalds  *	callback, and the inode is then released if the socket is bound to
5651da177e4SLinus Torvalds  *	an inode not a file.
5661da177e4SLinus Torvalds  */
5671da177e4SLinus Torvalds 
5681da177e4SLinus Torvalds void sock_release(struct socket *sock)
5691da177e4SLinus Torvalds {
5701da177e4SLinus Torvalds 	if (sock->ops) {
5711da177e4SLinus Torvalds 		struct module *owner = sock->ops->owner;
5721da177e4SLinus Torvalds 
5731da177e4SLinus Torvalds 		sock->ops->release(sock);
5741da177e4SLinus Torvalds 		sock->ops = NULL;
5751da177e4SLinus Torvalds 		module_put(owner);
5761da177e4SLinus Torvalds 	}
5771da177e4SLinus Torvalds 
578eaefd110SEric Dumazet 	if (rcu_dereference_protected(sock->wq, 1)->fasync_list)
5793410f22eSYang Yingliang 		pr_err("%s: fasync list not empty!\n", __func__);
5801da177e4SLinus Torvalds 
581b09e786bSMikulas Patocka 	if (test_bit(SOCK_EXTERNALLY_ALLOCATED, &sock->flags))
582b09e786bSMikulas Patocka 		return;
583b09e786bSMikulas Patocka 
58419e8d69cSAlex Shi 	this_cpu_sub(sockets_in_use, 1);
5851da177e4SLinus Torvalds 	if (!sock->file) {
5861da177e4SLinus Torvalds 		iput(SOCK_INODE(sock));
5871da177e4SLinus Torvalds 		return;
5881da177e4SLinus Torvalds 	}
5891da177e4SLinus Torvalds 	sock->file = NULL;
5901da177e4SLinus Torvalds }
591c6d409cfSEric Dumazet EXPORT_SYMBOL(sock_release);
5921da177e4SLinus Torvalds 
59367cc0d40SWillem de Bruijn void __sock_tx_timestamp(const struct sock *sk, __u8 *tx_flags)
59420d49473SPatrick Ohly {
595140c55d4SEric Dumazet 	u8 flags = *tx_flags;
596140c55d4SEric Dumazet 
597b9f40e21SWillem de Bruijn 	if (sk->sk_tsflags & SOF_TIMESTAMPING_TX_HARDWARE)
598140c55d4SEric Dumazet 		flags |= SKBTX_HW_TSTAMP;
599140c55d4SEric Dumazet 
600b9f40e21SWillem de Bruijn 	if (sk->sk_tsflags & SOF_TIMESTAMPING_TX_SOFTWARE)
601140c55d4SEric Dumazet 		flags |= SKBTX_SW_TSTAMP;
602140c55d4SEric Dumazet 
603e7fd2885SWillem de Bruijn 	if (sk->sk_tsflags & SOF_TIMESTAMPING_TX_SCHED)
604140c55d4SEric Dumazet 		flags |= SKBTX_SCHED_TSTAMP;
605140c55d4SEric Dumazet 
606e1c8a607SWillem de Bruijn 	if (sk->sk_tsflags & SOF_TIMESTAMPING_TX_ACK)
607140c55d4SEric Dumazet 		flags |= SKBTX_ACK_TSTAMP;
608e7fd2885SWillem de Bruijn 
609140c55d4SEric Dumazet 	*tx_flags = flags;
61020d49473SPatrick Ohly }
61167cc0d40SWillem de Bruijn EXPORT_SYMBOL(__sock_tx_timestamp);
61220d49473SPatrick Ohly 
6131b784140SYing Xue static inline int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg,
6141b784140SYing Xue 				     size_t size)
6151da177e4SLinus Torvalds {
6161b784140SYing Xue 	return sock->ops->sendmsg(sock, msg, size);
6171da177e4SLinus Torvalds }
6180cf00c6fSGu Zheng 
6190cf00c6fSGu Zheng int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
6200cf00c6fSGu Zheng {
6211b784140SYing Xue 	int err = security_socket_sendmsg(sock, msg, size);
6221b784140SYing Xue 
6231b784140SYing Xue 	return err ?: sock_sendmsg_nosec(sock, msg, size);
6240cf00c6fSGu Zheng }
625c6d409cfSEric Dumazet EXPORT_SYMBOL(sock_sendmsg);
6261da177e4SLinus Torvalds 
6271da177e4SLinus Torvalds int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
6281da177e4SLinus Torvalds 		   struct kvec *vec, size_t num, size_t size)
6291da177e4SLinus Torvalds {
6301da177e4SLinus Torvalds 	mm_segment_t oldfs = get_fs();
6311da177e4SLinus Torvalds 	int result;
6321da177e4SLinus Torvalds 
6331da177e4SLinus Torvalds 	set_fs(KERNEL_DS);
6341da177e4SLinus Torvalds 	/*
6351da177e4SLinus Torvalds 	 * the following is safe, since for compiler definitions of kvec and
6361da177e4SLinus Torvalds 	 * iovec are identical, yielding the same in-core layout and alignment
6371da177e4SLinus Torvalds 	 */
638c0371da6SAl Viro 	iov_iter_init(&msg->msg_iter, WRITE, (struct iovec *)vec, num, size);
6391da177e4SLinus Torvalds 	result = sock_sendmsg(sock, msg, size);
6401da177e4SLinus Torvalds 	set_fs(oldfs);
6411da177e4SLinus Torvalds 	return result;
6421da177e4SLinus Torvalds }
643c6d409cfSEric Dumazet EXPORT_SYMBOL(kernel_sendmsg);
6441da177e4SLinus Torvalds 
64592f37fd2SEric Dumazet /*
64692f37fd2SEric Dumazet  * called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP)
64792f37fd2SEric Dumazet  */
64892f37fd2SEric Dumazet void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
64992f37fd2SEric Dumazet 	struct sk_buff *skb)
65092f37fd2SEric Dumazet {
65120d49473SPatrick Ohly 	int need_software_tstamp = sock_flag(sk, SOCK_RCVTSTAMP);
652f24b9be5SWillem de Bruijn 	struct scm_timestamping tss;
65320d49473SPatrick Ohly 	int empty = 1;
65420d49473SPatrick Ohly 	struct skb_shared_hwtstamps *shhwtstamps =
65520d49473SPatrick Ohly 		skb_hwtstamps(skb);
65692f37fd2SEric Dumazet 
65720d49473SPatrick Ohly 	/* Race occurred between timestamp enabling and packet
65820d49473SPatrick Ohly 	   receiving.  Fill in the current time for now. */
65920d49473SPatrick Ohly 	if (need_software_tstamp && skb->tstamp.tv64 == 0)
66020d49473SPatrick Ohly 		__net_timestamp(skb);
66120d49473SPatrick Ohly 
66220d49473SPatrick Ohly 	if (need_software_tstamp) {
66392f37fd2SEric Dumazet 		if (!sock_flag(sk, SOCK_RCVTSTAMPNS)) {
66492f37fd2SEric Dumazet 			struct timeval tv;
66520d49473SPatrick Ohly 			skb_get_timestamp(skb, &tv);
66620d49473SPatrick Ohly 			put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMP,
66720d49473SPatrick Ohly 				 sizeof(tv), &tv);
66892f37fd2SEric Dumazet 		} else {
669f24b9be5SWillem de Bruijn 			struct timespec ts;
670f24b9be5SWillem de Bruijn 			skb_get_timestampns(skb, &ts);
67120d49473SPatrick Ohly 			put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPNS,
672f24b9be5SWillem de Bruijn 				 sizeof(ts), &ts);
67392f37fd2SEric Dumazet 		}
67492f37fd2SEric Dumazet 	}
67592f37fd2SEric Dumazet 
676f24b9be5SWillem de Bruijn 	memset(&tss, 0, sizeof(tss));
677c199105dSWillem de Bruijn 	if ((sk->sk_tsflags & SOF_TIMESTAMPING_SOFTWARE) &&
678f24b9be5SWillem de Bruijn 	    ktime_to_timespec_cond(skb->tstamp, tss.ts + 0))
67920d49473SPatrick Ohly 		empty = 0;
6804d276eb6SWillem de Bruijn 	if (shhwtstamps &&
681b9f40e21SWillem de Bruijn 	    (sk->sk_tsflags & SOF_TIMESTAMPING_RAW_HARDWARE) &&
682f24b9be5SWillem de Bruijn 	    ktime_to_timespec_cond(shhwtstamps->hwtstamp, tss.ts + 2))
68320d49473SPatrick Ohly 		empty = 0;
68420d49473SPatrick Ohly 	if (!empty)
68520d49473SPatrick Ohly 		put_cmsg(msg, SOL_SOCKET,
686f24b9be5SWillem de Bruijn 			 SCM_TIMESTAMPING, sizeof(tss), &tss);
68720d49473SPatrick Ohly }
6887c81fd8bSArnaldo Carvalho de Melo EXPORT_SYMBOL_GPL(__sock_recv_timestamp);
6897c81fd8bSArnaldo Carvalho de Melo 
6906e3e939fSJohannes Berg void __sock_recv_wifi_status(struct msghdr *msg, struct sock *sk,
6916e3e939fSJohannes Berg 	struct sk_buff *skb)
6926e3e939fSJohannes Berg {
6936e3e939fSJohannes Berg 	int ack;
6946e3e939fSJohannes Berg 
6956e3e939fSJohannes Berg 	if (!sock_flag(sk, SOCK_WIFI_STATUS))
6966e3e939fSJohannes Berg 		return;
6976e3e939fSJohannes Berg 	if (!skb->wifi_acked_valid)
6986e3e939fSJohannes Berg 		return;
6996e3e939fSJohannes Berg 
7006e3e939fSJohannes Berg 	ack = skb->wifi_acked;
7016e3e939fSJohannes Berg 
7026e3e939fSJohannes Berg 	put_cmsg(msg, SOL_SOCKET, SCM_WIFI_STATUS, sizeof(ack), &ack);
7036e3e939fSJohannes Berg }
7046e3e939fSJohannes Berg EXPORT_SYMBOL_GPL(__sock_recv_wifi_status);
7056e3e939fSJohannes Berg 
70611165f14Sstephen hemminger static inline void sock_recv_drops(struct msghdr *msg, struct sock *sk,
70711165f14Sstephen hemminger 				   struct sk_buff *skb)
7083b885787SNeil Horman {
709744d5a3eSEyal Birger 	if (sock_flag(sk, SOCK_RXQ_OVFL) && skb && SOCK_SKB_CB(skb)->dropcount)
7103b885787SNeil Horman 		put_cmsg(msg, SOL_SOCKET, SO_RXQ_OVFL,
711744d5a3eSEyal Birger 			sizeof(__u32), &SOCK_SKB_CB(skb)->dropcount);
7123b885787SNeil Horman }
7133b885787SNeil Horman 
714767dd033SEric Dumazet void __sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk,
7153b885787SNeil Horman 	struct sk_buff *skb)
7163b885787SNeil Horman {
7173b885787SNeil Horman 	sock_recv_timestamp(msg, sk, skb);
7183b885787SNeil Horman 	sock_recv_drops(msg, sk, skb);
7193b885787SNeil Horman }
720767dd033SEric Dumazet EXPORT_SYMBOL_GPL(__sock_recv_ts_and_drops);
7213b885787SNeil Horman 
7221b784140SYing Xue static inline int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg,
7231b784140SYing Xue 				     size_t size, int flags)
7241da177e4SLinus Torvalds {
7251b784140SYing Xue 	return sock->ops->recvmsg(sock, msg, size, flags);
7261da177e4SLinus Torvalds }
7271da177e4SLinus Torvalds 
7281b784140SYing Xue int sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
7291b784140SYing Xue 		 int flags)
730a2e27255SArnaldo Carvalho de Melo {
731a2e27255SArnaldo Carvalho de Melo 	int err = security_socket_recvmsg(sock, msg, size, flags);
732a2e27255SArnaldo Carvalho de Melo 
7331b784140SYing Xue 	return err ?: sock_recvmsg_nosec(sock, msg, size, flags);
7341da177e4SLinus Torvalds }
735c6d409cfSEric Dumazet EXPORT_SYMBOL(sock_recvmsg);
7361da177e4SLinus Torvalds 
737c1249c0aSMartin Lucina /**
738c1249c0aSMartin Lucina  * kernel_recvmsg - Receive a message from a socket (kernel space)
739c1249c0aSMartin Lucina  * @sock:       The socket to receive the message from
740c1249c0aSMartin Lucina  * @msg:        Received message
741c1249c0aSMartin Lucina  * @vec:        Input s/g array for message data
742c1249c0aSMartin Lucina  * @num:        Size of input s/g array
743c1249c0aSMartin Lucina  * @size:       Number of bytes to read
744c1249c0aSMartin Lucina  * @flags:      Message flags (MSG_DONTWAIT, etc...)
745c1249c0aSMartin Lucina  *
746c1249c0aSMartin Lucina  * On return the msg structure contains the scatter/gather array passed in the
747c1249c0aSMartin Lucina  * vec argument. The array is modified so that it consists of the unfilled
748c1249c0aSMartin Lucina  * portion of the original array.
749c1249c0aSMartin Lucina  *
750c1249c0aSMartin Lucina  * The returned value is the total number of bytes received, or an error.
751c1249c0aSMartin Lucina  */
7521da177e4SLinus Torvalds int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
75389bddce5SStephen Hemminger 		   struct kvec *vec, size_t num, size_t size, int flags)
7541da177e4SLinus Torvalds {
7551da177e4SLinus Torvalds 	mm_segment_t oldfs = get_fs();
7561da177e4SLinus Torvalds 	int result;
7571da177e4SLinus Torvalds 
7581da177e4SLinus Torvalds 	set_fs(KERNEL_DS);
7591da177e4SLinus Torvalds 	/*
7601da177e4SLinus Torvalds 	 * the following is safe, since for compiler definitions of kvec and
7611da177e4SLinus Torvalds 	 * iovec are identical, yielding the same in-core layout and alignment
7621da177e4SLinus Torvalds 	 */
763c0371da6SAl Viro 	iov_iter_init(&msg->msg_iter, READ, (struct iovec *)vec, num, size);
7641da177e4SLinus Torvalds 	result = sock_recvmsg(sock, msg, size, flags);
7651da177e4SLinus Torvalds 	set_fs(oldfs);
7661da177e4SLinus Torvalds 	return result;
7671da177e4SLinus Torvalds }
768c6d409cfSEric Dumazet EXPORT_SYMBOL(kernel_recvmsg);
7691da177e4SLinus Torvalds 
77020380731SArnaldo Carvalho de Melo static ssize_t sock_sendpage(struct file *file, struct page *page,
7711da177e4SLinus Torvalds 			     int offset, size_t size, loff_t *ppos, int more)
7721da177e4SLinus Torvalds {
7731da177e4SLinus Torvalds 	struct socket *sock;
7741da177e4SLinus Torvalds 	int flags;
7751da177e4SLinus Torvalds 
776b69aee04SEric Dumazet 	sock = file->private_data;
7771da177e4SLinus Torvalds 
77835f9c09fSEric Dumazet 	flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
77935f9c09fSEric Dumazet 	/* more is a combination of MSG_MORE and MSG_SENDPAGE_NOTLAST */
78035f9c09fSEric Dumazet 	flags |= more;
7811da177e4SLinus Torvalds 
782e6949583SLinus Torvalds 	return kernel_sendpage(sock, page, offset, size, flags);
7831da177e4SLinus Torvalds }
7841da177e4SLinus Torvalds 
7859c55e01cSJens Axboe static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
7869c55e01cSJens Axboe 				struct pipe_inode_info *pipe, size_t len,
7879c55e01cSJens Axboe 				unsigned int flags)
7889c55e01cSJens Axboe {
7899c55e01cSJens Axboe 	struct socket *sock = file->private_data;
7909c55e01cSJens Axboe 
791997b37daSRémi Denis-Courmont 	if (unlikely(!sock->ops->splice_read))
792997b37daSRémi Denis-Courmont 		return -EINVAL;
793997b37daSRémi Denis-Courmont 
7949c55e01cSJens Axboe 	return sock->ops->splice_read(sock, ppos, pipe, len, flags);
7959c55e01cSJens Axboe }
7969c55e01cSJens Axboe 
7978ae5e030SAl Viro static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to)
798ce1d4d3eSChristoph Hellwig {
7996d652330SAl Viro 	struct file *file = iocb->ki_filp;
8006d652330SAl Viro 	struct socket *sock = file->private_data;
8010345f931Stadeusz.struk@intel.com 	struct msghdr msg = {.msg_iter = *to,
8020345f931Stadeusz.struk@intel.com 			     .msg_iocb = iocb};
8038ae5e030SAl Viro 	ssize_t res;
804ce1d4d3eSChristoph Hellwig 
8058ae5e030SAl Viro 	if (file->f_flags & O_NONBLOCK)
8068ae5e030SAl Viro 		msg.msg_flags = MSG_DONTWAIT;
8078ae5e030SAl Viro 
8088ae5e030SAl Viro 	if (iocb->ki_pos != 0)
809ce1d4d3eSChristoph Hellwig 		return -ESPIPE;
810027445c3SBadari Pulavarty 
81173a7075eSKent Overstreet 	if (iocb->ki_nbytes == 0)	/* Match SYS5 behaviour */
812ce1d4d3eSChristoph Hellwig 		return 0;
813ce1d4d3eSChristoph Hellwig 
8141b784140SYing Xue 	res = sock_recvmsg(sock, &msg, iocb->ki_nbytes, msg.msg_flags);
8158ae5e030SAl Viro 	*to = msg.msg_iter;
8168ae5e030SAl Viro 	return res;
817ce1d4d3eSChristoph Hellwig }
818ce1d4d3eSChristoph Hellwig 
8198ae5e030SAl Viro static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from)
8201da177e4SLinus Torvalds {
8216d652330SAl Viro 	struct file *file = iocb->ki_filp;
8226d652330SAl Viro 	struct socket *sock = file->private_data;
8230345f931Stadeusz.struk@intel.com 	struct msghdr msg = {.msg_iter = *from,
8240345f931Stadeusz.struk@intel.com 			     .msg_iocb = iocb};
8258ae5e030SAl Viro 	ssize_t res;
8261da177e4SLinus Torvalds 
8278ae5e030SAl Viro 	if (iocb->ki_pos != 0)
828ce1d4d3eSChristoph Hellwig 		return -ESPIPE;
829027445c3SBadari Pulavarty 
8308ae5e030SAl Viro 	if (file->f_flags & O_NONBLOCK)
8318ae5e030SAl Viro 		msg.msg_flags = MSG_DONTWAIT;
8328ae5e030SAl Viro 
8336d652330SAl Viro 	if (sock->type == SOCK_SEQPACKET)
8346d652330SAl Viro 		msg.msg_flags |= MSG_EOR;
8356d652330SAl Viro 
8361b784140SYing Xue 	res = sock_sendmsg(sock, &msg, iocb->ki_nbytes);
8378ae5e030SAl Viro 	*from = msg.msg_iter;
8388ae5e030SAl Viro 	return res;
8391da177e4SLinus Torvalds }
8401da177e4SLinus Torvalds 
8411da177e4SLinus Torvalds /*
8421da177e4SLinus Torvalds  * Atomic setting of ioctl hooks to avoid race
8431da177e4SLinus Torvalds  * with module unload.
8441da177e4SLinus Torvalds  */
8451da177e4SLinus Torvalds 
8464a3e2f71SArjan van de Ven static DEFINE_MUTEX(br_ioctl_mutex);
847c6d409cfSEric Dumazet static int (*br_ioctl_hook) (struct net *, unsigned int cmd, void __user *arg);
8481da177e4SLinus Torvalds 
849881d966bSEric W. Biederman void brioctl_set(int (*hook) (struct net *, unsigned int, void __user *))
8501da177e4SLinus Torvalds {
8514a3e2f71SArjan van de Ven 	mutex_lock(&br_ioctl_mutex);
8521da177e4SLinus Torvalds 	br_ioctl_hook = hook;
8534a3e2f71SArjan van de Ven 	mutex_unlock(&br_ioctl_mutex);
8541da177e4SLinus Torvalds }
8551da177e4SLinus Torvalds EXPORT_SYMBOL(brioctl_set);
8561da177e4SLinus Torvalds 
8574a3e2f71SArjan van de Ven static DEFINE_MUTEX(vlan_ioctl_mutex);
858881d966bSEric W. Biederman static int (*vlan_ioctl_hook) (struct net *, void __user *arg);
8591da177e4SLinus Torvalds 
860881d966bSEric W. Biederman void vlan_ioctl_set(int (*hook) (struct net *, void __user *))
8611da177e4SLinus Torvalds {
8624a3e2f71SArjan van de Ven 	mutex_lock(&vlan_ioctl_mutex);
8631da177e4SLinus Torvalds 	vlan_ioctl_hook = hook;
8644a3e2f71SArjan van de Ven 	mutex_unlock(&vlan_ioctl_mutex);
8651da177e4SLinus Torvalds }
8661da177e4SLinus Torvalds EXPORT_SYMBOL(vlan_ioctl_set);
8671da177e4SLinus Torvalds 
8684a3e2f71SArjan van de Ven static DEFINE_MUTEX(dlci_ioctl_mutex);
8691da177e4SLinus Torvalds static int (*dlci_ioctl_hook) (unsigned int, void __user *);
8701da177e4SLinus Torvalds 
8711da177e4SLinus Torvalds void dlci_ioctl_set(int (*hook) (unsigned int, void __user *))
8721da177e4SLinus Torvalds {
8734a3e2f71SArjan van de Ven 	mutex_lock(&dlci_ioctl_mutex);
8741da177e4SLinus Torvalds 	dlci_ioctl_hook = hook;
8754a3e2f71SArjan van de Ven 	mutex_unlock(&dlci_ioctl_mutex);
8761da177e4SLinus Torvalds }
8771da177e4SLinus Torvalds EXPORT_SYMBOL(dlci_ioctl_set);
8781da177e4SLinus Torvalds 
8796b96018bSArnd Bergmann static long sock_do_ioctl(struct net *net, struct socket *sock,
8806b96018bSArnd Bergmann 				 unsigned int cmd, unsigned long arg)
8816b96018bSArnd Bergmann {
8826b96018bSArnd Bergmann 	int err;
8836b96018bSArnd Bergmann 	void __user *argp = (void __user *)arg;
8846b96018bSArnd Bergmann 
8856b96018bSArnd Bergmann 	err = sock->ops->ioctl(sock, cmd, arg);
8866b96018bSArnd Bergmann 
8876b96018bSArnd Bergmann 	/*
8886b96018bSArnd Bergmann 	 * If this ioctl is unknown try to hand it down
8896b96018bSArnd Bergmann 	 * to the NIC driver.
8906b96018bSArnd Bergmann 	 */
8916b96018bSArnd Bergmann 	if (err == -ENOIOCTLCMD)
8926b96018bSArnd Bergmann 		err = dev_ioctl(net, cmd, argp);
8936b96018bSArnd Bergmann 
8946b96018bSArnd Bergmann 	return err;
8956b96018bSArnd Bergmann }
8966b96018bSArnd Bergmann 
8971da177e4SLinus Torvalds /*
8981da177e4SLinus Torvalds  *	With an ioctl, arg may well be a user mode pointer, but we don't know
8991da177e4SLinus Torvalds  *	what to do with it - that's up to the protocol still.
9001da177e4SLinus Torvalds  */
9011da177e4SLinus Torvalds 
9021da177e4SLinus Torvalds static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
9031da177e4SLinus Torvalds {
9041da177e4SLinus Torvalds 	struct socket *sock;
905881d966bSEric W. Biederman 	struct sock *sk;
9061da177e4SLinus Torvalds 	void __user *argp = (void __user *)arg;
9071da177e4SLinus Torvalds 	int pid, err;
908881d966bSEric W. Biederman 	struct net *net;
9091da177e4SLinus Torvalds 
910b69aee04SEric Dumazet 	sock = file->private_data;
911881d966bSEric W. Biederman 	sk = sock->sk;
9123b1e0a65SYOSHIFUJI Hideaki 	net = sock_net(sk);
9131da177e4SLinus Torvalds 	if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15)) {
914881d966bSEric W. Biederman 		err = dev_ioctl(net, cmd, argp);
9151da177e4SLinus Torvalds 	} else
9163d23e349SJohannes Berg #ifdef CONFIG_WEXT_CORE
9171da177e4SLinus Torvalds 	if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
918881d966bSEric W. Biederman 		err = dev_ioctl(net, cmd, argp);
9191da177e4SLinus Torvalds 	} else
9203d23e349SJohannes Berg #endif
9211da177e4SLinus Torvalds 		switch (cmd) {
9221da177e4SLinus Torvalds 		case FIOSETOWN:
9231da177e4SLinus Torvalds 		case SIOCSPGRP:
9241da177e4SLinus Torvalds 			err = -EFAULT;
9251da177e4SLinus Torvalds 			if (get_user(pid, (int __user *)argp))
9261da177e4SLinus Torvalds 				break;
927e0b93eddSJeff Layton 			f_setown(sock->file, pid, 1);
928e0b93eddSJeff Layton 			err = 0;
9291da177e4SLinus Torvalds 			break;
9301da177e4SLinus Torvalds 		case FIOGETOWN:
9311da177e4SLinus Torvalds 		case SIOCGPGRP:
932609d7fa9SEric W. Biederman 			err = put_user(f_getown(sock->file),
93389bddce5SStephen Hemminger 				       (int __user *)argp);
9341da177e4SLinus Torvalds 			break;
9351da177e4SLinus Torvalds 		case SIOCGIFBR:
9361da177e4SLinus Torvalds 		case SIOCSIFBR:
9371da177e4SLinus Torvalds 		case SIOCBRADDBR:
9381da177e4SLinus Torvalds 		case SIOCBRDELBR:
9391da177e4SLinus Torvalds 			err = -ENOPKG;
9401da177e4SLinus Torvalds 			if (!br_ioctl_hook)
9411da177e4SLinus Torvalds 				request_module("bridge");
9421da177e4SLinus Torvalds 
9434a3e2f71SArjan van de Ven 			mutex_lock(&br_ioctl_mutex);
9441da177e4SLinus Torvalds 			if (br_ioctl_hook)
945881d966bSEric W. Biederman 				err = br_ioctl_hook(net, cmd, argp);
9464a3e2f71SArjan van de Ven 			mutex_unlock(&br_ioctl_mutex);
9471da177e4SLinus Torvalds 			break;
9481da177e4SLinus Torvalds 		case SIOCGIFVLAN:
9491da177e4SLinus Torvalds 		case SIOCSIFVLAN:
9501da177e4SLinus Torvalds 			err = -ENOPKG;
9511da177e4SLinus Torvalds 			if (!vlan_ioctl_hook)
9521da177e4SLinus Torvalds 				request_module("8021q");
9531da177e4SLinus Torvalds 
9544a3e2f71SArjan van de Ven 			mutex_lock(&vlan_ioctl_mutex);
9551da177e4SLinus Torvalds 			if (vlan_ioctl_hook)
956881d966bSEric W. Biederman 				err = vlan_ioctl_hook(net, argp);
9574a3e2f71SArjan van de Ven 			mutex_unlock(&vlan_ioctl_mutex);
9581da177e4SLinus Torvalds 			break;
9591da177e4SLinus Torvalds 		case SIOCADDDLCI:
9601da177e4SLinus Torvalds 		case SIOCDELDLCI:
9611da177e4SLinus Torvalds 			err = -ENOPKG;
9621da177e4SLinus Torvalds 			if (!dlci_ioctl_hook)
9631da177e4SLinus Torvalds 				request_module("dlci");
9641da177e4SLinus Torvalds 
9654a3e2f71SArjan van de Ven 			mutex_lock(&dlci_ioctl_mutex);
9667512cbf6SPavel Emelyanov 			if (dlci_ioctl_hook)
9671da177e4SLinus Torvalds 				err = dlci_ioctl_hook(cmd, argp);
9684a3e2f71SArjan van de Ven 			mutex_unlock(&dlci_ioctl_mutex);
9691da177e4SLinus Torvalds 			break;
9701da177e4SLinus Torvalds 		default:
9716b96018bSArnd Bergmann 			err = sock_do_ioctl(net, sock, cmd, arg);
9721da177e4SLinus Torvalds 			break;
9731da177e4SLinus Torvalds 		}
9741da177e4SLinus Torvalds 	return err;
9751da177e4SLinus Torvalds }
9761da177e4SLinus Torvalds 
9771da177e4SLinus Torvalds int sock_create_lite(int family, int type, int protocol, struct socket **res)
9781da177e4SLinus Torvalds {
9791da177e4SLinus Torvalds 	int err;
9801da177e4SLinus Torvalds 	struct socket *sock = NULL;
9811da177e4SLinus Torvalds 
9821da177e4SLinus Torvalds 	err = security_socket_create(family, type, protocol, 1);
9831da177e4SLinus Torvalds 	if (err)
9841da177e4SLinus Torvalds 		goto out;
9851da177e4SLinus Torvalds 
9861da177e4SLinus Torvalds 	sock = sock_alloc();
9871da177e4SLinus Torvalds 	if (!sock) {
9881da177e4SLinus Torvalds 		err = -ENOMEM;
9891da177e4SLinus Torvalds 		goto out;
9901da177e4SLinus Torvalds 	}
9911da177e4SLinus Torvalds 
9921da177e4SLinus Torvalds 	sock->type = type;
9937420ed23SVenkat Yekkirala 	err = security_socket_post_create(sock, family, type, protocol, 1);
9947420ed23SVenkat Yekkirala 	if (err)
9957420ed23SVenkat Yekkirala 		goto out_release;
9967420ed23SVenkat Yekkirala 
9971da177e4SLinus Torvalds out:
9981da177e4SLinus Torvalds 	*res = sock;
9991da177e4SLinus Torvalds 	return err;
10007420ed23SVenkat Yekkirala out_release:
10017420ed23SVenkat Yekkirala 	sock_release(sock);
10027420ed23SVenkat Yekkirala 	sock = NULL;
10037420ed23SVenkat Yekkirala 	goto out;
10041da177e4SLinus Torvalds }
1005c6d409cfSEric Dumazet EXPORT_SYMBOL(sock_create_lite);
10061da177e4SLinus Torvalds 
10071da177e4SLinus Torvalds /* No kernel lock held - perfect */
10081da177e4SLinus Torvalds static unsigned int sock_poll(struct file *file, poll_table *wait)
10091da177e4SLinus Torvalds {
1010cbf55001SEliezer Tamir 	unsigned int busy_flag = 0;
10111da177e4SLinus Torvalds 	struct socket *sock;
10121da177e4SLinus Torvalds 
10131da177e4SLinus Torvalds 	/*
10141da177e4SLinus Torvalds 	 *      We can't return errors to poll, so it's either yes or no.
10151da177e4SLinus Torvalds 	 */
1016b69aee04SEric Dumazet 	sock = file->private_data;
10172d48d67fSEliezer Tamir 
1018cbf55001SEliezer Tamir 	if (sk_can_busy_loop(sock->sk)) {
10192d48d67fSEliezer Tamir 		/* this socket can poll_ll so tell the system call */
1020cbf55001SEliezer Tamir 		busy_flag = POLL_BUSY_LOOP;
10212d48d67fSEliezer Tamir 
10222d48d67fSEliezer Tamir 		/* once, only if requested by syscall */
1023cbf55001SEliezer Tamir 		if (wait && (wait->_key & POLL_BUSY_LOOP))
1024cbf55001SEliezer Tamir 			sk_busy_loop(sock->sk, 1);
10252d48d67fSEliezer Tamir 	}
10262d48d67fSEliezer Tamir 
1027cbf55001SEliezer Tamir 	return busy_flag | sock->ops->poll(file, sock, wait);
10281da177e4SLinus Torvalds }
10291da177e4SLinus Torvalds 
10301da177e4SLinus Torvalds static int sock_mmap(struct file *file, struct vm_area_struct *vma)
10311da177e4SLinus Torvalds {
1032b69aee04SEric Dumazet 	struct socket *sock = file->private_data;
10331da177e4SLinus Torvalds 
10341da177e4SLinus Torvalds 	return sock->ops->mmap(file, sock, vma);
10351da177e4SLinus Torvalds }
10361da177e4SLinus Torvalds 
103720380731SArnaldo Carvalho de Melo static int sock_close(struct inode *inode, struct file *filp)
10381da177e4SLinus Torvalds {
10391da177e4SLinus Torvalds 	sock_release(SOCKET_I(inode));
10401da177e4SLinus Torvalds 	return 0;
10411da177e4SLinus Torvalds }
10421da177e4SLinus Torvalds 
10431da177e4SLinus Torvalds /*
10441da177e4SLinus Torvalds  *	Update the socket async list
10451da177e4SLinus Torvalds  *
10461da177e4SLinus Torvalds  *	Fasync_list locking strategy.
10471da177e4SLinus Torvalds  *
10481da177e4SLinus Torvalds  *	1. fasync_list is modified only under process context socket lock
10491da177e4SLinus Torvalds  *	   i.e. under semaphore.
10501da177e4SLinus Torvalds  *	2. fasync_list is used under read_lock(&sk->sk_callback_lock)
1051989a2979SEric Dumazet  *	   or under socket lock
10521da177e4SLinus Torvalds  */
10531da177e4SLinus Torvalds 
10541da177e4SLinus Torvalds static int sock_fasync(int fd, struct file *filp, int on)
10551da177e4SLinus Torvalds {
1056989a2979SEric Dumazet 	struct socket *sock = filp->private_data;
1057989a2979SEric Dumazet 	struct sock *sk = sock->sk;
1058eaefd110SEric Dumazet 	struct socket_wq *wq;
10591da177e4SLinus Torvalds 
1060989a2979SEric Dumazet 	if (sk == NULL)
10611da177e4SLinus Torvalds 		return -EINVAL;
10621da177e4SLinus Torvalds 
10631da177e4SLinus Torvalds 	lock_sock(sk);
1064eaefd110SEric Dumazet 	wq = rcu_dereference_protected(sock->wq, sock_owned_by_user(sk));
1065eaefd110SEric Dumazet 	fasync_helper(fd, filp, on, &wq->fasync_list);
10661da177e4SLinus Torvalds 
1067eaefd110SEric Dumazet 	if (!wq->fasync_list)
1068bcdce719SEric Dumazet 		sock_reset_flag(sk, SOCK_FASYNC);
1069989a2979SEric Dumazet 	else
1070989a2979SEric Dumazet 		sock_set_flag(sk, SOCK_FASYNC);
10711da177e4SLinus Torvalds 
1072989a2979SEric Dumazet 	release_sock(sk);
10731da177e4SLinus Torvalds 	return 0;
10741da177e4SLinus Torvalds }
10751da177e4SLinus Torvalds 
107643815482SEric Dumazet /* This function may be called only under socket lock or callback_lock or rcu_lock */
10771da177e4SLinus Torvalds 
10781da177e4SLinus Torvalds int sock_wake_async(struct socket *sock, int how, int band)
10791da177e4SLinus Torvalds {
108043815482SEric Dumazet 	struct socket_wq *wq;
108143815482SEric Dumazet 
108243815482SEric Dumazet 	if (!sock)
10831da177e4SLinus Torvalds 		return -1;
108443815482SEric Dumazet 	rcu_read_lock();
108543815482SEric Dumazet 	wq = rcu_dereference(sock->wq);
108643815482SEric Dumazet 	if (!wq || !wq->fasync_list) {
108743815482SEric Dumazet 		rcu_read_unlock();
108843815482SEric Dumazet 		return -1;
108943815482SEric Dumazet 	}
109089bddce5SStephen Hemminger 	switch (how) {
10918d8ad9d7SPavel Emelyanov 	case SOCK_WAKE_WAITD:
10921da177e4SLinus Torvalds 		if (test_bit(SOCK_ASYNC_WAITDATA, &sock->flags))
10931da177e4SLinus Torvalds 			break;
10941da177e4SLinus Torvalds 		goto call_kill;
10958d8ad9d7SPavel Emelyanov 	case SOCK_WAKE_SPACE:
10961da177e4SLinus Torvalds 		if (!test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags))
10971da177e4SLinus Torvalds 			break;
10981da177e4SLinus Torvalds 		/* fall through */
10998d8ad9d7SPavel Emelyanov 	case SOCK_WAKE_IO:
11001da177e4SLinus Torvalds call_kill:
110143815482SEric Dumazet 		kill_fasync(&wq->fasync_list, SIGIO, band);
11021da177e4SLinus Torvalds 		break;
11038d8ad9d7SPavel Emelyanov 	case SOCK_WAKE_URG:
110443815482SEric Dumazet 		kill_fasync(&wq->fasync_list, SIGURG, band);
11051da177e4SLinus Torvalds 	}
110643815482SEric Dumazet 	rcu_read_unlock();
11071da177e4SLinus Torvalds 	return 0;
11081da177e4SLinus Torvalds }
1109c6d409cfSEric Dumazet EXPORT_SYMBOL(sock_wake_async);
11101da177e4SLinus Torvalds 
1111721db93aSPavel Emelyanov int __sock_create(struct net *net, int family, int type, int protocol,
111289bddce5SStephen Hemminger 			 struct socket **res, int kern)
11131da177e4SLinus Torvalds {
11141da177e4SLinus Torvalds 	int err;
11151da177e4SLinus Torvalds 	struct socket *sock;
111655737fdaSStephen Hemminger 	const struct net_proto_family *pf;
11171da177e4SLinus Torvalds 
11181da177e4SLinus Torvalds 	/*
11191da177e4SLinus Torvalds 	 *      Check protocol is in range
11201da177e4SLinus Torvalds 	 */
11211da177e4SLinus Torvalds 	if (family < 0 || family >= NPROTO)
11221da177e4SLinus Torvalds 		return -EAFNOSUPPORT;
11231da177e4SLinus Torvalds 	if (type < 0 || type >= SOCK_MAX)
11241da177e4SLinus Torvalds 		return -EINVAL;
11251da177e4SLinus Torvalds 
11261da177e4SLinus Torvalds 	/* Compatibility.
11271da177e4SLinus Torvalds 
11281da177e4SLinus Torvalds 	   This uglymoron is moved from INET layer to here to avoid
11291da177e4SLinus Torvalds 	   deadlock in module load.
11301da177e4SLinus Torvalds 	 */
11311da177e4SLinus Torvalds 	if (family == PF_INET && type == SOCK_PACKET) {
11321da177e4SLinus Torvalds 		static int warned;
11331da177e4SLinus Torvalds 		if (!warned) {
11341da177e4SLinus Torvalds 			warned = 1;
11353410f22eSYang Yingliang 			pr_info("%s uses obsolete (PF_INET,SOCK_PACKET)\n",
113689bddce5SStephen Hemminger 				current->comm);
11371da177e4SLinus Torvalds 		}
11381da177e4SLinus Torvalds 		family = PF_PACKET;
11391da177e4SLinus Torvalds 	}
11401da177e4SLinus Torvalds 
11411da177e4SLinus Torvalds 	err = security_socket_create(family, type, protocol, kern);
11421da177e4SLinus Torvalds 	if (err)
11431da177e4SLinus Torvalds 		return err;
11441da177e4SLinus Torvalds 
114555737fdaSStephen Hemminger 	/*
114655737fdaSStephen Hemminger 	 *	Allocate the socket and allow the family to set things up. if
114755737fdaSStephen Hemminger 	 *	the protocol is 0, the family is instructed to select an appropriate
114855737fdaSStephen Hemminger 	 *	default.
114955737fdaSStephen Hemminger 	 */
115055737fdaSStephen Hemminger 	sock = sock_alloc();
115155737fdaSStephen Hemminger 	if (!sock) {
1152e87cc472SJoe Perches 		net_warn_ratelimited("socket: no more sockets\n");
115355737fdaSStephen Hemminger 		return -ENFILE;	/* Not exactly a match, but its the
115455737fdaSStephen Hemminger 				   closest posix thing */
115555737fdaSStephen Hemminger 	}
115655737fdaSStephen Hemminger 
115755737fdaSStephen Hemminger 	sock->type = type;
115855737fdaSStephen Hemminger 
115995a5afcaSJohannes Berg #ifdef CONFIG_MODULES
11601da177e4SLinus Torvalds 	/* Attempt to load a protocol module if the find failed.
11611da177e4SLinus Torvalds 	 *
11621da177e4SLinus Torvalds 	 * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user
11631da177e4SLinus Torvalds 	 * requested real, full-featured networking support upon configuration.
11641da177e4SLinus Torvalds 	 * Otherwise module support will break!
11651da177e4SLinus Torvalds 	 */
1166190683a9SEric Dumazet 	if (rcu_access_pointer(net_families[family]) == NULL)
11671da177e4SLinus Torvalds 		request_module("net-pf-%d", family);
11681da177e4SLinus Torvalds #endif
11691da177e4SLinus Torvalds 
117055737fdaSStephen Hemminger 	rcu_read_lock();
117155737fdaSStephen Hemminger 	pf = rcu_dereference(net_families[family]);
11721da177e4SLinus Torvalds 	err = -EAFNOSUPPORT;
117355737fdaSStephen Hemminger 	if (!pf)
117455737fdaSStephen Hemminger 		goto out_release;
11751da177e4SLinus Torvalds 
11761da177e4SLinus Torvalds 	/*
11771da177e4SLinus Torvalds 	 * We will call the ->create function, that possibly is in a loadable
11781da177e4SLinus Torvalds 	 * module, so we have to bump that loadable module refcnt first.
11791da177e4SLinus Torvalds 	 */
118055737fdaSStephen Hemminger 	if (!try_module_get(pf->owner))
11811da177e4SLinus Torvalds 		goto out_release;
11821da177e4SLinus Torvalds 
118355737fdaSStephen Hemminger 	/* Now protected by module ref count */
118455737fdaSStephen Hemminger 	rcu_read_unlock();
118555737fdaSStephen Hemminger 
11863f378b68SEric Paris 	err = pf->create(net, sock, protocol, kern);
118755737fdaSStephen Hemminger 	if (err < 0)
11881da177e4SLinus Torvalds 		goto out_module_put;
1189a79af59eSFrank Filz 
11901da177e4SLinus Torvalds 	/*
11911da177e4SLinus Torvalds 	 * Now to bump the refcnt of the [loadable] module that owns this
11921da177e4SLinus Torvalds 	 * socket at sock_release time we decrement its refcnt.
11931da177e4SLinus Torvalds 	 */
119455737fdaSStephen Hemminger 	if (!try_module_get(sock->ops->owner))
119555737fdaSStephen Hemminger 		goto out_module_busy;
119655737fdaSStephen Hemminger 
11971da177e4SLinus Torvalds 	/*
11981da177e4SLinus Torvalds 	 * Now that we're done with the ->create function, the [loadable]
11991da177e4SLinus Torvalds 	 * module can have its refcnt decremented
12001da177e4SLinus Torvalds 	 */
120155737fdaSStephen Hemminger 	module_put(pf->owner);
12027420ed23SVenkat Yekkirala 	err = security_socket_post_create(sock, family, type, protocol, kern);
12037420ed23SVenkat Yekkirala 	if (err)
12043b185525SHerbert Xu 		goto out_sock_release;
120555737fdaSStephen Hemminger 	*res = sock;
12061da177e4SLinus Torvalds 
120755737fdaSStephen Hemminger 	return 0;
120855737fdaSStephen Hemminger 
120955737fdaSStephen Hemminger out_module_busy:
121055737fdaSStephen Hemminger 	err = -EAFNOSUPPORT;
12111da177e4SLinus Torvalds out_module_put:
121255737fdaSStephen Hemminger 	sock->ops = NULL;
121355737fdaSStephen Hemminger 	module_put(pf->owner);
121455737fdaSStephen Hemminger out_sock_release:
12151da177e4SLinus Torvalds 	sock_release(sock);
121655737fdaSStephen Hemminger 	return err;
121755737fdaSStephen Hemminger 
121855737fdaSStephen Hemminger out_release:
121955737fdaSStephen Hemminger 	rcu_read_unlock();
122055737fdaSStephen Hemminger 	goto out_sock_release;
12211da177e4SLinus Torvalds }
1222721db93aSPavel Emelyanov EXPORT_SYMBOL(__sock_create);
12231da177e4SLinus Torvalds 
12241da177e4SLinus Torvalds int sock_create(int family, int type, int protocol, struct socket **res)
12251da177e4SLinus Torvalds {
12261b8d7ae4SEric W. Biederman 	return __sock_create(current->nsproxy->net_ns, family, type, protocol, res, 0);
12271da177e4SLinus Torvalds }
1228c6d409cfSEric Dumazet EXPORT_SYMBOL(sock_create);
12291da177e4SLinus Torvalds 
12301da177e4SLinus Torvalds int sock_create_kern(int family, int type, int protocol, struct socket **res)
12311da177e4SLinus Torvalds {
12321b8d7ae4SEric W. Biederman 	return __sock_create(&init_net, family, type, protocol, res, 1);
12331da177e4SLinus Torvalds }
1234c6d409cfSEric Dumazet EXPORT_SYMBOL(sock_create_kern);
12351da177e4SLinus Torvalds 
12363e0fa65fSHeiko Carstens SYSCALL_DEFINE3(socket, int, family, int, type, int, protocol)
12371da177e4SLinus Torvalds {
12381da177e4SLinus Torvalds 	int retval;
12391da177e4SLinus Torvalds 	struct socket *sock;
1240a677a039SUlrich Drepper 	int flags;
1241a677a039SUlrich Drepper 
1242e38b36f3SUlrich Drepper 	/* Check the SOCK_* constants for consistency.  */
1243e38b36f3SUlrich Drepper 	BUILD_BUG_ON(SOCK_CLOEXEC != O_CLOEXEC);
1244e38b36f3SUlrich Drepper 	BUILD_BUG_ON((SOCK_MAX | SOCK_TYPE_MASK) != SOCK_TYPE_MASK);
1245e38b36f3SUlrich Drepper 	BUILD_BUG_ON(SOCK_CLOEXEC & SOCK_TYPE_MASK);
1246e38b36f3SUlrich Drepper 	BUILD_BUG_ON(SOCK_NONBLOCK & SOCK_TYPE_MASK);
1247e38b36f3SUlrich Drepper 
1248a677a039SUlrich Drepper 	flags = type & ~SOCK_TYPE_MASK;
124977d27200SUlrich Drepper 	if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
1250a677a039SUlrich Drepper 		return -EINVAL;
1251a677a039SUlrich Drepper 	type &= SOCK_TYPE_MASK;
12521da177e4SLinus Torvalds 
1253aaca0bdcSUlrich Drepper 	if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1254aaca0bdcSUlrich Drepper 		flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1255aaca0bdcSUlrich Drepper 
12561da177e4SLinus Torvalds 	retval = sock_create(family, type, protocol, &sock);
12571da177e4SLinus Torvalds 	if (retval < 0)
12581da177e4SLinus Torvalds 		goto out;
12591da177e4SLinus Torvalds 
126077d27200SUlrich Drepper 	retval = sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK));
12611da177e4SLinus Torvalds 	if (retval < 0)
12621da177e4SLinus Torvalds 		goto out_release;
12631da177e4SLinus Torvalds 
12641da177e4SLinus Torvalds out:
12651da177e4SLinus Torvalds 	/* It may be already another descriptor 8) Not kernel problem. */
12661da177e4SLinus Torvalds 	return retval;
12671da177e4SLinus Torvalds 
12681da177e4SLinus Torvalds out_release:
12691da177e4SLinus Torvalds 	sock_release(sock);
12701da177e4SLinus Torvalds 	return retval;
12711da177e4SLinus Torvalds }
12721da177e4SLinus Torvalds 
12731da177e4SLinus Torvalds /*
12741da177e4SLinus Torvalds  *	Create a pair of connected sockets.
12751da177e4SLinus Torvalds  */
12761da177e4SLinus Torvalds 
12773e0fa65fSHeiko Carstens SYSCALL_DEFINE4(socketpair, int, family, int, type, int, protocol,
12783e0fa65fSHeiko Carstens 		int __user *, usockvec)
12791da177e4SLinus Torvalds {
12801da177e4SLinus Torvalds 	struct socket *sock1, *sock2;
12811da177e4SLinus Torvalds 	int fd1, fd2, err;
1282db349509SAl Viro 	struct file *newfile1, *newfile2;
1283a677a039SUlrich Drepper 	int flags;
1284a677a039SUlrich Drepper 
1285a677a039SUlrich Drepper 	flags = type & ~SOCK_TYPE_MASK;
128677d27200SUlrich Drepper 	if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
1287a677a039SUlrich Drepper 		return -EINVAL;
1288a677a039SUlrich Drepper 	type &= SOCK_TYPE_MASK;
12891da177e4SLinus Torvalds 
1290aaca0bdcSUlrich Drepper 	if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1291aaca0bdcSUlrich Drepper 		flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1292aaca0bdcSUlrich Drepper 
12931da177e4SLinus Torvalds 	/*
12941da177e4SLinus Torvalds 	 * Obtain the first socket and check if the underlying protocol
12951da177e4SLinus Torvalds 	 * supports the socketpair call.
12961da177e4SLinus Torvalds 	 */
12971da177e4SLinus Torvalds 
12981da177e4SLinus Torvalds 	err = sock_create(family, type, protocol, &sock1);
12991da177e4SLinus Torvalds 	if (err < 0)
13001da177e4SLinus Torvalds 		goto out;
13011da177e4SLinus Torvalds 
13021da177e4SLinus Torvalds 	err = sock_create(family, type, protocol, &sock2);
13031da177e4SLinus Torvalds 	if (err < 0)
13041da177e4SLinus Torvalds 		goto out_release_1;
13051da177e4SLinus Torvalds 
13061da177e4SLinus Torvalds 	err = sock1->ops->socketpair(sock1, sock2);
13071da177e4SLinus Torvalds 	if (err < 0)
13081da177e4SLinus Torvalds 		goto out_release_both;
13091da177e4SLinus Torvalds 
131028407630SAl Viro 	fd1 = get_unused_fd_flags(flags);
1311bf3c23d1SDavid S. Miller 	if (unlikely(fd1 < 0)) {
1312bf3c23d1SDavid S. Miller 		err = fd1;
13131da177e4SLinus Torvalds 		goto out_release_both;
1314bf3c23d1SDavid S. Miller 	}
1315d73aa286SYann Droneaud 
131628407630SAl Viro 	fd2 = get_unused_fd_flags(flags);
1317198de4d7SAl Viro 	if (unlikely(fd2 < 0)) {
1318198de4d7SAl Viro 		err = fd2;
1319d73aa286SYann Droneaud 		goto out_put_unused_1;
132028407630SAl Viro 	}
132128407630SAl Viro 
1322aab174f0SLinus Torvalds 	newfile1 = sock_alloc_file(sock1, flags, NULL);
132328407630SAl Viro 	if (unlikely(IS_ERR(newfile1))) {
132428407630SAl Viro 		err = PTR_ERR(newfile1);
1325d73aa286SYann Droneaud 		goto out_put_unused_both;
132628407630SAl Viro 	}
132728407630SAl Viro 
1328aab174f0SLinus Torvalds 	newfile2 = sock_alloc_file(sock2, flags, NULL);
132928407630SAl Viro 	if (IS_ERR(newfile2)) {
133028407630SAl Viro 		err = PTR_ERR(newfile2);
1331d73aa286SYann Droneaud 		goto out_fput_1;
1332db349509SAl Viro 	}
1333db349509SAl Viro 
1334d73aa286SYann Droneaud 	err = put_user(fd1, &usockvec[0]);
1335d73aa286SYann Droneaud 	if (err)
1336d73aa286SYann Droneaud 		goto out_fput_both;
1337d73aa286SYann Droneaud 
1338d73aa286SYann Droneaud 	err = put_user(fd2, &usockvec[1]);
1339d73aa286SYann Droneaud 	if (err)
1340d73aa286SYann Droneaud 		goto out_fput_both;
1341d73aa286SYann Droneaud 
1342157cf649SAl Viro 	audit_fd_pair(fd1, fd2);
1343d73aa286SYann Droneaud 
1344db349509SAl Viro 	fd_install(fd1, newfile1);
1345db349509SAl Viro 	fd_install(fd2, newfile2);
13461da177e4SLinus Torvalds 	/* fd1 and fd2 may be already another descriptors.
13471da177e4SLinus Torvalds 	 * Not kernel problem.
13481da177e4SLinus Torvalds 	 */
13491da177e4SLinus Torvalds 
13501da177e4SLinus Torvalds 	return 0;
13511da177e4SLinus Torvalds 
1352d73aa286SYann Droneaud out_fput_both:
1353d73aa286SYann Droneaud 	fput(newfile2);
1354d73aa286SYann Droneaud 	fput(newfile1);
1355d73aa286SYann Droneaud 	put_unused_fd(fd2);
1356d73aa286SYann Droneaud 	put_unused_fd(fd1);
1357d73aa286SYann Droneaud 	goto out;
13581da177e4SLinus Torvalds 
1359d73aa286SYann Droneaud out_fput_1:
1360d73aa286SYann Droneaud 	fput(newfile1);
1361d73aa286SYann Droneaud 	put_unused_fd(fd2);
1362d73aa286SYann Droneaud 	put_unused_fd(fd1);
1363d73aa286SYann Droneaud 	sock_release(sock2);
1364d73aa286SYann Droneaud 	goto out;
1365d73aa286SYann Droneaud 
1366d73aa286SYann Droneaud out_put_unused_both:
1367d73aa286SYann Droneaud 	put_unused_fd(fd2);
1368d73aa286SYann Droneaud out_put_unused_1:
1369d73aa286SYann Droneaud 	put_unused_fd(fd1);
13701da177e4SLinus Torvalds out_release_both:
13711da177e4SLinus Torvalds 	sock_release(sock2);
13721da177e4SLinus Torvalds out_release_1:
13731da177e4SLinus Torvalds 	sock_release(sock1);
13741da177e4SLinus Torvalds out:
13751da177e4SLinus Torvalds 	return err;
13761da177e4SLinus Torvalds }
13771da177e4SLinus Torvalds 
13781da177e4SLinus Torvalds /*
13791da177e4SLinus Torvalds  *	Bind a name to a socket. Nothing much to do here since it's
13801da177e4SLinus Torvalds  *	the protocol's responsibility to handle the local address.
13811da177e4SLinus Torvalds  *
13821da177e4SLinus Torvalds  *	We move the socket address to kernel space before we call
13831da177e4SLinus Torvalds  *	the protocol layer (having also checked the address is ok).
13841da177e4SLinus Torvalds  */
13851da177e4SLinus Torvalds 
138620f37034SHeiko Carstens SYSCALL_DEFINE3(bind, int, fd, struct sockaddr __user *, umyaddr, int, addrlen)
13871da177e4SLinus Torvalds {
13881da177e4SLinus Torvalds 	struct socket *sock;
1389230b1839SYOSHIFUJI Hideaki 	struct sockaddr_storage address;
13906cb153caSBenjamin LaHaise 	int err, fput_needed;
13911da177e4SLinus Torvalds 
139289bddce5SStephen Hemminger 	sock = sockfd_lookup_light(fd, &err, &fput_needed);
139389bddce5SStephen Hemminger 	if (sock) {
139443db362dSMaciej Żenczykowski 		err = move_addr_to_kernel(umyaddr, addrlen, &address);
139589bddce5SStephen Hemminger 		if (err >= 0) {
139689bddce5SStephen Hemminger 			err = security_socket_bind(sock,
1397230b1839SYOSHIFUJI Hideaki 						   (struct sockaddr *)&address,
139889bddce5SStephen Hemminger 						   addrlen);
13996cb153caSBenjamin LaHaise 			if (!err)
14006cb153caSBenjamin LaHaise 				err = sock->ops->bind(sock,
140189bddce5SStephen Hemminger 						      (struct sockaddr *)
1402230b1839SYOSHIFUJI Hideaki 						      &address, addrlen);
14031da177e4SLinus Torvalds 		}
14046cb153caSBenjamin LaHaise 		fput_light(sock->file, fput_needed);
14051da177e4SLinus Torvalds 	}
14061da177e4SLinus Torvalds 	return err;
14071da177e4SLinus Torvalds }
14081da177e4SLinus Torvalds 
14091da177e4SLinus Torvalds /*
14101da177e4SLinus Torvalds  *	Perform a listen. Basically, we allow the protocol to do anything
14111da177e4SLinus Torvalds  *	necessary for a listen, and if that works, we mark the socket as
14121da177e4SLinus Torvalds  *	ready for listening.
14131da177e4SLinus Torvalds  */
14141da177e4SLinus Torvalds 
14153e0fa65fSHeiko Carstens SYSCALL_DEFINE2(listen, int, fd, int, backlog)
14161da177e4SLinus Torvalds {
14171da177e4SLinus Torvalds 	struct socket *sock;
14186cb153caSBenjamin LaHaise 	int err, fput_needed;
1419b8e1f9b5SPavel Emelyanov 	int somaxconn;
14201da177e4SLinus Torvalds 
142189bddce5SStephen Hemminger 	sock = sockfd_lookup_light(fd, &err, &fput_needed);
142289bddce5SStephen Hemminger 	if (sock) {
14238efa6e93SPavel Emelyanov 		somaxconn = sock_net(sock->sk)->core.sysctl_somaxconn;
142495c96174SEric Dumazet 		if ((unsigned int)backlog > somaxconn)
1425b8e1f9b5SPavel Emelyanov 			backlog = somaxconn;
14261da177e4SLinus Torvalds 
14271da177e4SLinus Torvalds 		err = security_socket_listen(sock, backlog);
14286cb153caSBenjamin LaHaise 		if (!err)
14291da177e4SLinus Torvalds 			err = sock->ops->listen(sock, backlog);
14306cb153caSBenjamin LaHaise 
14316cb153caSBenjamin LaHaise 		fput_light(sock->file, fput_needed);
14321da177e4SLinus Torvalds 	}
14331da177e4SLinus Torvalds 	return err;
14341da177e4SLinus Torvalds }
14351da177e4SLinus Torvalds 
14361da177e4SLinus Torvalds /*
14371da177e4SLinus Torvalds  *	For accept, we attempt to create a new socket, set up the link
14381da177e4SLinus Torvalds  *	with the client, wake up the client, then return the new
14391da177e4SLinus Torvalds  *	connected fd. We collect the address of the connector in kernel
14401da177e4SLinus Torvalds  *	space and move it to user at the very end. This is unclean because
14411da177e4SLinus Torvalds  *	we open the socket then return an error.
14421da177e4SLinus Torvalds  *
14431da177e4SLinus Torvalds  *	1003.1g adds the ability to recvmsg() to query connection pending
14441da177e4SLinus Torvalds  *	status to recvmsg. We need to add that support in a way thats
14451da177e4SLinus Torvalds  *	clean when we restucture accept also.
14461da177e4SLinus Torvalds  */
14471da177e4SLinus Torvalds 
144820f37034SHeiko Carstens SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr,
144920f37034SHeiko Carstens 		int __user *, upeer_addrlen, int, flags)
14501da177e4SLinus Torvalds {
14511da177e4SLinus Torvalds 	struct socket *sock, *newsock;
145239d8c1b6SDavid S. Miller 	struct file *newfile;
14536cb153caSBenjamin LaHaise 	int err, len, newfd, fput_needed;
1454230b1839SYOSHIFUJI Hideaki 	struct sockaddr_storage address;
14551da177e4SLinus Torvalds 
145677d27200SUlrich Drepper 	if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
1457aaca0bdcSUlrich Drepper 		return -EINVAL;
1458aaca0bdcSUlrich Drepper 
1459aaca0bdcSUlrich Drepper 	if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1460aaca0bdcSUlrich Drepper 		flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1461aaca0bdcSUlrich Drepper 
14626cb153caSBenjamin LaHaise 	sock = sockfd_lookup_light(fd, &err, &fput_needed);
14631da177e4SLinus Torvalds 	if (!sock)
14641da177e4SLinus Torvalds 		goto out;
14651da177e4SLinus Torvalds 
14661da177e4SLinus Torvalds 	err = -ENFILE;
1467c6d409cfSEric Dumazet 	newsock = sock_alloc();
1468c6d409cfSEric Dumazet 	if (!newsock)
14691da177e4SLinus Torvalds 		goto out_put;
14701da177e4SLinus Torvalds 
14711da177e4SLinus Torvalds 	newsock->type = sock->type;
14721da177e4SLinus Torvalds 	newsock->ops = sock->ops;
14731da177e4SLinus Torvalds 
14741da177e4SLinus Torvalds 	/*
14751da177e4SLinus Torvalds 	 * We don't need try_module_get here, as the listening socket (sock)
14761da177e4SLinus Torvalds 	 * has the protocol module (sock->ops->owner) held.
14771da177e4SLinus Torvalds 	 */
14781da177e4SLinus Torvalds 	__module_get(newsock->ops->owner);
14791da177e4SLinus Torvalds 
148028407630SAl Viro 	newfd = get_unused_fd_flags(flags);
148139d8c1b6SDavid S. Miller 	if (unlikely(newfd < 0)) {
148239d8c1b6SDavid S. Miller 		err = newfd;
14839a1875e6SDavid S. Miller 		sock_release(newsock);
14849a1875e6SDavid S. Miller 		goto out_put;
148539d8c1b6SDavid S. Miller 	}
1486aab174f0SLinus Torvalds 	newfile = sock_alloc_file(newsock, flags, sock->sk->sk_prot_creator->name);
148728407630SAl Viro 	if (unlikely(IS_ERR(newfile))) {
148828407630SAl Viro 		err = PTR_ERR(newfile);
148928407630SAl Viro 		put_unused_fd(newfd);
149028407630SAl Viro 		sock_release(newsock);
149128407630SAl Viro 		goto out_put;
149228407630SAl Viro 	}
149339d8c1b6SDavid S. Miller 
1494a79af59eSFrank Filz 	err = security_socket_accept(sock, newsock);
1495a79af59eSFrank Filz 	if (err)
149639d8c1b6SDavid S. Miller 		goto out_fd;
1497a79af59eSFrank Filz 
14981da177e4SLinus Torvalds 	err = sock->ops->accept(sock, newsock, sock->file->f_flags);
14991da177e4SLinus Torvalds 	if (err < 0)
150039d8c1b6SDavid S. Miller 		goto out_fd;
15011da177e4SLinus Torvalds 
15021da177e4SLinus Torvalds 	if (upeer_sockaddr) {
1503230b1839SYOSHIFUJI Hideaki 		if (newsock->ops->getname(newsock, (struct sockaddr *)&address,
150489bddce5SStephen Hemminger 					  &len, 2) < 0) {
15051da177e4SLinus Torvalds 			err = -ECONNABORTED;
150639d8c1b6SDavid S. Miller 			goto out_fd;
15071da177e4SLinus Torvalds 		}
150843db362dSMaciej Żenczykowski 		err = move_addr_to_user(&address,
1509230b1839SYOSHIFUJI Hideaki 					len, upeer_sockaddr, upeer_addrlen);
15101da177e4SLinus Torvalds 		if (err < 0)
151139d8c1b6SDavid S. Miller 			goto out_fd;
15121da177e4SLinus Torvalds 	}
15131da177e4SLinus Torvalds 
15141da177e4SLinus Torvalds 	/* File flags are not inherited via accept() unlike another OSes. */
15151da177e4SLinus Torvalds 
151639d8c1b6SDavid S. Miller 	fd_install(newfd, newfile);
151739d8c1b6SDavid S. Miller 	err = newfd;
15181da177e4SLinus Torvalds 
15191da177e4SLinus Torvalds out_put:
15206cb153caSBenjamin LaHaise 	fput_light(sock->file, fput_needed);
15211da177e4SLinus Torvalds out:
15221da177e4SLinus Torvalds 	return err;
152339d8c1b6SDavid S. Miller out_fd:
15249606a216SDavid S. Miller 	fput(newfile);
152539d8c1b6SDavid S. Miller 	put_unused_fd(newfd);
15261da177e4SLinus Torvalds 	goto out_put;
15271da177e4SLinus Torvalds }
15281da177e4SLinus Torvalds 
152920f37034SHeiko Carstens SYSCALL_DEFINE3(accept, int, fd, struct sockaddr __user *, upeer_sockaddr,
153020f37034SHeiko Carstens 		int __user *, upeer_addrlen)
1531aaca0bdcSUlrich Drepper {
1532de11defeSUlrich Drepper 	return sys_accept4(fd, upeer_sockaddr, upeer_addrlen, 0);
1533aaca0bdcSUlrich Drepper }
1534aaca0bdcSUlrich Drepper 
15351da177e4SLinus Torvalds /*
15361da177e4SLinus Torvalds  *	Attempt to connect to a socket with the server address.  The address
15371da177e4SLinus Torvalds  *	is in user space so we verify it is OK and move it to kernel space.
15381da177e4SLinus Torvalds  *
15391da177e4SLinus Torvalds  *	For 1003.1g we need to add clean support for a bind to AF_UNSPEC to
15401da177e4SLinus Torvalds  *	break bindings
15411da177e4SLinus Torvalds  *
15421da177e4SLinus Torvalds  *	NOTE: 1003.1g draft 6.3 is broken with respect to AX.25/NetROM and
15431da177e4SLinus Torvalds  *	other SEQPACKET protocols that take time to connect() as it doesn't
15441da177e4SLinus Torvalds  *	include the -EINPROGRESS status for such sockets.
15451da177e4SLinus Torvalds  */
15461da177e4SLinus Torvalds 
154720f37034SHeiko Carstens SYSCALL_DEFINE3(connect, int, fd, struct sockaddr __user *, uservaddr,
154820f37034SHeiko Carstens 		int, addrlen)
15491da177e4SLinus Torvalds {
15501da177e4SLinus Torvalds 	struct socket *sock;
1551230b1839SYOSHIFUJI Hideaki 	struct sockaddr_storage address;
15526cb153caSBenjamin LaHaise 	int err, fput_needed;
15531da177e4SLinus Torvalds 
15546cb153caSBenjamin LaHaise 	sock = sockfd_lookup_light(fd, &err, &fput_needed);
15551da177e4SLinus Torvalds 	if (!sock)
15561da177e4SLinus Torvalds 		goto out;
155743db362dSMaciej Żenczykowski 	err = move_addr_to_kernel(uservaddr, addrlen, &address);
15581da177e4SLinus Torvalds 	if (err < 0)
15591da177e4SLinus Torvalds 		goto out_put;
15601da177e4SLinus Torvalds 
156189bddce5SStephen Hemminger 	err =
1562230b1839SYOSHIFUJI Hideaki 	    security_socket_connect(sock, (struct sockaddr *)&address, addrlen);
15631da177e4SLinus Torvalds 	if (err)
15641da177e4SLinus Torvalds 		goto out_put;
15651da177e4SLinus Torvalds 
1566230b1839SYOSHIFUJI Hideaki 	err = sock->ops->connect(sock, (struct sockaddr *)&address, addrlen,
15671da177e4SLinus Torvalds 				 sock->file->f_flags);
15681da177e4SLinus Torvalds out_put:
15696cb153caSBenjamin LaHaise 	fput_light(sock->file, fput_needed);
15701da177e4SLinus Torvalds out:
15711da177e4SLinus Torvalds 	return err;
15721da177e4SLinus Torvalds }
15731da177e4SLinus Torvalds 
15741da177e4SLinus Torvalds /*
15751da177e4SLinus Torvalds  *	Get the local address ('name') of a socket object. Move the obtained
15761da177e4SLinus Torvalds  *	name to user space.
15771da177e4SLinus Torvalds  */
15781da177e4SLinus Torvalds 
157920f37034SHeiko Carstens SYSCALL_DEFINE3(getsockname, int, fd, struct sockaddr __user *, usockaddr,
158020f37034SHeiko Carstens 		int __user *, usockaddr_len)
15811da177e4SLinus Torvalds {
15821da177e4SLinus Torvalds 	struct socket *sock;
1583230b1839SYOSHIFUJI Hideaki 	struct sockaddr_storage address;
15846cb153caSBenjamin LaHaise 	int len, err, fput_needed;
15851da177e4SLinus Torvalds 
15866cb153caSBenjamin LaHaise 	sock = sockfd_lookup_light(fd, &err, &fput_needed);
15871da177e4SLinus Torvalds 	if (!sock)
15881da177e4SLinus Torvalds 		goto out;
15891da177e4SLinus Torvalds 
15901da177e4SLinus Torvalds 	err = security_socket_getsockname(sock);
15911da177e4SLinus Torvalds 	if (err)
15921da177e4SLinus Torvalds 		goto out_put;
15931da177e4SLinus Torvalds 
1594230b1839SYOSHIFUJI Hideaki 	err = sock->ops->getname(sock, (struct sockaddr *)&address, &len, 0);
15951da177e4SLinus Torvalds 	if (err)
15961da177e4SLinus Torvalds 		goto out_put;
159743db362dSMaciej Żenczykowski 	err = move_addr_to_user(&address, len, usockaddr, usockaddr_len);
15981da177e4SLinus Torvalds 
15991da177e4SLinus Torvalds out_put:
16006cb153caSBenjamin LaHaise 	fput_light(sock->file, fput_needed);
16011da177e4SLinus Torvalds out:
16021da177e4SLinus Torvalds 	return err;
16031da177e4SLinus Torvalds }
16041da177e4SLinus Torvalds 
16051da177e4SLinus Torvalds /*
16061da177e4SLinus Torvalds  *	Get the remote address ('name') of a socket object. Move the obtained
16071da177e4SLinus Torvalds  *	name to user space.
16081da177e4SLinus Torvalds  */
16091da177e4SLinus Torvalds 
161020f37034SHeiko Carstens SYSCALL_DEFINE3(getpeername, int, fd, struct sockaddr __user *, usockaddr,
161120f37034SHeiko Carstens 		int __user *, usockaddr_len)
16121da177e4SLinus Torvalds {
16131da177e4SLinus Torvalds 	struct socket *sock;
1614230b1839SYOSHIFUJI Hideaki 	struct sockaddr_storage address;
16156cb153caSBenjamin LaHaise 	int len, err, fput_needed;
16161da177e4SLinus Torvalds 
161789bddce5SStephen Hemminger 	sock = sockfd_lookup_light(fd, &err, &fput_needed);
161889bddce5SStephen Hemminger 	if (sock != NULL) {
16191da177e4SLinus Torvalds 		err = security_socket_getpeername(sock);
16201da177e4SLinus Torvalds 		if (err) {
16216cb153caSBenjamin LaHaise 			fput_light(sock->file, fput_needed);
16221da177e4SLinus Torvalds 			return err;
16231da177e4SLinus Torvalds 		}
16241da177e4SLinus Torvalds 
162589bddce5SStephen Hemminger 		err =
1626230b1839SYOSHIFUJI Hideaki 		    sock->ops->getname(sock, (struct sockaddr *)&address, &len,
162789bddce5SStephen Hemminger 				       1);
16281da177e4SLinus Torvalds 		if (!err)
162943db362dSMaciej Żenczykowski 			err = move_addr_to_user(&address, len, usockaddr,
163089bddce5SStephen Hemminger 						usockaddr_len);
16316cb153caSBenjamin LaHaise 		fput_light(sock->file, fput_needed);
16321da177e4SLinus Torvalds 	}
16331da177e4SLinus Torvalds 	return err;
16341da177e4SLinus Torvalds }
16351da177e4SLinus Torvalds 
16361da177e4SLinus Torvalds /*
16371da177e4SLinus Torvalds  *	Send a datagram to a given address. We move the address into kernel
16381da177e4SLinus Torvalds  *	space and check the user space data area is readable before invoking
16391da177e4SLinus Torvalds  *	the protocol.
16401da177e4SLinus Torvalds  */
16411da177e4SLinus Torvalds 
16423e0fa65fSHeiko Carstens SYSCALL_DEFINE6(sendto, int, fd, void __user *, buff, size_t, len,
164395c96174SEric Dumazet 		unsigned int, flags, struct sockaddr __user *, addr,
16443e0fa65fSHeiko Carstens 		int, addr_len)
16451da177e4SLinus Torvalds {
16461da177e4SLinus Torvalds 	struct socket *sock;
1647230b1839SYOSHIFUJI Hideaki 	struct sockaddr_storage address;
16481da177e4SLinus Torvalds 	int err;
16491da177e4SLinus Torvalds 	struct msghdr msg;
16501da177e4SLinus Torvalds 	struct iovec iov;
16516cb153caSBenjamin LaHaise 	int fput_needed;
16521da177e4SLinus Torvalds 
1653253eacc0SLinus Torvalds 	if (len > INT_MAX)
1654253eacc0SLinus Torvalds 		len = INT_MAX;
16554de930efSAl Viro 	if (unlikely(!access_ok(VERIFY_READ, buff, len)))
16564de930efSAl Viro 		return -EFAULT;
1657de0fa95cSPavel Emelyanov 	sock = sockfd_lookup_light(fd, &err, &fput_needed);
1658de0fa95cSPavel Emelyanov 	if (!sock)
16594387ff75SDavid S. Miller 		goto out;
16606cb153caSBenjamin LaHaise 
16611da177e4SLinus Torvalds 	iov.iov_base = buff;
16621da177e4SLinus Torvalds 	iov.iov_len = len;
16631da177e4SLinus Torvalds 	msg.msg_name = NULL;
1664c0371da6SAl Viro 	iov_iter_init(&msg.msg_iter, WRITE, &iov, 1, len);
16651da177e4SLinus Torvalds 	msg.msg_control = NULL;
16661da177e4SLinus Torvalds 	msg.msg_controllen = 0;
16671da177e4SLinus Torvalds 	msg.msg_namelen = 0;
16686cb153caSBenjamin LaHaise 	if (addr) {
166943db362dSMaciej Żenczykowski 		err = move_addr_to_kernel(addr, addr_len, &address);
16701da177e4SLinus Torvalds 		if (err < 0)
16711da177e4SLinus Torvalds 			goto out_put;
1672230b1839SYOSHIFUJI Hideaki 		msg.msg_name = (struct sockaddr *)&address;
16731da177e4SLinus Torvalds 		msg.msg_namelen = addr_len;
16741da177e4SLinus Torvalds 	}
16751da177e4SLinus Torvalds 	if (sock->file->f_flags & O_NONBLOCK)
16761da177e4SLinus Torvalds 		flags |= MSG_DONTWAIT;
16771da177e4SLinus Torvalds 	msg.msg_flags = flags;
16781da177e4SLinus Torvalds 	err = sock_sendmsg(sock, &msg, len);
16791da177e4SLinus Torvalds 
16801da177e4SLinus Torvalds out_put:
1681de0fa95cSPavel Emelyanov 	fput_light(sock->file, fput_needed);
16824387ff75SDavid S. Miller out:
16831da177e4SLinus Torvalds 	return err;
16841da177e4SLinus Torvalds }
16851da177e4SLinus Torvalds 
16861da177e4SLinus Torvalds /*
16871da177e4SLinus Torvalds  *	Send a datagram down a socket.
16881da177e4SLinus Torvalds  */
16891da177e4SLinus Torvalds 
16903e0fa65fSHeiko Carstens SYSCALL_DEFINE4(send, int, fd, void __user *, buff, size_t, len,
169195c96174SEric Dumazet 		unsigned int, flags)
16921da177e4SLinus Torvalds {
16931da177e4SLinus Torvalds 	return sys_sendto(fd, buff, len, flags, NULL, 0);
16941da177e4SLinus Torvalds }
16951da177e4SLinus Torvalds 
16961da177e4SLinus Torvalds /*
16971da177e4SLinus Torvalds  *	Receive a frame from the socket and optionally record the address of the
16981da177e4SLinus Torvalds  *	sender. We verify the buffers are writable and if needed move the
16991da177e4SLinus Torvalds  *	sender address from kernel to user space.
17001da177e4SLinus Torvalds  */
17011da177e4SLinus Torvalds 
17023e0fa65fSHeiko Carstens SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, ubuf, size_t, size,
170395c96174SEric Dumazet 		unsigned int, flags, struct sockaddr __user *, addr,
17043e0fa65fSHeiko Carstens 		int __user *, addr_len)
17051da177e4SLinus Torvalds {
17061da177e4SLinus Torvalds 	struct socket *sock;
17071da177e4SLinus Torvalds 	struct iovec iov;
17081da177e4SLinus Torvalds 	struct msghdr msg;
1709230b1839SYOSHIFUJI Hideaki 	struct sockaddr_storage address;
17101da177e4SLinus Torvalds 	int err, err2;
17116cb153caSBenjamin LaHaise 	int fput_needed;
17121da177e4SLinus Torvalds 
1713253eacc0SLinus Torvalds 	if (size > INT_MAX)
1714253eacc0SLinus Torvalds 		size = INT_MAX;
17154de930efSAl Viro 	if (unlikely(!access_ok(VERIFY_WRITE, ubuf, size)))
17164de930efSAl Viro 		return -EFAULT;
1717de0fa95cSPavel Emelyanov 	sock = sockfd_lookup_light(fd, &err, &fput_needed);
17181da177e4SLinus Torvalds 	if (!sock)
1719de0fa95cSPavel Emelyanov 		goto out;
17201da177e4SLinus Torvalds 
17211da177e4SLinus Torvalds 	msg.msg_control = NULL;
17221da177e4SLinus Torvalds 	msg.msg_controllen = 0;
17231da177e4SLinus Torvalds 	iov.iov_len = size;
17241da177e4SLinus Torvalds 	iov.iov_base = ubuf;
1725c0371da6SAl Viro 	iov_iter_init(&msg.msg_iter, READ, &iov, 1, size);
1726f3d33426SHannes Frederic Sowa 	/* Save some cycles and don't copy the address if not needed */
1727f3d33426SHannes Frederic Sowa 	msg.msg_name = addr ? (struct sockaddr *)&address : NULL;
1728f3d33426SHannes Frederic Sowa 	/* We assume all kernel code knows the size of sockaddr_storage */
1729f3d33426SHannes Frederic Sowa 	msg.msg_namelen = 0;
17301da177e4SLinus Torvalds 	if (sock->file->f_flags & O_NONBLOCK)
17311da177e4SLinus Torvalds 		flags |= MSG_DONTWAIT;
17321da177e4SLinus Torvalds 	err = sock_recvmsg(sock, &msg, size, flags);
17331da177e4SLinus Torvalds 
173489bddce5SStephen Hemminger 	if (err >= 0 && addr != NULL) {
173543db362dSMaciej Żenczykowski 		err2 = move_addr_to_user(&address,
1736230b1839SYOSHIFUJI Hideaki 					 msg.msg_namelen, addr, addr_len);
17371da177e4SLinus Torvalds 		if (err2 < 0)
17381da177e4SLinus Torvalds 			err = err2;
17391da177e4SLinus Torvalds 	}
1740de0fa95cSPavel Emelyanov 
1741de0fa95cSPavel Emelyanov 	fput_light(sock->file, fput_needed);
17424387ff75SDavid S. Miller out:
17431da177e4SLinus Torvalds 	return err;
17441da177e4SLinus Torvalds }
17451da177e4SLinus Torvalds 
17461da177e4SLinus Torvalds /*
17471da177e4SLinus Torvalds  *	Receive a datagram from a socket.
17481da177e4SLinus Torvalds  */
17491da177e4SLinus Torvalds 
1750b7c0ddf5SJan Glauber SYSCALL_DEFINE4(recv, int, fd, void __user *, ubuf, size_t, size,
1751b7c0ddf5SJan Glauber 		unsigned int, flags)
17521da177e4SLinus Torvalds {
17531da177e4SLinus Torvalds 	return sys_recvfrom(fd, ubuf, size, flags, NULL, NULL);
17541da177e4SLinus Torvalds }
17551da177e4SLinus Torvalds 
17561da177e4SLinus Torvalds /*
17571da177e4SLinus Torvalds  *	Set a socket option. Because we don't know the option lengths we have
17581da177e4SLinus Torvalds  *	to pass the user mode parameter for the protocols to sort out.
17591da177e4SLinus Torvalds  */
17601da177e4SLinus Torvalds 
176120f37034SHeiko Carstens SYSCALL_DEFINE5(setsockopt, int, fd, int, level, int, optname,
176220f37034SHeiko Carstens 		char __user *, optval, int, optlen)
17631da177e4SLinus Torvalds {
17646cb153caSBenjamin LaHaise 	int err, fput_needed;
17651da177e4SLinus Torvalds 	struct socket *sock;
17661da177e4SLinus Torvalds 
17671da177e4SLinus Torvalds 	if (optlen < 0)
17681da177e4SLinus Torvalds 		return -EINVAL;
17691da177e4SLinus Torvalds 
177089bddce5SStephen Hemminger 	sock = sockfd_lookup_light(fd, &err, &fput_needed);
177189bddce5SStephen Hemminger 	if (sock != NULL) {
17721da177e4SLinus Torvalds 		err = security_socket_setsockopt(sock, level, optname);
17736cb153caSBenjamin LaHaise 		if (err)
17746cb153caSBenjamin LaHaise 			goto out_put;
17751da177e4SLinus Torvalds 
17761da177e4SLinus Torvalds 		if (level == SOL_SOCKET)
177789bddce5SStephen Hemminger 			err =
177889bddce5SStephen Hemminger 			    sock_setsockopt(sock, level, optname, optval,
177989bddce5SStephen Hemminger 					    optlen);
17801da177e4SLinus Torvalds 		else
178189bddce5SStephen Hemminger 			err =
178289bddce5SStephen Hemminger 			    sock->ops->setsockopt(sock, level, optname, optval,
178389bddce5SStephen Hemminger 						  optlen);
17846cb153caSBenjamin LaHaise out_put:
17856cb153caSBenjamin LaHaise 		fput_light(sock->file, fput_needed);
17861da177e4SLinus Torvalds 	}
17871da177e4SLinus Torvalds 	return err;
17881da177e4SLinus Torvalds }
17891da177e4SLinus Torvalds 
17901da177e4SLinus Torvalds /*
17911da177e4SLinus Torvalds  *	Get a socket option. Because we don't know the option lengths we have
17921da177e4SLinus Torvalds  *	to pass a user mode parameter for the protocols to sort out.
17931da177e4SLinus Torvalds  */
17941da177e4SLinus Torvalds 
179520f37034SHeiko Carstens SYSCALL_DEFINE5(getsockopt, int, fd, int, level, int, optname,
179620f37034SHeiko Carstens 		char __user *, optval, int __user *, optlen)
17971da177e4SLinus Torvalds {
17986cb153caSBenjamin LaHaise 	int err, fput_needed;
17991da177e4SLinus Torvalds 	struct socket *sock;
18001da177e4SLinus Torvalds 
180189bddce5SStephen Hemminger 	sock = sockfd_lookup_light(fd, &err, &fput_needed);
180289bddce5SStephen Hemminger 	if (sock != NULL) {
18036cb153caSBenjamin LaHaise 		err = security_socket_getsockopt(sock, level, optname);
18046cb153caSBenjamin LaHaise 		if (err)
18056cb153caSBenjamin LaHaise 			goto out_put;
18061da177e4SLinus Torvalds 
18071da177e4SLinus Torvalds 		if (level == SOL_SOCKET)
180889bddce5SStephen Hemminger 			err =
180989bddce5SStephen Hemminger 			    sock_getsockopt(sock, level, optname, optval,
181089bddce5SStephen Hemminger 					    optlen);
18111da177e4SLinus Torvalds 		else
181289bddce5SStephen Hemminger 			err =
181389bddce5SStephen Hemminger 			    sock->ops->getsockopt(sock, level, optname, optval,
181489bddce5SStephen Hemminger 						  optlen);
18156cb153caSBenjamin LaHaise out_put:
18166cb153caSBenjamin LaHaise 		fput_light(sock->file, fput_needed);
18171da177e4SLinus Torvalds 	}
18181da177e4SLinus Torvalds 	return err;
18191da177e4SLinus Torvalds }
18201da177e4SLinus Torvalds 
18211da177e4SLinus Torvalds /*
18221da177e4SLinus Torvalds  *	Shutdown a socket.
18231da177e4SLinus Torvalds  */
18241da177e4SLinus Torvalds 
1825754fe8d2SHeiko Carstens SYSCALL_DEFINE2(shutdown, int, fd, int, how)
18261da177e4SLinus Torvalds {
18276cb153caSBenjamin LaHaise 	int err, fput_needed;
18281da177e4SLinus Torvalds 	struct socket *sock;
18291da177e4SLinus Torvalds 
183089bddce5SStephen Hemminger 	sock = sockfd_lookup_light(fd, &err, &fput_needed);
183189bddce5SStephen Hemminger 	if (sock != NULL) {
18321da177e4SLinus Torvalds 		err = security_socket_shutdown(sock, how);
18336cb153caSBenjamin LaHaise 		if (!err)
18341da177e4SLinus Torvalds 			err = sock->ops->shutdown(sock, how);
18356cb153caSBenjamin LaHaise 		fput_light(sock->file, fput_needed);
18361da177e4SLinus Torvalds 	}
18371da177e4SLinus Torvalds 	return err;
18381da177e4SLinus Torvalds }
18391da177e4SLinus Torvalds 
18401da177e4SLinus Torvalds /* A couple of helpful macros for getting the address of the 32/64 bit
18411da177e4SLinus Torvalds  * fields which are the same type (int / unsigned) on our platforms.
18421da177e4SLinus Torvalds  */
18431da177e4SLinus Torvalds #define COMPAT_MSG(msg, member)	((MSG_CMSG_COMPAT & flags) ? &msg##_compat->member : &msg->member)
18441da177e4SLinus Torvalds #define COMPAT_NAMELEN(msg)	COMPAT_MSG(msg, msg_namelen)
18451da177e4SLinus Torvalds #define COMPAT_FLAGS(msg)	COMPAT_MSG(msg, msg_flags)
18461da177e4SLinus Torvalds 
1847c71d8ebeSTetsuo Handa struct used_address {
1848c71d8ebeSTetsuo Handa 	struct sockaddr_storage name;
1849c71d8ebeSTetsuo Handa 	unsigned int name_len;
1850c71d8ebeSTetsuo Handa };
1851c71d8ebeSTetsuo Handa 
185208adb7daSAl Viro static ssize_t copy_msghdr_from_user(struct msghdr *kmsg,
185308adb7daSAl Viro 				     struct user_msghdr __user *umsg,
185408adb7daSAl Viro 				     struct sockaddr __user **save_addr,
185508adb7daSAl Viro 				     struct iovec **iov)
18561661bf36SDan Carpenter {
185708adb7daSAl Viro 	struct sockaddr __user *uaddr;
185808adb7daSAl Viro 	struct iovec __user *uiov;
1859c0371da6SAl Viro 	size_t nr_segs;
186008adb7daSAl Viro 	ssize_t err;
186108adb7daSAl Viro 
186208adb7daSAl Viro 	if (!access_ok(VERIFY_READ, umsg, sizeof(*umsg)) ||
186308adb7daSAl Viro 	    __get_user(uaddr, &umsg->msg_name) ||
186408adb7daSAl Viro 	    __get_user(kmsg->msg_namelen, &umsg->msg_namelen) ||
186508adb7daSAl Viro 	    __get_user(uiov, &umsg->msg_iov) ||
1866c0371da6SAl Viro 	    __get_user(nr_segs, &umsg->msg_iovlen) ||
186708adb7daSAl Viro 	    __get_user(kmsg->msg_control, &umsg->msg_control) ||
186808adb7daSAl Viro 	    __get_user(kmsg->msg_controllen, &umsg->msg_controllen) ||
186908adb7daSAl Viro 	    __get_user(kmsg->msg_flags, &umsg->msg_flags))
18701661bf36SDan Carpenter 		return -EFAULT;
1871dbb490b9SMatthew Leach 
187208adb7daSAl Viro 	if (!uaddr)
18736a2a2b3aSAni Sinha 		kmsg->msg_namelen = 0;
18746a2a2b3aSAni Sinha 
1875dbb490b9SMatthew Leach 	if (kmsg->msg_namelen < 0)
1876dbb490b9SMatthew Leach 		return -EINVAL;
1877dbb490b9SMatthew Leach 
18781661bf36SDan Carpenter 	if (kmsg->msg_namelen > sizeof(struct sockaddr_storage))
1879db31c55aSDan Carpenter 		kmsg->msg_namelen = sizeof(struct sockaddr_storage);
188008adb7daSAl Viro 
188108adb7daSAl Viro 	if (save_addr)
188208adb7daSAl Viro 		*save_addr = uaddr;
188308adb7daSAl Viro 
188408adb7daSAl Viro 	if (uaddr && kmsg->msg_namelen) {
188508adb7daSAl Viro 		if (!save_addr) {
188608adb7daSAl Viro 			err = move_addr_to_kernel(uaddr, kmsg->msg_namelen,
188708adb7daSAl Viro 						  kmsg->msg_name);
188808adb7daSAl Viro 			if (err < 0)
188908adb7daSAl Viro 				return err;
189008adb7daSAl Viro 		}
189108adb7daSAl Viro 	} else {
189208adb7daSAl Viro 		kmsg->msg_name = NULL;
189308adb7daSAl Viro 		kmsg->msg_namelen = 0;
189408adb7daSAl Viro 	}
189508adb7daSAl Viro 
1896c0371da6SAl Viro 	if (nr_segs > UIO_MAXIOV)
189708adb7daSAl Viro 		return -EMSGSIZE;
189808adb7daSAl Viro 
18990345f931Stadeusz.struk@intel.com 	kmsg->msg_iocb = NULL;
19000345f931Stadeusz.struk@intel.com 
190108adb7daSAl Viro 	err = rw_copy_check_uvector(save_addr ? READ : WRITE,
1902c0371da6SAl Viro 				    uiov, nr_segs,
190308adb7daSAl Viro 				    UIO_FASTIOV, *iov, iov);
190408adb7daSAl Viro 	if (err >= 0)
1905c0371da6SAl Viro 		iov_iter_init(&kmsg->msg_iter, save_addr ? READ : WRITE,
1906c0371da6SAl Viro 			      *iov, nr_segs, err);
190708adb7daSAl Viro 	return err;
19081661bf36SDan Carpenter }
19091661bf36SDan Carpenter 
1910666547ffSAl Viro static int ___sys_sendmsg(struct socket *sock, struct user_msghdr __user *msg,
191195c96174SEric Dumazet 			 struct msghdr *msg_sys, unsigned int flags,
1912c71d8ebeSTetsuo Handa 			 struct used_address *used_address)
19131da177e4SLinus Torvalds {
191489bddce5SStephen Hemminger 	struct compat_msghdr __user *msg_compat =
191589bddce5SStephen Hemminger 	    (struct compat_msghdr __user *)msg;
1916230b1839SYOSHIFUJI Hideaki 	struct sockaddr_storage address;
19171da177e4SLinus Torvalds 	struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
1918b9d717a7SAlex Williamson 	unsigned char ctl[sizeof(struct cmsghdr) + 20]
1919b9d717a7SAlex Williamson 	    __attribute__ ((aligned(sizeof(__kernel_size_t))));
1920b9d717a7SAlex Williamson 	/* 20 is size of ipv6_pktinfo */
19211da177e4SLinus Torvalds 	unsigned char *ctl_buf = ctl;
192208adb7daSAl Viro 	int ctl_len, total_len;
192308adb7daSAl Viro 	ssize_t err;
19241da177e4SLinus Torvalds 
192508adb7daSAl Viro 	msg_sys->msg_name = &address;
19261da177e4SLinus Torvalds 
192708449320SAl Viro 	if (MSG_CMSG_COMPAT & flags)
192808adb7daSAl Viro 		err = get_compat_msghdr(msg_sys, msg_compat, NULL, &iov);
192908449320SAl Viro 	else
193008adb7daSAl Viro 		err = copy_msghdr_from_user(msg_sys, msg, NULL, &iov);
19311da177e4SLinus Torvalds 	if (err < 0)
19321da177e4SLinus Torvalds 		goto out_freeiov;
19331da177e4SLinus Torvalds 	total_len = err;
19341da177e4SLinus Torvalds 
19351da177e4SLinus Torvalds 	err = -ENOBUFS;
19361da177e4SLinus Torvalds 
1937228e548eSAnton Blanchard 	if (msg_sys->msg_controllen > INT_MAX)
19381da177e4SLinus Torvalds 		goto out_freeiov;
1939228e548eSAnton Blanchard 	ctl_len = msg_sys->msg_controllen;
19401da177e4SLinus Torvalds 	if ((MSG_CMSG_COMPAT & flags) && ctl_len) {
194189bddce5SStephen Hemminger 		err =
1942228e548eSAnton Blanchard 		    cmsghdr_from_user_compat_to_kern(msg_sys, sock->sk, ctl,
194389bddce5SStephen Hemminger 						     sizeof(ctl));
19441da177e4SLinus Torvalds 		if (err)
19451da177e4SLinus Torvalds 			goto out_freeiov;
1946228e548eSAnton Blanchard 		ctl_buf = msg_sys->msg_control;
1947228e548eSAnton Blanchard 		ctl_len = msg_sys->msg_controllen;
19481da177e4SLinus Torvalds 	} else if (ctl_len) {
194989bddce5SStephen Hemminger 		if (ctl_len > sizeof(ctl)) {
19501da177e4SLinus Torvalds 			ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL);
19511da177e4SLinus Torvalds 			if (ctl_buf == NULL)
19521da177e4SLinus Torvalds 				goto out_freeiov;
19531da177e4SLinus Torvalds 		}
19541da177e4SLinus Torvalds 		err = -EFAULT;
19551da177e4SLinus Torvalds 		/*
1956228e548eSAnton Blanchard 		 * Careful! Before this, msg_sys->msg_control contains a user pointer.
19571da177e4SLinus Torvalds 		 * Afterwards, it will be a kernel pointer. Thus the compiler-assisted
19581da177e4SLinus Torvalds 		 * checking falls down on this.
19591da177e4SLinus Torvalds 		 */
1960fb8621bbSNamhyung Kim 		if (copy_from_user(ctl_buf,
1961228e548eSAnton Blanchard 				   (void __user __force *)msg_sys->msg_control,
196289bddce5SStephen Hemminger 				   ctl_len))
19631da177e4SLinus Torvalds 			goto out_freectl;
1964228e548eSAnton Blanchard 		msg_sys->msg_control = ctl_buf;
19651da177e4SLinus Torvalds 	}
1966228e548eSAnton Blanchard 	msg_sys->msg_flags = flags;
19671da177e4SLinus Torvalds 
19681da177e4SLinus Torvalds 	if (sock->file->f_flags & O_NONBLOCK)
1969228e548eSAnton Blanchard 		msg_sys->msg_flags |= MSG_DONTWAIT;
1970c71d8ebeSTetsuo Handa 	/*
1971c71d8ebeSTetsuo Handa 	 * If this is sendmmsg() and current destination address is same as
1972c71d8ebeSTetsuo Handa 	 * previously succeeded address, omit asking LSM's decision.
1973c71d8ebeSTetsuo Handa 	 * used_address->name_len is initialized to UINT_MAX so that the first
1974c71d8ebeSTetsuo Handa 	 * destination address never matches.
1975c71d8ebeSTetsuo Handa 	 */
1976bc909d9dSMathieu Desnoyers 	if (used_address && msg_sys->msg_name &&
1977bc909d9dSMathieu Desnoyers 	    used_address->name_len == msg_sys->msg_namelen &&
1978bc909d9dSMathieu Desnoyers 	    !memcmp(&used_address->name, msg_sys->msg_name,
1979c71d8ebeSTetsuo Handa 		    used_address->name_len)) {
1980c71d8ebeSTetsuo Handa 		err = sock_sendmsg_nosec(sock, msg_sys, total_len);
1981c71d8ebeSTetsuo Handa 		goto out_freectl;
1982c71d8ebeSTetsuo Handa 	}
1983c71d8ebeSTetsuo Handa 	err = sock_sendmsg(sock, msg_sys, total_len);
1984c71d8ebeSTetsuo Handa 	/*
1985c71d8ebeSTetsuo Handa 	 * If this is sendmmsg() and sending to current destination address was
1986c71d8ebeSTetsuo Handa 	 * successful, remember it.
1987c71d8ebeSTetsuo Handa 	 */
1988c71d8ebeSTetsuo Handa 	if (used_address && err >= 0) {
1989c71d8ebeSTetsuo Handa 		used_address->name_len = msg_sys->msg_namelen;
1990bc909d9dSMathieu Desnoyers 		if (msg_sys->msg_name)
1991bc909d9dSMathieu Desnoyers 			memcpy(&used_address->name, msg_sys->msg_name,
1992c71d8ebeSTetsuo Handa 			       used_address->name_len);
1993c71d8ebeSTetsuo Handa 	}
19941da177e4SLinus Torvalds 
19951da177e4SLinus Torvalds out_freectl:
19961da177e4SLinus Torvalds 	if (ctl_buf != ctl)
19971da177e4SLinus Torvalds 		sock_kfree_s(sock->sk, ctl_buf, ctl_len);
19981da177e4SLinus Torvalds out_freeiov:
19991da177e4SLinus Torvalds 	if (iov != iovstack)
2000a74e9106SEric Dumazet 		kfree(iov);
2001228e548eSAnton Blanchard 	return err;
2002228e548eSAnton Blanchard }
2003228e548eSAnton Blanchard 
2004228e548eSAnton Blanchard /*
2005228e548eSAnton Blanchard  *	BSD sendmsg interface
2006228e548eSAnton Blanchard  */
2007228e548eSAnton Blanchard 
2008666547ffSAl Viro long __sys_sendmsg(int fd, struct user_msghdr __user *msg, unsigned flags)
2009228e548eSAnton Blanchard {
2010228e548eSAnton Blanchard 	int fput_needed, err;
2011228e548eSAnton Blanchard 	struct msghdr msg_sys;
20121be374a0SAndy Lutomirski 	struct socket *sock;
2013228e548eSAnton Blanchard 
20141be374a0SAndy Lutomirski 	sock = sockfd_lookup_light(fd, &err, &fput_needed);
2015228e548eSAnton Blanchard 	if (!sock)
2016228e548eSAnton Blanchard 		goto out;
2017228e548eSAnton Blanchard 
2018a7526eb5SAndy Lutomirski 	err = ___sys_sendmsg(sock, msg, &msg_sys, flags, NULL);
2019228e548eSAnton Blanchard 
20206cb153caSBenjamin LaHaise 	fput_light(sock->file, fput_needed);
20211da177e4SLinus Torvalds out:
20221da177e4SLinus Torvalds 	return err;
20231da177e4SLinus Torvalds }
20241da177e4SLinus Torvalds 
2025666547ffSAl Viro SYSCALL_DEFINE3(sendmsg, int, fd, struct user_msghdr __user *, msg, unsigned int, flags)
2026a7526eb5SAndy Lutomirski {
2027a7526eb5SAndy Lutomirski 	if (flags & MSG_CMSG_COMPAT)
2028a7526eb5SAndy Lutomirski 		return -EINVAL;
2029a7526eb5SAndy Lutomirski 	return __sys_sendmsg(fd, msg, flags);
2030a7526eb5SAndy Lutomirski }
2031a7526eb5SAndy Lutomirski 
2032228e548eSAnton Blanchard /*
2033228e548eSAnton Blanchard  *	Linux sendmmsg interface
2034228e548eSAnton Blanchard  */
2035228e548eSAnton Blanchard 
2036228e548eSAnton Blanchard int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
2037228e548eSAnton Blanchard 		   unsigned int flags)
2038228e548eSAnton Blanchard {
2039228e548eSAnton Blanchard 	int fput_needed, err, datagrams;
2040228e548eSAnton Blanchard 	struct socket *sock;
2041228e548eSAnton Blanchard 	struct mmsghdr __user *entry;
2042228e548eSAnton Blanchard 	struct compat_mmsghdr __user *compat_entry;
2043228e548eSAnton Blanchard 	struct msghdr msg_sys;
2044c71d8ebeSTetsuo Handa 	struct used_address used_address;
2045228e548eSAnton Blanchard 
204698382f41SAnton Blanchard 	if (vlen > UIO_MAXIOV)
204798382f41SAnton Blanchard 		vlen = UIO_MAXIOV;
2048228e548eSAnton Blanchard 
2049228e548eSAnton Blanchard 	datagrams = 0;
2050228e548eSAnton Blanchard 
2051228e548eSAnton Blanchard 	sock = sockfd_lookup_light(fd, &err, &fput_needed);
2052228e548eSAnton Blanchard 	if (!sock)
2053228e548eSAnton Blanchard 		return err;
2054228e548eSAnton Blanchard 
2055c71d8ebeSTetsuo Handa 	used_address.name_len = UINT_MAX;
2056228e548eSAnton Blanchard 	entry = mmsg;
2057228e548eSAnton Blanchard 	compat_entry = (struct compat_mmsghdr __user *)mmsg;
2058728ffb86SAnton Blanchard 	err = 0;
2059228e548eSAnton Blanchard 
2060228e548eSAnton Blanchard 	while (datagrams < vlen) {
2061228e548eSAnton Blanchard 		if (MSG_CMSG_COMPAT & flags) {
2062666547ffSAl Viro 			err = ___sys_sendmsg(sock, (struct user_msghdr __user *)compat_entry,
2063c71d8ebeSTetsuo Handa 					     &msg_sys, flags, &used_address);
2064228e548eSAnton Blanchard 			if (err < 0)
2065228e548eSAnton Blanchard 				break;
2066228e548eSAnton Blanchard 			err = __put_user(err, &compat_entry->msg_len);
2067228e548eSAnton Blanchard 			++compat_entry;
2068228e548eSAnton Blanchard 		} else {
2069a7526eb5SAndy Lutomirski 			err = ___sys_sendmsg(sock,
2070666547ffSAl Viro 					     (struct user_msghdr __user *)entry,
2071c71d8ebeSTetsuo Handa 					     &msg_sys, flags, &used_address);
2072228e548eSAnton Blanchard 			if (err < 0)
2073228e548eSAnton Blanchard 				break;
2074228e548eSAnton Blanchard 			err = put_user(err, &entry->msg_len);
2075228e548eSAnton Blanchard 			++entry;
2076228e548eSAnton Blanchard 		}
2077228e548eSAnton Blanchard 
2078228e548eSAnton Blanchard 		if (err)
2079228e548eSAnton Blanchard 			break;
2080228e548eSAnton Blanchard 		++datagrams;
2081228e548eSAnton Blanchard 	}
2082228e548eSAnton Blanchard 
2083228e548eSAnton Blanchard 	fput_light(sock->file, fput_needed);
2084228e548eSAnton Blanchard 
2085728ffb86SAnton Blanchard 	/* We only return an error if no datagrams were able to be sent */
2086728ffb86SAnton Blanchard 	if (datagrams != 0)
2087228e548eSAnton Blanchard 		return datagrams;
2088228e548eSAnton Blanchard 
2089228e548eSAnton Blanchard 	return err;
2090228e548eSAnton Blanchard }
2091228e548eSAnton Blanchard 
2092228e548eSAnton Blanchard SYSCALL_DEFINE4(sendmmsg, int, fd, struct mmsghdr __user *, mmsg,
2093228e548eSAnton Blanchard 		unsigned int, vlen, unsigned int, flags)
2094228e548eSAnton Blanchard {
20951be374a0SAndy Lutomirski 	if (flags & MSG_CMSG_COMPAT)
20961be374a0SAndy Lutomirski 		return -EINVAL;
2097228e548eSAnton Blanchard 	return __sys_sendmmsg(fd, mmsg, vlen, flags);
2098228e548eSAnton Blanchard }
2099228e548eSAnton Blanchard 
2100666547ffSAl Viro static int ___sys_recvmsg(struct socket *sock, struct user_msghdr __user *msg,
210195c96174SEric Dumazet 			 struct msghdr *msg_sys, unsigned int flags, int nosec)
21021da177e4SLinus Torvalds {
210389bddce5SStephen Hemminger 	struct compat_msghdr __user *msg_compat =
210489bddce5SStephen Hemminger 	    (struct compat_msghdr __user *)msg;
21051da177e4SLinus Torvalds 	struct iovec iovstack[UIO_FASTIOV];
21061da177e4SLinus Torvalds 	struct iovec *iov = iovstack;
21071da177e4SLinus Torvalds 	unsigned long cmsg_ptr;
210808adb7daSAl Viro 	int total_len, len;
210908adb7daSAl Viro 	ssize_t err;
21101da177e4SLinus Torvalds 
21111da177e4SLinus Torvalds 	/* kernel mode address */
2112230b1839SYOSHIFUJI Hideaki 	struct sockaddr_storage addr;
21131da177e4SLinus Torvalds 
21141da177e4SLinus Torvalds 	/* user mode address pointers */
21151da177e4SLinus Torvalds 	struct sockaddr __user *uaddr;
211608adb7daSAl Viro 	int __user *uaddr_len = COMPAT_NAMELEN(msg);
21171da177e4SLinus Torvalds 
211808adb7daSAl Viro 	msg_sys->msg_name = &addr;
21191da177e4SLinus Torvalds 
2120f3d33426SHannes Frederic Sowa 	if (MSG_CMSG_COMPAT & flags)
212108adb7daSAl Viro 		err = get_compat_msghdr(msg_sys, msg_compat, &uaddr, &iov);
2122f3d33426SHannes Frederic Sowa 	else
212308adb7daSAl Viro 		err = copy_msghdr_from_user(msg_sys, msg, &uaddr, &iov);
21241da177e4SLinus Torvalds 	if (err < 0)
21251da177e4SLinus Torvalds 		goto out_freeiov;
21261da177e4SLinus Torvalds 	total_len = err;
21271da177e4SLinus Torvalds 
2128a2e27255SArnaldo Carvalho de Melo 	cmsg_ptr = (unsigned long)msg_sys->msg_control;
2129a2e27255SArnaldo Carvalho de Melo 	msg_sys->msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT);
21301da177e4SLinus Torvalds 
2131f3d33426SHannes Frederic Sowa 	/* We assume all kernel code knows the size of sockaddr_storage */
2132f3d33426SHannes Frederic Sowa 	msg_sys->msg_namelen = 0;
2133f3d33426SHannes Frederic Sowa 
21341da177e4SLinus Torvalds 	if (sock->file->f_flags & O_NONBLOCK)
21351da177e4SLinus Torvalds 		flags |= MSG_DONTWAIT;
2136a2e27255SArnaldo Carvalho de Melo 	err = (nosec ? sock_recvmsg_nosec : sock_recvmsg)(sock, msg_sys,
2137a2e27255SArnaldo Carvalho de Melo 							  total_len, flags);
21381da177e4SLinus Torvalds 	if (err < 0)
21391da177e4SLinus Torvalds 		goto out_freeiov;
21401da177e4SLinus Torvalds 	len = err;
21411da177e4SLinus Torvalds 
21421da177e4SLinus Torvalds 	if (uaddr != NULL) {
214343db362dSMaciej Żenczykowski 		err = move_addr_to_user(&addr,
2144a2e27255SArnaldo Carvalho de Melo 					msg_sys->msg_namelen, uaddr,
214589bddce5SStephen Hemminger 					uaddr_len);
21461da177e4SLinus Torvalds 		if (err < 0)
21471da177e4SLinus Torvalds 			goto out_freeiov;
21481da177e4SLinus Torvalds 	}
2149a2e27255SArnaldo Carvalho de Melo 	err = __put_user((msg_sys->msg_flags & ~MSG_CMSG_COMPAT),
215037f7f421SDavid S. Miller 			 COMPAT_FLAGS(msg));
21511da177e4SLinus Torvalds 	if (err)
21521da177e4SLinus Torvalds 		goto out_freeiov;
21531da177e4SLinus Torvalds 	if (MSG_CMSG_COMPAT & flags)
2154a2e27255SArnaldo Carvalho de Melo 		err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
21551da177e4SLinus Torvalds 				 &msg_compat->msg_controllen);
21561da177e4SLinus Torvalds 	else
2157a2e27255SArnaldo Carvalho de Melo 		err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
21581da177e4SLinus Torvalds 				 &msg->msg_controllen);
21591da177e4SLinus Torvalds 	if (err)
21601da177e4SLinus Torvalds 		goto out_freeiov;
21611da177e4SLinus Torvalds 	err = len;
21621da177e4SLinus Torvalds 
21631da177e4SLinus Torvalds out_freeiov:
21641da177e4SLinus Torvalds 	if (iov != iovstack)
2165a74e9106SEric Dumazet 		kfree(iov);
2166a2e27255SArnaldo Carvalho de Melo 	return err;
2167a2e27255SArnaldo Carvalho de Melo }
2168a2e27255SArnaldo Carvalho de Melo 
2169a2e27255SArnaldo Carvalho de Melo /*
2170a2e27255SArnaldo Carvalho de Melo  *	BSD recvmsg interface
2171a2e27255SArnaldo Carvalho de Melo  */
2172a2e27255SArnaldo Carvalho de Melo 
2173666547ffSAl Viro long __sys_recvmsg(int fd, struct user_msghdr __user *msg, unsigned flags)
2174a2e27255SArnaldo Carvalho de Melo {
2175a2e27255SArnaldo Carvalho de Melo 	int fput_needed, err;
2176a2e27255SArnaldo Carvalho de Melo 	struct msghdr msg_sys;
21771be374a0SAndy Lutomirski 	struct socket *sock;
2178a2e27255SArnaldo Carvalho de Melo 
21791be374a0SAndy Lutomirski 	sock = sockfd_lookup_light(fd, &err, &fput_needed);
2180a2e27255SArnaldo Carvalho de Melo 	if (!sock)
2181a2e27255SArnaldo Carvalho de Melo 		goto out;
2182a2e27255SArnaldo Carvalho de Melo 
2183a7526eb5SAndy Lutomirski 	err = ___sys_recvmsg(sock, msg, &msg_sys, flags, 0);
2184a2e27255SArnaldo Carvalho de Melo 
21856cb153caSBenjamin LaHaise 	fput_light(sock->file, fput_needed);
21861da177e4SLinus Torvalds out:
21871da177e4SLinus Torvalds 	return err;
21881da177e4SLinus Torvalds }
21891da177e4SLinus Torvalds 
2190666547ffSAl Viro SYSCALL_DEFINE3(recvmsg, int, fd, struct user_msghdr __user *, msg,
2191a7526eb5SAndy Lutomirski 		unsigned int, flags)
2192a7526eb5SAndy Lutomirski {
2193a7526eb5SAndy Lutomirski 	if (flags & MSG_CMSG_COMPAT)
2194a7526eb5SAndy Lutomirski 		return -EINVAL;
2195a7526eb5SAndy Lutomirski 	return __sys_recvmsg(fd, msg, flags);
2196a7526eb5SAndy Lutomirski }
2197a7526eb5SAndy Lutomirski 
2198a2e27255SArnaldo Carvalho de Melo /*
2199a2e27255SArnaldo Carvalho de Melo  *     Linux recvmmsg interface
2200a2e27255SArnaldo Carvalho de Melo  */
22011da177e4SLinus Torvalds 
2202a2e27255SArnaldo Carvalho de Melo int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
2203a2e27255SArnaldo Carvalho de Melo 		   unsigned int flags, struct timespec *timeout)
2204a2e27255SArnaldo Carvalho de Melo {
2205a2e27255SArnaldo Carvalho de Melo 	int fput_needed, err, datagrams;
2206a2e27255SArnaldo Carvalho de Melo 	struct socket *sock;
2207a2e27255SArnaldo Carvalho de Melo 	struct mmsghdr __user *entry;
2208d7256d0eSJean-Mickael Guerin 	struct compat_mmsghdr __user *compat_entry;
2209a2e27255SArnaldo Carvalho de Melo 	struct msghdr msg_sys;
2210a2e27255SArnaldo Carvalho de Melo 	struct timespec end_time;
2211a2e27255SArnaldo Carvalho de Melo 
2212a2e27255SArnaldo Carvalho de Melo 	if (timeout &&
2213a2e27255SArnaldo Carvalho de Melo 	    poll_select_set_timeout(&end_time, timeout->tv_sec,
2214a2e27255SArnaldo Carvalho de Melo 				    timeout->tv_nsec))
2215a2e27255SArnaldo Carvalho de Melo 		return -EINVAL;
2216a2e27255SArnaldo Carvalho de Melo 
2217a2e27255SArnaldo Carvalho de Melo 	datagrams = 0;
2218a2e27255SArnaldo Carvalho de Melo 
2219a2e27255SArnaldo Carvalho de Melo 	sock = sockfd_lookup_light(fd, &err, &fput_needed);
2220a2e27255SArnaldo Carvalho de Melo 	if (!sock)
2221a2e27255SArnaldo Carvalho de Melo 		return err;
2222a2e27255SArnaldo Carvalho de Melo 
2223a2e27255SArnaldo Carvalho de Melo 	err = sock_error(sock->sk);
2224a2e27255SArnaldo Carvalho de Melo 	if (err)
2225a2e27255SArnaldo Carvalho de Melo 		goto out_put;
2226a2e27255SArnaldo Carvalho de Melo 
2227a2e27255SArnaldo Carvalho de Melo 	entry = mmsg;
2228d7256d0eSJean-Mickael Guerin 	compat_entry = (struct compat_mmsghdr __user *)mmsg;
2229a2e27255SArnaldo Carvalho de Melo 
2230a2e27255SArnaldo Carvalho de Melo 	while (datagrams < vlen) {
2231a2e27255SArnaldo Carvalho de Melo 		/*
2232a2e27255SArnaldo Carvalho de Melo 		 * No need to ask LSM for more than the first datagram.
2233a2e27255SArnaldo Carvalho de Melo 		 */
2234d7256d0eSJean-Mickael Guerin 		if (MSG_CMSG_COMPAT & flags) {
2235666547ffSAl Viro 			err = ___sys_recvmsg(sock, (struct user_msghdr __user *)compat_entry,
2236b9eb8b87SAnton Blanchard 					     &msg_sys, flags & ~MSG_WAITFORONE,
2237b9eb8b87SAnton Blanchard 					     datagrams);
2238d7256d0eSJean-Mickael Guerin 			if (err < 0)
2239d7256d0eSJean-Mickael Guerin 				break;
2240d7256d0eSJean-Mickael Guerin 			err = __put_user(err, &compat_entry->msg_len);
2241d7256d0eSJean-Mickael Guerin 			++compat_entry;
2242d7256d0eSJean-Mickael Guerin 		} else {
2243a7526eb5SAndy Lutomirski 			err = ___sys_recvmsg(sock,
2244666547ffSAl Viro 					     (struct user_msghdr __user *)entry,
2245b9eb8b87SAnton Blanchard 					     &msg_sys, flags & ~MSG_WAITFORONE,
2246b9eb8b87SAnton Blanchard 					     datagrams);
2247a2e27255SArnaldo Carvalho de Melo 			if (err < 0)
2248a2e27255SArnaldo Carvalho de Melo 				break;
2249a2e27255SArnaldo Carvalho de Melo 			err = put_user(err, &entry->msg_len);
2250d7256d0eSJean-Mickael Guerin 			++entry;
2251d7256d0eSJean-Mickael Guerin 		}
2252d7256d0eSJean-Mickael Guerin 
2253a2e27255SArnaldo Carvalho de Melo 		if (err)
2254a2e27255SArnaldo Carvalho de Melo 			break;
2255a2e27255SArnaldo Carvalho de Melo 		++datagrams;
2256a2e27255SArnaldo Carvalho de Melo 
225771c5c159SBrandon L Black 		/* MSG_WAITFORONE turns on MSG_DONTWAIT after one packet */
225871c5c159SBrandon L Black 		if (flags & MSG_WAITFORONE)
225971c5c159SBrandon L Black 			flags |= MSG_DONTWAIT;
226071c5c159SBrandon L Black 
2261a2e27255SArnaldo Carvalho de Melo 		if (timeout) {
2262a2e27255SArnaldo Carvalho de Melo 			ktime_get_ts(timeout);
2263a2e27255SArnaldo Carvalho de Melo 			*timeout = timespec_sub(end_time, *timeout);
2264a2e27255SArnaldo Carvalho de Melo 			if (timeout->tv_sec < 0) {
2265a2e27255SArnaldo Carvalho de Melo 				timeout->tv_sec = timeout->tv_nsec = 0;
2266a2e27255SArnaldo Carvalho de Melo 				break;
2267a2e27255SArnaldo Carvalho de Melo 			}
2268a2e27255SArnaldo Carvalho de Melo 
2269a2e27255SArnaldo Carvalho de Melo 			/* Timeout, return less than vlen datagrams */
2270a2e27255SArnaldo Carvalho de Melo 			if (timeout->tv_nsec == 0 && timeout->tv_sec == 0)
2271a2e27255SArnaldo Carvalho de Melo 				break;
2272a2e27255SArnaldo Carvalho de Melo 		}
2273a2e27255SArnaldo Carvalho de Melo 
2274a2e27255SArnaldo Carvalho de Melo 		/* Out of band data, return right away */
2275a2e27255SArnaldo Carvalho de Melo 		if (msg_sys.msg_flags & MSG_OOB)
2276a2e27255SArnaldo Carvalho de Melo 			break;
2277a2e27255SArnaldo Carvalho de Melo 	}
2278a2e27255SArnaldo Carvalho de Melo 
2279a2e27255SArnaldo Carvalho de Melo out_put:
2280a2e27255SArnaldo Carvalho de Melo 	fput_light(sock->file, fput_needed);
2281a2e27255SArnaldo Carvalho de Melo 
2282a2e27255SArnaldo Carvalho de Melo 	if (err == 0)
2283a2e27255SArnaldo Carvalho de Melo 		return datagrams;
2284a2e27255SArnaldo Carvalho de Melo 
2285a2e27255SArnaldo Carvalho de Melo 	if (datagrams != 0) {
2286a2e27255SArnaldo Carvalho de Melo 		/*
2287a2e27255SArnaldo Carvalho de Melo 		 * We may return less entries than requested (vlen) if the
2288a2e27255SArnaldo Carvalho de Melo 		 * sock is non block and there aren't enough datagrams...
2289a2e27255SArnaldo Carvalho de Melo 		 */
2290a2e27255SArnaldo Carvalho de Melo 		if (err != -EAGAIN) {
2291a2e27255SArnaldo Carvalho de Melo 			/*
2292a2e27255SArnaldo Carvalho de Melo 			 * ... or  if recvmsg returns an error after we
2293a2e27255SArnaldo Carvalho de Melo 			 * received some datagrams, where we record the
2294a2e27255SArnaldo Carvalho de Melo 			 * error to return on the next call or if the
2295a2e27255SArnaldo Carvalho de Melo 			 * app asks about it using getsockopt(SO_ERROR).
2296a2e27255SArnaldo Carvalho de Melo 			 */
2297a2e27255SArnaldo Carvalho de Melo 			sock->sk->sk_err = -err;
2298a2e27255SArnaldo Carvalho de Melo 		}
2299a2e27255SArnaldo Carvalho de Melo 
2300a2e27255SArnaldo Carvalho de Melo 		return datagrams;
2301a2e27255SArnaldo Carvalho de Melo 	}
2302a2e27255SArnaldo Carvalho de Melo 
2303a2e27255SArnaldo Carvalho de Melo 	return err;
2304a2e27255SArnaldo Carvalho de Melo }
2305a2e27255SArnaldo Carvalho de Melo 
2306a2e27255SArnaldo Carvalho de Melo SYSCALL_DEFINE5(recvmmsg, int, fd, struct mmsghdr __user *, mmsg,
2307a2e27255SArnaldo Carvalho de Melo 		unsigned int, vlen, unsigned int, flags,
2308a2e27255SArnaldo Carvalho de Melo 		struct timespec __user *, timeout)
2309a2e27255SArnaldo Carvalho de Melo {
2310a2e27255SArnaldo Carvalho de Melo 	int datagrams;
2311a2e27255SArnaldo Carvalho de Melo 	struct timespec timeout_sys;
2312a2e27255SArnaldo Carvalho de Melo 
23131be374a0SAndy Lutomirski 	if (flags & MSG_CMSG_COMPAT)
23141be374a0SAndy Lutomirski 		return -EINVAL;
23151be374a0SAndy Lutomirski 
2316a2e27255SArnaldo Carvalho de Melo 	if (!timeout)
2317a2e27255SArnaldo Carvalho de Melo 		return __sys_recvmmsg(fd, mmsg, vlen, flags, NULL);
2318a2e27255SArnaldo Carvalho de Melo 
2319a2e27255SArnaldo Carvalho de Melo 	if (copy_from_user(&timeout_sys, timeout, sizeof(timeout_sys)))
2320a2e27255SArnaldo Carvalho de Melo 		return -EFAULT;
2321a2e27255SArnaldo Carvalho de Melo 
2322a2e27255SArnaldo Carvalho de Melo 	datagrams = __sys_recvmmsg(fd, mmsg, vlen, flags, &timeout_sys);
2323a2e27255SArnaldo Carvalho de Melo 
2324a2e27255SArnaldo Carvalho de Melo 	if (datagrams > 0 &&
2325a2e27255SArnaldo Carvalho de Melo 	    copy_to_user(timeout, &timeout_sys, sizeof(timeout_sys)))
2326a2e27255SArnaldo Carvalho de Melo 		datagrams = -EFAULT;
2327a2e27255SArnaldo Carvalho de Melo 
2328a2e27255SArnaldo Carvalho de Melo 	return datagrams;
2329a2e27255SArnaldo Carvalho de Melo }
2330a2e27255SArnaldo Carvalho de Melo 
2331a2e27255SArnaldo Carvalho de Melo #ifdef __ARCH_WANT_SYS_SOCKETCALL
23321da177e4SLinus Torvalds /* Argument list sizes for sys_socketcall */
23331da177e4SLinus Torvalds #define AL(x) ((x) * sizeof(unsigned long))
2334228e548eSAnton Blanchard static const unsigned char nargs[21] = {
233589bddce5SStephen Hemminger 	AL(0), AL(3), AL(3), AL(3), AL(2), AL(3),
23361da177e4SLinus Torvalds 	AL(3), AL(3), AL(4), AL(4), AL(4), AL(6),
2337aaca0bdcSUlrich Drepper 	AL(6), AL(2), AL(5), AL(5), AL(3), AL(3),
2338228e548eSAnton Blanchard 	AL(4), AL(5), AL(4)
233989bddce5SStephen Hemminger };
234089bddce5SStephen Hemminger 
23411da177e4SLinus Torvalds #undef AL
23421da177e4SLinus Torvalds 
23431da177e4SLinus Torvalds /*
23441da177e4SLinus Torvalds  *	System call vectors.
23451da177e4SLinus Torvalds  *
23461da177e4SLinus Torvalds  *	Argument checking cleaned up. Saved 20% in size.
23471da177e4SLinus Torvalds  *  This function doesn't need to set the kernel lock because
23481da177e4SLinus Torvalds  *  it is set by the callees.
23491da177e4SLinus Torvalds  */
23501da177e4SLinus Torvalds 
23513e0fa65fSHeiko Carstens SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args)
23521da177e4SLinus Torvalds {
23532950fa9dSChen Gang 	unsigned long a[AUDITSC_ARGS];
23541da177e4SLinus Torvalds 	unsigned long a0, a1;
23551da177e4SLinus Torvalds 	int err;
235647379052SArjan van de Ven 	unsigned int len;
23571da177e4SLinus Torvalds 
2358228e548eSAnton Blanchard 	if (call < 1 || call > SYS_SENDMMSG)
23591da177e4SLinus Torvalds 		return -EINVAL;
23601da177e4SLinus Torvalds 
236147379052SArjan van de Ven 	len = nargs[call];
236247379052SArjan van de Ven 	if (len > sizeof(a))
236347379052SArjan van de Ven 		return -EINVAL;
236447379052SArjan van de Ven 
23651da177e4SLinus Torvalds 	/* copy_from_user should be SMP safe. */
236647379052SArjan van de Ven 	if (copy_from_user(a, args, len))
23671da177e4SLinus Torvalds 		return -EFAULT;
23681da177e4SLinus Torvalds 
23692950fa9dSChen Gang 	err = audit_socketcall(nargs[call] / sizeof(unsigned long), a);
23702950fa9dSChen Gang 	if (err)
23712950fa9dSChen Gang 		return err;
23723ec3b2fbSDavid Woodhouse 
23731da177e4SLinus Torvalds 	a0 = a[0];
23741da177e4SLinus Torvalds 	a1 = a[1];
23751da177e4SLinus Torvalds 
237689bddce5SStephen Hemminger 	switch (call) {
23771da177e4SLinus Torvalds 	case SYS_SOCKET:
23781da177e4SLinus Torvalds 		err = sys_socket(a0, a1, a[2]);
23791da177e4SLinus Torvalds 		break;
23801da177e4SLinus Torvalds 	case SYS_BIND:
23811da177e4SLinus Torvalds 		err = sys_bind(a0, (struct sockaddr __user *)a1, a[2]);
23821da177e4SLinus Torvalds 		break;
23831da177e4SLinus Torvalds 	case SYS_CONNECT:
23841da177e4SLinus Torvalds 		err = sys_connect(a0, (struct sockaddr __user *)a1, a[2]);
23851da177e4SLinus Torvalds 		break;
23861da177e4SLinus Torvalds 	case SYS_LISTEN:
23871da177e4SLinus Torvalds 		err = sys_listen(a0, a1);
23881da177e4SLinus Torvalds 		break;
23891da177e4SLinus Torvalds 	case SYS_ACCEPT:
2390de11defeSUlrich Drepper 		err = sys_accept4(a0, (struct sockaddr __user *)a1,
2391aaca0bdcSUlrich Drepper 				  (int __user *)a[2], 0);
23921da177e4SLinus Torvalds 		break;
23931da177e4SLinus Torvalds 	case SYS_GETSOCKNAME:
239489bddce5SStephen Hemminger 		err =
239589bddce5SStephen Hemminger 		    sys_getsockname(a0, (struct sockaddr __user *)a1,
239689bddce5SStephen Hemminger 				    (int __user *)a[2]);
23971da177e4SLinus Torvalds 		break;
23981da177e4SLinus Torvalds 	case SYS_GETPEERNAME:
239989bddce5SStephen Hemminger 		err =
240089bddce5SStephen Hemminger 		    sys_getpeername(a0, (struct sockaddr __user *)a1,
240189bddce5SStephen Hemminger 				    (int __user *)a[2]);
24021da177e4SLinus Torvalds 		break;
24031da177e4SLinus Torvalds 	case SYS_SOCKETPAIR:
24041da177e4SLinus Torvalds 		err = sys_socketpair(a0, a1, a[2], (int __user *)a[3]);
24051da177e4SLinus Torvalds 		break;
24061da177e4SLinus Torvalds 	case SYS_SEND:
24071da177e4SLinus Torvalds 		err = sys_send(a0, (void __user *)a1, a[2], a[3]);
24081da177e4SLinus Torvalds 		break;
24091da177e4SLinus Torvalds 	case SYS_SENDTO:
24101da177e4SLinus Torvalds 		err = sys_sendto(a0, (void __user *)a1, a[2], a[3],
24111da177e4SLinus Torvalds 				 (struct sockaddr __user *)a[4], a[5]);
24121da177e4SLinus Torvalds 		break;
24131da177e4SLinus Torvalds 	case SYS_RECV:
24141da177e4SLinus Torvalds 		err = sys_recv(a0, (void __user *)a1, a[2], a[3]);
24151da177e4SLinus Torvalds 		break;
24161da177e4SLinus Torvalds 	case SYS_RECVFROM:
24171da177e4SLinus Torvalds 		err = sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
241889bddce5SStephen Hemminger 				   (struct sockaddr __user *)a[4],
241989bddce5SStephen Hemminger 				   (int __user *)a[5]);
24201da177e4SLinus Torvalds 		break;
24211da177e4SLinus Torvalds 	case SYS_SHUTDOWN:
24221da177e4SLinus Torvalds 		err = sys_shutdown(a0, a1);
24231da177e4SLinus Torvalds 		break;
24241da177e4SLinus Torvalds 	case SYS_SETSOCKOPT:
24251da177e4SLinus Torvalds 		err = sys_setsockopt(a0, a1, a[2], (char __user *)a[3], a[4]);
24261da177e4SLinus Torvalds 		break;
24271da177e4SLinus Torvalds 	case SYS_GETSOCKOPT:
242889bddce5SStephen Hemminger 		err =
242989bddce5SStephen Hemminger 		    sys_getsockopt(a0, a1, a[2], (char __user *)a[3],
243089bddce5SStephen Hemminger 				   (int __user *)a[4]);
24311da177e4SLinus Torvalds 		break;
24321da177e4SLinus Torvalds 	case SYS_SENDMSG:
2433666547ffSAl Viro 		err = sys_sendmsg(a0, (struct user_msghdr __user *)a1, a[2]);
24341da177e4SLinus Torvalds 		break;
2435228e548eSAnton Blanchard 	case SYS_SENDMMSG:
2436228e548eSAnton Blanchard 		err = sys_sendmmsg(a0, (struct mmsghdr __user *)a1, a[2], a[3]);
2437228e548eSAnton Blanchard 		break;
24381da177e4SLinus Torvalds 	case SYS_RECVMSG:
2439666547ffSAl Viro 		err = sys_recvmsg(a0, (struct user_msghdr __user *)a1, a[2]);
24401da177e4SLinus Torvalds 		break;
2441a2e27255SArnaldo Carvalho de Melo 	case SYS_RECVMMSG:
2442a2e27255SArnaldo Carvalho de Melo 		err = sys_recvmmsg(a0, (struct mmsghdr __user *)a1, a[2], a[3],
2443a2e27255SArnaldo Carvalho de Melo 				   (struct timespec __user *)a[4]);
2444a2e27255SArnaldo Carvalho de Melo 		break;
2445de11defeSUlrich Drepper 	case SYS_ACCEPT4:
2446de11defeSUlrich Drepper 		err = sys_accept4(a0, (struct sockaddr __user *)a1,
2447de11defeSUlrich Drepper 				  (int __user *)a[2], a[3]);
2448aaca0bdcSUlrich Drepper 		break;
24491da177e4SLinus Torvalds 	default:
24501da177e4SLinus Torvalds 		err = -EINVAL;
24511da177e4SLinus Torvalds 		break;
24521da177e4SLinus Torvalds 	}
24531da177e4SLinus Torvalds 	return err;
24541da177e4SLinus Torvalds }
24551da177e4SLinus Torvalds 
24561da177e4SLinus Torvalds #endif				/* __ARCH_WANT_SYS_SOCKETCALL */
24571da177e4SLinus Torvalds 
245855737fdaSStephen Hemminger /**
245955737fdaSStephen Hemminger  *	sock_register - add a socket protocol handler
246055737fdaSStephen Hemminger  *	@ops: description of protocol
246155737fdaSStephen Hemminger  *
24621da177e4SLinus Torvalds  *	This function is called by a protocol handler that wants to
24631da177e4SLinus Torvalds  *	advertise its address family, and have it linked into the
2464e793c0f7SMasanari Iida  *	socket interface. The value ops->family corresponds to the
246555737fdaSStephen Hemminger  *	socket system call protocol family.
24661da177e4SLinus Torvalds  */
2467f0fd27d4SStephen Hemminger int sock_register(const struct net_proto_family *ops)
24681da177e4SLinus Torvalds {
24691da177e4SLinus Torvalds 	int err;
24701da177e4SLinus Torvalds 
24711da177e4SLinus Torvalds 	if (ops->family >= NPROTO) {
24723410f22eSYang Yingliang 		pr_crit("protocol %d >= NPROTO(%d)\n", ops->family, NPROTO);
24731da177e4SLinus Torvalds 		return -ENOBUFS;
24741da177e4SLinus Torvalds 	}
247555737fdaSStephen Hemminger 
247655737fdaSStephen Hemminger 	spin_lock(&net_family_lock);
2477190683a9SEric Dumazet 	if (rcu_dereference_protected(net_families[ops->family],
2478190683a9SEric Dumazet 				      lockdep_is_held(&net_family_lock)))
24791da177e4SLinus Torvalds 		err = -EEXIST;
248055737fdaSStephen Hemminger 	else {
2481cf778b00SEric Dumazet 		rcu_assign_pointer(net_families[ops->family], ops);
24821da177e4SLinus Torvalds 		err = 0;
24831da177e4SLinus Torvalds 	}
248455737fdaSStephen Hemminger 	spin_unlock(&net_family_lock);
248555737fdaSStephen Hemminger 
24863410f22eSYang Yingliang 	pr_info("NET: Registered protocol family %d\n", ops->family);
24871da177e4SLinus Torvalds 	return err;
24881da177e4SLinus Torvalds }
2489c6d409cfSEric Dumazet EXPORT_SYMBOL(sock_register);
24901da177e4SLinus Torvalds 
249155737fdaSStephen Hemminger /**
249255737fdaSStephen Hemminger  *	sock_unregister - remove a protocol handler
249355737fdaSStephen Hemminger  *	@family: protocol family to remove
249455737fdaSStephen Hemminger  *
24951da177e4SLinus Torvalds  *	This function is called by a protocol handler that wants to
24961da177e4SLinus Torvalds  *	remove its address family, and have it unlinked from the
249755737fdaSStephen Hemminger  *	new socket creation.
249855737fdaSStephen Hemminger  *
249955737fdaSStephen Hemminger  *	If protocol handler is a module, then it can use module reference
250055737fdaSStephen Hemminger  *	counts to protect against new references. If protocol handler is not
250155737fdaSStephen Hemminger  *	a module then it needs to provide its own protection in
250255737fdaSStephen Hemminger  *	the ops->create routine.
25031da177e4SLinus Torvalds  */
2504f0fd27d4SStephen Hemminger void sock_unregister(int family)
25051da177e4SLinus Torvalds {
2506f0fd27d4SStephen Hemminger 	BUG_ON(family < 0 || family >= NPROTO);
25071da177e4SLinus Torvalds 
250855737fdaSStephen Hemminger 	spin_lock(&net_family_lock);
2509a9b3cd7fSStephen Hemminger 	RCU_INIT_POINTER(net_families[family], NULL);
251055737fdaSStephen Hemminger 	spin_unlock(&net_family_lock);
251155737fdaSStephen Hemminger 
251255737fdaSStephen Hemminger 	synchronize_rcu();
251355737fdaSStephen Hemminger 
25143410f22eSYang Yingliang 	pr_info("NET: Unregistered protocol family %d\n", family);
25151da177e4SLinus Torvalds }
2516c6d409cfSEric Dumazet EXPORT_SYMBOL(sock_unregister);
25171da177e4SLinus Torvalds 
251877d76ea3SAndi Kleen static int __init sock_init(void)
25191da177e4SLinus Torvalds {
2520b3e19d92SNick Piggin 	int err;
25212ca794e5SEric W. Biederman 	/*
25222ca794e5SEric W. Biederman 	 *      Initialize the network sysctl infrastructure.
25232ca794e5SEric W. Biederman 	 */
25242ca794e5SEric W. Biederman 	err = net_sysctl_init();
25252ca794e5SEric W. Biederman 	if (err)
25262ca794e5SEric W. Biederman 		goto out;
2527b3e19d92SNick Piggin 
25281da177e4SLinus Torvalds 	/*
25291da177e4SLinus Torvalds 	 *      Initialize skbuff SLAB cache
25301da177e4SLinus Torvalds 	 */
25311da177e4SLinus Torvalds 	skb_init();
25321da177e4SLinus Torvalds 
25331da177e4SLinus Torvalds 	/*
25341da177e4SLinus Torvalds 	 *      Initialize the protocols module.
25351da177e4SLinus Torvalds 	 */
25361da177e4SLinus Torvalds 
25371da177e4SLinus Torvalds 	init_inodecache();
2538b3e19d92SNick Piggin 
2539b3e19d92SNick Piggin 	err = register_filesystem(&sock_fs_type);
2540b3e19d92SNick Piggin 	if (err)
2541b3e19d92SNick Piggin 		goto out_fs;
25421da177e4SLinus Torvalds 	sock_mnt = kern_mount(&sock_fs_type);
2543b3e19d92SNick Piggin 	if (IS_ERR(sock_mnt)) {
2544b3e19d92SNick Piggin 		err = PTR_ERR(sock_mnt);
2545b3e19d92SNick Piggin 		goto out_mount;
2546b3e19d92SNick Piggin 	}
254777d76ea3SAndi Kleen 
254877d76ea3SAndi Kleen 	/* The real protocol initialization is performed in later initcalls.
25491da177e4SLinus Torvalds 	 */
25501da177e4SLinus Torvalds 
25511da177e4SLinus Torvalds #ifdef CONFIG_NETFILTER
25526d11cfdbSPablo Neira Ayuso 	err = netfilter_init();
25536d11cfdbSPablo Neira Ayuso 	if (err)
25546d11cfdbSPablo Neira Ayuso 		goto out;
25551da177e4SLinus Torvalds #endif
2556cbeb321aSDavid S. Miller 
2557408eccceSDaniel Borkmann 	ptp_classifier_init();
2558c1f19b51SRichard Cochran 
2559b3e19d92SNick Piggin out:
2560b3e19d92SNick Piggin 	return err;
2561b3e19d92SNick Piggin 
2562b3e19d92SNick Piggin out_mount:
2563b3e19d92SNick Piggin 	unregister_filesystem(&sock_fs_type);
2564b3e19d92SNick Piggin out_fs:
2565b3e19d92SNick Piggin 	goto out;
25661da177e4SLinus Torvalds }
25671da177e4SLinus Torvalds 
256877d76ea3SAndi Kleen core_initcall(sock_init);	/* early initcall */
256977d76ea3SAndi Kleen 
25701da177e4SLinus Torvalds #ifdef CONFIG_PROC_FS
25711da177e4SLinus Torvalds void socket_seq_show(struct seq_file *seq)
25721da177e4SLinus Torvalds {
25731da177e4SLinus Torvalds 	int cpu;
25741da177e4SLinus Torvalds 	int counter = 0;
25751da177e4SLinus Torvalds 
25766f912042SKAMEZAWA Hiroyuki 	for_each_possible_cpu(cpu)
25771da177e4SLinus Torvalds 	    counter += per_cpu(sockets_in_use, cpu);
25781da177e4SLinus Torvalds 
25791da177e4SLinus Torvalds 	/* It can be negative, by the way. 8) */
25801da177e4SLinus Torvalds 	if (counter < 0)
25811da177e4SLinus Torvalds 		counter = 0;
25821da177e4SLinus Torvalds 
25831da177e4SLinus Torvalds 	seq_printf(seq, "sockets: used %d\n", counter);
25841da177e4SLinus Torvalds }
25851da177e4SLinus Torvalds #endif				/* CONFIG_PROC_FS */
25861da177e4SLinus Torvalds 
258789bbfc95SShaun Pereira #ifdef CONFIG_COMPAT
25886b96018bSArnd Bergmann static int do_siocgstamp(struct net *net, struct socket *sock,
2589644595f8SH. Peter Anvin 			 unsigned int cmd, void __user *up)
25907a229387SArnd Bergmann {
25917a229387SArnd Bergmann 	mm_segment_t old_fs = get_fs();
25927a229387SArnd Bergmann 	struct timeval ktv;
25937a229387SArnd Bergmann 	int err;
25947a229387SArnd Bergmann 
25957a229387SArnd Bergmann 	set_fs(KERNEL_DS);
25966b96018bSArnd Bergmann 	err = sock_do_ioctl(net, sock, cmd, (unsigned long)&ktv);
25977a229387SArnd Bergmann 	set_fs(old_fs);
2598644595f8SH. Peter Anvin 	if (!err)
2599ed6fe9d6SMikulas Patocka 		err = compat_put_timeval(&ktv, up);
2600644595f8SH. Peter Anvin 
26017a229387SArnd Bergmann 	return err;
26027a229387SArnd Bergmann }
26037a229387SArnd Bergmann 
26046b96018bSArnd Bergmann static int do_siocgstampns(struct net *net, struct socket *sock,
2605644595f8SH. Peter Anvin 			   unsigned int cmd, void __user *up)
26067a229387SArnd Bergmann {
26077a229387SArnd Bergmann 	mm_segment_t old_fs = get_fs();
26087a229387SArnd Bergmann 	struct timespec kts;
26097a229387SArnd Bergmann 	int err;
26107a229387SArnd Bergmann 
26117a229387SArnd Bergmann 	set_fs(KERNEL_DS);
26126b96018bSArnd Bergmann 	err = sock_do_ioctl(net, sock, cmd, (unsigned long)&kts);
26137a229387SArnd Bergmann 	set_fs(old_fs);
2614644595f8SH. Peter Anvin 	if (!err)
2615ed6fe9d6SMikulas Patocka 		err = compat_put_timespec(&kts, up);
2616644595f8SH. Peter Anvin 
26177a229387SArnd Bergmann 	return err;
26187a229387SArnd Bergmann }
26197a229387SArnd Bergmann 
26206b96018bSArnd Bergmann static int dev_ifname32(struct net *net, struct compat_ifreq __user *uifr32)
26217a229387SArnd Bergmann {
26227a229387SArnd Bergmann 	struct ifreq __user *uifr;
26237a229387SArnd Bergmann 	int err;
26247a229387SArnd Bergmann 
26257a229387SArnd Bergmann 	uifr = compat_alloc_user_space(sizeof(struct ifreq));
26266b96018bSArnd Bergmann 	if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq)))
26277a229387SArnd Bergmann 		return -EFAULT;
26287a229387SArnd Bergmann 
26296b96018bSArnd Bergmann 	err = dev_ioctl(net, SIOCGIFNAME, uifr);
26307a229387SArnd Bergmann 	if (err)
26317a229387SArnd Bergmann 		return err;
26327a229387SArnd Bergmann 
26336b96018bSArnd Bergmann 	if (copy_in_user(uifr32, uifr, sizeof(struct compat_ifreq)))
26347a229387SArnd Bergmann 		return -EFAULT;
26357a229387SArnd Bergmann 
26367a229387SArnd Bergmann 	return 0;
26377a229387SArnd Bergmann }
26387a229387SArnd Bergmann 
26396b96018bSArnd Bergmann static int dev_ifconf(struct net *net, struct compat_ifconf __user *uifc32)
26407a229387SArnd Bergmann {
26416b96018bSArnd Bergmann 	struct compat_ifconf ifc32;
26427a229387SArnd Bergmann 	struct ifconf ifc;
26437a229387SArnd Bergmann 	struct ifconf __user *uifc;
26446b96018bSArnd Bergmann 	struct compat_ifreq __user *ifr32;
26457a229387SArnd Bergmann 	struct ifreq __user *ifr;
26467a229387SArnd Bergmann 	unsigned int i, j;
26477a229387SArnd Bergmann 	int err;
26487a229387SArnd Bergmann 
26496b96018bSArnd Bergmann 	if (copy_from_user(&ifc32, uifc32, sizeof(struct compat_ifconf)))
26507a229387SArnd Bergmann 		return -EFAULT;
26517a229387SArnd Bergmann 
265243da5f2eSMathias Krause 	memset(&ifc, 0, sizeof(ifc));
26537a229387SArnd Bergmann 	if (ifc32.ifcbuf == 0) {
26547a229387SArnd Bergmann 		ifc32.ifc_len = 0;
26557a229387SArnd Bergmann 		ifc.ifc_len = 0;
26567a229387SArnd Bergmann 		ifc.ifc_req = NULL;
26577a229387SArnd Bergmann 		uifc = compat_alloc_user_space(sizeof(struct ifconf));
26587a229387SArnd Bergmann 	} else {
26596b96018bSArnd Bergmann 		size_t len = ((ifc32.ifc_len / sizeof(struct compat_ifreq)) + 1) *
26607a229387SArnd Bergmann 			sizeof(struct ifreq);
26617a229387SArnd Bergmann 		uifc = compat_alloc_user_space(sizeof(struct ifconf) + len);
26627a229387SArnd Bergmann 		ifc.ifc_len = len;
26637a229387SArnd Bergmann 		ifr = ifc.ifc_req = (void __user *)(uifc + 1);
26647a229387SArnd Bergmann 		ifr32 = compat_ptr(ifc32.ifcbuf);
26656b96018bSArnd Bergmann 		for (i = 0; i < ifc32.ifc_len; i += sizeof(struct compat_ifreq)) {
26666b96018bSArnd Bergmann 			if (copy_in_user(ifr, ifr32, sizeof(struct compat_ifreq)))
26677a229387SArnd Bergmann 				return -EFAULT;
26687a229387SArnd Bergmann 			ifr++;
26697a229387SArnd Bergmann 			ifr32++;
26707a229387SArnd Bergmann 		}
26717a229387SArnd Bergmann 	}
26727a229387SArnd Bergmann 	if (copy_to_user(uifc, &ifc, sizeof(struct ifconf)))
26737a229387SArnd Bergmann 		return -EFAULT;
26747a229387SArnd Bergmann 
26756b96018bSArnd Bergmann 	err = dev_ioctl(net, SIOCGIFCONF, uifc);
26767a229387SArnd Bergmann 	if (err)
26777a229387SArnd Bergmann 		return err;
26787a229387SArnd Bergmann 
26797a229387SArnd Bergmann 	if (copy_from_user(&ifc, uifc, sizeof(struct ifconf)))
26807a229387SArnd Bergmann 		return -EFAULT;
26817a229387SArnd Bergmann 
26827a229387SArnd Bergmann 	ifr = ifc.ifc_req;
26837a229387SArnd Bergmann 	ifr32 = compat_ptr(ifc32.ifcbuf);
26847a229387SArnd Bergmann 	for (i = 0, j = 0;
26856b96018bSArnd Bergmann 	     i + sizeof(struct compat_ifreq) <= ifc32.ifc_len && j < ifc.ifc_len;
26866b96018bSArnd Bergmann 	     i += sizeof(struct compat_ifreq), j += sizeof(struct ifreq)) {
26876b96018bSArnd Bergmann 		if (copy_in_user(ifr32, ifr, sizeof(struct compat_ifreq)))
26887a229387SArnd Bergmann 			return -EFAULT;
26897a229387SArnd Bergmann 		ifr32++;
26907a229387SArnd Bergmann 		ifr++;
26917a229387SArnd Bergmann 	}
26927a229387SArnd Bergmann 
26937a229387SArnd Bergmann 	if (ifc32.ifcbuf == 0) {
26947a229387SArnd Bergmann 		/* Translate from 64-bit structure multiple to
26957a229387SArnd Bergmann 		 * a 32-bit one.
26967a229387SArnd Bergmann 		 */
26977a229387SArnd Bergmann 		i = ifc.ifc_len;
26986b96018bSArnd Bergmann 		i = ((i / sizeof(struct ifreq)) * sizeof(struct compat_ifreq));
26997a229387SArnd Bergmann 		ifc32.ifc_len = i;
27007a229387SArnd Bergmann 	} else {
27017a229387SArnd Bergmann 		ifc32.ifc_len = i;
27027a229387SArnd Bergmann 	}
27036b96018bSArnd Bergmann 	if (copy_to_user(uifc32, &ifc32, sizeof(struct compat_ifconf)))
27047a229387SArnd Bergmann 		return -EFAULT;
27057a229387SArnd Bergmann 
27067a229387SArnd Bergmann 	return 0;
27077a229387SArnd Bergmann }
27087a229387SArnd Bergmann 
27096b96018bSArnd Bergmann static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32)
27107a229387SArnd Bergmann {
27113a7da39dSBen Hutchings 	struct compat_ethtool_rxnfc __user *compat_rxnfc;
27123a7da39dSBen Hutchings 	bool convert_in = false, convert_out = false;
27133a7da39dSBen Hutchings 	size_t buf_size = ALIGN(sizeof(struct ifreq), 8);
27143a7da39dSBen Hutchings 	struct ethtool_rxnfc __user *rxnfc;
27157a229387SArnd Bergmann 	struct ifreq __user *ifr;
27163a7da39dSBen Hutchings 	u32 rule_cnt = 0, actual_rule_cnt;
27173a7da39dSBen Hutchings 	u32 ethcmd;
27187a229387SArnd Bergmann 	u32 data;
27193a7da39dSBen Hutchings 	int ret;
27207a229387SArnd Bergmann 
27217a229387SArnd Bergmann 	if (get_user(data, &ifr32->ifr_ifru.ifru_data))
27227a229387SArnd Bergmann 		return -EFAULT;
27237a229387SArnd Bergmann 
27243a7da39dSBen Hutchings 	compat_rxnfc = compat_ptr(data);
27253a7da39dSBen Hutchings 
27263a7da39dSBen Hutchings 	if (get_user(ethcmd, &compat_rxnfc->cmd))
27277a229387SArnd Bergmann 		return -EFAULT;
27287a229387SArnd Bergmann 
27293a7da39dSBen Hutchings 	/* Most ethtool structures are defined without padding.
27303a7da39dSBen Hutchings 	 * Unfortunately struct ethtool_rxnfc is an exception.
27313a7da39dSBen Hutchings 	 */
27323a7da39dSBen Hutchings 	switch (ethcmd) {
27333a7da39dSBen Hutchings 	default:
27343a7da39dSBen Hutchings 		break;
27353a7da39dSBen Hutchings 	case ETHTOOL_GRXCLSRLALL:
27363a7da39dSBen Hutchings 		/* Buffer size is variable */
27373a7da39dSBen Hutchings 		if (get_user(rule_cnt, &compat_rxnfc->rule_cnt))
27383a7da39dSBen Hutchings 			return -EFAULT;
27393a7da39dSBen Hutchings 		if (rule_cnt > KMALLOC_MAX_SIZE / sizeof(u32))
27403a7da39dSBen Hutchings 			return -ENOMEM;
27413a7da39dSBen Hutchings 		buf_size += rule_cnt * sizeof(u32);
27423a7da39dSBen Hutchings 		/* fall through */
27433a7da39dSBen Hutchings 	case ETHTOOL_GRXRINGS:
27443a7da39dSBen Hutchings 	case ETHTOOL_GRXCLSRLCNT:
27453a7da39dSBen Hutchings 	case ETHTOOL_GRXCLSRULE:
274655664f32SBen Hutchings 	case ETHTOOL_SRXCLSRLINS:
27473a7da39dSBen Hutchings 		convert_out = true;
27483a7da39dSBen Hutchings 		/* fall through */
27493a7da39dSBen Hutchings 	case ETHTOOL_SRXCLSRLDEL:
27503a7da39dSBen Hutchings 		buf_size += sizeof(struct ethtool_rxnfc);
27513a7da39dSBen Hutchings 		convert_in = true;
27523a7da39dSBen Hutchings 		break;
27533a7da39dSBen Hutchings 	}
27543a7da39dSBen Hutchings 
27553a7da39dSBen Hutchings 	ifr = compat_alloc_user_space(buf_size);
2756954b1244SStephen Hemminger 	rxnfc = (void __user *)ifr + ALIGN(sizeof(struct ifreq), 8);
27573a7da39dSBen Hutchings 
27583a7da39dSBen Hutchings 	if (copy_in_user(&ifr->ifr_name, &ifr32->ifr_name, IFNAMSIZ))
27593a7da39dSBen Hutchings 		return -EFAULT;
27603a7da39dSBen Hutchings 
27613a7da39dSBen Hutchings 	if (put_user(convert_in ? rxnfc : compat_ptr(data),
27623a7da39dSBen Hutchings 		     &ifr->ifr_ifru.ifru_data))
27633a7da39dSBen Hutchings 		return -EFAULT;
27643a7da39dSBen Hutchings 
27653a7da39dSBen Hutchings 	if (convert_in) {
2766127fe533SAlexander Duyck 		/* We expect there to be holes between fs.m_ext and
27673a7da39dSBen Hutchings 		 * fs.ring_cookie and at the end of fs, but nowhere else.
27683a7da39dSBen Hutchings 		 */
2769127fe533SAlexander Duyck 		BUILD_BUG_ON(offsetof(struct compat_ethtool_rxnfc, fs.m_ext) +
2770127fe533SAlexander Duyck 			     sizeof(compat_rxnfc->fs.m_ext) !=
2771127fe533SAlexander Duyck 			     offsetof(struct ethtool_rxnfc, fs.m_ext) +
2772127fe533SAlexander Duyck 			     sizeof(rxnfc->fs.m_ext));
27733a7da39dSBen Hutchings 		BUILD_BUG_ON(
27743a7da39dSBen Hutchings 			offsetof(struct compat_ethtool_rxnfc, fs.location) -
27753a7da39dSBen Hutchings 			offsetof(struct compat_ethtool_rxnfc, fs.ring_cookie) !=
27763a7da39dSBen Hutchings 			offsetof(struct ethtool_rxnfc, fs.location) -
27773a7da39dSBen Hutchings 			offsetof(struct ethtool_rxnfc, fs.ring_cookie));
27783a7da39dSBen Hutchings 
27793a7da39dSBen Hutchings 		if (copy_in_user(rxnfc, compat_rxnfc,
2780954b1244SStephen Hemminger 				 (void __user *)(&rxnfc->fs.m_ext + 1) -
2781954b1244SStephen Hemminger 				 (void __user *)rxnfc) ||
27823a7da39dSBen Hutchings 		    copy_in_user(&rxnfc->fs.ring_cookie,
27833a7da39dSBen Hutchings 				 &compat_rxnfc->fs.ring_cookie,
2784954b1244SStephen Hemminger 				 (void __user *)(&rxnfc->fs.location + 1) -
2785954b1244SStephen Hemminger 				 (void __user *)&rxnfc->fs.ring_cookie) ||
27863a7da39dSBen Hutchings 		    copy_in_user(&rxnfc->rule_cnt, &compat_rxnfc->rule_cnt,
27873a7da39dSBen Hutchings 				 sizeof(rxnfc->rule_cnt)))
27883a7da39dSBen Hutchings 			return -EFAULT;
27893a7da39dSBen Hutchings 	}
27903a7da39dSBen Hutchings 
27913a7da39dSBen Hutchings 	ret = dev_ioctl(net, SIOCETHTOOL, ifr);
27923a7da39dSBen Hutchings 	if (ret)
27933a7da39dSBen Hutchings 		return ret;
27943a7da39dSBen Hutchings 
27953a7da39dSBen Hutchings 	if (convert_out) {
27963a7da39dSBen Hutchings 		if (copy_in_user(compat_rxnfc, rxnfc,
2797954b1244SStephen Hemminger 				 (const void __user *)(&rxnfc->fs.m_ext + 1) -
2798954b1244SStephen Hemminger 				 (const void __user *)rxnfc) ||
27993a7da39dSBen Hutchings 		    copy_in_user(&compat_rxnfc->fs.ring_cookie,
28003a7da39dSBen Hutchings 				 &rxnfc->fs.ring_cookie,
2801954b1244SStephen Hemminger 				 (const void __user *)(&rxnfc->fs.location + 1) -
2802954b1244SStephen Hemminger 				 (const void __user *)&rxnfc->fs.ring_cookie) ||
28033a7da39dSBen Hutchings 		    copy_in_user(&compat_rxnfc->rule_cnt, &rxnfc->rule_cnt,
28043a7da39dSBen Hutchings 				 sizeof(rxnfc->rule_cnt)))
28053a7da39dSBen Hutchings 			return -EFAULT;
28063a7da39dSBen Hutchings 
28073a7da39dSBen Hutchings 		if (ethcmd == ETHTOOL_GRXCLSRLALL) {
28083a7da39dSBen Hutchings 			/* As an optimisation, we only copy the actual
28093a7da39dSBen Hutchings 			 * number of rules that the underlying
28103a7da39dSBen Hutchings 			 * function returned.  Since Mallory might
28113a7da39dSBen Hutchings 			 * change the rule count in user memory, we
28123a7da39dSBen Hutchings 			 * check that it is less than the rule count
28133a7da39dSBen Hutchings 			 * originally given (as the user buffer size),
28143a7da39dSBen Hutchings 			 * which has been range-checked.
28153a7da39dSBen Hutchings 			 */
28163a7da39dSBen Hutchings 			if (get_user(actual_rule_cnt, &rxnfc->rule_cnt))
28173a7da39dSBen Hutchings 				return -EFAULT;
28183a7da39dSBen Hutchings 			if (actual_rule_cnt < rule_cnt)
28193a7da39dSBen Hutchings 				rule_cnt = actual_rule_cnt;
28203a7da39dSBen Hutchings 			if (copy_in_user(&compat_rxnfc->rule_locs[0],
28213a7da39dSBen Hutchings 					 &rxnfc->rule_locs[0],
28223a7da39dSBen Hutchings 					 rule_cnt * sizeof(u32)))
28233a7da39dSBen Hutchings 				return -EFAULT;
28243a7da39dSBen Hutchings 		}
28253a7da39dSBen Hutchings 	}
28263a7da39dSBen Hutchings 
28273a7da39dSBen Hutchings 	return 0;
28287a229387SArnd Bergmann }
28297a229387SArnd Bergmann 
28307a50a240SArnd Bergmann static int compat_siocwandev(struct net *net, struct compat_ifreq __user *uifr32)
28317a50a240SArnd Bergmann {
28327a50a240SArnd Bergmann 	void __user *uptr;
28337a50a240SArnd Bergmann 	compat_uptr_t uptr32;
28347a50a240SArnd Bergmann 	struct ifreq __user *uifr;
28357a50a240SArnd Bergmann 
28367a50a240SArnd Bergmann 	uifr = compat_alloc_user_space(sizeof(*uifr));
28377a50a240SArnd Bergmann 	if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq)))
28387a50a240SArnd Bergmann 		return -EFAULT;
28397a50a240SArnd Bergmann 
28407a50a240SArnd Bergmann 	if (get_user(uptr32, &uifr32->ifr_settings.ifs_ifsu))
28417a50a240SArnd Bergmann 		return -EFAULT;
28427a50a240SArnd Bergmann 
28437a50a240SArnd Bergmann 	uptr = compat_ptr(uptr32);
28447a50a240SArnd Bergmann 
28457a50a240SArnd Bergmann 	if (put_user(uptr, &uifr->ifr_settings.ifs_ifsu.raw_hdlc))
28467a50a240SArnd Bergmann 		return -EFAULT;
28477a50a240SArnd Bergmann 
28487a50a240SArnd Bergmann 	return dev_ioctl(net, SIOCWANDEV, uifr);
28497a50a240SArnd Bergmann }
28507a50a240SArnd Bergmann 
28516b96018bSArnd Bergmann static int bond_ioctl(struct net *net, unsigned int cmd,
28526b96018bSArnd Bergmann 			 struct compat_ifreq __user *ifr32)
28537a229387SArnd Bergmann {
28547a229387SArnd Bergmann 	struct ifreq kifr;
28557a229387SArnd Bergmann 	mm_segment_t old_fs;
28567a229387SArnd Bergmann 	int err;
28577a229387SArnd Bergmann 
28587a229387SArnd Bergmann 	switch (cmd) {
28597a229387SArnd Bergmann 	case SIOCBONDENSLAVE:
28607a229387SArnd Bergmann 	case SIOCBONDRELEASE:
28617a229387SArnd Bergmann 	case SIOCBONDSETHWADDR:
28627a229387SArnd Bergmann 	case SIOCBONDCHANGEACTIVE:
28636b96018bSArnd Bergmann 		if (copy_from_user(&kifr, ifr32, sizeof(struct compat_ifreq)))
28647a229387SArnd Bergmann 			return -EFAULT;
28657a229387SArnd Bergmann 
28667a229387SArnd Bergmann 		old_fs = get_fs();
28677a229387SArnd Bergmann 		set_fs(KERNEL_DS);
2868c3f52ae6Sstephen hemminger 		err = dev_ioctl(net, cmd,
2869c3f52ae6Sstephen hemminger 				(struct ifreq __user __force *) &kifr);
28707a229387SArnd Bergmann 		set_fs(old_fs);
28717a229387SArnd Bergmann 
28727a229387SArnd Bergmann 		return err;
28737a229387SArnd Bergmann 	default:
287407d106d0SLinus Torvalds 		return -ENOIOCTLCMD;
2875ccbd6a5aSJoe Perches 	}
28767a229387SArnd Bergmann }
28777a229387SArnd Bergmann 
2878590d4693SBen Hutchings /* Handle ioctls that use ifreq::ifr_data and just need struct ifreq converted */
2879590d4693SBen Hutchings static int compat_ifr_data_ioctl(struct net *net, unsigned int cmd,
28806b96018bSArnd Bergmann 				 struct compat_ifreq __user *u_ifreq32)
28817a229387SArnd Bergmann {
28827a229387SArnd Bergmann 	struct ifreq __user *u_ifreq64;
28837a229387SArnd Bergmann 	char tmp_buf[IFNAMSIZ];
28847a229387SArnd Bergmann 	void __user *data64;
28857a229387SArnd Bergmann 	u32 data32;
28867a229387SArnd Bergmann 
28877a229387SArnd Bergmann 	if (copy_from_user(&tmp_buf[0], &(u_ifreq32->ifr_ifrn.ifrn_name[0]),
28887a229387SArnd Bergmann 			   IFNAMSIZ))
28897a229387SArnd Bergmann 		return -EFAULT;
2890417c3522SBen Hutchings 	if (get_user(data32, &u_ifreq32->ifr_ifru.ifru_data))
28917a229387SArnd Bergmann 		return -EFAULT;
28927a229387SArnd Bergmann 	data64 = compat_ptr(data32);
28937a229387SArnd Bergmann 
28947a229387SArnd Bergmann 	u_ifreq64 = compat_alloc_user_space(sizeof(*u_ifreq64));
28957a229387SArnd Bergmann 
28967a229387SArnd Bergmann 	if (copy_to_user(&u_ifreq64->ifr_ifrn.ifrn_name[0], &tmp_buf[0],
28977a229387SArnd Bergmann 			 IFNAMSIZ))
28987a229387SArnd Bergmann 		return -EFAULT;
2899417c3522SBen Hutchings 	if (put_user(data64, &u_ifreq64->ifr_ifru.ifru_data))
29007a229387SArnd Bergmann 		return -EFAULT;
29017a229387SArnd Bergmann 
29026b96018bSArnd Bergmann 	return dev_ioctl(net, cmd, u_ifreq64);
29037a229387SArnd Bergmann }
29047a229387SArnd Bergmann 
29056b96018bSArnd Bergmann static int dev_ifsioc(struct net *net, struct socket *sock,
29066b96018bSArnd Bergmann 			 unsigned int cmd, struct compat_ifreq __user *uifr32)
29077a229387SArnd Bergmann {
2908a2116ed2SArnd Bergmann 	struct ifreq __user *uifr;
29097a229387SArnd Bergmann 	int err;
29107a229387SArnd Bergmann 
2911a2116ed2SArnd Bergmann 	uifr = compat_alloc_user_space(sizeof(*uifr));
2912a2116ed2SArnd Bergmann 	if (copy_in_user(uifr, uifr32, sizeof(*uifr32)))
29137a229387SArnd Bergmann 		return -EFAULT;
2914a2116ed2SArnd Bergmann 
2915a2116ed2SArnd Bergmann 	err = sock_do_ioctl(net, sock, cmd, (unsigned long)uifr);
2916a2116ed2SArnd Bergmann 
29177a229387SArnd Bergmann 	if (!err) {
29187a229387SArnd Bergmann 		switch (cmd) {
29197a229387SArnd Bergmann 		case SIOCGIFFLAGS:
29207a229387SArnd Bergmann 		case SIOCGIFMETRIC:
29217a229387SArnd Bergmann 		case SIOCGIFMTU:
29227a229387SArnd Bergmann 		case SIOCGIFMEM:
29237a229387SArnd Bergmann 		case SIOCGIFHWADDR:
29247a229387SArnd Bergmann 		case SIOCGIFINDEX:
29257a229387SArnd Bergmann 		case SIOCGIFADDR:
29267a229387SArnd Bergmann 		case SIOCGIFBRDADDR:
29277a229387SArnd Bergmann 		case SIOCGIFDSTADDR:
29287a229387SArnd Bergmann 		case SIOCGIFNETMASK:
2929fab2532bSArnd Bergmann 		case SIOCGIFPFLAGS:
29307a229387SArnd Bergmann 		case SIOCGIFTXQLEN:
2931fab2532bSArnd Bergmann 		case SIOCGMIIPHY:
2932fab2532bSArnd Bergmann 		case SIOCGMIIREG:
2933a2116ed2SArnd Bergmann 			if (copy_in_user(uifr32, uifr, sizeof(*uifr32)))
2934a2116ed2SArnd Bergmann 				err = -EFAULT;
29357a229387SArnd Bergmann 			break;
2936a2116ed2SArnd Bergmann 		}
2937a2116ed2SArnd Bergmann 	}
2938a2116ed2SArnd Bergmann 	return err;
2939a2116ed2SArnd Bergmann }
2940a2116ed2SArnd Bergmann 
2941a2116ed2SArnd Bergmann static int compat_sioc_ifmap(struct net *net, unsigned int cmd,
2942a2116ed2SArnd Bergmann 			struct compat_ifreq __user *uifr32)
2943a2116ed2SArnd Bergmann {
2944a2116ed2SArnd Bergmann 	struct ifreq ifr;
2945a2116ed2SArnd Bergmann 	struct compat_ifmap __user *uifmap32;
2946a2116ed2SArnd Bergmann 	mm_segment_t old_fs;
2947a2116ed2SArnd Bergmann 	int err;
2948a2116ed2SArnd Bergmann 
2949a2116ed2SArnd Bergmann 	uifmap32 = &uifr32->ifr_ifru.ifru_map;
2950a2116ed2SArnd Bergmann 	err = copy_from_user(&ifr, uifr32, sizeof(ifr.ifr_name));
29513ddc5b46SMathieu Desnoyers 	err |= get_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
29523ddc5b46SMathieu Desnoyers 	err |= get_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
29533ddc5b46SMathieu Desnoyers 	err |= get_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
29543ddc5b46SMathieu Desnoyers 	err |= get_user(ifr.ifr_map.irq, &uifmap32->irq);
29553ddc5b46SMathieu Desnoyers 	err |= get_user(ifr.ifr_map.dma, &uifmap32->dma);
29563ddc5b46SMathieu Desnoyers 	err |= get_user(ifr.ifr_map.port, &uifmap32->port);
2957a2116ed2SArnd Bergmann 	if (err)
2958a2116ed2SArnd Bergmann 		return -EFAULT;
2959a2116ed2SArnd Bergmann 
2960a2116ed2SArnd Bergmann 	old_fs = get_fs();
2961a2116ed2SArnd Bergmann 	set_fs(KERNEL_DS);
2962c3f52ae6Sstephen hemminger 	err = dev_ioctl(net, cmd, (void  __user __force *)&ifr);
2963a2116ed2SArnd Bergmann 	set_fs(old_fs);
2964a2116ed2SArnd Bergmann 
2965a2116ed2SArnd Bergmann 	if (cmd == SIOCGIFMAP && !err) {
29667a229387SArnd Bergmann 		err = copy_to_user(uifr32, &ifr, sizeof(ifr.ifr_name));
29673ddc5b46SMathieu Desnoyers 		err |= put_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
29683ddc5b46SMathieu Desnoyers 		err |= put_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
29693ddc5b46SMathieu Desnoyers 		err |= put_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
29703ddc5b46SMathieu Desnoyers 		err |= put_user(ifr.ifr_map.irq, &uifmap32->irq);
29713ddc5b46SMathieu Desnoyers 		err |= put_user(ifr.ifr_map.dma, &uifmap32->dma);
29723ddc5b46SMathieu Desnoyers 		err |= put_user(ifr.ifr_map.port, &uifmap32->port);
29737a229387SArnd Bergmann 		if (err)
29747a229387SArnd Bergmann 			err = -EFAULT;
29757a229387SArnd Bergmann 	}
29767a229387SArnd Bergmann 	return err;
29777a229387SArnd Bergmann }
29787a229387SArnd Bergmann 
29797a229387SArnd Bergmann struct rtentry32 {
29807a229387SArnd Bergmann 	u32		rt_pad1;
29817a229387SArnd Bergmann 	struct sockaddr rt_dst;         /* target address               */
29827a229387SArnd Bergmann 	struct sockaddr rt_gateway;     /* gateway addr (RTF_GATEWAY)   */
29837a229387SArnd Bergmann 	struct sockaddr rt_genmask;     /* target network mask (IP)     */
29847a229387SArnd Bergmann 	unsigned short	rt_flags;
29857a229387SArnd Bergmann 	short		rt_pad2;
29867a229387SArnd Bergmann 	u32		rt_pad3;
29877a229387SArnd Bergmann 	unsigned char	rt_tos;
29887a229387SArnd Bergmann 	unsigned char	rt_class;
29897a229387SArnd Bergmann 	short		rt_pad4;
29907a229387SArnd Bergmann 	short		rt_metric;      /* +1 for binary compatibility! */
29917a229387SArnd Bergmann 	/* char * */ u32 rt_dev;        /* forcing the device at add    */
29927a229387SArnd Bergmann 	u32		rt_mtu;         /* per route MTU/Window         */
29937a229387SArnd Bergmann 	u32		rt_window;      /* Window clamping              */
29947a229387SArnd Bergmann 	unsigned short  rt_irtt;        /* Initial RTT                  */
29957a229387SArnd Bergmann };
29967a229387SArnd Bergmann 
29977a229387SArnd Bergmann struct in6_rtmsg32 {
29987a229387SArnd Bergmann 	struct in6_addr		rtmsg_dst;
29997a229387SArnd Bergmann 	struct in6_addr		rtmsg_src;
30007a229387SArnd Bergmann 	struct in6_addr		rtmsg_gateway;
30017a229387SArnd Bergmann 	u32			rtmsg_type;
30027a229387SArnd Bergmann 	u16			rtmsg_dst_len;
30037a229387SArnd Bergmann 	u16			rtmsg_src_len;
30047a229387SArnd Bergmann 	u32			rtmsg_metric;
30057a229387SArnd Bergmann 	u32			rtmsg_info;
30067a229387SArnd Bergmann 	u32			rtmsg_flags;
30077a229387SArnd Bergmann 	s32			rtmsg_ifindex;
30087a229387SArnd Bergmann };
30097a229387SArnd Bergmann 
30106b96018bSArnd Bergmann static int routing_ioctl(struct net *net, struct socket *sock,
30116b96018bSArnd Bergmann 			 unsigned int cmd, void __user *argp)
30127a229387SArnd Bergmann {
30137a229387SArnd Bergmann 	int ret;
30147a229387SArnd Bergmann 	void *r = NULL;
30157a229387SArnd Bergmann 	struct in6_rtmsg r6;
30167a229387SArnd Bergmann 	struct rtentry r4;
30177a229387SArnd Bergmann 	char devname[16];
30187a229387SArnd Bergmann 	u32 rtdev;
30197a229387SArnd Bergmann 	mm_segment_t old_fs = get_fs();
30207a229387SArnd Bergmann 
30216b96018bSArnd Bergmann 	if (sock && sock->sk && sock->sk->sk_family == AF_INET6) { /* ipv6 */
30226b96018bSArnd Bergmann 		struct in6_rtmsg32 __user *ur6 = argp;
30237a229387SArnd Bergmann 		ret = copy_from_user(&r6.rtmsg_dst, &(ur6->rtmsg_dst),
30247a229387SArnd Bergmann 			3 * sizeof(struct in6_addr));
30253ddc5b46SMathieu Desnoyers 		ret |= get_user(r6.rtmsg_type, &(ur6->rtmsg_type));
30263ddc5b46SMathieu Desnoyers 		ret |= get_user(r6.rtmsg_dst_len, &(ur6->rtmsg_dst_len));
30273ddc5b46SMathieu Desnoyers 		ret |= get_user(r6.rtmsg_src_len, &(ur6->rtmsg_src_len));
30283ddc5b46SMathieu Desnoyers 		ret |= get_user(r6.rtmsg_metric, &(ur6->rtmsg_metric));
30293ddc5b46SMathieu Desnoyers 		ret |= get_user(r6.rtmsg_info, &(ur6->rtmsg_info));
30303ddc5b46SMathieu Desnoyers 		ret |= get_user(r6.rtmsg_flags, &(ur6->rtmsg_flags));
30313ddc5b46SMathieu Desnoyers 		ret |= get_user(r6.rtmsg_ifindex, &(ur6->rtmsg_ifindex));
30327a229387SArnd Bergmann 
30337a229387SArnd Bergmann 		r = (void *) &r6;
30347a229387SArnd Bergmann 	} else { /* ipv4 */
30356b96018bSArnd Bergmann 		struct rtentry32 __user *ur4 = argp;
30367a229387SArnd Bergmann 		ret = copy_from_user(&r4.rt_dst, &(ur4->rt_dst),
30377a229387SArnd Bergmann 					3 * sizeof(struct sockaddr));
30383ddc5b46SMathieu Desnoyers 		ret |= get_user(r4.rt_flags, &(ur4->rt_flags));
30393ddc5b46SMathieu Desnoyers 		ret |= get_user(r4.rt_metric, &(ur4->rt_metric));
30403ddc5b46SMathieu Desnoyers 		ret |= get_user(r4.rt_mtu, &(ur4->rt_mtu));
30413ddc5b46SMathieu Desnoyers 		ret |= get_user(r4.rt_window, &(ur4->rt_window));
30423ddc5b46SMathieu Desnoyers 		ret |= get_user(r4.rt_irtt, &(ur4->rt_irtt));
30433ddc5b46SMathieu Desnoyers 		ret |= get_user(rtdev, &(ur4->rt_dev));
30447a229387SArnd Bergmann 		if (rtdev) {
30457a229387SArnd Bergmann 			ret |= copy_from_user(devname, compat_ptr(rtdev), 15);
3046c3f52ae6Sstephen hemminger 			r4.rt_dev = (char __user __force *)devname;
3047c3f52ae6Sstephen hemminger 			devname[15] = 0;
30487a229387SArnd Bergmann 		} else
30497a229387SArnd Bergmann 			r4.rt_dev = NULL;
30507a229387SArnd Bergmann 
30517a229387SArnd Bergmann 		r = (void *) &r4;
30527a229387SArnd Bergmann 	}
30537a229387SArnd Bergmann 
30547a229387SArnd Bergmann 	if (ret) {
30557a229387SArnd Bergmann 		ret = -EFAULT;
30567a229387SArnd Bergmann 		goto out;
30577a229387SArnd Bergmann 	}
30587a229387SArnd Bergmann 
30597a229387SArnd Bergmann 	set_fs(KERNEL_DS);
30606b96018bSArnd Bergmann 	ret = sock_do_ioctl(net, sock, cmd, (unsigned long) r);
30617a229387SArnd Bergmann 	set_fs(old_fs);
30627a229387SArnd Bergmann 
30637a229387SArnd Bergmann out:
30647a229387SArnd Bergmann 	return ret;
30657a229387SArnd Bergmann }
30667a229387SArnd Bergmann 
30677a229387SArnd Bergmann /* Since old style bridge ioctl's endup using SIOCDEVPRIVATE
30687a229387SArnd Bergmann  * for some operations; this forces use of the newer bridge-utils that
306925985edcSLucas De Marchi  * use compatible ioctls
30707a229387SArnd Bergmann  */
30716b96018bSArnd Bergmann static int old_bridge_ioctl(compat_ulong_t __user *argp)
30727a229387SArnd Bergmann {
30736b96018bSArnd Bergmann 	compat_ulong_t tmp;
30747a229387SArnd Bergmann 
30756b96018bSArnd Bergmann 	if (get_user(tmp, argp))
30767a229387SArnd Bergmann 		return -EFAULT;
30777a229387SArnd Bergmann 	if (tmp == BRCTL_GET_VERSION)
30787a229387SArnd Bergmann 		return BRCTL_VERSION + 1;
30797a229387SArnd Bergmann 	return -EINVAL;
30807a229387SArnd Bergmann }
30817a229387SArnd Bergmann 
30826b96018bSArnd Bergmann static int compat_sock_ioctl_trans(struct file *file, struct socket *sock,
30836b96018bSArnd Bergmann 			 unsigned int cmd, unsigned long arg)
30846b96018bSArnd Bergmann {
30856b96018bSArnd Bergmann 	void __user *argp = compat_ptr(arg);
30866b96018bSArnd Bergmann 	struct sock *sk = sock->sk;
30876b96018bSArnd Bergmann 	struct net *net = sock_net(sk);
30887a229387SArnd Bergmann 
30896b96018bSArnd Bergmann 	if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15))
3090590d4693SBen Hutchings 		return compat_ifr_data_ioctl(net, cmd, argp);
30917a229387SArnd Bergmann 
30926b96018bSArnd Bergmann 	switch (cmd) {
30936b96018bSArnd Bergmann 	case SIOCSIFBR:
30946b96018bSArnd Bergmann 	case SIOCGIFBR:
30956b96018bSArnd Bergmann 		return old_bridge_ioctl(argp);
30966b96018bSArnd Bergmann 	case SIOCGIFNAME:
30976b96018bSArnd Bergmann 		return dev_ifname32(net, argp);
30986b96018bSArnd Bergmann 	case SIOCGIFCONF:
30996b96018bSArnd Bergmann 		return dev_ifconf(net, argp);
31006b96018bSArnd Bergmann 	case SIOCETHTOOL:
31016b96018bSArnd Bergmann 		return ethtool_ioctl(net, argp);
31027a50a240SArnd Bergmann 	case SIOCWANDEV:
31037a50a240SArnd Bergmann 		return compat_siocwandev(net, argp);
3104a2116ed2SArnd Bergmann 	case SIOCGIFMAP:
3105a2116ed2SArnd Bergmann 	case SIOCSIFMAP:
3106a2116ed2SArnd Bergmann 		return compat_sioc_ifmap(net, cmd, argp);
31076b96018bSArnd Bergmann 	case SIOCBONDENSLAVE:
31086b96018bSArnd Bergmann 	case SIOCBONDRELEASE:
31096b96018bSArnd Bergmann 	case SIOCBONDSETHWADDR:
31106b96018bSArnd Bergmann 	case SIOCBONDCHANGEACTIVE:
31116b96018bSArnd Bergmann 		return bond_ioctl(net, cmd, argp);
31126b96018bSArnd Bergmann 	case SIOCADDRT:
31136b96018bSArnd Bergmann 	case SIOCDELRT:
31146b96018bSArnd Bergmann 		return routing_ioctl(net, sock, cmd, argp);
31156b96018bSArnd Bergmann 	case SIOCGSTAMP:
31166b96018bSArnd Bergmann 		return do_siocgstamp(net, sock, cmd, argp);
31176b96018bSArnd Bergmann 	case SIOCGSTAMPNS:
31186b96018bSArnd Bergmann 		return do_siocgstampns(net, sock, cmd, argp);
3119590d4693SBen Hutchings 	case SIOCBONDSLAVEINFOQUERY:
3120590d4693SBen Hutchings 	case SIOCBONDINFOQUERY:
3121a2116ed2SArnd Bergmann 	case SIOCSHWTSTAMP:
3122fd468c74SBen Hutchings 	case SIOCGHWTSTAMP:
3123590d4693SBen Hutchings 		return compat_ifr_data_ioctl(net, cmd, argp);
31247a229387SArnd Bergmann 
31256b96018bSArnd Bergmann 	case FIOSETOWN:
31266b96018bSArnd Bergmann 	case SIOCSPGRP:
31276b96018bSArnd Bergmann 	case FIOGETOWN:
31286b96018bSArnd Bergmann 	case SIOCGPGRP:
31296b96018bSArnd Bergmann 	case SIOCBRADDBR:
31306b96018bSArnd Bergmann 	case SIOCBRDELBR:
31316b96018bSArnd Bergmann 	case SIOCGIFVLAN:
31326b96018bSArnd Bergmann 	case SIOCSIFVLAN:
31336b96018bSArnd Bergmann 	case SIOCADDDLCI:
31346b96018bSArnd Bergmann 	case SIOCDELDLCI:
31356b96018bSArnd Bergmann 		return sock_ioctl(file, cmd, arg);
31366b96018bSArnd Bergmann 
31376b96018bSArnd Bergmann 	case SIOCGIFFLAGS:
31386b96018bSArnd Bergmann 	case SIOCSIFFLAGS:
31396b96018bSArnd Bergmann 	case SIOCGIFMETRIC:
31406b96018bSArnd Bergmann 	case SIOCSIFMETRIC:
31416b96018bSArnd Bergmann 	case SIOCGIFMTU:
31426b96018bSArnd Bergmann 	case SIOCSIFMTU:
31436b96018bSArnd Bergmann 	case SIOCGIFMEM:
31446b96018bSArnd Bergmann 	case SIOCSIFMEM:
31456b96018bSArnd Bergmann 	case SIOCGIFHWADDR:
31466b96018bSArnd Bergmann 	case SIOCSIFHWADDR:
31476b96018bSArnd Bergmann 	case SIOCADDMULTI:
31486b96018bSArnd Bergmann 	case SIOCDELMULTI:
31496b96018bSArnd Bergmann 	case SIOCGIFINDEX:
31506b96018bSArnd Bergmann 	case SIOCGIFADDR:
31516b96018bSArnd Bergmann 	case SIOCSIFADDR:
31526b96018bSArnd Bergmann 	case SIOCSIFHWBROADCAST:
31536b96018bSArnd Bergmann 	case SIOCDIFADDR:
31546b96018bSArnd Bergmann 	case SIOCGIFBRDADDR:
31556b96018bSArnd Bergmann 	case SIOCSIFBRDADDR:
31566b96018bSArnd Bergmann 	case SIOCGIFDSTADDR:
31576b96018bSArnd Bergmann 	case SIOCSIFDSTADDR:
31586b96018bSArnd Bergmann 	case SIOCGIFNETMASK:
31596b96018bSArnd Bergmann 	case SIOCSIFNETMASK:
31606b96018bSArnd Bergmann 	case SIOCSIFPFLAGS:
31616b96018bSArnd Bergmann 	case SIOCGIFPFLAGS:
31626b96018bSArnd Bergmann 	case SIOCGIFTXQLEN:
31636b96018bSArnd Bergmann 	case SIOCSIFTXQLEN:
31646b96018bSArnd Bergmann 	case SIOCBRADDIF:
31656b96018bSArnd Bergmann 	case SIOCBRDELIF:
31669177efd3SArnd Bergmann 	case SIOCSIFNAME:
31679177efd3SArnd Bergmann 	case SIOCGMIIPHY:
31689177efd3SArnd Bergmann 	case SIOCGMIIREG:
31699177efd3SArnd Bergmann 	case SIOCSMIIREG:
31706b96018bSArnd Bergmann 		return dev_ifsioc(net, sock, cmd, argp);
31719177efd3SArnd Bergmann 
31726b96018bSArnd Bergmann 	case SIOCSARP:
31736b96018bSArnd Bergmann 	case SIOCGARP:
31746b96018bSArnd Bergmann 	case SIOCDARP:
31756b96018bSArnd Bergmann 	case SIOCATMARK:
31769177efd3SArnd Bergmann 		return sock_do_ioctl(net, sock, cmd, arg);
31779177efd3SArnd Bergmann 	}
31789177efd3SArnd Bergmann 
31796b96018bSArnd Bergmann 	return -ENOIOCTLCMD;
31806b96018bSArnd Bergmann }
31817a229387SArnd Bergmann 
318295c96174SEric Dumazet static long compat_sock_ioctl(struct file *file, unsigned int cmd,
318389bbfc95SShaun Pereira 			      unsigned long arg)
318489bbfc95SShaun Pereira {
318589bbfc95SShaun Pereira 	struct socket *sock = file->private_data;
318689bbfc95SShaun Pereira 	int ret = -ENOIOCTLCMD;
318787de87d5SDavid S. Miller 	struct sock *sk;
318887de87d5SDavid S. Miller 	struct net *net;
318987de87d5SDavid S. Miller 
319087de87d5SDavid S. Miller 	sk = sock->sk;
319187de87d5SDavid S. Miller 	net = sock_net(sk);
319289bbfc95SShaun Pereira 
319389bbfc95SShaun Pereira 	if (sock->ops->compat_ioctl)
319489bbfc95SShaun Pereira 		ret = sock->ops->compat_ioctl(sock, cmd, arg);
319589bbfc95SShaun Pereira 
319687de87d5SDavid S. Miller 	if (ret == -ENOIOCTLCMD &&
319787de87d5SDavid S. Miller 	    (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST))
319887de87d5SDavid S. Miller 		ret = compat_wext_handle_ioctl(net, cmd, arg);
319987de87d5SDavid S. Miller 
32006b96018bSArnd Bergmann 	if (ret == -ENOIOCTLCMD)
32016b96018bSArnd Bergmann 		ret = compat_sock_ioctl_trans(file, sock, cmd, arg);
32026b96018bSArnd Bergmann 
320389bbfc95SShaun Pereira 	return ret;
320489bbfc95SShaun Pereira }
320589bbfc95SShaun Pereira #endif
320689bbfc95SShaun Pereira 
3207ac5a488eSSridhar Samudrala int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen)
3208ac5a488eSSridhar Samudrala {
3209ac5a488eSSridhar Samudrala 	return sock->ops->bind(sock, addr, addrlen);
3210ac5a488eSSridhar Samudrala }
3211c6d409cfSEric Dumazet EXPORT_SYMBOL(kernel_bind);
3212ac5a488eSSridhar Samudrala 
3213ac5a488eSSridhar Samudrala int kernel_listen(struct socket *sock, int backlog)
3214ac5a488eSSridhar Samudrala {
3215ac5a488eSSridhar Samudrala 	return sock->ops->listen(sock, backlog);
3216ac5a488eSSridhar Samudrala }
3217c6d409cfSEric Dumazet EXPORT_SYMBOL(kernel_listen);
3218ac5a488eSSridhar Samudrala 
3219ac5a488eSSridhar Samudrala int kernel_accept(struct socket *sock, struct socket **newsock, int flags)
3220ac5a488eSSridhar Samudrala {
3221ac5a488eSSridhar Samudrala 	struct sock *sk = sock->sk;
3222ac5a488eSSridhar Samudrala 	int err;
3223ac5a488eSSridhar Samudrala 
3224ac5a488eSSridhar Samudrala 	err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol,
3225ac5a488eSSridhar Samudrala 			       newsock);
3226ac5a488eSSridhar Samudrala 	if (err < 0)
3227ac5a488eSSridhar Samudrala 		goto done;
3228ac5a488eSSridhar Samudrala 
3229ac5a488eSSridhar Samudrala 	err = sock->ops->accept(sock, *newsock, flags);
3230ac5a488eSSridhar Samudrala 	if (err < 0) {
3231ac5a488eSSridhar Samudrala 		sock_release(*newsock);
3232fa8705b0STony Battersby 		*newsock = NULL;
3233ac5a488eSSridhar Samudrala 		goto done;
3234ac5a488eSSridhar Samudrala 	}
3235ac5a488eSSridhar Samudrala 
3236ac5a488eSSridhar Samudrala 	(*newsock)->ops = sock->ops;
32371b08534eSWei Yongjun 	__module_get((*newsock)->ops->owner);
3238ac5a488eSSridhar Samudrala 
3239ac5a488eSSridhar Samudrala done:
3240ac5a488eSSridhar Samudrala 	return err;
3241ac5a488eSSridhar Samudrala }
3242c6d409cfSEric Dumazet EXPORT_SYMBOL(kernel_accept);
3243ac5a488eSSridhar Samudrala 
3244ac5a488eSSridhar Samudrala int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
3245ac5a488eSSridhar Samudrala 		   int flags)
3246ac5a488eSSridhar Samudrala {
3247ac5a488eSSridhar Samudrala 	return sock->ops->connect(sock, addr, addrlen, flags);
3248ac5a488eSSridhar Samudrala }
3249c6d409cfSEric Dumazet EXPORT_SYMBOL(kernel_connect);
3250ac5a488eSSridhar Samudrala 
3251ac5a488eSSridhar Samudrala int kernel_getsockname(struct socket *sock, struct sockaddr *addr,
3252ac5a488eSSridhar Samudrala 			 int *addrlen)
3253ac5a488eSSridhar Samudrala {
3254ac5a488eSSridhar Samudrala 	return sock->ops->getname(sock, addr, addrlen, 0);
3255ac5a488eSSridhar Samudrala }
3256c6d409cfSEric Dumazet EXPORT_SYMBOL(kernel_getsockname);
3257ac5a488eSSridhar Samudrala 
3258ac5a488eSSridhar Samudrala int kernel_getpeername(struct socket *sock, struct sockaddr *addr,
3259ac5a488eSSridhar Samudrala 			 int *addrlen)
3260ac5a488eSSridhar Samudrala {
3261ac5a488eSSridhar Samudrala 	return sock->ops->getname(sock, addr, addrlen, 1);
3262ac5a488eSSridhar Samudrala }
3263c6d409cfSEric Dumazet EXPORT_SYMBOL(kernel_getpeername);
3264ac5a488eSSridhar Samudrala 
3265ac5a488eSSridhar Samudrala int kernel_getsockopt(struct socket *sock, int level, int optname,
3266ac5a488eSSridhar Samudrala 			char *optval, int *optlen)
3267ac5a488eSSridhar Samudrala {
3268ac5a488eSSridhar Samudrala 	mm_segment_t oldfs = get_fs();
3269fb8621bbSNamhyung Kim 	char __user *uoptval;
3270fb8621bbSNamhyung Kim 	int __user *uoptlen;
3271ac5a488eSSridhar Samudrala 	int err;
3272ac5a488eSSridhar Samudrala 
3273fb8621bbSNamhyung Kim 	uoptval = (char __user __force *) optval;
3274fb8621bbSNamhyung Kim 	uoptlen = (int __user __force *) optlen;
3275fb8621bbSNamhyung Kim 
3276ac5a488eSSridhar Samudrala 	set_fs(KERNEL_DS);
3277ac5a488eSSridhar Samudrala 	if (level == SOL_SOCKET)
3278fb8621bbSNamhyung Kim 		err = sock_getsockopt(sock, level, optname, uoptval, uoptlen);
3279ac5a488eSSridhar Samudrala 	else
3280fb8621bbSNamhyung Kim 		err = sock->ops->getsockopt(sock, level, optname, uoptval,
3281fb8621bbSNamhyung Kim 					    uoptlen);
3282ac5a488eSSridhar Samudrala 	set_fs(oldfs);
3283ac5a488eSSridhar Samudrala 	return err;
3284ac5a488eSSridhar Samudrala }
3285c6d409cfSEric Dumazet EXPORT_SYMBOL(kernel_getsockopt);
3286ac5a488eSSridhar Samudrala 
3287ac5a488eSSridhar Samudrala int kernel_setsockopt(struct socket *sock, int level, int optname,
3288b7058842SDavid S. Miller 			char *optval, unsigned int optlen)
3289ac5a488eSSridhar Samudrala {
3290ac5a488eSSridhar Samudrala 	mm_segment_t oldfs = get_fs();
3291fb8621bbSNamhyung Kim 	char __user *uoptval;
3292ac5a488eSSridhar Samudrala 	int err;
3293ac5a488eSSridhar Samudrala 
3294fb8621bbSNamhyung Kim 	uoptval = (char __user __force *) optval;
3295fb8621bbSNamhyung Kim 
3296ac5a488eSSridhar Samudrala 	set_fs(KERNEL_DS);
3297ac5a488eSSridhar Samudrala 	if (level == SOL_SOCKET)
3298fb8621bbSNamhyung Kim 		err = sock_setsockopt(sock, level, optname, uoptval, optlen);
3299ac5a488eSSridhar Samudrala 	else
3300fb8621bbSNamhyung Kim 		err = sock->ops->setsockopt(sock, level, optname, uoptval,
3301ac5a488eSSridhar Samudrala 					    optlen);
3302ac5a488eSSridhar Samudrala 	set_fs(oldfs);
3303ac5a488eSSridhar Samudrala 	return err;
3304ac5a488eSSridhar Samudrala }
3305c6d409cfSEric Dumazet EXPORT_SYMBOL(kernel_setsockopt);
3306ac5a488eSSridhar Samudrala 
3307ac5a488eSSridhar Samudrala int kernel_sendpage(struct socket *sock, struct page *page, int offset,
3308ac5a488eSSridhar Samudrala 		    size_t size, int flags)
3309ac5a488eSSridhar Samudrala {
3310ac5a488eSSridhar Samudrala 	if (sock->ops->sendpage)
3311ac5a488eSSridhar Samudrala 		return sock->ops->sendpage(sock, page, offset, size, flags);
3312ac5a488eSSridhar Samudrala 
3313ac5a488eSSridhar Samudrala 	return sock_no_sendpage(sock, page, offset, size, flags);
3314ac5a488eSSridhar Samudrala }
3315c6d409cfSEric Dumazet EXPORT_SYMBOL(kernel_sendpage);
3316ac5a488eSSridhar Samudrala 
3317ac5a488eSSridhar Samudrala int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg)
3318ac5a488eSSridhar Samudrala {
3319ac5a488eSSridhar Samudrala 	mm_segment_t oldfs = get_fs();
3320ac5a488eSSridhar Samudrala 	int err;
3321ac5a488eSSridhar Samudrala 
3322ac5a488eSSridhar Samudrala 	set_fs(KERNEL_DS);
3323ac5a488eSSridhar Samudrala 	err = sock->ops->ioctl(sock, cmd, arg);
3324ac5a488eSSridhar Samudrala 	set_fs(oldfs);
3325ac5a488eSSridhar Samudrala 
3326ac5a488eSSridhar Samudrala 	return err;
3327ac5a488eSSridhar Samudrala }
3328c6d409cfSEric Dumazet EXPORT_SYMBOL(kernel_sock_ioctl);
3329ac5a488eSSridhar Samudrala 
333091cf45f0STrond Myklebust int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how)
333191cf45f0STrond Myklebust {
333291cf45f0STrond Myklebust 	return sock->ops->shutdown(sock, how);
333391cf45f0STrond Myklebust }
333491cf45f0STrond Myklebust EXPORT_SYMBOL(kernel_sock_shutdown);
3335