xref: /openbmc/linux/net/socket.c (revision 55737fda)
11da177e4SLinus Torvalds /*
21da177e4SLinus Torvalds  * NET		An implementation of the SOCKET network access protocol.
31da177e4SLinus Torvalds  *
41da177e4SLinus Torvalds  * Version:	@(#)socket.c	1.1.93	18/02/95
51da177e4SLinus Torvalds  *
61da177e4SLinus Torvalds  * Authors:	Orest Zborowski, <obz@Kodak.COM>
702c30a84SJesper Juhl  *		Ross Biro
81da177e4SLinus Torvalds  *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
91da177e4SLinus Torvalds  *
101da177e4SLinus Torvalds  * Fixes:
111da177e4SLinus Torvalds  *		Anonymous	:	NOTSOCK/BADF cleanup. Error fix in
121da177e4SLinus Torvalds  *					shutdown()
131da177e4SLinus Torvalds  *		Alan Cox	:	verify_area() fixes
141da177e4SLinus Torvalds  *		Alan Cox	:	Removed DDI
151da177e4SLinus Torvalds  *		Jonathan Kamens	:	SOCK_DGRAM reconnect bug
161da177e4SLinus Torvalds  *		Alan Cox	:	Moved a load of checks to the very
171da177e4SLinus Torvalds  *					top level.
181da177e4SLinus Torvalds  *		Alan Cox	:	Move address structures to/from user
191da177e4SLinus Torvalds  *					mode above the protocol layers.
201da177e4SLinus Torvalds  *		Rob Janssen	:	Allow 0 length sends.
211da177e4SLinus Torvalds  *		Alan Cox	:	Asynchronous I/O support (cribbed from the
221da177e4SLinus Torvalds  *					tty drivers).
231da177e4SLinus Torvalds  *		Niibe Yutaka	:	Asynchronous I/O for writes (4.4BSD style)
241da177e4SLinus Torvalds  *		Jeff Uphoff	:	Made max number of sockets command-line
251da177e4SLinus Torvalds  *					configurable.
261da177e4SLinus Torvalds  *		Matti Aarnio	:	Made the number of sockets dynamic,
271da177e4SLinus Torvalds  *					to be allocated when needed, and mr.
281da177e4SLinus Torvalds  *					Uphoff's max is used as max to be
291da177e4SLinus Torvalds  *					allowed to allocate.
301da177e4SLinus Torvalds  *		Linus		:	Argh. removed all the socket allocation
311da177e4SLinus Torvalds  *					altogether: it's in the inode now.
321da177e4SLinus Torvalds  *		Alan Cox	:	Made sock_alloc()/sock_release() public
331da177e4SLinus Torvalds  *					for NetROM and future kernel nfsd type
341da177e4SLinus Torvalds  *					stuff.
351da177e4SLinus Torvalds  *		Alan Cox	:	sendmsg/recvmsg basics.
361da177e4SLinus Torvalds  *		Tom Dyas	:	Export net symbols.
371da177e4SLinus Torvalds  *		Marcin Dalecki	:	Fixed problems with CONFIG_NET="n".
381da177e4SLinus Torvalds  *		Alan Cox	:	Added thread locking to sys_* calls
391da177e4SLinus Torvalds  *					for sockets. May have errors at the
401da177e4SLinus Torvalds  *					moment.
411da177e4SLinus Torvalds  *		Kevin Buhr	:	Fixed the dumb errors in the above.
421da177e4SLinus Torvalds  *		Andi Kleen	:	Some small cleanups, optimizations,
431da177e4SLinus Torvalds  *					and fixed a copy_from_user() bug.
441da177e4SLinus Torvalds  *		Tigran Aivazian	:	sys_send(args) calls sys_sendto(args, NULL, 0)
451da177e4SLinus Torvalds  *		Tigran Aivazian	:	Made listen(2) backlog sanity checks
461da177e4SLinus Torvalds  *					protocol-independent
471da177e4SLinus Torvalds  *
481da177e4SLinus Torvalds  *
491da177e4SLinus Torvalds  *		This program is free software; you can redistribute it and/or
501da177e4SLinus Torvalds  *		modify it under the terms of the GNU General Public License
511da177e4SLinus Torvalds  *		as published by the Free Software Foundation; either version
521da177e4SLinus Torvalds  *		2 of the License, or (at your option) any later version.
531da177e4SLinus Torvalds  *
541da177e4SLinus Torvalds  *
551da177e4SLinus Torvalds  *	This module is effectively the top level interface to the BSD socket
561da177e4SLinus Torvalds  *	paradigm.
571da177e4SLinus Torvalds  *
581da177e4SLinus Torvalds  *	Based upon Swansea University Computer Society NET3.039
591da177e4SLinus Torvalds  */
601da177e4SLinus Torvalds 
611da177e4SLinus Torvalds #include <linux/mm.h>
621da177e4SLinus Torvalds #include <linux/socket.h>
631da177e4SLinus Torvalds #include <linux/file.h>
641da177e4SLinus Torvalds #include <linux/net.h>
651da177e4SLinus Torvalds #include <linux/interrupt.h>
66*55737fdaSStephen Hemminger #include <linux/rcupdate.h>
671da177e4SLinus Torvalds #include <linux/netdevice.h>
681da177e4SLinus Torvalds #include <linux/proc_fs.h>
691da177e4SLinus Torvalds #include <linux/seq_file.h>
704a3e2f71SArjan van de Ven #include <linux/mutex.h>
711da177e4SLinus Torvalds #include <linux/wanrouter.h>
721da177e4SLinus Torvalds #include <linux/if_bridge.h>
7320380731SArnaldo Carvalho de Melo #include <linux/if_frad.h>
7420380731SArnaldo Carvalho de Melo #include <linux/if_vlan.h>
751da177e4SLinus Torvalds #include <linux/init.h>
761da177e4SLinus Torvalds #include <linux/poll.h>
771da177e4SLinus Torvalds #include <linux/cache.h>
781da177e4SLinus Torvalds #include <linux/module.h>
791da177e4SLinus Torvalds #include <linux/highmem.h>
801da177e4SLinus Torvalds #include <linux/divert.h>
811da177e4SLinus Torvalds #include <linux/mount.h>
821da177e4SLinus Torvalds #include <linux/security.h>
831da177e4SLinus Torvalds #include <linux/syscalls.h>
841da177e4SLinus Torvalds #include <linux/compat.h>
851da177e4SLinus Torvalds #include <linux/kmod.h>
863ec3b2fbSDavid Woodhouse #include <linux/audit.h>
87d86b5e0eSAdrian Bunk #include <linux/wireless.h>
881da177e4SLinus Torvalds 
891da177e4SLinus Torvalds #include <asm/uaccess.h>
901da177e4SLinus Torvalds #include <asm/unistd.h>
911da177e4SLinus Torvalds 
921da177e4SLinus Torvalds #include <net/compat.h>
931da177e4SLinus Torvalds 
941da177e4SLinus Torvalds #include <net/sock.h>
951da177e4SLinus Torvalds #include <linux/netfilter.h>
961da177e4SLinus Torvalds 
971da177e4SLinus Torvalds static int sock_no_open(struct inode *irrelevant, struct file *dontcare);
981da177e4SLinus Torvalds static ssize_t sock_aio_read(struct kiocb *iocb, char __user *buf,
991da177e4SLinus Torvalds 			     size_t size, loff_t pos);
1001da177e4SLinus Torvalds static ssize_t sock_aio_write(struct kiocb *iocb, const char __user *buf,
1011da177e4SLinus Torvalds 			      size_t size, loff_t pos);
1021da177e4SLinus Torvalds static int sock_mmap(struct file *file, struct vm_area_struct *vma);
1031da177e4SLinus Torvalds 
1041da177e4SLinus Torvalds static int sock_close(struct inode *inode, struct file *file);
1051da177e4SLinus Torvalds static unsigned int sock_poll(struct file *file,
1061da177e4SLinus Torvalds 			      struct poll_table_struct *wait);
10789bddce5SStephen Hemminger static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
10889bbfc95SShaun Pereira #ifdef CONFIG_COMPAT
10989bbfc95SShaun Pereira static long compat_sock_ioctl(struct file *file,
11089bbfc95SShaun Pereira 			      unsigned int cmd, unsigned long arg);
11189bbfc95SShaun Pereira #endif
1121da177e4SLinus Torvalds static int sock_fasync(int fd, struct file *filp, int on);
1131da177e4SLinus Torvalds static ssize_t sock_readv(struct file *file, const struct iovec *vector,
1141da177e4SLinus Torvalds 			  unsigned long count, loff_t *ppos);
1151da177e4SLinus Torvalds static ssize_t sock_writev(struct file *file, const struct iovec *vector,
1161da177e4SLinus Torvalds 			   unsigned long count, loff_t *ppos);
1171da177e4SLinus Torvalds static ssize_t sock_sendpage(struct file *file, struct page *page,
1181da177e4SLinus Torvalds 			     int offset, size_t size, loff_t *ppos, int more);
1191da177e4SLinus Torvalds 
1201da177e4SLinus Torvalds /*
1211da177e4SLinus Torvalds  *	Socket files have a set of 'special' operations as well as the generic file ones. These don't appear
1221da177e4SLinus Torvalds  *	in the operation structures but are done directly via the socketcall() multiplexor.
1231da177e4SLinus Torvalds  */
1241da177e4SLinus Torvalds 
1251da177e4SLinus Torvalds static struct file_operations socket_file_ops = {
1261da177e4SLinus Torvalds 	.owner =	THIS_MODULE,
1271da177e4SLinus Torvalds 	.llseek =	no_llseek,
1281da177e4SLinus Torvalds 	.aio_read =	sock_aio_read,
1291da177e4SLinus Torvalds 	.aio_write =	sock_aio_write,
1301da177e4SLinus Torvalds 	.poll =		sock_poll,
1311da177e4SLinus Torvalds 	.unlocked_ioctl = sock_ioctl,
13289bbfc95SShaun Pereira #ifdef CONFIG_COMPAT
13389bbfc95SShaun Pereira 	.compat_ioctl = compat_sock_ioctl,
13489bbfc95SShaun Pereira #endif
1351da177e4SLinus Torvalds 	.mmap =		sock_mmap,
1361da177e4SLinus Torvalds 	.open =		sock_no_open,	/* special open code to disallow open via /proc */
1371da177e4SLinus Torvalds 	.release =	sock_close,
1381da177e4SLinus Torvalds 	.fasync =	sock_fasync,
1391da177e4SLinus Torvalds 	.readv =	sock_readv,
1401da177e4SLinus Torvalds 	.writev =	sock_writev,
1415274f052SJens Axboe 	.sendpage =	sock_sendpage,
1425274f052SJens Axboe 	.splice_write = generic_splice_sendpage,
1431da177e4SLinus Torvalds };
1441da177e4SLinus Torvalds 
1451da177e4SLinus Torvalds /*
1461da177e4SLinus Torvalds  *	The protocol list. Each protocol is registered in here.
1471da177e4SLinus Torvalds  */
1481da177e4SLinus Torvalds 
1491da177e4SLinus Torvalds static DEFINE_SPINLOCK(net_family_lock);
150*55737fdaSStephen Hemminger static const struct net_proto_family *net_families[NPROTO];
1511da177e4SLinus Torvalds 
1521da177e4SLinus Torvalds /*
1531da177e4SLinus Torvalds  *	Statistics counters of the socket lists
1541da177e4SLinus Torvalds  */
1551da177e4SLinus Torvalds 
1561da177e4SLinus Torvalds static DEFINE_PER_CPU(int, sockets_in_use) = 0;
1571da177e4SLinus Torvalds 
1581da177e4SLinus Torvalds /*
15989bddce5SStephen Hemminger  * Support routines.
16089bddce5SStephen Hemminger  * Move socket addresses back and forth across the kernel/user
1611da177e4SLinus Torvalds  * divide and look after the messy bits.
1621da177e4SLinus Torvalds  */
1631da177e4SLinus Torvalds 
1641da177e4SLinus Torvalds #define MAX_SOCK_ADDR	128		/* 108 for Unix domain -
1651da177e4SLinus Torvalds 					   16 for IP, 16 for IPX,
1661da177e4SLinus Torvalds 					   24 for IPv6,
1671da177e4SLinus Torvalds 					   about 80 for AX.25
1681da177e4SLinus Torvalds 					   must be at least one bigger than
1691da177e4SLinus Torvalds 					   the AF_UNIX size (see net/unix/af_unix.c
1701da177e4SLinus Torvalds 					   :unix_mkname()).
1711da177e4SLinus Torvalds 					 */
1721da177e4SLinus Torvalds 
1731da177e4SLinus Torvalds /**
1741da177e4SLinus Torvalds  *	move_addr_to_kernel	-	copy a socket address into kernel space
1751da177e4SLinus Torvalds  *	@uaddr: Address in user space
1761da177e4SLinus Torvalds  *	@kaddr: Address in kernel space
1771da177e4SLinus Torvalds  *	@ulen: Length in user space
1781da177e4SLinus Torvalds  *
1791da177e4SLinus Torvalds  *	The address is copied into kernel space. If the provided address is
1801da177e4SLinus Torvalds  *	too long an error code of -EINVAL is returned. If the copy gives
1811da177e4SLinus Torvalds  *	invalid addresses -EFAULT is returned. On a success 0 is returned.
1821da177e4SLinus Torvalds  */
1831da177e4SLinus Torvalds 
1841da177e4SLinus Torvalds int move_addr_to_kernel(void __user *uaddr, int ulen, void *kaddr)
1851da177e4SLinus Torvalds {
1861da177e4SLinus Torvalds 	if (ulen < 0 || ulen > MAX_SOCK_ADDR)
1871da177e4SLinus Torvalds 		return -EINVAL;
1881da177e4SLinus Torvalds 	if (ulen == 0)
1891da177e4SLinus Torvalds 		return 0;
1901da177e4SLinus Torvalds 	if (copy_from_user(kaddr, uaddr, ulen))
1911da177e4SLinus Torvalds 		return -EFAULT;
1923ec3b2fbSDavid Woodhouse 	return audit_sockaddr(ulen, kaddr);
1931da177e4SLinus Torvalds }
1941da177e4SLinus Torvalds 
1951da177e4SLinus Torvalds /**
1961da177e4SLinus Torvalds  *	move_addr_to_user	-	copy an address to user space
1971da177e4SLinus Torvalds  *	@kaddr: kernel space address
1981da177e4SLinus Torvalds  *	@klen: length of address in kernel
1991da177e4SLinus Torvalds  *	@uaddr: user space address
2001da177e4SLinus Torvalds  *	@ulen: pointer to user length field
2011da177e4SLinus Torvalds  *
2021da177e4SLinus Torvalds  *	The value pointed to by ulen on entry is the buffer length available.
2031da177e4SLinus Torvalds  *	This is overwritten with the buffer space used. -EINVAL is returned
2041da177e4SLinus Torvalds  *	if an overlong buffer is specified or a negative buffer size. -EFAULT
2051da177e4SLinus Torvalds  *	is returned if either the buffer or the length field are not
2061da177e4SLinus Torvalds  *	accessible.
2071da177e4SLinus Torvalds  *	After copying the data up to the limit the user specifies, the true
2081da177e4SLinus Torvalds  *	length of the data is written over the length limit the user
2091da177e4SLinus Torvalds  *	specified. Zero is returned for a success.
2101da177e4SLinus Torvalds  */
2111da177e4SLinus Torvalds 
21289bddce5SStephen Hemminger int move_addr_to_user(void *kaddr, int klen, void __user *uaddr,
21389bddce5SStephen Hemminger 		      int __user *ulen)
2141da177e4SLinus Torvalds {
2151da177e4SLinus Torvalds 	int err;
2161da177e4SLinus Torvalds 	int len;
2171da177e4SLinus Torvalds 
21889bddce5SStephen Hemminger 	err = get_user(len, ulen);
21989bddce5SStephen Hemminger 	if (err)
2201da177e4SLinus Torvalds 		return err;
2211da177e4SLinus Torvalds 	if (len > klen)
2221da177e4SLinus Torvalds 		len = klen;
2231da177e4SLinus Torvalds 	if (len < 0 || len > MAX_SOCK_ADDR)
2241da177e4SLinus Torvalds 		return -EINVAL;
22589bddce5SStephen Hemminger 	if (len) {
226d6fe3945SSteve Grubb 		if (audit_sockaddr(klen, kaddr))
227d6fe3945SSteve Grubb 			return -ENOMEM;
2281da177e4SLinus Torvalds 		if (copy_to_user(uaddr, kaddr, len))
2291da177e4SLinus Torvalds 			return -EFAULT;
2301da177e4SLinus Torvalds 	}
2311da177e4SLinus Torvalds 	/*
2321da177e4SLinus Torvalds 	 *      "fromlen shall refer to the value before truncation.."
2331da177e4SLinus Torvalds 	 *                      1003.1g
2341da177e4SLinus Torvalds 	 */
2351da177e4SLinus Torvalds 	return __put_user(klen, ulen);
2361da177e4SLinus Torvalds }
2371da177e4SLinus Torvalds 
2381da177e4SLinus Torvalds #define SOCKFS_MAGIC 0x534F434B
2391da177e4SLinus Torvalds 
240ba89966cSEric Dumazet static kmem_cache_t *sock_inode_cachep __read_mostly;
2411da177e4SLinus Torvalds 
2421da177e4SLinus Torvalds static struct inode *sock_alloc_inode(struct super_block *sb)
2431da177e4SLinus Torvalds {
2441da177e4SLinus Torvalds 	struct socket_alloc *ei;
24589bddce5SStephen Hemminger 
24689bddce5SStephen Hemminger 	ei = kmem_cache_alloc(sock_inode_cachep, SLAB_KERNEL);
2471da177e4SLinus Torvalds 	if (!ei)
2481da177e4SLinus Torvalds 		return NULL;
2491da177e4SLinus Torvalds 	init_waitqueue_head(&ei->socket.wait);
2501da177e4SLinus Torvalds 
2511da177e4SLinus Torvalds 	ei->socket.fasync_list = NULL;
2521da177e4SLinus Torvalds 	ei->socket.state = SS_UNCONNECTED;
2531da177e4SLinus Torvalds 	ei->socket.flags = 0;
2541da177e4SLinus Torvalds 	ei->socket.ops = NULL;
2551da177e4SLinus Torvalds 	ei->socket.sk = NULL;
2561da177e4SLinus Torvalds 	ei->socket.file = NULL;
2571da177e4SLinus Torvalds 	ei->socket.flags = 0;
2581da177e4SLinus Torvalds 
2591da177e4SLinus Torvalds 	return &ei->vfs_inode;
2601da177e4SLinus Torvalds }
2611da177e4SLinus Torvalds 
2621da177e4SLinus Torvalds static void sock_destroy_inode(struct inode *inode)
2631da177e4SLinus Torvalds {
2641da177e4SLinus Torvalds 	kmem_cache_free(sock_inode_cachep,
2651da177e4SLinus Torvalds 			container_of(inode, struct socket_alloc, vfs_inode));
2661da177e4SLinus Torvalds }
2671da177e4SLinus Torvalds 
2681da177e4SLinus Torvalds static void init_once(void *foo, kmem_cache_t *cachep, unsigned long flags)
2691da177e4SLinus Torvalds {
2701da177e4SLinus Torvalds 	struct socket_alloc *ei = (struct socket_alloc *)foo;
2711da177e4SLinus Torvalds 
27289bddce5SStephen Hemminger 	if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR))
27389bddce5SStephen Hemminger 	    == SLAB_CTOR_CONSTRUCTOR)
2741da177e4SLinus Torvalds 		inode_init_once(&ei->vfs_inode);
2751da177e4SLinus Torvalds }
2761da177e4SLinus Torvalds 
2771da177e4SLinus Torvalds static int init_inodecache(void)
2781da177e4SLinus Torvalds {
2791da177e4SLinus Torvalds 	sock_inode_cachep = kmem_cache_create("sock_inode_cache",
2801da177e4SLinus Torvalds 					      sizeof(struct socket_alloc),
28189bddce5SStephen Hemminger 					      0,
28289bddce5SStephen Hemminger 					      (SLAB_HWCACHE_ALIGN |
28389bddce5SStephen Hemminger 					       SLAB_RECLAIM_ACCOUNT |
284fffb60f9SPaul Jackson 					       SLAB_MEM_SPREAD),
28589bddce5SStephen Hemminger 					      init_once,
28689bddce5SStephen Hemminger 					      NULL);
2871da177e4SLinus Torvalds 	if (sock_inode_cachep == NULL)
2881da177e4SLinus Torvalds 		return -ENOMEM;
2891da177e4SLinus Torvalds 	return 0;
2901da177e4SLinus Torvalds }
2911da177e4SLinus Torvalds 
2921da177e4SLinus Torvalds static struct super_operations sockfs_ops = {
2931da177e4SLinus Torvalds 	.alloc_inode =	sock_alloc_inode,
2941da177e4SLinus Torvalds 	.destroy_inode =sock_destroy_inode,
2951da177e4SLinus Torvalds 	.statfs =	simple_statfs,
2961da177e4SLinus Torvalds };
2971da177e4SLinus Torvalds 
298454e2398SDavid Howells static int sockfs_get_sb(struct file_system_type *fs_type,
29989bddce5SStephen Hemminger 			 int flags, const char *dev_name, void *data,
30089bddce5SStephen Hemminger 			 struct vfsmount *mnt)
3011da177e4SLinus Torvalds {
302454e2398SDavid Howells 	return get_sb_pseudo(fs_type, "socket:", &sockfs_ops, SOCKFS_MAGIC,
303454e2398SDavid Howells 			     mnt);
3041da177e4SLinus Torvalds }
3051da177e4SLinus Torvalds 
306ba89966cSEric Dumazet static struct vfsmount *sock_mnt __read_mostly;
3071da177e4SLinus Torvalds 
3081da177e4SLinus Torvalds static struct file_system_type sock_fs_type = {
3091da177e4SLinus Torvalds 	.name =		"sockfs",
3101da177e4SLinus Torvalds 	.get_sb =	sockfs_get_sb,
3111da177e4SLinus Torvalds 	.kill_sb =	kill_anon_super,
3121da177e4SLinus Torvalds };
31389bddce5SStephen Hemminger 
3141da177e4SLinus Torvalds static int sockfs_delete_dentry(struct dentry *dentry)
3151da177e4SLinus Torvalds {
3161da177e4SLinus Torvalds 	return 1;
3171da177e4SLinus Torvalds }
3181da177e4SLinus Torvalds static struct dentry_operations sockfs_dentry_operations = {
3191da177e4SLinus Torvalds 	.d_delete = sockfs_delete_dentry,
3201da177e4SLinus Torvalds };
3211da177e4SLinus Torvalds 
3221da177e4SLinus Torvalds /*
3231da177e4SLinus Torvalds  *	Obtains the first available file descriptor and sets it up for use.
3241da177e4SLinus Torvalds  *
32539d8c1b6SDavid S. Miller  *	These functions create file structures and maps them to fd space
32639d8c1b6SDavid S. Miller  *	of the current process. On success it returns file descriptor
3271da177e4SLinus Torvalds  *	and file struct implicitly stored in sock->file.
3281da177e4SLinus Torvalds  *	Note that another thread may close file descriptor before we return
3291da177e4SLinus Torvalds  *	from this function. We use the fact that now we do not refer
3301da177e4SLinus Torvalds  *	to socket after mapping. If one day we will need it, this
3311da177e4SLinus Torvalds  *	function will increment ref. count on file by 1.
3321da177e4SLinus Torvalds  *
3331da177e4SLinus Torvalds  *	In any case returned fd MAY BE not valid!
3341da177e4SLinus Torvalds  *	This race condition is unavoidable
3351da177e4SLinus Torvalds  *	with shared fd spaces, we cannot solve it inside kernel,
3361da177e4SLinus Torvalds  *	but we take care of internal coherence yet.
3371da177e4SLinus Torvalds  */
3381da177e4SLinus Torvalds 
33939d8c1b6SDavid S. Miller static int sock_alloc_fd(struct file **filep)
3401da177e4SLinus Torvalds {
3411da177e4SLinus Torvalds 	int fd;
3421da177e4SLinus Torvalds 
3431da177e4SLinus Torvalds 	fd = get_unused_fd();
34439d8c1b6SDavid S. Miller 	if (likely(fd >= 0)) {
3451da177e4SLinus Torvalds 		struct file *file = get_empty_filp();
3461da177e4SLinus Torvalds 
34739d8c1b6SDavid S. Miller 		*filep = file;
34839d8c1b6SDavid S. Miller 		if (unlikely(!file)) {
3491da177e4SLinus Torvalds 			put_unused_fd(fd);
35039d8c1b6SDavid S. Miller 			return -ENFILE;
3511da177e4SLinus Torvalds 		}
35239d8c1b6SDavid S. Miller 	} else
35339d8c1b6SDavid S. Miller 		*filep = NULL;
35439d8c1b6SDavid S. Miller 	return fd;
35539d8c1b6SDavid S. Miller }
35639d8c1b6SDavid S. Miller 
35739d8c1b6SDavid S. Miller static int sock_attach_fd(struct socket *sock, struct file *file)
35839d8c1b6SDavid S. Miller {
35939d8c1b6SDavid S. Miller 	struct qstr this;
36039d8c1b6SDavid S. Miller 	char name[32];
3611da177e4SLinus Torvalds 
362f31f5f05SEric Dumazet 	this.len = sprintf(name, "[%lu]", SOCK_INODE(sock)->i_ino);
3631da177e4SLinus Torvalds 	this.name = name;
3641da177e4SLinus Torvalds 	this.hash = SOCK_INODE(sock)->i_ino;
3651da177e4SLinus Torvalds 
3661da177e4SLinus Torvalds 	file->f_dentry = d_alloc(sock_mnt->mnt_sb->s_root, &this);
36739d8c1b6SDavid S. Miller 	if (unlikely(!file->f_dentry))
36839d8c1b6SDavid S. Miller 		return -ENOMEM;
36939d8c1b6SDavid S. Miller 
3701da177e4SLinus Torvalds 	file->f_dentry->d_op = &sockfs_dentry_operations;
3711da177e4SLinus Torvalds 	d_add(file->f_dentry, SOCK_INODE(sock));
3721da177e4SLinus Torvalds 	file->f_vfsmnt = mntget(sock_mnt);
3731da177e4SLinus Torvalds 	file->f_mapping = file->f_dentry->d_inode->i_mapping;
3741da177e4SLinus Torvalds 
3751da177e4SLinus Torvalds 	sock->file = file;
3761da177e4SLinus Torvalds 	file->f_op = SOCK_INODE(sock)->i_fop = &socket_file_ops;
3771da177e4SLinus Torvalds 	file->f_mode = FMODE_READ | FMODE_WRITE;
3781da177e4SLinus Torvalds 	file->f_flags = O_RDWR;
3791da177e4SLinus Torvalds 	file->f_pos = 0;
38007dc3f07SBenjamin LaHaise 	file->private_data = sock;
38139d8c1b6SDavid S. Miller 
38239d8c1b6SDavid S. Miller 	return 0;
3831da177e4SLinus Torvalds }
3841da177e4SLinus Torvalds 
38539d8c1b6SDavid S. Miller int sock_map_fd(struct socket *sock)
38639d8c1b6SDavid S. Miller {
38739d8c1b6SDavid S. Miller 	struct file *newfile;
38839d8c1b6SDavid S. Miller 	int fd = sock_alloc_fd(&newfile);
38939d8c1b6SDavid S. Miller 
39039d8c1b6SDavid S. Miller 	if (likely(fd >= 0)) {
39139d8c1b6SDavid S. Miller 		int err = sock_attach_fd(sock, newfile);
39239d8c1b6SDavid S. Miller 
39339d8c1b6SDavid S. Miller 		if (unlikely(err < 0)) {
39439d8c1b6SDavid S. Miller 			put_filp(newfile);
39539d8c1b6SDavid S. Miller 			put_unused_fd(fd);
39639d8c1b6SDavid S. Miller 			return err;
39739d8c1b6SDavid S. Miller 		}
39839d8c1b6SDavid S. Miller 		fd_install(fd, newfile);
39939d8c1b6SDavid S. Miller 	}
4001da177e4SLinus Torvalds 	return fd;
4011da177e4SLinus Torvalds }
4021da177e4SLinus Torvalds 
4036cb153caSBenjamin LaHaise static struct socket *sock_from_file(struct file *file, int *err)
4046cb153caSBenjamin LaHaise {
4056cb153caSBenjamin LaHaise 	struct inode *inode;
4066cb153caSBenjamin LaHaise 	struct socket *sock;
4076cb153caSBenjamin LaHaise 
4086cb153caSBenjamin LaHaise 	if (file->f_op == &socket_file_ops)
4096cb153caSBenjamin LaHaise 		return file->private_data;	/* set in sock_map_fd */
4106cb153caSBenjamin LaHaise 
4116cb153caSBenjamin LaHaise 	inode = file->f_dentry->d_inode;
4126cb153caSBenjamin LaHaise 	if (!S_ISSOCK(inode->i_mode)) {
4136cb153caSBenjamin LaHaise 		*err = -ENOTSOCK;
4146cb153caSBenjamin LaHaise 		return NULL;
4156cb153caSBenjamin LaHaise 	}
4166cb153caSBenjamin LaHaise 
4176cb153caSBenjamin LaHaise 	sock = SOCKET_I(inode);
4186cb153caSBenjamin LaHaise 	if (sock->file != file) {
4196cb153caSBenjamin LaHaise 		printk(KERN_ERR "socki_lookup: socket file changed!\n");
4206cb153caSBenjamin LaHaise 		sock->file = file;
4216cb153caSBenjamin LaHaise 	}
4226cb153caSBenjamin LaHaise 	return sock;
4236cb153caSBenjamin LaHaise }
4246cb153caSBenjamin LaHaise 
4251da177e4SLinus Torvalds /**
4261da177e4SLinus Torvalds  *	sockfd_lookup	- 	Go from a file number to its socket slot
4271da177e4SLinus Torvalds  *	@fd: file handle
4281da177e4SLinus Torvalds  *	@err: pointer to an error code return
4291da177e4SLinus Torvalds  *
4301da177e4SLinus Torvalds  *	The file handle passed in is locked and the socket it is bound
4311da177e4SLinus Torvalds  *	too is returned. If an error occurs the err pointer is overwritten
4321da177e4SLinus Torvalds  *	with a negative errno code and NULL is returned. The function checks
4331da177e4SLinus Torvalds  *	for both invalid handles and passing a handle which is not a socket.
4341da177e4SLinus Torvalds  *
4351da177e4SLinus Torvalds  *	On a success the socket object pointer is returned.
4361da177e4SLinus Torvalds  */
4371da177e4SLinus Torvalds 
4381da177e4SLinus Torvalds struct socket *sockfd_lookup(int fd, int *err)
4391da177e4SLinus Torvalds {
4401da177e4SLinus Torvalds 	struct file *file;
4411da177e4SLinus Torvalds 	struct socket *sock;
4421da177e4SLinus Torvalds 
44389bddce5SStephen Hemminger 	file = fget(fd);
44489bddce5SStephen Hemminger 	if (!file) {
4451da177e4SLinus Torvalds 		*err = -EBADF;
4461da177e4SLinus Torvalds 		return NULL;
4471da177e4SLinus Torvalds 	}
44889bddce5SStephen Hemminger 
4496cb153caSBenjamin LaHaise 	sock = sock_from_file(file, err);
4506cb153caSBenjamin LaHaise 	if (!sock)
4511da177e4SLinus Torvalds 		fput(file);
4526cb153caSBenjamin LaHaise 	return sock;
4531da177e4SLinus Torvalds }
4541da177e4SLinus Torvalds 
4556cb153caSBenjamin LaHaise static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed)
4566cb153caSBenjamin LaHaise {
4576cb153caSBenjamin LaHaise 	struct file *file;
4586cb153caSBenjamin LaHaise 	struct socket *sock;
4596cb153caSBenjamin LaHaise 
4603672558cSHua Zhong 	*err = -EBADF;
4616cb153caSBenjamin LaHaise 	file = fget_light(fd, fput_needed);
4626cb153caSBenjamin LaHaise 	if (file) {
4636cb153caSBenjamin LaHaise 		sock = sock_from_file(file, err);
4646cb153caSBenjamin LaHaise 		if (sock)
4651da177e4SLinus Torvalds 			return sock;
4666cb153caSBenjamin LaHaise 		fput_light(file, *fput_needed);
4676cb153caSBenjamin LaHaise 	}
4686cb153caSBenjamin LaHaise 	return NULL;
4691da177e4SLinus Torvalds }
4701da177e4SLinus Torvalds 
4711da177e4SLinus Torvalds /**
4721da177e4SLinus Torvalds  *	sock_alloc	-	allocate a socket
4731da177e4SLinus Torvalds  *
4741da177e4SLinus Torvalds  *	Allocate a new inode and socket object. The two are bound together
4751da177e4SLinus Torvalds  *	and initialised. The socket is then returned. If we are out of inodes
4761da177e4SLinus Torvalds  *	NULL is returned.
4771da177e4SLinus Torvalds  */
4781da177e4SLinus Torvalds 
4791da177e4SLinus Torvalds static struct socket *sock_alloc(void)
4801da177e4SLinus Torvalds {
4811da177e4SLinus Torvalds 	struct inode *inode;
4821da177e4SLinus Torvalds 	struct socket *sock;
4831da177e4SLinus Torvalds 
4841da177e4SLinus Torvalds 	inode = new_inode(sock_mnt->mnt_sb);
4851da177e4SLinus Torvalds 	if (!inode)
4861da177e4SLinus Torvalds 		return NULL;
4871da177e4SLinus Torvalds 
4881da177e4SLinus Torvalds 	sock = SOCKET_I(inode);
4891da177e4SLinus Torvalds 
4901da177e4SLinus Torvalds 	inode->i_mode = S_IFSOCK | S_IRWXUGO;
4911da177e4SLinus Torvalds 	inode->i_uid = current->fsuid;
4921da177e4SLinus Torvalds 	inode->i_gid = current->fsgid;
4931da177e4SLinus Torvalds 
4941da177e4SLinus Torvalds 	get_cpu_var(sockets_in_use)++;
4951da177e4SLinus Torvalds 	put_cpu_var(sockets_in_use);
4961da177e4SLinus Torvalds 	return sock;
4971da177e4SLinus Torvalds }
4981da177e4SLinus Torvalds 
4991da177e4SLinus Torvalds /*
5001da177e4SLinus Torvalds  *	In theory you can't get an open on this inode, but /proc provides
5011da177e4SLinus Torvalds  *	a back door. Remember to keep it shut otherwise you'll let the
5021da177e4SLinus Torvalds  *	creepy crawlies in.
5031da177e4SLinus Torvalds  */
5041da177e4SLinus Torvalds 
5051da177e4SLinus Torvalds static int sock_no_open(struct inode *irrelevant, struct file *dontcare)
5061da177e4SLinus Torvalds {
5071da177e4SLinus Torvalds 	return -ENXIO;
5081da177e4SLinus Torvalds }
5091da177e4SLinus Torvalds 
5104b6f5d20SArjan van de Ven const struct file_operations bad_sock_fops = {
5111da177e4SLinus Torvalds 	.owner = THIS_MODULE,
5121da177e4SLinus Torvalds 	.open = sock_no_open,
5131da177e4SLinus Torvalds };
5141da177e4SLinus Torvalds 
5151da177e4SLinus Torvalds /**
5161da177e4SLinus Torvalds  *	sock_release	-	close a socket
5171da177e4SLinus Torvalds  *	@sock: socket to close
5181da177e4SLinus Torvalds  *
5191da177e4SLinus Torvalds  *	The socket is released from the protocol stack if it has a release
5201da177e4SLinus Torvalds  *	callback, and the inode is then released if the socket is bound to
5211da177e4SLinus Torvalds  *	an inode not a file.
5221da177e4SLinus Torvalds  */
5231da177e4SLinus Torvalds 
5241da177e4SLinus Torvalds void sock_release(struct socket *sock)
5251da177e4SLinus Torvalds {
5261da177e4SLinus Torvalds 	if (sock->ops) {
5271da177e4SLinus Torvalds 		struct module *owner = sock->ops->owner;
5281da177e4SLinus Torvalds 
5291da177e4SLinus Torvalds 		sock->ops->release(sock);
5301da177e4SLinus Torvalds 		sock->ops = NULL;
5311da177e4SLinus Torvalds 		module_put(owner);
5321da177e4SLinus Torvalds 	}
5331da177e4SLinus Torvalds 
5341da177e4SLinus Torvalds 	if (sock->fasync_list)
5351da177e4SLinus Torvalds 		printk(KERN_ERR "sock_release: fasync list not empty!\n");
5361da177e4SLinus Torvalds 
5371da177e4SLinus Torvalds 	get_cpu_var(sockets_in_use)--;
5381da177e4SLinus Torvalds 	put_cpu_var(sockets_in_use);
5391da177e4SLinus Torvalds 	if (!sock->file) {
5401da177e4SLinus Torvalds 		iput(SOCK_INODE(sock));
5411da177e4SLinus Torvalds 		return;
5421da177e4SLinus Torvalds 	}
5431da177e4SLinus Torvalds 	sock->file = NULL;
5441da177e4SLinus Torvalds }
5451da177e4SLinus Torvalds 
5461da177e4SLinus Torvalds static inline int __sock_sendmsg(struct kiocb *iocb, struct socket *sock,
5471da177e4SLinus Torvalds 				 struct msghdr *msg, size_t size)
5481da177e4SLinus Torvalds {
5491da177e4SLinus Torvalds 	struct sock_iocb *si = kiocb_to_siocb(iocb);
5501da177e4SLinus Torvalds 	int err;
5511da177e4SLinus Torvalds 
5521da177e4SLinus Torvalds 	si->sock = sock;
5531da177e4SLinus Torvalds 	si->scm = NULL;
5541da177e4SLinus Torvalds 	si->msg = msg;
5551da177e4SLinus Torvalds 	si->size = size;
5561da177e4SLinus Torvalds 
5571da177e4SLinus Torvalds 	err = security_socket_sendmsg(sock, msg, size);
5581da177e4SLinus Torvalds 	if (err)
5591da177e4SLinus Torvalds 		return err;
5601da177e4SLinus Torvalds 
5611da177e4SLinus Torvalds 	return sock->ops->sendmsg(iocb, sock, msg, size);
5621da177e4SLinus Torvalds }
5631da177e4SLinus Torvalds 
5641da177e4SLinus Torvalds int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
5651da177e4SLinus Torvalds {
5661da177e4SLinus Torvalds 	struct kiocb iocb;
5671da177e4SLinus Torvalds 	struct sock_iocb siocb;
5681da177e4SLinus Torvalds 	int ret;
5691da177e4SLinus Torvalds 
5701da177e4SLinus Torvalds 	init_sync_kiocb(&iocb, NULL);
5711da177e4SLinus Torvalds 	iocb.private = &siocb;
5721da177e4SLinus Torvalds 	ret = __sock_sendmsg(&iocb, sock, msg, size);
5731da177e4SLinus Torvalds 	if (-EIOCBQUEUED == ret)
5741da177e4SLinus Torvalds 		ret = wait_on_sync_kiocb(&iocb);
5751da177e4SLinus Torvalds 	return ret;
5761da177e4SLinus Torvalds }
5771da177e4SLinus Torvalds 
5781da177e4SLinus Torvalds int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
5791da177e4SLinus Torvalds 		   struct kvec *vec, size_t num, size_t size)
5801da177e4SLinus Torvalds {
5811da177e4SLinus Torvalds 	mm_segment_t oldfs = get_fs();
5821da177e4SLinus Torvalds 	int result;
5831da177e4SLinus Torvalds 
5841da177e4SLinus Torvalds 	set_fs(KERNEL_DS);
5851da177e4SLinus Torvalds 	/*
5861da177e4SLinus Torvalds 	 * the following is safe, since for compiler definitions of kvec and
5871da177e4SLinus Torvalds 	 * iovec are identical, yielding the same in-core layout and alignment
5881da177e4SLinus Torvalds 	 */
58989bddce5SStephen Hemminger 	msg->msg_iov = (struct iovec *)vec;
5901da177e4SLinus Torvalds 	msg->msg_iovlen = num;
5911da177e4SLinus Torvalds 	result = sock_sendmsg(sock, msg, size);
5921da177e4SLinus Torvalds 	set_fs(oldfs);
5931da177e4SLinus Torvalds 	return result;
5941da177e4SLinus Torvalds }
5951da177e4SLinus Torvalds 
5961da177e4SLinus Torvalds static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock,
5971da177e4SLinus Torvalds 				 struct msghdr *msg, size_t size, int flags)
5981da177e4SLinus Torvalds {
5991da177e4SLinus Torvalds 	int err;
6001da177e4SLinus Torvalds 	struct sock_iocb *si = kiocb_to_siocb(iocb);
6011da177e4SLinus Torvalds 
6021da177e4SLinus Torvalds 	si->sock = sock;
6031da177e4SLinus Torvalds 	si->scm = NULL;
6041da177e4SLinus Torvalds 	si->msg = msg;
6051da177e4SLinus Torvalds 	si->size = size;
6061da177e4SLinus Torvalds 	si->flags = flags;
6071da177e4SLinus Torvalds 
6081da177e4SLinus Torvalds 	err = security_socket_recvmsg(sock, msg, size, flags);
6091da177e4SLinus Torvalds 	if (err)
6101da177e4SLinus Torvalds 		return err;
6111da177e4SLinus Torvalds 
6121da177e4SLinus Torvalds 	return sock->ops->recvmsg(iocb, sock, msg, size, flags);
6131da177e4SLinus Torvalds }
6141da177e4SLinus Torvalds 
6151da177e4SLinus Torvalds int sock_recvmsg(struct socket *sock, struct msghdr *msg,
6161da177e4SLinus Torvalds 		 size_t size, int flags)
6171da177e4SLinus Torvalds {
6181da177e4SLinus Torvalds 	struct kiocb iocb;
6191da177e4SLinus Torvalds 	struct sock_iocb siocb;
6201da177e4SLinus Torvalds 	int ret;
6211da177e4SLinus Torvalds 
6221da177e4SLinus Torvalds 	init_sync_kiocb(&iocb, NULL);
6231da177e4SLinus Torvalds 	iocb.private = &siocb;
6241da177e4SLinus Torvalds 	ret = __sock_recvmsg(&iocb, sock, msg, size, flags);
6251da177e4SLinus Torvalds 	if (-EIOCBQUEUED == ret)
6261da177e4SLinus Torvalds 		ret = wait_on_sync_kiocb(&iocb);
6271da177e4SLinus Torvalds 	return ret;
6281da177e4SLinus Torvalds }
6291da177e4SLinus Torvalds 
6301da177e4SLinus Torvalds int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
63189bddce5SStephen Hemminger 		   struct kvec *vec, size_t num, size_t size, int flags)
6321da177e4SLinus Torvalds {
6331da177e4SLinus Torvalds 	mm_segment_t oldfs = get_fs();
6341da177e4SLinus Torvalds 	int result;
6351da177e4SLinus Torvalds 
6361da177e4SLinus Torvalds 	set_fs(KERNEL_DS);
6371da177e4SLinus Torvalds 	/*
6381da177e4SLinus Torvalds 	 * the following is safe, since for compiler definitions of kvec and
6391da177e4SLinus Torvalds 	 * iovec are identical, yielding the same in-core layout and alignment
6401da177e4SLinus Torvalds 	 */
64189bddce5SStephen Hemminger 	msg->msg_iov = (struct iovec *)vec, msg->msg_iovlen = num;
6421da177e4SLinus Torvalds 	result = sock_recvmsg(sock, msg, size, flags);
6431da177e4SLinus Torvalds 	set_fs(oldfs);
6441da177e4SLinus Torvalds 	return result;
6451da177e4SLinus Torvalds }
6461da177e4SLinus Torvalds 
6471da177e4SLinus Torvalds static void sock_aio_dtor(struct kiocb *iocb)
6481da177e4SLinus Torvalds {
6491da177e4SLinus Torvalds 	kfree(iocb->private);
6501da177e4SLinus Torvalds }
6511da177e4SLinus Torvalds 
65220380731SArnaldo Carvalho de Melo static ssize_t sock_sendpage(struct file *file, struct page *page,
6531da177e4SLinus Torvalds 			     int offset, size_t size, loff_t *ppos, int more)
6541da177e4SLinus Torvalds {
6551da177e4SLinus Torvalds 	struct socket *sock;
6561da177e4SLinus Torvalds 	int flags;
6571da177e4SLinus Torvalds 
658b69aee04SEric Dumazet 	sock = file->private_data;
6591da177e4SLinus Torvalds 
6601da177e4SLinus Torvalds 	flags = !(file->f_flags & O_NONBLOCK) ? 0 : MSG_DONTWAIT;
6611da177e4SLinus Torvalds 	if (more)
6621da177e4SLinus Torvalds 		flags |= MSG_MORE;
6631da177e4SLinus Torvalds 
6641da177e4SLinus Torvalds 	return sock->ops->sendpage(sock, page, offset, size, flags);
6651da177e4SLinus Torvalds }
6661da177e4SLinus Torvalds 
667ce1d4d3eSChristoph Hellwig static struct sock_iocb *alloc_sock_iocb(struct kiocb *iocb,
66889bddce5SStephen Hemminger 					 char __user *ubuf, size_t size,
66989bddce5SStephen Hemminger 					 struct sock_iocb *siocb)
670ce1d4d3eSChristoph Hellwig {
671ce1d4d3eSChristoph Hellwig 	if (!is_sync_kiocb(iocb)) {
672ce1d4d3eSChristoph Hellwig 		siocb = kmalloc(sizeof(*siocb), GFP_KERNEL);
673ce1d4d3eSChristoph Hellwig 		if (!siocb)
674ce1d4d3eSChristoph Hellwig 			return NULL;
675ce1d4d3eSChristoph Hellwig 		iocb->ki_dtor = sock_aio_dtor;
676ce1d4d3eSChristoph Hellwig 	}
677ce1d4d3eSChristoph Hellwig 
678ce1d4d3eSChristoph Hellwig 	siocb->kiocb = iocb;
679ce1d4d3eSChristoph Hellwig 	siocb->async_iov.iov_base = ubuf;
680ce1d4d3eSChristoph Hellwig 	siocb->async_iov.iov_len = size;
681ce1d4d3eSChristoph Hellwig 
682ce1d4d3eSChristoph Hellwig 	iocb->private = siocb;
683ce1d4d3eSChristoph Hellwig 	return siocb;
684ce1d4d3eSChristoph Hellwig }
685ce1d4d3eSChristoph Hellwig 
686ce1d4d3eSChristoph Hellwig static ssize_t do_sock_read(struct msghdr *msg, struct kiocb *iocb,
68789bddce5SStephen Hemminger 			    struct file *file, struct iovec *iov,
68889bddce5SStephen Hemminger 			    unsigned long nr_segs)
689ce1d4d3eSChristoph Hellwig {
690ce1d4d3eSChristoph Hellwig 	struct socket *sock = file->private_data;
691ce1d4d3eSChristoph Hellwig 	size_t size = 0;
692ce1d4d3eSChristoph Hellwig 	int i;
693ce1d4d3eSChristoph Hellwig 
694ce1d4d3eSChristoph Hellwig 	for (i = 0; i < nr_segs; i++)
695ce1d4d3eSChristoph Hellwig 		size += iov[i].iov_len;
696ce1d4d3eSChristoph Hellwig 
697ce1d4d3eSChristoph Hellwig 	msg->msg_name = NULL;
698ce1d4d3eSChristoph Hellwig 	msg->msg_namelen = 0;
699ce1d4d3eSChristoph Hellwig 	msg->msg_control = NULL;
700ce1d4d3eSChristoph Hellwig 	msg->msg_controllen = 0;
701ce1d4d3eSChristoph Hellwig 	msg->msg_iov = (struct iovec *)iov;
702ce1d4d3eSChristoph Hellwig 	msg->msg_iovlen = nr_segs;
703ce1d4d3eSChristoph Hellwig 	msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
704ce1d4d3eSChristoph Hellwig 
705ce1d4d3eSChristoph Hellwig 	return __sock_recvmsg(iocb, sock, msg, size, msg->msg_flags);
706ce1d4d3eSChristoph Hellwig }
707ce1d4d3eSChristoph Hellwig 
708ce1d4d3eSChristoph Hellwig static ssize_t sock_readv(struct file *file, const struct iovec *iov,
709ce1d4d3eSChristoph Hellwig 			  unsigned long nr_segs, loff_t *ppos)
710ce1d4d3eSChristoph Hellwig {
711ce1d4d3eSChristoph Hellwig 	struct kiocb iocb;
712ce1d4d3eSChristoph Hellwig 	struct sock_iocb siocb;
713ce1d4d3eSChristoph Hellwig 	struct msghdr msg;
714ce1d4d3eSChristoph Hellwig 	int ret;
715ce1d4d3eSChristoph Hellwig 
716ce1d4d3eSChristoph Hellwig 	init_sync_kiocb(&iocb, NULL);
717ce1d4d3eSChristoph Hellwig 	iocb.private = &siocb;
718ce1d4d3eSChristoph Hellwig 
719ce1d4d3eSChristoph Hellwig 	ret = do_sock_read(&msg, &iocb, file, (struct iovec *)iov, nr_segs);
720ce1d4d3eSChristoph Hellwig 	if (-EIOCBQUEUED == ret)
721ce1d4d3eSChristoph Hellwig 		ret = wait_on_sync_kiocb(&iocb);
722ce1d4d3eSChristoph Hellwig 	return ret;
723ce1d4d3eSChristoph Hellwig }
724ce1d4d3eSChristoph Hellwig 
725ce1d4d3eSChristoph Hellwig static ssize_t sock_aio_read(struct kiocb *iocb, char __user *ubuf,
726ce1d4d3eSChristoph Hellwig 			     size_t count, loff_t pos)
727ce1d4d3eSChristoph Hellwig {
728ce1d4d3eSChristoph Hellwig 	struct sock_iocb siocb, *x;
729ce1d4d3eSChristoph Hellwig 
730ce1d4d3eSChristoph Hellwig 	if (pos != 0)
731ce1d4d3eSChristoph Hellwig 		return -ESPIPE;
732ce1d4d3eSChristoph Hellwig 	if (count == 0)		/* Match SYS5 behaviour */
733ce1d4d3eSChristoph Hellwig 		return 0;
734ce1d4d3eSChristoph Hellwig 
735ce1d4d3eSChristoph Hellwig 	x = alloc_sock_iocb(iocb, ubuf, count, &siocb);
736ce1d4d3eSChristoph Hellwig 	if (!x)
737ce1d4d3eSChristoph Hellwig 		return -ENOMEM;
738ce1d4d3eSChristoph Hellwig 	return do_sock_read(&x->async_msg, iocb, iocb->ki_filp,
739ce1d4d3eSChristoph Hellwig 			    &x->async_iov, 1);
740ce1d4d3eSChristoph Hellwig }
741ce1d4d3eSChristoph Hellwig 
742ce1d4d3eSChristoph Hellwig static ssize_t do_sock_write(struct msghdr *msg, struct kiocb *iocb,
74389bddce5SStephen Hemminger 			     struct file *file, struct iovec *iov,
74489bddce5SStephen Hemminger 			     unsigned long nr_segs)
745ce1d4d3eSChristoph Hellwig {
746ce1d4d3eSChristoph Hellwig 	struct socket *sock = file->private_data;
747ce1d4d3eSChristoph Hellwig 	size_t size = 0;
748ce1d4d3eSChristoph Hellwig 	int i;
749ce1d4d3eSChristoph Hellwig 
750ce1d4d3eSChristoph Hellwig 	for (i = 0; i < nr_segs; i++)
751ce1d4d3eSChristoph Hellwig 		size += iov[i].iov_len;
752ce1d4d3eSChristoph Hellwig 
753ce1d4d3eSChristoph Hellwig 	msg->msg_name = NULL;
754ce1d4d3eSChristoph Hellwig 	msg->msg_namelen = 0;
755ce1d4d3eSChristoph Hellwig 	msg->msg_control = NULL;
756ce1d4d3eSChristoph Hellwig 	msg->msg_controllen = 0;
757ce1d4d3eSChristoph Hellwig 	msg->msg_iov = (struct iovec *)iov;
758ce1d4d3eSChristoph Hellwig 	msg->msg_iovlen = nr_segs;
759ce1d4d3eSChristoph Hellwig 	msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
760ce1d4d3eSChristoph Hellwig 	if (sock->type == SOCK_SEQPACKET)
761ce1d4d3eSChristoph Hellwig 		msg->msg_flags |= MSG_EOR;
762ce1d4d3eSChristoph Hellwig 
763ce1d4d3eSChristoph Hellwig 	return __sock_sendmsg(iocb, sock, msg, size);
764ce1d4d3eSChristoph Hellwig }
765ce1d4d3eSChristoph Hellwig 
766ce1d4d3eSChristoph Hellwig static ssize_t sock_writev(struct file *file, const struct iovec *iov,
767ce1d4d3eSChristoph Hellwig 			   unsigned long nr_segs, loff_t *ppos)
7681da177e4SLinus Torvalds {
7691da177e4SLinus Torvalds 	struct msghdr msg;
770ce1d4d3eSChristoph Hellwig 	struct kiocb iocb;
771ce1d4d3eSChristoph Hellwig 	struct sock_iocb siocb;
772ce1d4d3eSChristoph Hellwig 	int ret;
7731da177e4SLinus Torvalds 
774ce1d4d3eSChristoph Hellwig 	init_sync_kiocb(&iocb, NULL);
775ce1d4d3eSChristoph Hellwig 	iocb.private = &siocb;
7761da177e4SLinus Torvalds 
777ce1d4d3eSChristoph Hellwig 	ret = do_sock_write(&msg, &iocb, file, (struct iovec *)iov, nr_segs);
778ce1d4d3eSChristoph Hellwig 	if (-EIOCBQUEUED == ret)
779ce1d4d3eSChristoph Hellwig 		ret = wait_on_sync_kiocb(&iocb);
780ce1d4d3eSChristoph Hellwig 	return ret;
7811da177e4SLinus Torvalds }
7821da177e4SLinus Torvalds 
783ce1d4d3eSChristoph Hellwig static ssize_t sock_aio_write(struct kiocb *iocb, const char __user *ubuf,
784ce1d4d3eSChristoph Hellwig 			      size_t count, loff_t pos)
7851da177e4SLinus Torvalds {
786ce1d4d3eSChristoph Hellwig 	struct sock_iocb siocb, *x;
7871da177e4SLinus Torvalds 
788ce1d4d3eSChristoph Hellwig 	if (pos != 0)
789ce1d4d3eSChristoph Hellwig 		return -ESPIPE;
790ce1d4d3eSChristoph Hellwig 	if (count == 0)		/* Match SYS5 behaviour */
791ce1d4d3eSChristoph Hellwig 		return 0;
792ce1d4d3eSChristoph Hellwig 
793ce1d4d3eSChristoph Hellwig 	x = alloc_sock_iocb(iocb, (void __user *)ubuf, count, &siocb);
794ce1d4d3eSChristoph Hellwig 	if (!x)
795ce1d4d3eSChristoph Hellwig 		return -ENOMEM;
796ce1d4d3eSChristoph Hellwig 
797ce1d4d3eSChristoph Hellwig 	return do_sock_write(&x->async_msg, iocb, iocb->ki_filp,
798ce1d4d3eSChristoph Hellwig 			     &x->async_iov, 1);
7991da177e4SLinus Torvalds }
8001da177e4SLinus Torvalds 
8011da177e4SLinus Torvalds /*
8021da177e4SLinus Torvalds  * Atomic setting of ioctl hooks to avoid race
8031da177e4SLinus Torvalds  * with module unload.
8041da177e4SLinus Torvalds  */
8051da177e4SLinus Torvalds 
8064a3e2f71SArjan van de Ven static DEFINE_MUTEX(br_ioctl_mutex);
8071da177e4SLinus Torvalds static int (*br_ioctl_hook) (unsigned int cmd, void __user *arg) = NULL;
8081da177e4SLinus Torvalds 
8091da177e4SLinus Torvalds void brioctl_set(int (*hook) (unsigned int, void __user *))
8101da177e4SLinus Torvalds {
8114a3e2f71SArjan van de Ven 	mutex_lock(&br_ioctl_mutex);
8121da177e4SLinus Torvalds 	br_ioctl_hook = hook;
8134a3e2f71SArjan van de Ven 	mutex_unlock(&br_ioctl_mutex);
8141da177e4SLinus Torvalds }
81589bddce5SStephen Hemminger 
8161da177e4SLinus Torvalds EXPORT_SYMBOL(brioctl_set);
8171da177e4SLinus Torvalds 
8184a3e2f71SArjan van de Ven static DEFINE_MUTEX(vlan_ioctl_mutex);
8191da177e4SLinus Torvalds static int (*vlan_ioctl_hook) (void __user *arg);
8201da177e4SLinus Torvalds 
8211da177e4SLinus Torvalds void vlan_ioctl_set(int (*hook) (void __user *))
8221da177e4SLinus Torvalds {
8234a3e2f71SArjan van de Ven 	mutex_lock(&vlan_ioctl_mutex);
8241da177e4SLinus Torvalds 	vlan_ioctl_hook = hook;
8254a3e2f71SArjan van de Ven 	mutex_unlock(&vlan_ioctl_mutex);
8261da177e4SLinus Torvalds }
82789bddce5SStephen Hemminger 
8281da177e4SLinus Torvalds EXPORT_SYMBOL(vlan_ioctl_set);
8291da177e4SLinus Torvalds 
8304a3e2f71SArjan van de Ven static DEFINE_MUTEX(dlci_ioctl_mutex);
8311da177e4SLinus Torvalds static int (*dlci_ioctl_hook) (unsigned int, void __user *);
8321da177e4SLinus Torvalds 
8331da177e4SLinus Torvalds void dlci_ioctl_set(int (*hook) (unsigned int, void __user *))
8341da177e4SLinus Torvalds {
8354a3e2f71SArjan van de Ven 	mutex_lock(&dlci_ioctl_mutex);
8361da177e4SLinus Torvalds 	dlci_ioctl_hook = hook;
8374a3e2f71SArjan van de Ven 	mutex_unlock(&dlci_ioctl_mutex);
8381da177e4SLinus Torvalds }
83989bddce5SStephen Hemminger 
8401da177e4SLinus Torvalds EXPORT_SYMBOL(dlci_ioctl_set);
8411da177e4SLinus Torvalds 
8421da177e4SLinus Torvalds /*
8431da177e4SLinus Torvalds  *	With an ioctl, arg may well be a user mode pointer, but we don't know
8441da177e4SLinus Torvalds  *	what to do with it - that's up to the protocol still.
8451da177e4SLinus Torvalds  */
8461da177e4SLinus Torvalds 
8471da177e4SLinus Torvalds static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
8481da177e4SLinus Torvalds {
8491da177e4SLinus Torvalds 	struct socket *sock;
8501da177e4SLinus Torvalds 	void __user *argp = (void __user *)arg;
8511da177e4SLinus Torvalds 	int pid, err;
8521da177e4SLinus Torvalds 
853b69aee04SEric Dumazet 	sock = file->private_data;
8541da177e4SLinus Torvalds 	if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15)) {
8551da177e4SLinus Torvalds 		err = dev_ioctl(cmd, argp);
8561da177e4SLinus Torvalds 	} else
857d86b5e0eSAdrian Bunk #ifdef CONFIG_WIRELESS_EXT
8581da177e4SLinus Torvalds 	if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
8591da177e4SLinus Torvalds 		err = dev_ioctl(cmd, argp);
8601da177e4SLinus Torvalds 	} else
861d86b5e0eSAdrian Bunk #endif				/* CONFIG_WIRELESS_EXT */
8621da177e4SLinus Torvalds 		switch (cmd) {
8631da177e4SLinus Torvalds 		case FIOSETOWN:
8641da177e4SLinus Torvalds 		case SIOCSPGRP:
8651da177e4SLinus Torvalds 			err = -EFAULT;
8661da177e4SLinus Torvalds 			if (get_user(pid, (int __user *)argp))
8671da177e4SLinus Torvalds 				break;
8681da177e4SLinus Torvalds 			err = f_setown(sock->file, pid, 1);
8691da177e4SLinus Torvalds 			break;
8701da177e4SLinus Torvalds 		case FIOGETOWN:
8711da177e4SLinus Torvalds 		case SIOCGPGRP:
87289bddce5SStephen Hemminger 			err = put_user(sock->file->f_owner.pid,
87389bddce5SStephen Hemminger 				       (int __user *)argp);
8741da177e4SLinus Torvalds 			break;
8751da177e4SLinus Torvalds 		case SIOCGIFBR:
8761da177e4SLinus Torvalds 		case SIOCSIFBR:
8771da177e4SLinus Torvalds 		case SIOCBRADDBR:
8781da177e4SLinus Torvalds 		case SIOCBRDELBR:
8791da177e4SLinus Torvalds 			err = -ENOPKG;
8801da177e4SLinus Torvalds 			if (!br_ioctl_hook)
8811da177e4SLinus Torvalds 				request_module("bridge");
8821da177e4SLinus Torvalds 
8834a3e2f71SArjan van de Ven 			mutex_lock(&br_ioctl_mutex);
8841da177e4SLinus Torvalds 			if (br_ioctl_hook)
8851da177e4SLinus Torvalds 				err = br_ioctl_hook(cmd, argp);
8864a3e2f71SArjan van de Ven 			mutex_unlock(&br_ioctl_mutex);
8871da177e4SLinus Torvalds 			break;
8881da177e4SLinus Torvalds 		case SIOCGIFVLAN:
8891da177e4SLinus Torvalds 		case SIOCSIFVLAN:
8901da177e4SLinus Torvalds 			err = -ENOPKG;
8911da177e4SLinus Torvalds 			if (!vlan_ioctl_hook)
8921da177e4SLinus Torvalds 				request_module("8021q");
8931da177e4SLinus Torvalds 
8944a3e2f71SArjan van de Ven 			mutex_lock(&vlan_ioctl_mutex);
8951da177e4SLinus Torvalds 			if (vlan_ioctl_hook)
8961da177e4SLinus Torvalds 				err = vlan_ioctl_hook(argp);
8974a3e2f71SArjan van de Ven 			mutex_unlock(&vlan_ioctl_mutex);
8981da177e4SLinus Torvalds 			break;
8991da177e4SLinus Torvalds 		case SIOCGIFDIVERT:
9001da177e4SLinus Torvalds 		case SIOCSIFDIVERT:
9011da177e4SLinus Torvalds 			/* Convert this to call through a hook */
9021da177e4SLinus Torvalds 			err = divert_ioctl(cmd, argp);
9031da177e4SLinus Torvalds 			break;
9041da177e4SLinus Torvalds 		case SIOCADDDLCI:
9051da177e4SLinus Torvalds 		case SIOCDELDLCI:
9061da177e4SLinus Torvalds 			err = -ENOPKG;
9071da177e4SLinus Torvalds 			if (!dlci_ioctl_hook)
9081da177e4SLinus Torvalds 				request_module("dlci");
9091da177e4SLinus Torvalds 
9101da177e4SLinus Torvalds 			if (dlci_ioctl_hook) {
9114a3e2f71SArjan van de Ven 				mutex_lock(&dlci_ioctl_mutex);
9121da177e4SLinus Torvalds 				err = dlci_ioctl_hook(cmd, argp);
9134a3e2f71SArjan van de Ven 				mutex_unlock(&dlci_ioctl_mutex);
9141da177e4SLinus Torvalds 			}
9151da177e4SLinus Torvalds 			break;
9161da177e4SLinus Torvalds 		default:
9171da177e4SLinus Torvalds 			err = sock->ops->ioctl(sock, cmd, arg);
918b5e5fa5eSChristoph Hellwig 
919b5e5fa5eSChristoph Hellwig 			/*
920b5e5fa5eSChristoph Hellwig 			 * If this ioctl is unknown try to hand it down
921b5e5fa5eSChristoph Hellwig 			 * to the NIC driver.
922b5e5fa5eSChristoph Hellwig 			 */
923b5e5fa5eSChristoph Hellwig 			if (err == -ENOIOCTLCMD)
924b5e5fa5eSChristoph Hellwig 				err = dev_ioctl(cmd, argp);
9251da177e4SLinus Torvalds 			break;
9261da177e4SLinus Torvalds 		}
9271da177e4SLinus Torvalds 	return err;
9281da177e4SLinus Torvalds }
9291da177e4SLinus Torvalds 
9301da177e4SLinus Torvalds int sock_create_lite(int family, int type, int protocol, struct socket **res)
9311da177e4SLinus Torvalds {
9321da177e4SLinus Torvalds 	int err;
9331da177e4SLinus Torvalds 	struct socket *sock = NULL;
9341da177e4SLinus Torvalds 
9351da177e4SLinus Torvalds 	err = security_socket_create(family, type, protocol, 1);
9361da177e4SLinus Torvalds 	if (err)
9371da177e4SLinus Torvalds 		goto out;
9381da177e4SLinus Torvalds 
9391da177e4SLinus Torvalds 	sock = sock_alloc();
9401da177e4SLinus Torvalds 	if (!sock) {
9411da177e4SLinus Torvalds 		err = -ENOMEM;
9421da177e4SLinus Torvalds 		goto out;
9431da177e4SLinus Torvalds 	}
9441da177e4SLinus Torvalds 
9451da177e4SLinus Torvalds 	sock->type = type;
9467420ed23SVenkat Yekkirala 	err = security_socket_post_create(sock, family, type, protocol, 1);
9477420ed23SVenkat Yekkirala 	if (err)
9487420ed23SVenkat Yekkirala 		goto out_release;
9497420ed23SVenkat Yekkirala 
9501da177e4SLinus Torvalds out:
9511da177e4SLinus Torvalds 	*res = sock;
9521da177e4SLinus Torvalds 	return err;
9537420ed23SVenkat Yekkirala out_release:
9547420ed23SVenkat Yekkirala 	sock_release(sock);
9557420ed23SVenkat Yekkirala 	sock = NULL;
9567420ed23SVenkat Yekkirala 	goto out;
9571da177e4SLinus Torvalds }
9581da177e4SLinus Torvalds 
9591da177e4SLinus Torvalds /* No kernel lock held - perfect */
9601da177e4SLinus Torvalds static unsigned int sock_poll(struct file *file, poll_table *wait)
9611da177e4SLinus Torvalds {
9621da177e4SLinus Torvalds 	struct socket *sock;
9631da177e4SLinus Torvalds 
9641da177e4SLinus Torvalds 	/*
9651da177e4SLinus Torvalds 	 *      We can't return errors to poll, so it's either yes or no.
9661da177e4SLinus Torvalds 	 */
967b69aee04SEric Dumazet 	sock = file->private_data;
9681da177e4SLinus Torvalds 	return sock->ops->poll(file, sock, wait);
9691da177e4SLinus Torvalds }
9701da177e4SLinus Torvalds 
9711da177e4SLinus Torvalds static int sock_mmap(struct file *file, struct vm_area_struct *vma)
9721da177e4SLinus Torvalds {
973b69aee04SEric Dumazet 	struct socket *sock = file->private_data;
9741da177e4SLinus Torvalds 
9751da177e4SLinus Torvalds 	return sock->ops->mmap(file, sock, vma);
9761da177e4SLinus Torvalds }
9771da177e4SLinus Torvalds 
97820380731SArnaldo Carvalho de Melo static int sock_close(struct inode *inode, struct file *filp)
9791da177e4SLinus Torvalds {
9801da177e4SLinus Torvalds 	/*
9811da177e4SLinus Torvalds 	 *      It was possible the inode is NULL we were
9821da177e4SLinus Torvalds 	 *      closing an unfinished socket.
9831da177e4SLinus Torvalds 	 */
9841da177e4SLinus Torvalds 
98589bddce5SStephen Hemminger 	if (!inode) {
9861da177e4SLinus Torvalds 		printk(KERN_DEBUG "sock_close: NULL inode\n");
9871da177e4SLinus Torvalds 		return 0;
9881da177e4SLinus Torvalds 	}
9891da177e4SLinus Torvalds 	sock_fasync(-1, filp, 0);
9901da177e4SLinus Torvalds 	sock_release(SOCKET_I(inode));
9911da177e4SLinus Torvalds 	return 0;
9921da177e4SLinus Torvalds }
9931da177e4SLinus Torvalds 
9941da177e4SLinus Torvalds /*
9951da177e4SLinus Torvalds  *	Update the socket async list
9961da177e4SLinus Torvalds  *
9971da177e4SLinus Torvalds  *	Fasync_list locking strategy.
9981da177e4SLinus Torvalds  *
9991da177e4SLinus Torvalds  *	1. fasync_list is modified only under process context socket lock
10001da177e4SLinus Torvalds  *	   i.e. under semaphore.
10011da177e4SLinus Torvalds  *	2. fasync_list is used under read_lock(&sk->sk_callback_lock)
10021da177e4SLinus Torvalds  *	   or under socket lock.
10031da177e4SLinus Torvalds  *	3. fasync_list can be used from softirq context, so that
10041da177e4SLinus Torvalds  *	   modification under socket lock have to be enhanced with
10051da177e4SLinus Torvalds  *	   write_lock_bh(&sk->sk_callback_lock).
10061da177e4SLinus Torvalds  *							--ANK (990710)
10071da177e4SLinus Torvalds  */
10081da177e4SLinus Torvalds 
10091da177e4SLinus Torvalds static int sock_fasync(int fd, struct file *filp, int on)
10101da177e4SLinus Torvalds {
10111da177e4SLinus Torvalds 	struct fasync_struct *fa, *fna = NULL, **prev;
10121da177e4SLinus Torvalds 	struct socket *sock;
10131da177e4SLinus Torvalds 	struct sock *sk;
10141da177e4SLinus Torvalds 
101589bddce5SStephen Hemminger 	if (on) {
10168b3a7005SKris Katterjohn 		fna = kmalloc(sizeof(struct fasync_struct), GFP_KERNEL);
10171da177e4SLinus Torvalds 		if (fna == NULL)
10181da177e4SLinus Torvalds 			return -ENOMEM;
10191da177e4SLinus Torvalds 	}
10201da177e4SLinus Torvalds 
1021b69aee04SEric Dumazet 	sock = filp->private_data;
10221da177e4SLinus Torvalds 
102389bddce5SStephen Hemminger 	sk = sock->sk;
102489bddce5SStephen Hemminger 	if (sk == NULL) {
10251da177e4SLinus Torvalds 		kfree(fna);
10261da177e4SLinus Torvalds 		return -EINVAL;
10271da177e4SLinus Torvalds 	}
10281da177e4SLinus Torvalds 
10291da177e4SLinus Torvalds 	lock_sock(sk);
10301da177e4SLinus Torvalds 
10311da177e4SLinus Torvalds 	prev = &(sock->fasync_list);
10321da177e4SLinus Torvalds 
10331da177e4SLinus Torvalds 	for (fa = *prev; fa != NULL; prev = &fa->fa_next, fa = *prev)
10341da177e4SLinus Torvalds 		if (fa->fa_file == filp)
10351da177e4SLinus Torvalds 			break;
10361da177e4SLinus Torvalds 
103789bddce5SStephen Hemminger 	if (on) {
103889bddce5SStephen Hemminger 		if (fa != NULL) {
10391da177e4SLinus Torvalds 			write_lock_bh(&sk->sk_callback_lock);
10401da177e4SLinus Torvalds 			fa->fa_fd = fd;
10411da177e4SLinus Torvalds 			write_unlock_bh(&sk->sk_callback_lock);
10421da177e4SLinus Torvalds 
10431da177e4SLinus Torvalds 			kfree(fna);
10441da177e4SLinus Torvalds 			goto out;
10451da177e4SLinus Torvalds 		}
10461da177e4SLinus Torvalds 		fna->fa_file = filp;
10471da177e4SLinus Torvalds 		fna->fa_fd = fd;
10481da177e4SLinus Torvalds 		fna->magic = FASYNC_MAGIC;
10491da177e4SLinus Torvalds 		fna->fa_next = sock->fasync_list;
10501da177e4SLinus Torvalds 		write_lock_bh(&sk->sk_callback_lock);
10511da177e4SLinus Torvalds 		sock->fasync_list = fna;
10521da177e4SLinus Torvalds 		write_unlock_bh(&sk->sk_callback_lock);
105389bddce5SStephen Hemminger 	} else {
105489bddce5SStephen Hemminger 		if (fa != NULL) {
10551da177e4SLinus Torvalds 			write_lock_bh(&sk->sk_callback_lock);
10561da177e4SLinus Torvalds 			*prev = fa->fa_next;
10571da177e4SLinus Torvalds 			write_unlock_bh(&sk->sk_callback_lock);
10581da177e4SLinus Torvalds 			kfree(fa);
10591da177e4SLinus Torvalds 		}
10601da177e4SLinus Torvalds 	}
10611da177e4SLinus Torvalds 
10621da177e4SLinus Torvalds out:
10631da177e4SLinus Torvalds 	release_sock(sock->sk);
10641da177e4SLinus Torvalds 	return 0;
10651da177e4SLinus Torvalds }
10661da177e4SLinus Torvalds 
10671da177e4SLinus Torvalds /* This function may be called only under socket lock or callback_lock */
10681da177e4SLinus Torvalds 
10691da177e4SLinus Torvalds int sock_wake_async(struct socket *sock, int how, int band)
10701da177e4SLinus Torvalds {
10711da177e4SLinus Torvalds 	if (!sock || !sock->fasync_list)
10721da177e4SLinus Torvalds 		return -1;
107389bddce5SStephen Hemminger 	switch (how) {
10741da177e4SLinus Torvalds 	case 1:
10751da177e4SLinus Torvalds 
10761da177e4SLinus Torvalds 		if (test_bit(SOCK_ASYNC_WAITDATA, &sock->flags))
10771da177e4SLinus Torvalds 			break;
10781da177e4SLinus Torvalds 		goto call_kill;
10791da177e4SLinus Torvalds 	case 2:
10801da177e4SLinus Torvalds 		if (!test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags))
10811da177e4SLinus Torvalds 			break;
10821da177e4SLinus Torvalds 		/* fall through */
10831da177e4SLinus Torvalds 	case 0:
10841da177e4SLinus Torvalds call_kill:
10851da177e4SLinus Torvalds 		__kill_fasync(sock->fasync_list, SIGIO, band);
10861da177e4SLinus Torvalds 		break;
10871da177e4SLinus Torvalds 	case 3:
10881da177e4SLinus Torvalds 		__kill_fasync(sock->fasync_list, SIGURG, band);
10891da177e4SLinus Torvalds 	}
10901da177e4SLinus Torvalds 	return 0;
10911da177e4SLinus Torvalds }
10921da177e4SLinus Torvalds 
109389bddce5SStephen Hemminger static int __sock_create(int family, int type, int protocol,
109489bddce5SStephen Hemminger 			 struct socket **res, int kern)
10951da177e4SLinus Torvalds {
10961da177e4SLinus Torvalds 	int err;
10971da177e4SLinus Torvalds 	struct socket *sock;
1098*55737fdaSStephen Hemminger 	const struct net_proto_family *pf;
10991da177e4SLinus Torvalds 
11001da177e4SLinus Torvalds 	/*
11011da177e4SLinus Torvalds 	 *      Check protocol is in range
11021da177e4SLinus Torvalds 	 */
11031da177e4SLinus Torvalds 	if (family < 0 || family >= NPROTO)
11041da177e4SLinus Torvalds 		return -EAFNOSUPPORT;
11051da177e4SLinus Torvalds 	if (type < 0 || type >= SOCK_MAX)
11061da177e4SLinus Torvalds 		return -EINVAL;
11071da177e4SLinus Torvalds 
11081da177e4SLinus Torvalds 	/* Compatibility.
11091da177e4SLinus Torvalds 
11101da177e4SLinus Torvalds 	   This uglymoron is moved from INET layer to here to avoid
11111da177e4SLinus Torvalds 	   deadlock in module load.
11121da177e4SLinus Torvalds 	 */
11131da177e4SLinus Torvalds 	if (family == PF_INET && type == SOCK_PACKET) {
11141da177e4SLinus Torvalds 		static int warned;
11151da177e4SLinus Torvalds 		if (!warned) {
11161da177e4SLinus Torvalds 			warned = 1;
111789bddce5SStephen Hemminger 			printk(KERN_INFO "%s uses obsolete (PF_INET,SOCK_PACKET)\n",
111889bddce5SStephen Hemminger 			       current->comm);
11191da177e4SLinus Torvalds 		}
11201da177e4SLinus Torvalds 		family = PF_PACKET;
11211da177e4SLinus Torvalds 	}
11221da177e4SLinus Torvalds 
11231da177e4SLinus Torvalds 	err = security_socket_create(family, type, protocol, kern);
11241da177e4SLinus Torvalds 	if (err)
11251da177e4SLinus Torvalds 		return err;
11261da177e4SLinus Torvalds 
1127*55737fdaSStephen Hemminger 	/*
1128*55737fdaSStephen Hemminger 	 *	Allocate the socket and allow the family to set things up. if
1129*55737fdaSStephen Hemminger 	 *	the protocol is 0, the family is instructed to select an appropriate
1130*55737fdaSStephen Hemminger 	 *	default.
1131*55737fdaSStephen Hemminger 	 */
1132*55737fdaSStephen Hemminger 	sock = sock_alloc();
1133*55737fdaSStephen Hemminger 	if (!sock) {
1134*55737fdaSStephen Hemminger 		if (net_ratelimit())
1135*55737fdaSStephen Hemminger 			printk(KERN_WARNING "socket: no more sockets\n");
1136*55737fdaSStephen Hemminger 		return -ENFILE;	/* Not exactly a match, but its the
1137*55737fdaSStephen Hemminger 				   closest posix thing */
1138*55737fdaSStephen Hemminger 	}
1139*55737fdaSStephen Hemminger 
1140*55737fdaSStephen Hemminger 	sock->type = type;
1141*55737fdaSStephen Hemminger 
11421da177e4SLinus Torvalds #if defined(CONFIG_KMOD)
11431da177e4SLinus Torvalds 	/* Attempt to load a protocol module if the find failed.
11441da177e4SLinus Torvalds 	 *
11451da177e4SLinus Torvalds 	 * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user
11461da177e4SLinus Torvalds 	 * requested real, full-featured networking support upon configuration.
11471da177e4SLinus Torvalds 	 * Otherwise module support will break!
11481da177e4SLinus Torvalds 	 */
1149*55737fdaSStephen Hemminger 	if (net_families[family] == NULL)
11501da177e4SLinus Torvalds 		request_module("net-pf-%d", family);
11511da177e4SLinus Torvalds #endif
11521da177e4SLinus Torvalds 
1153*55737fdaSStephen Hemminger 	rcu_read_lock();
1154*55737fdaSStephen Hemminger 	pf = rcu_dereference(net_families[family]);
11551da177e4SLinus Torvalds 	err = -EAFNOSUPPORT;
1156*55737fdaSStephen Hemminger 	if (!pf)
1157*55737fdaSStephen Hemminger 		goto out_release;
11581da177e4SLinus Torvalds 
11591da177e4SLinus Torvalds 	/*
11601da177e4SLinus Torvalds 	 * We will call the ->create function, that possibly is in a loadable
11611da177e4SLinus Torvalds 	 * module, so we have to bump that loadable module refcnt first.
11621da177e4SLinus Torvalds 	 */
1163*55737fdaSStephen Hemminger 	if (!try_module_get(pf->owner))
11641da177e4SLinus Torvalds 		goto out_release;
11651da177e4SLinus Torvalds 
1166*55737fdaSStephen Hemminger 	/* Now protected by module ref count */
1167*55737fdaSStephen Hemminger 	rcu_read_unlock();
1168*55737fdaSStephen Hemminger 
1169*55737fdaSStephen Hemminger 	err = pf->create(sock, protocol);
1170*55737fdaSStephen Hemminger 	if (err < 0)
11711da177e4SLinus Torvalds 		goto out_module_put;
1172a79af59eSFrank Filz 
11731da177e4SLinus Torvalds 	/*
11741da177e4SLinus Torvalds 	 * Now to bump the refcnt of the [loadable] module that owns this
11751da177e4SLinus Torvalds 	 * socket at sock_release time we decrement its refcnt.
11761da177e4SLinus Torvalds 	 */
1177*55737fdaSStephen Hemminger 	if (!try_module_get(sock->ops->owner))
1178*55737fdaSStephen Hemminger 		goto out_module_busy;
1179*55737fdaSStephen Hemminger 
11801da177e4SLinus Torvalds 	/*
11811da177e4SLinus Torvalds 	 * Now that we're done with the ->create function, the [loadable]
11821da177e4SLinus Torvalds 	 * module can have its refcnt decremented
11831da177e4SLinus Torvalds 	 */
1184*55737fdaSStephen Hemminger 	module_put(pf->owner);
11857420ed23SVenkat Yekkirala 	err = security_socket_post_create(sock, family, type, protocol, kern);
11867420ed23SVenkat Yekkirala 	if (err)
11877420ed23SVenkat Yekkirala 		goto out_release;
1188*55737fdaSStephen Hemminger 	*res = sock;
11891da177e4SLinus Torvalds 
1190*55737fdaSStephen Hemminger 	return 0;
1191*55737fdaSStephen Hemminger 
1192*55737fdaSStephen Hemminger out_module_busy:
1193*55737fdaSStephen Hemminger 	err = -EAFNOSUPPORT;
11941da177e4SLinus Torvalds out_module_put:
1195*55737fdaSStephen Hemminger 	sock->ops = NULL;
1196*55737fdaSStephen Hemminger 	module_put(pf->owner);
1197*55737fdaSStephen Hemminger out_sock_release:
11981da177e4SLinus Torvalds 	sock_release(sock);
1199*55737fdaSStephen Hemminger 	return err;
1200*55737fdaSStephen Hemminger 
1201*55737fdaSStephen Hemminger out_release:
1202*55737fdaSStephen Hemminger 	rcu_read_unlock();
1203*55737fdaSStephen Hemminger 	goto out_sock_release;
12041da177e4SLinus Torvalds }
12051da177e4SLinus Torvalds 
12061da177e4SLinus Torvalds int sock_create(int family, int type, int protocol, struct socket **res)
12071da177e4SLinus Torvalds {
12081da177e4SLinus Torvalds 	return __sock_create(family, type, protocol, res, 0);
12091da177e4SLinus Torvalds }
12101da177e4SLinus Torvalds 
12111da177e4SLinus Torvalds int sock_create_kern(int family, int type, int protocol, struct socket **res)
12121da177e4SLinus Torvalds {
12131da177e4SLinus Torvalds 	return __sock_create(family, type, protocol, res, 1);
12141da177e4SLinus Torvalds }
12151da177e4SLinus Torvalds 
12161da177e4SLinus Torvalds asmlinkage long sys_socket(int family, int type, int protocol)
12171da177e4SLinus Torvalds {
12181da177e4SLinus Torvalds 	int retval;
12191da177e4SLinus Torvalds 	struct socket *sock;
12201da177e4SLinus Torvalds 
12211da177e4SLinus Torvalds 	retval = sock_create(family, type, protocol, &sock);
12221da177e4SLinus Torvalds 	if (retval < 0)
12231da177e4SLinus Torvalds 		goto out;
12241da177e4SLinus Torvalds 
12251da177e4SLinus Torvalds 	retval = sock_map_fd(sock);
12261da177e4SLinus Torvalds 	if (retval < 0)
12271da177e4SLinus Torvalds 		goto out_release;
12281da177e4SLinus Torvalds 
12291da177e4SLinus Torvalds out:
12301da177e4SLinus Torvalds 	/* It may be already another descriptor 8) Not kernel problem. */
12311da177e4SLinus Torvalds 	return retval;
12321da177e4SLinus Torvalds 
12331da177e4SLinus Torvalds out_release:
12341da177e4SLinus Torvalds 	sock_release(sock);
12351da177e4SLinus Torvalds 	return retval;
12361da177e4SLinus Torvalds }
12371da177e4SLinus Torvalds 
12381da177e4SLinus Torvalds /*
12391da177e4SLinus Torvalds  *	Create a pair of connected sockets.
12401da177e4SLinus Torvalds  */
12411da177e4SLinus Torvalds 
124289bddce5SStephen Hemminger asmlinkage long sys_socketpair(int family, int type, int protocol,
124389bddce5SStephen Hemminger 			       int __user *usockvec)
12441da177e4SLinus Torvalds {
12451da177e4SLinus Torvalds 	struct socket *sock1, *sock2;
12461da177e4SLinus Torvalds 	int fd1, fd2, err;
12471da177e4SLinus Torvalds 
12481da177e4SLinus Torvalds 	/*
12491da177e4SLinus Torvalds 	 * Obtain the first socket and check if the underlying protocol
12501da177e4SLinus Torvalds 	 * supports the socketpair call.
12511da177e4SLinus Torvalds 	 */
12521da177e4SLinus Torvalds 
12531da177e4SLinus Torvalds 	err = sock_create(family, type, protocol, &sock1);
12541da177e4SLinus Torvalds 	if (err < 0)
12551da177e4SLinus Torvalds 		goto out;
12561da177e4SLinus Torvalds 
12571da177e4SLinus Torvalds 	err = sock_create(family, type, protocol, &sock2);
12581da177e4SLinus Torvalds 	if (err < 0)
12591da177e4SLinus Torvalds 		goto out_release_1;
12601da177e4SLinus Torvalds 
12611da177e4SLinus Torvalds 	err = sock1->ops->socketpair(sock1, sock2);
12621da177e4SLinus Torvalds 	if (err < 0)
12631da177e4SLinus Torvalds 		goto out_release_both;
12641da177e4SLinus Torvalds 
12651da177e4SLinus Torvalds 	fd1 = fd2 = -1;
12661da177e4SLinus Torvalds 
12671da177e4SLinus Torvalds 	err = sock_map_fd(sock1);
12681da177e4SLinus Torvalds 	if (err < 0)
12691da177e4SLinus Torvalds 		goto out_release_both;
12701da177e4SLinus Torvalds 	fd1 = err;
12711da177e4SLinus Torvalds 
12721da177e4SLinus Torvalds 	err = sock_map_fd(sock2);
12731da177e4SLinus Torvalds 	if (err < 0)
12741da177e4SLinus Torvalds 		goto out_close_1;
12751da177e4SLinus Torvalds 	fd2 = err;
12761da177e4SLinus Torvalds 
12771da177e4SLinus Torvalds 	/* fd1 and fd2 may be already another descriptors.
12781da177e4SLinus Torvalds 	 * Not kernel problem.
12791da177e4SLinus Torvalds 	 */
12801da177e4SLinus Torvalds 
12811da177e4SLinus Torvalds 	err = put_user(fd1, &usockvec[0]);
12821da177e4SLinus Torvalds 	if (!err)
12831da177e4SLinus Torvalds 		err = put_user(fd2, &usockvec[1]);
12841da177e4SLinus Torvalds 	if (!err)
12851da177e4SLinus Torvalds 		return 0;
12861da177e4SLinus Torvalds 
12871da177e4SLinus Torvalds 	sys_close(fd2);
12881da177e4SLinus Torvalds 	sys_close(fd1);
12891da177e4SLinus Torvalds 	return err;
12901da177e4SLinus Torvalds 
12911da177e4SLinus Torvalds out_close_1:
12921da177e4SLinus Torvalds 	sock_release(sock2);
12931da177e4SLinus Torvalds 	sys_close(fd1);
12941da177e4SLinus Torvalds 	return err;
12951da177e4SLinus Torvalds 
12961da177e4SLinus Torvalds out_release_both:
12971da177e4SLinus Torvalds 	sock_release(sock2);
12981da177e4SLinus Torvalds out_release_1:
12991da177e4SLinus Torvalds 	sock_release(sock1);
13001da177e4SLinus Torvalds out:
13011da177e4SLinus Torvalds 	return err;
13021da177e4SLinus Torvalds }
13031da177e4SLinus Torvalds 
13041da177e4SLinus Torvalds /*
13051da177e4SLinus Torvalds  *	Bind a name to a socket. Nothing much to do here since it's
13061da177e4SLinus Torvalds  *	the protocol's responsibility to handle the local address.
13071da177e4SLinus Torvalds  *
13081da177e4SLinus Torvalds  *	We move the socket address to kernel space before we call
13091da177e4SLinus Torvalds  *	the protocol layer (having also checked the address is ok).
13101da177e4SLinus Torvalds  */
13111da177e4SLinus Torvalds 
13121da177e4SLinus Torvalds asmlinkage long sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen)
13131da177e4SLinus Torvalds {
13141da177e4SLinus Torvalds 	struct socket *sock;
13151da177e4SLinus Torvalds 	char address[MAX_SOCK_ADDR];
13166cb153caSBenjamin LaHaise 	int err, fput_needed;
13171da177e4SLinus Torvalds 
131889bddce5SStephen Hemminger 	sock = sockfd_lookup_light(fd, &err, &fput_needed);
131989bddce5SStephen Hemminger 	if(sock) {
132089bddce5SStephen Hemminger 		err = move_addr_to_kernel(umyaddr, addrlen, address);
132189bddce5SStephen Hemminger 		if (err >= 0) {
132289bddce5SStephen Hemminger 			err = security_socket_bind(sock,
132389bddce5SStephen Hemminger 						   (struct sockaddr *)address,
132489bddce5SStephen Hemminger 						   addrlen);
13256cb153caSBenjamin LaHaise 			if (!err)
13266cb153caSBenjamin LaHaise 				err = sock->ops->bind(sock,
132789bddce5SStephen Hemminger 						      (struct sockaddr *)
132889bddce5SStephen Hemminger 						      address, addrlen);
13291da177e4SLinus Torvalds 		}
13306cb153caSBenjamin LaHaise 		fput_light(sock->file, fput_needed);
13311da177e4SLinus Torvalds 	}
13321da177e4SLinus Torvalds 	return err;
13331da177e4SLinus Torvalds }
13341da177e4SLinus Torvalds 
13351da177e4SLinus Torvalds /*
13361da177e4SLinus Torvalds  *	Perform a listen. Basically, we allow the protocol to do anything
13371da177e4SLinus Torvalds  *	necessary for a listen, and if that works, we mark the socket as
13381da177e4SLinus Torvalds  *	ready for listening.
13391da177e4SLinus Torvalds  */
13401da177e4SLinus Torvalds 
13411da177e4SLinus Torvalds int sysctl_somaxconn = SOMAXCONN;
13421da177e4SLinus Torvalds 
13431da177e4SLinus Torvalds asmlinkage long sys_listen(int fd, int backlog)
13441da177e4SLinus Torvalds {
13451da177e4SLinus Torvalds 	struct socket *sock;
13466cb153caSBenjamin LaHaise 	int err, fput_needed;
13471da177e4SLinus Torvalds 
134889bddce5SStephen Hemminger 	sock = sockfd_lookup_light(fd, &err, &fput_needed);
134989bddce5SStephen Hemminger 	if (sock) {
13501da177e4SLinus Torvalds 		if ((unsigned)backlog > sysctl_somaxconn)
13511da177e4SLinus Torvalds 			backlog = sysctl_somaxconn;
13521da177e4SLinus Torvalds 
13531da177e4SLinus Torvalds 		err = security_socket_listen(sock, backlog);
13546cb153caSBenjamin LaHaise 		if (!err)
13551da177e4SLinus Torvalds 			err = sock->ops->listen(sock, backlog);
13566cb153caSBenjamin LaHaise 
13576cb153caSBenjamin LaHaise 		fput_light(sock->file, fput_needed);
13581da177e4SLinus Torvalds 	}
13591da177e4SLinus Torvalds 	return err;
13601da177e4SLinus Torvalds }
13611da177e4SLinus Torvalds 
13621da177e4SLinus Torvalds /*
13631da177e4SLinus Torvalds  *	For accept, we attempt to create a new socket, set up the link
13641da177e4SLinus Torvalds  *	with the client, wake up the client, then return the new
13651da177e4SLinus Torvalds  *	connected fd. We collect the address of the connector in kernel
13661da177e4SLinus Torvalds  *	space and move it to user at the very end. This is unclean because
13671da177e4SLinus Torvalds  *	we open the socket then return an error.
13681da177e4SLinus Torvalds  *
13691da177e4SLinus Torvalds  *	1003.1g adds the ability to recvmsg() to query connection pending
13701da177e4SLinus Torvalds  *	status to recvmsg. We need to add that support in a way thats
13711da177e4SLinus Torvalds  *	clean when we restucture accept also.
13721da177e4SLinus Torvalds  */
13731da177e4SLinus Torvalds 
137489bddce5SStephen Hemminger asmlinkage long sys_accept(int fd, struct sockaddr __user *upeer_sockaddr,
137589bddce5SStephen Hemminger 			   int __user *upeer_addrlen)
13761da177e4SLinus Torvalds {
13771da177e4SLinus Torvalds 	struct socket *sock, *newsock;
137839d8c1b6SDavid S. Miller 	struct file *newfile;
13796cb153caSBenjamin LaHaise 	int err, len, newfd, fput_needed;
13801da177e4SLinus Torvalds 	char address[MAX_SOCK_ADDR];
13811da177e4SLinus Torvalds 
13826cb153caSBenjamin LaHaise 	sock = sockfd_lookup_light(fd, &err, &fput_needed);
13831da177e4SLinus Torvalds 	if (!sock)
13841da177e4SLinus Torvalds 		goto out;
13851da177e4SLinus Torvalds 
13861da177e4SLinus Torvalds 	err = -ENFILE;
13871da177e4SLinus Torvalds 	if (!(newsock = sock_alloc()))
13881da177e4SLinus Torvalds 		goto out_put;
13891da177e4SLinus Torvalds 
13901da177e4SLinus Torvalds 	newsock->type = sock->type;
13911da177e4SLinus Torvalds 	newsock->ops = sock->ops;
13921da177e4SLinus Torvalds 
13931da177e4SLinus Torvalds 	/*
13941da177e4SLinus Torvalds 	 * We don't need try_module_get here, as the listening socket (sock)
13951da177e4SLinus Torvalds 	 * has the protocol module (sock->ops->owner) held.
13961da177e4SLinus Torvalds 	 */
13971da177e4SLinus Torvalds 	__module_get(newsock->ops->owner);
13981da177e4SLinus Torvalds 
139939d8c1b6SDavid S. Miller 	newfd = sock_alloc_fd(&newfile);
140039d8c1b6SDavid S. Miller 	if (unlikely(newfd < 0)) {
140139d8c1b6SDavid S. Miller 		err = newfd;
14029a1875e6SDavid S. Miller 		sock_release(newsock);
14039a1875e6SDavid S. Miller 		goto out_put;
140439d8c1b6SDavid S. Miller 	}
140539d8c1b6SDavid S. Miller 
140639d8c1b6SDavid S. Miller 	err = sock_attach_fd(newsock, newfile);
140739d8c1b6SDavid S. Miller 	if (err < 0)
140839d8c1b6SDavid S. Miller 		goto out_fd;
140939d8c1b6SDavid S. Miller 
1410a79af59eSFrank Filz 	err = security_socket_accept(sock, newsock);
1411a79af59eSFrank Filz 	if (err)
141239d8c1b6SDavid S. Miller 		goto out_fd;
1413a79af59eSFrank Filz 
14141da177e4SLinus Torvalds 	err = sock->ops->accept(sock, newsock, sock->file->f_flags);
14151da177e4SLinus Torvalds 	if (err < 0)
141639d8c1b6SDavid S. Miller 		goto out_fd;
14171da177e4SLinus Torvalds 
14181da177e4SLinus Torvalds 	if (upeer_sockaddr) {
141989bddce5SStephen Hemminger 		if (newsock->ops->getname(newsock, (struct sockaddr *)address,
142089bddce5SStephen Hemminger 					  &len, 2) < 0) {
14211da177e4SLinus Torvalds 			err = -ECONNABORTED;
142239d8c1b6SDavid S. Miller 			goto out_fd;
14231da177e4SLinus Torvalds 		}
142489bddce5SStephen Hemminger 		err = move_addr_to_user(address, len, upeer_sockaddr,
142589bddce5SStephen Hemminger 					upeer_addrlen);
14261da177e4SLinus Torvalds 		if (err < 0)
142739d8c1b6SDavid S. Miller 			goto out_fd;
14281da177e4SLinus Torvalds 	}
14291da177e4SLinus Torvalds 
14301da177e4SLinus Torvalds 	/* File flags are not inherited via accept() unlike another OSes. */
14311da177e4SLinus Torvalds 
143239d8c1b6SDavid S. Miller 	fd_install(newfd, newfile);
143339d8c1b6SDavid S. Miller 	err = newfd;
14341da177e4SLinus Torvalds 
14351da177e4SLinus Torvalds 	security_socket_post_accept(sock, newsock);
14361da177e4SLinus Torvalds 
14371da177e4SLinus Torvalds out_put:
14386cb153caSBenjamin LaHaise 	fput_light(sock->file, fput_needed);
14391da177e4SLinus Torvalds out:
14401da177e4SLinus Torvalds 	return err;
144139d8c1b6SDavid S. Miller out_fd:
14429606a216SDavid S. Miller 	fput(newfile);
144339d8c1b6SDavid S. Miller 	put_unused_fd(newfd);
14441da177e4SLinus Torvalds 	goto out_put;
14451da177e4SLinus Torvalds }
14461da177e4SLinus Torvalds 
14471da177e4SLinus Torvalds /*
14481da177e4SLinus Torvalds  *	Attempt to connect to a socket with the server address.  The address
14491da177e4SLinus Torvalds  *	is in user space so we verify it is OK and move it to kernel space.
14501da177e4SLinus Torvalds  *
14511da177e4SLinus Torvalds  *	For 1003.1g we need to add clean support for a bind to AF_UNSPEC to
14521da177e4SLinus Torvalds  *	break bindings
14531da177e4SLinus Torvalds  *
14541da177e4SLinus Torvalds  *	NOTE: 1003.1g draft 6.3 is broken with respect to AX.25/NetROM and
14551da177e4SLinus Torvalds  *	other SEQPACKET protocols that take time to connect() as it doesn't
14561da177e4SLinus Torvalds  *	include the -EINPROGRESS status for such sockets.
14571da177e4SLinus Torvalds  */
14581da177e4SLinus Torvalds 
145989bddce5SStephen Hemminger asmlinkage long sys_connect(int fd, struct sockaddr __user *uservaddr,
146089bddce5SStephen Hemminger 			    int addrlen)
14611da177e4SLinus Torvalds {
14621da177e4SLinus Torvalds 	struct socket *sock;
14631da177e4SLinus Torvalds 	char address[MAX_SOCK_ADDR];
14646cb153caSBenjamin LaHaise 	int err, fput_needed;
14651da177e4SLinus Torvalds 
14666cb153caSBenjamin LaHaise 	sock = sockfd_lookup_light(fd, &err, &fput_needed);
14671da177e4SLinus Torvalds 	if (!sock)
14681da177e4SLinus Torvalds 		goto out;
14691da177e4SLinus Torvalds 	err = move_addr_to_kernel(uservaddr, addrlen, address);
14701da177e4SLinus Torvalds 	if (err < 0)
14711da177e4SLinus Torvalds 		goto out_put;
14721da177e4SLinus Torvalds 
147389bddce5SStephen Hemminger 	err =
147489bddce5SStephen Hemminger 	    security_socket_connect(sock, (struct sockaddr *)address, addrlen);
14751da177e4SLinus Torvalds 	if (err)
14761da177e4SLinus Torvalds 		goto out_put;
14771da177e4SLinus Torvalds 
14781da177e4SLinus Torvalds 	err = sock->ops->connect(sock, (struct sockaddr *)address, addrlen,
14791da177e4SLinus Torvalds 				 sock->file->f_flags);
14801da177e4SLinus Torvalds out_put:
14816cb153caSBenjamin LaHaise 	fput_light(sock->file, fput_needed);
14821da177e4SLinus Torvalds out:
14831da177e4SLinus Torvalds 	return err;
14841da177e4SLinus Torvalds }
14851da177e4SLinus Torvalds 
14861da177e4SLinus Torvalds /*
14871da177e4SLinus Torvalds  *	Get the local address ('name') of a socket object. Move the obtained
14881da177e4SLinus Torvalds  *	name to user space.
14891da177e4SLinus Torvalds  */
14901da177e4SLinus Torvalds 
149189bddce5SStephen Hemminger asmlinkage long sys_getsockname(int fd, struct sockaddr __user *usockaddr,
149289bddce5SStephen Hemminger 				int __user *usockaddr_len)
14931da177e4SLinus Torvalds {
14941da177e4SLinus Torvalds 	struct socket *sock;
14951da177e4SLinus Torvalds 	char address[MAX_SOCK_ADDR];
14966cb153caSBenjamin LaHaise 	int len, err, fput_needed;
14971da177e4SLinus Torvalds 
14986cb153caSBenjamin LaHaise 	sock = sockfd_lookup_light(fd, &err, &fput_needed);
14991da177e4SLinus Torvalds 	if (!sock)
15001da177e4SLinus Torvalds 		goto out;
15011da177e4SLinus Torvalds 
15021da177e4SLinus Torvalds 	err = security_socket_getsockname(sock);
15031da177e4SLinus Torvalds 	if (err)
15041da177e4SLinus Torvalds 		goto out_put;
15051da177e4SLinus Torvalds 
15061da177e4SLinus Torvalds 	err = sock->ops->getname(sock, (struct sockaddr *)address, &len, 0);
15071da177e4SLinus Torvalds 	if (err)
15081da177e4SLinus Torvalds 		goto out_put;
15091da177e4SLinus Torvalds 	err = move_addr_to_user(address, len, usockaddr, usockaddr_len);
15101da177e4SLinus Torvalds 
15111da177e4SLinus Torvalds out_put:
15126cb153caSBenjamin LaHaise 	fput_light(sock->file, fput_needed);
15131da177e4SLinus Torvalds out:
15141da177e4SLinus Torvalds 	return err;
15151da177e4SLinus Torvalds }
15161da177e4SLinus Torvalds 
15171da177e4SLinus Torvalds /*
15181da177e4SLinus Torvalds  *	Get the remote address ('name') of a socket object. Move the obtained
15191da177e4SLinus Torvalds  *	name to user space.
15201da177e4SLinus Torvalds  */
15211da177e4SLinus Torvalds 
152289bddce5SStephen Hemminger asmlinkage long sys_getpeername(int fd, struct sockaddr __user *usockaddr,
152389bddce5SStephen Hemminger 				int __user *usockaddr_len)
15241da177e4SLinus Torvalds {
15251da177e4SLinus Torvalds 	struct socket *sock;
15261da177e4SLinus Torvalds 	char address[MAX_SOCK_ADDR];
15276cb153caSBenjamin LaHaise 	int len, err, fput_needed;
15281da177e4SLinus Torvalds 
152989bddce5SStephen Hemminger 	sock = sockfd_lookup_light(fd, &err, &fput_needed);
153089bddce5SStephen Hemminger 	if (sock != NULL) {
15311da177e4SLinus Torvalds 		err = security_socket_getpeername(sock);
15321da177e4SLinus Torvalds 		if (err) {
15336cb153caSBenjamin LaHaise 			fput_light(sock->file, fput_needed);
15341da177e4SLinus Torvalds 			return err;
15351da177e4SLinus Torvalds 		}
15361da177e4SLinus Torvalds 
153789bddce5SStephen Hemminger 		err =
153889bddce5SStephen Hemminger 		    sock->ops->getname(sock, (struct sockaddr *)address, &len,
153989bddce5SStephen Hemminger 				       1);
15401da177e4SLinus Torvalds 		if (!err)
154189bddce5SStephen Hemminger 			err = move_addr_to_user(address, len, usockaddr,
154289bddce5SStephen Hemminger 						usockaddr_len);
15436cb153caSBenjamin LaHaise 		fput_light(sock->file, fput_needed);
15441da177e4SLinus Torvalds 	}
15451da177e4SLinus Torvalds 	return err;
15461da177e4SLinus Torvalds }
15471da177e4SLinus Torvalds 
15481da177e4SLinus Torvalds /*
15491da177e4SLinus Torvalds  *	Send a datagram to a given address. We move the address into kernel
15501da177e4SLinus Torvalds  *	space and check the user space data area is readable before invoking
15511da177e4SLinus Torvalds  *	the protocol.
15521da177e4SLinus Torvalds  */
15531da177e4SLinus Torvalds 
155489bddce5SStephen Hemminger asmlinkage long sys_sendto(int fd, void __user *buff, size_t len,
155589bddce5SStephen Hemminger 			   unsigned flags, struct sockaddr __user *addr,
155689bddce5SStephen Hemminger 			   int addr_len)
15571da177e4SLinus Torvalds {
15581da177e4SLinus Torvalds 	struct socket *sock;
15591da177e4SLinus Torvalds 	char address[MAX_SOCK_ADDR];
15601da177e4SLinus Torvalds 	int err;
15611da177e4SLinus Torvalds 	struct msghdr msg;
15621da177e4SLinus Torvalds 	struct iovec iov;
15636cb153caSBenjamin LaHaise 	int fput_needed;
15646cb153caSBenjamin LaHaise 	struct file *sock_file;
15651da177e4SLinus Torvalds 
15666cb153caSBenjamin LaHaise 	sock_file = fget_light(fd, &fput_needed);
15676cb153caSBenjamin LaHaise 	if (!sock_file)
15686cb153caSBenjamin LaHaise 		return -EBADF;
15696cb153caSBenjamin LaHaise 
15706cb153caSBenjamin LaHaise 	sock = sock_from_file(sock_file, &err);
15711da177e4SLinus Torvalds 	if (!sock)
15726cb153caSBenjamin LaHaise 		goto out_put;
15731da177e4SLinus Torvalds 	iov.iov_base = buff;
15741da177e4SLinus Torvalds 	iov.iov_len = len;
15751da177e4SLinus Torvalds 	msg.msg_name = NULL;
15761da177e4SLinus Torvalds 	msg.msg_iov = &iov;
15771da177e4SLinus Torvalds 	msg.msg_iovlen = 1;
15781da177e4SLinus Torvalds 	msg.msg_control = NULL;
15791da177e4SLinus Torvalds 	msg.msg_controllen = 0;
15801da177e4SLinus Torvalds 	msg.msg_namelen = 0;
15816cb153caSBenjamin LaHaise 	if (addr) {
15821da177e4SLinus Torvalds 		err = move_addr_to_kernel(addr, addr_len, address);
15831da177e4SLinus Torvalds 		if (err < 0)
15841da177e4SLinus Torvalds 			goto out_put;
15851da177e4SLinus Torvalds 		msg.msg_name = address;
15861da177e4SLinus Torvalds 		msg.msg_namelen = addr_len;
15871da177e4SLinus Torvalds 	}
15881da177e4SLinus Torvalds 	if (sock->file->f_flags & O_NONBLOCK)
15891da177e4SLinus Torvalds 		flags |= MSG_DONTWAIT;
15901da177e4SLinus Torvalds 	msg.msg_flags = flags;
15911da177e4SLinus Torvalds 	err = sock_sendmsg(sock, &msg, len);
15921da177e4SLinus Torvalds 
15931da177e4SLinus Torvalds out_put:
15946cb153caSBenjamin LaHaise 	fput_light(sock_file, fput_needed);
15951da177e4SLinus Torvalds 	return err;
15961da177e4SLinus Torvalds }
15971da177e4SLinus Torvalds 
15981da177e4SLinus Torvalds /*
15991da177e4SLinus Torvalds  *	Send a datagram down a socket.
16001da177e4SLinus Torvalds  */
16011da177e4SLinus Torvalds 
16021da177e4SLinus Torvalds asmlinkage long sys_send(int fd, void __user *buff, size_t len, unsigned flags)
16031da177e4SLinus Torvalds {
16041da177e4SLinus Torvalds 	return sys_sendto(fd, buff, len, flags, NULL, 0);
16051da177e4SLinus Torvalds }
16061da177e4SLinus Torvalds 
16071da177e4SLinus Torvalds /*
16081da177e4SLinus Torvalds  *	Receive a frame from the socket and optionally record the address of the
16091da177e4SLinus Torvalds  *	sender. We verify the buffers are writable and if needed move the
16101da177e4SLinus Torvalds  *	sender address from kernel to user space.
16111da177e4SLinus Torvalds  */
16121da177e4SLinus Torvalds 
161389bddce5SStephen Hemminger asmlinkage long sys_recvfrom(int fd, void __user *ubuf, size_t size,
161489bddce5SStephen Hemminger 			     unsigned flags, struct sockaddr __user *addr,
161589bddce5SStephen Hemminger 			     int __user *addr_len)
16161da177e4SLinus Torvalds {
16171da177e4SLinus Torvalds 	struct socket *sock;
16181da177e4SLinus Torvalds 	struct iovec iov;
16191da177e4SLinus Torvalds 	struct msghdr msg;
16201da177e4SLinus Torvalds 	char address[MAX_SOCK_ADDR];
16211da177e4SLinus Torvalds 	int err, err2;
16226cb153caSBenjamin LaHaise 	struct file *sock_file;
16236cb153caSBenjamin LaHaise 	int fput_needed;
16241da177e4SLinus Torvalds 
16256cb153caSBenjamin LaHaise 	sock_file = fget_light(fd, &fput_needed);
16266cb153caSBenjamin LaHaise 	if (!sock_file)
16276cb153caSBenjamin LaHaise 		return -EBADF;
16286cb153caSBenjamin LaHaise 
16296cb153caSBenjamin LaHaise 	sock = sock_from_file(sock_file, &err);
16301da177e4SLinus Torvalds 	if (!sock)
16311da177e4SLinus Torvalds 		goto out;
16321da177e4SLinus Torvalds 
16331da177e4SLinus Torvalds 	msg.msg_control = NULL;
16341da177e4SLinus Torvalds 	msg.msg_controllen = 0;
16351da177e4SLinus Torvalds 	msg.msg_iovlen = 1;
16361da177e4SLinus Torvalds 	msg.msg_iov = &iov;
16371da177e4SLinus Torvalds 	iov.iov_len = size;
16381da177e4SLinus Torvalds 	iov.iov_base = ubuf;
16391da177e4SLinus Torvalds 	msg.msg_name = address;
16401da177e4SLinus Torvalds 	msg.msg_namelen = MAX_SOCK_ADDR;
16411da177e4SLinus Torvalds 	if (sock->file->f_flags & O_NONBLOCK)
16421da177e4SLinus Torvalds 		flags |= MSG_DONTWAIT;
16431da177e4SLinus Torvalds 	err = sock_recvmsg(sock, &msg, size, flags);
16441da177e4SLinus Torvalds 
164589bddce5SStephen Hemminger 	if (err >= 0 && addr != NULL) {
16461da177e4SLinus Torvalds 		err2 = move_addr_to_user(address, msg.msg_namelen, addr, addr_len);
16471da177e4SLinus Torvalds 		if (err2 < 0)
16481da177e4SLinus Torvalds 			err = err2;
16491da177e4SLinus Torvalds 	}
16501da177e4SLinus Torvalds out:
16516cb153caSBenjamin LaHaise 	fput_light(sock_file, fput_needed);
16521da177e4SLinus Torvalds 	return err;
16531da177e4SLinus Torvalds }
16541da177e4SLinus Torvalds 
16551da177e4SLinus Torvalds /*
16561da177e4SLinus Torvalds  *	Receive a datagram from a socket.
16571da177e4SLinus Torvalds  */
16581da177e4SLinus Torvalds 
165989bddce5SStephen Hemminger asmlinkage long sys_recv(int fd, void __user *ubuf, size_t size,
166089bddce5SStephen Hemminger 			 unsigned flags)
16611da177e4SLinus Torvalds {
16621da177e4SLinus Torvalds 	return sys_recvfrom(fd, ubuf, size, flags, NULL, NULL);
16631da177e4SLinus Torvalds }
16641da177e4SLinus Torvalds 
16651da177e4SLinus Torvalds /*
16661da177e4SLinus Torvalds  *	Set a socket option. Because we don't know the option lengths we have
16671da177e4SLinus Torvalds  *	to pass the user mode parameter for the protocols to sort out.
16681da177e4SLinus Torvalds  */
16691da177e4SLinus Torvalds 
167089bddce5SStephen Hemminger asmlinkage long sys_setsockopt(int fd, int level, int optname,
167189bddce5SStephen Hemminger 			       char __user *optval, int optlen)
16721da177e4SLinus Torvalds {
16736cb153caSBenjamin LaHaise 	int err, fput_needed;
16741da177e4SLinus Torvalds 	struct socket *sock;
16751da177e4SLinus Torvalds 
16761da177e4SLinus Torvalds 	if (optlen < 0)
16771da177e4SLinus Torvalds 		return -EINVAL;
16781da177e4SLinus Torvalds 
167989bddce5SStephen Hemminger 	sock = sockfd_lookup_light(fd, &err, &fput_needed);
168089bddce5SStephen Hemminger 	if (sock != NULL) {
16811da177e4SLinus Torvalds 		err = security_socket_setsockopt(sock, level, optname);
16826cb153caSBenjamin LaHaise 		if (err)
16836cb153caSBenjamin LaHaise 			goto out_put;
16841da177e4SLinus Torvalds 
16851da177e4SLinus Torvalds 		if (level == SOL_SOCKET)
168689bddce5SStephen Hemminger 			err =
168789bddce5SStephen Hemminger 			    sock_setsockopt(sock, level, optname, optval,
168889bddce5SStephen Hemminger 					    optlen);
16891da177e4SLinus Torvalds 		else
169089bddce5SStephen Hemminger 			err =
169189bddce5SStephen Hemminger 			    sock->ops->setsockopt(sock, level, optname, optval,
169289bddce5SStephen Hemminger 						  optlen);
16936cb153caSBenjamin LaHaise out_put:
16946cb153caSBenjamin LaHaise 		fput_light(sock->file, fput_needed);
16951da177e4SLinus Torvalds 	}
16961da177e4SLinus Torvalds 	return err;
16971da177e4SLinus Torvalds }
16981da177e4SLinus Torvalds 
16991da177e4SLinus Torvalds /*
17001da177e4SLinus Torvalds  *	Get a socket option. Because we don't know the option lengths we have
17011da177e4SLinus Torvalds  *	to pass a user mode parameter for the protocols to sort out.
17021da177e4SLinus Torvalds  */
17031da177e4SLinus Torvalds 
170489bddce5SStephen Hemminger asmlinkage long sys_getsockopt(int fd, int level, int optname,
170589bddce5SStephen Hemminger 			       char __user *optval, int __user *optlen)
17061da177e4SLinus Torvalds {
17076cb153caSBenjamin LaHaise 	int err, fput_needed;
17081da177e4SLinus Torvalds 	struct socket *sock;
17091da177e4SLinus Torvalds 
171089bddce5SStephen Hemminger 	sock = sockfd_lookup_light(fd, &err, &fput_needed);
171189bddce5SStephen Hemminger 	if (sock != NULL) {
17126cb153caSBenjamin LaHaise 		err = security_socket_getsockopt(sock, level, optname);
17136cb153caSBenjamin LaHaise 		if (err)
17146cb153caSBenjamin LaHaise 			goto out_put;
17151da177e4SLinus Torvalds 
17161da177e4SLinus Torvalds 		if (level == SOL_SOCKET)
171789bddce5SStephen Hemminger 			err =
171889bddce5SStephen Hemminger 			    sock_getsockopt(sock, level, optname, optval,
171989bddce5SStephen Hemminger 					    optlen);
17201da177e4SLinus Torvalds 		else
172189bddce5SStephen Hemminger 			err =
172289bddce5SStephen Hemminger 			    sock->ops->getsockopt(sock, level, optname, optval,
172389bddce5SStephen Hemminger 						  optlen);
17246cb153caSBenjamin LaHaise out_put:
17256cb153caSBenjamin LaHaise 		fput_light(sock->file, fput_needed);
17261da177e4SLinus Torvalds 	}
17271da177e4SLinus Torvalds 	return err;
17281da177e4SLinus Torvalds }
17291da177e4SLinus Torvalds 
17301da177e4SLinus Torvalds /*
17311da177e4SLinus Torvalds  *	Shutdown a socket.
17321da177e4SLinus Torvalds  */
17331da177e4SLinus Torvalds 
17341da177e4SLinus Torvalds asmlinkage long sys_shutdown(int fd, int how)
17351da177e4SLinus Torvalds {
17366cb153caSBenjamin LaHaise 	int err, fput_needed;
17371da177e4SLinus Torvalds 	struct socket *sock;
17381da177e4SLinus Torvalds 
173989bddce5SStephen Hemminger 	sock = sockfd_lookup_light(fd, &err, &fput_needed);
174089bddce5SStephen Hemminger 	if (sock != NULL) {
17411da177e4SLinus Torvalds 		err = security_socket_shutdown(sock, how);
17426cb153caSBenjamin LaHaise 		if (!err)
17431da177e4SLinus Torvalds 			err = sock->ops->shutdown(sock, how);
17446cb153caSBenjamin LaHaise 		fput_light(sock->file, fput_needed);
17451da177e4SLinus Torvalds 	}
17461da177e4SLinus Torvalds 	return err;
17471da177e4SLinus Torvalds }
17481da177e4SLinus Torvalds 
17491da177e4SLinus Torvalds /* A couple of helpful macros for getting the address of the 32/64 bit
17501da177e4SLinus Torvalds  * fields which are the same type (int / unsigned) on our platforms.
17511da177e4SLinus Torvalds  */
17521da177e4SLinus Torvalds #define COMPAT_MSG(msg, member)	((MSG_CMSG_COMPAT & flags) ? &msg##_compat->member : &msg->member)
17531da177e4SLinus Torvalds #define COMPAT_NAMELEN(msg)	COMPAT_MSG(msg, msg_namelen)
17541da177e4SLinus Torvalds #define COMPAT_FLAGS(msg)	COMPAT_MSG(msg, msg_flags)
17551da177e4SLinus Torvalds 
17561da177e4SLinus Torvalds /*
17571da177e4SLinus Torvalds  *	BSD sendmsg interface
17581da177e4SLinus Torvalds  */
17591da177e4SLinus Torvalds 
17601da177e4SLinus Torvalds asmlinkage long sys_sendmsg(int fd, struct msghdr __user *msg, unsigned flags)
17611da177e4SLinus Torvalds {
176289bddce5SStephen Hemminger 	struct compat_msghdr __user *msg_compat =
176389bddce5SStephen Hemminger 	    (struct compat_msghdr __user *)msg;
17641da177e4SLinus Torvalds 	struct socket *sock;
17651da177e4SLinus Torvalds 	char address[MAX_SOCK_ADDR];
17661da177e4SLinus Torvalds 	struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
1767b9d717a7SAlex Williamson 	unsigned char ctl[sizeof(struct cmsghdr) + 20]
1768b9d717a7SAlex Williamson 	    __attribute__ ((aligned(sizeof(__kernel_size_t))));
1769b9d717a7SAlex Williamson 	/* 20 is size of ipv6_pktinfo */
17701da177e4SLinus Torvalds 	unsigned char *ctl_buf = ctl;
17711da177e4SLinus Torvalds 	struct msghdr msg_sys;
17721da177e4SLinus Torvalds 	int err, ctl_len, iov_size, total_len;
17736cb153caSBenjamin LaHaise 	int fput_needed;
17741da177e4SLinus Torvalds 
17751da177e4SLinus Torvalds 	err = -EFAULT;
17761da177e4SLinus Torvalds 	if (MSG_CMSG_COMPAT & flags) {
17771da177e4SLinus Torvalds 		if (get_compat_msghdr(&msg_sys, msg_compat))
17781da177e4SLinus Torvalds 			return -EFAULT;
177989bddce5SStephen Hemminger 	}
178089bddce5SStephen Hemminger 	else if (copy_from_user(&msg_sys, msg, sizeof(struct msghdr)))
17811da177e4SLinus Torvalds 		return -EFAULT;
17821da177e4SLinus Torvalds 
17836cb153caSBenjamin LaHaise 	sock = sockfd_lookup_light(fd, &err, &fput_needed);
17841da177e4SLinus Torvalds 	if (!sock)
17851da177e4SLinus Torvalds 		goto out;
17861da177e4SLinus Torvalds 
17871da177e4SLinus Torvalds 	/* do not move before msg_sys is valid */
17881da177e4SLinus Torvalds 	err = -EMSGSIZE;
17891da177e4SLinus Torvalds 	if (msg_sys.msg_iovlen > UIO_MAXIOV)
17901da177e4SLinus Torvalds 		goto out_put;
17911da177e4SLinus Torvalds 
17921da177e4SLinus Torvalds 	/* Check whether to allocate the iovec area */
17931da177e4SLinus Torvalds 	err = -ENOMEM;
17941da177e4SLinus Torvalds 	iov_size = msg_sys.msg_iovlen * sizeof(struct iovec);
17951da177e4SLinus Torvalds 	if (msg_sys.msg_iovlen > UIO_FASTIOV) {
17961da177e4SLinus Torvalds 		iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL);
17971da177e4SLinus Torvalds 		if (!iov)
17981da177e4SLinus Torvalds 			goto out_put;
17991da177e4SLinus Torvalds 	}
18001da177e4SLinus Torvalds 
18011da177e4SLinus Torvalds 	/* This will also move the address data into kernel space */
18021da177e4SLinus Torvalds 	if (MSG_CMSG_COMPAT & flags) {
18031da177e4SLinus Torvalds 		err = verify_compat_iovec(&msg_sys, iov, address, VERIFY_READ);
18041da177e4SLinus Torvalds 	} else
18051da177e4SLinus Torvalds 		err = verify_iovec(&msg_sys, iov, address, VERIFY_READ);
18061da177e4SLinus Torvalds 	if (err < 0)
18071da177e4SLinus Torvalds 		goto out_freeiov;
18081da177e4SLinus Torvalds 	total_len = err;
18091da177e4SLinus Torvalds 
18101da177e4SLinus Torvalds 	err = -ENOBUFS;
18111da177e4SLinus Torvalds 
18121da177e4SLinus Torvalds 	if (msg_sys.msg_controllen > INT_MAX)
18131da177e4SLinus Torvalds 		goto out_freeiov;
18141da177e4SLinus Torvalds 	ctl_len = msg_sys.msg_controllen;
18151da177e4SLinus Torvalds 	if ((MSG_CMSG_COMPAT & flags) && ctl_len) {
181689bddce5SStephen Hemminger 		err =
181789bddce5SStephen Hemminger 		    cmsghdr_from_user_compat_to_kern(&msg_sys, sock->sk, ctl,
181889bddce5SStephen Hemminger 						     sizeof(ctl));
18191da177e4SLinus Torvalds 		if (err)
18201da177e4SLinus Torvalds 			goto out_freeiov;
18211da177e4SLinus Torvalds 		ctl_buf = msg_sys.msg_control;
18228920e8f9SAl Viro 		ctl_len = msg_sys.msg_controllen;
18231da177e4SLinus Torvalds 	} else if (ctl_len) {
182489bddce5SStephen Hemminger 		if (ctl_len > sizeof(ctl)) {
18251da177e4SLinus Torvalds 			ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL);
18261da177e4SLinus Torvalds 			if (ctl_buf == NULL)
18271da177e4SLinus Torvalds 				goto out_freeiov;
18281da177e4SLinus Torvalds 		}
18291da177e4SLinus Torvalds 		err = -EFAULT;
18301da177e4SLinus Torvalds 		/*
18311da177e4SLinus Torvalds 		 * Careful! Before this, msg_sys.msg_control contains a user pointer.
18321da177e4SLinus Torvalds 		 * Afterwards, it will be a kernel pointer. Thus the compiler-assisted
18331da177e4SLinus Torvalds 		 * checking falls down on this.
18341da177e4SLinus Torvalds 		 */
183589bddce5SStephen Hemminger 		if (copy_from_user(ctl_buf, (void __user *)msg_sys.msg_control,
183689bddce5SStephen Hemminger 				   ctl_len))
18371da177e4SLinus Torvalds 			goto out_freectl;
18381da177e4SLinus Torvalds 		msg_sys.msg_control = ctl_buf;
18391da177e4SLinus Torvalds 	}
18401da177e4SLinus Torvalds 	msg_sys.msg_flags = flags;
18411da177e4SLinus Torvalds 
18421da177e4SLinus Torvalds 	if (sock->file->f_flags & O_NONBLOCK)
18431da177e4SLinus Torvalds 		msg_sys.msg_flags |= MSG_DONTWAIT;
18441da177e4SLinus Torvalds 	err = sock_sendmsg(sock, &msg_sys, total_len);
18451da177e4SLinus Torvalds 
18461da177e4SLinus Torvalds out_freectl:
18471da177e4SLinus Torvalds 	if (ctl_buf != ctl)
18481da177e4SLinus Torvalds 		sock_kfree_s(sock->sk, ctl_buf, ctl_len);
18491da177e4SLinus Torvalds out_freeiov:
18501da177e4SLinus Torvalds 	if (iov != iovstack)
18511da177e4SLinus Torvalds 		sock_kfree_s(sock->sk, iov, iov_size);
18521da177e4SLinus Torvalds out_put:
18536cb153caSBenjamin LaHaise 	fput_light(sock->file, fput_needed);
18541da177e4SLinus Torvalds out:
18551da177e4SLinus Torvalds 	return err;
18561da177e4SLinus Torvalds }
18571da177e4SLinus Torvalds 
18581da177e4SLinus Torvalds /*
18591da177e4SLinus Torvalds  *	BSD recvmsg interface
18601da177e4SLinus Torvalds  */
18611da177e4SLinus Torvalds 
186289bddce5SStephen Hemminger asmlinkage long sys_recvmsg(int fd, struct msghdr __user *msg,
186389bddce5SStephen Hemminger 			    unsigned int flags)
18641da177e4SLinus Torvalds {
186589bddce5SStephen Hemminger 	struct compat_msghdr __user *msg_compat =
186689bddce5SStephen Hemminger 	    (struct compat_msghdr __user *)msg;
18671da177e4SLinus Torvalds 	struct socket *sock;
18681da177e4SLinus Torvalds 	struct iovec iovstack[UIO_FASTIOV];
18691da177e4SLinus Torvalds 	struct iovec *iov = iovstack;
18701da177e4SLinus Torvalds 	struct msghdr msg_sys;
18711da177e4SLinus Torvalds 	unsigned long cmsg_ptr;
18721da177e4SLinus Torvalds 	int err, iov_size, total_len, len;
18736cb153caSBenjamin LaHaise 	int fput_needed;
18741da177e4SLinus Torvalds 
18751da177e4SLinus Torvalds 	/* kernel mode address */
18761da177e4SLinus Torvalds 	char addr[MAX_SOCK_ADDR];
18771da177e4SLinus Torvalds 
18781da177e4SLinus Torvalds 	/* user mode address pointers */
18791da177e4SLinus Torvalds 	struct sockaddr __user *uaddr;
18801da177e4SLinus Torvalds 	int __user *uaddr_len;
18811da177e4SLinus Torvalds 
18821da177e4SLinus Torvalds 	if (MSG_CMSG_COMPAT & flags) {
18831da177e4SLinus Torvalds 		if (get_compat_msghdr(&msg_sys, msg_compat))
18841da177e4SLinus Torvalds 			return -EFAULT;
188589bddce5SStephen Hemminger 	}
188689bddce5SStephen Hemminger 	else if (copy_from_user(&msg_sys, msg, sizeof(struct msghdr)))
18871da177e4SLinus Torvalds 		return -EFAULT;
18881da177e4SLinus Torvalds 
18896cb153caSBenjamin LaHaise 	sock = sockfd_lookup_light(fd, &err, &fput_needed);
18901da177e4SLinus Torvalds 	if (!sock)
18911da177e4SLinus Torvalds 		goto out;
18921da177e4SLinus Torvalds 
18931da177e4SLinus Torvalds 	err = -EMSGSIZE;
18941da177e4SLinus Torvalds 	if (msg_sys.msg_iovlen > UIO_MAXIOV)
18951da177e4SLinus Torvalds 		goto out_put;
18961da177e4SLinus Torvalds 
18971da177e4SLinus Torvalds 	/* Check whether to allocate the iovec area */
18981da177e4SLinus Torvalds 	err = -ENOMEM;
18991da177e4SLinus Torvalds 	iov_size = msg_sys.msg_iovlen * sizeof(struct iovec);
19001da177e4SLinus Torvalds 	if (msg_sys.msg_iovlen > UIO_FASTIOV) {
19011da177e4SLinus Torvalds 		iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL);
19021da177e4SLinus Torvalds 		if (!iov)
19031da177e4SLinus Torvalds 			goto out_put;
19041da177e4SLinus Torvalds 	}
19051da177e4SLinus Torvalds 
19061da177e4SLinus Torvalds 	/*
19071da177e4SLinus Torvalds 	 *      Save the user-mode address (verify_iovec will change the
19081da177e4SLinus Torvalds 	 *      kernel msghdr to use the kernel address space)
19091da177e4SLinus Torvalds 	 */
19101da177e4SLinus Torvalds 
19111da177e4SLinus Torvalds 	uaddr = (void __user *)msg_sys.msg_name;
19121da177e4SLinus Torvalds 	uaddr_len = COMPAT_NAMELEN(msg);
19131da177e4SLinus Torvalds 	if (MSG_CMSG_COMPAT & flags) {
19141da177e4SLinus Torvalds 		err = verify_compat_iovec(&msg_sys, iov, addr, VERIFY_WRITE);
19151da177e4SLinus Torvalds 	} else
19161da177e4SLinus Torvalds 		err = verify_iovec(&msg_sys, iov, addr, VERIFY_WRITE);
19171da177e4SLinus Torvalds 	if (err < 0)
19181da177e4SLinus Torvalds 		goto out_freeiov;
19191da177e4SLinus Torvalds 	total_len = err;
19201da177e4SLinus Torvalds 
19211da177e4SLinus Torvalds 	cmsg_ptr = (unsigned long)msg_sys.msg_control;
19221da177e4SLinus Torvalds 	msg_sys.msg_flags = 0;
19231da177e4SLinus Torvalds 	if (MSG_CMSG_COMPAT & flags)
19241da177e4SLinus Torvalds 		msg_sys.msg_flags = MSG_CMSG_COMPAT;
19251da177e4SLinus Torvalds 
19261da177e4SLinus Torvalds 	if (sock->file->f_flags & O_NONBLOCK)
19271da177e4SLinus Torvalds 		flags |= MSG_DONTWAIT;
19281da177e4SLinus Torvalds 	err = sock_recvmsg(sock, &msg_sys, total_len, flags);
19291da177e4SLinus Torvalds 	if (err < 0)
19301da177e4SLinus Torvalds 		goto out_freeiov;
19311da177e4SLinus Torvalds 	len = err;
19321da177e4SLinus Torvalds 
19331da177e4SLinus Torvalds 	if (uaddr != NULL) {
193489bddce5SStephen Hemminger 		err = move_addr_to_user(addr, msg_sys.msg_namelen, uaddr,
193589bddce5SStephen Hemminger 					uaddr_len);
19361da177e4SLinus Torvalds 		if (err < 0)
19371da177e4SLinus Torvalds 			goto out_freeiov;
19381da177e4SLinus Torvalds 	}
193937f7f421SDavid S. Miller 	err = __put_user((msg_sys.msg_flags & ~MSG_CMSG_COMPAT),
194037f7f421SDavid S. Miller 			 COMPAT_FLAGS(msg));
19411da177e4SLinus Torvalds 	if (err)
19421da177e4SLinus Torvalds 		goto out_freeiov;
19431da177e4SLinus Torvalds 	if (MSG_CMSG_COMPAT & flags)
19441da177e4SLinus Torvalds 		err = __put_user((unsigned long)msg_sys.msg_control - cmsg_ptr,
19451da177e4SLinus Torvalds 				 &msg_compat->msg_controllen);
19461da177e4SLinus Torvalds 	else
19471da177e4SLinus Torvalds 		err = __put_user((unsigned long)msg_sys.msg_control - cmsg_ptr,
19481da177e4SLinus Torvalds 				 &msg->msg_controllen);
19491da177e4SLinus Torvalds 	if (err)
19501da177e4SLinus Torvalds 		goto out_freeiov;
19511da177e4SLinus Torvalds 	err = len;
19521da177e4SLinus Torvalds 
19531da177e4SLinus Torvalds out_freeiov:
19541da177e4SLinus Torvalds 	if (iov != iovstack)
19551da177e4SLinus Torvalds 		sock_kfree_s(sock->sk, iov, iov_size);
19561da177e4SLinus Torvalds out_put:
19576cb153caSBenjamin LaHaise 	fput_light(sock->file, fput_needed);
19581da177e4SLinus Torvalds out:
19591da177e4SLinus Torvalds 	return err;
19601da177e4SLinus Torvalds }
19611da177e4SLinus Torvalds 
19621da177e4SLinus Torvalds #ifdef __ARCH_WANT_SYS_SOCKETCALL
19631da177e4SLinus Torvalds 
19641da177e4SLinus Torvalds /* Argument list sizes for sys_socketcall */
19651da177e4SLinus Torvalds #define AL(x) ((x) * sizeof(unsigned long))
196689bddce5SStephen Hemminger static const unsigned char nargs[18]={
196789bddce5SStephen Hemminger 	AL(0),AL(3),AL(3),AL(3),AL(2),AL(3),
19681da177e4SLinus Torvalds 	AL(3),AL(3),AL(4),AL(4),AL(4),AL(6),
196989bddce5SStephen Hemminger 	AL(6),AL(2),AL(5),AL(5),AL(3),AL(3)
197089bddce5SStephen Hemminger };
197189bddce5SStephen Hemminger 
19721da177e4SLinus Torvalds #undef AL
19731da177e4SLinus Torvalds 
19741da177e4SLinus Torvalds /*
19751da177e4SLinus Torvalds  *	System call vectors.
19761da177e4SLinus Torvalds  *
19771da177e4SLinus Torvalds  *	Argument checking cleaned up. Saved 20% in size.
19781da177e4SLinus Torvalds  *  This function doesn't need to set the kernel lock because
19791da177e4SLinus Torvalds  *  it is set by the callees.
19801da177e4SLinus Torvalds  */
19811da177e4SLinus Torvalds 
19821da177e4SLinus Torvalds asmlinkage long sys_socketcall(int call, unsigned long __user *args)
19831da177e4SLinus Torvalds {
19841da177e4SLinus Torvalds 	unsigned long a[6];
19851da177e4SLinus Torvalds 	unsigned long a0, a1;
19861da177e4SLinus Torvalds 	int err;
19871da177e4SLinus Torvalds 
19881da177e4SLinus Torvalds 	if (call < 1 || call > SYS_RECVMSG)
19891da177e4SLinus Torvalds 		return -EINVAL;
19901da177e4SLinus Torvalds 
19911da177e4SLinus Torvalds 	/* copy_from_user should be SMP safe. */
19921da177e4SLinus Torvalds 	if (copy_from_user(a, args, nargs[call]))
19931da177e4SLinus Torvalds 		return -EFAULT;
19941da177e4SLinus Torvalds 
19954bcff1b3SDavid Woodhouse 	err = audit_socketcall(nargs[call] / sizeof(unsigned long), a);
19963ec3b2fbSDavid Woodhouse 	if (err)
19973ec3b2fbSDavid Woodhouse 		return err;
19983ec3b2fbSDavid Woodhouse 
19991da177e4SLinus Torvalds 	a0 = a[0];
20001da177e4SLinus Torvalds 	a1 = a[1];
20011da177e4SLinus Torvalds 
200289bddce5SStephen Hemminger 	switch (call) {
20031da177e4SLinus Torvalds 	case SYS_SOCKET:
20041da177e4SLinus Torvalds 		err = sys_socket(a0, a1, a[2]);
20051da177e4SLinus Torvalds 		break;
20061da177e4SLinus Torvalds 	case SYS_BIND:
20071da177e4SLinus Torvalds 		err = sys_bind(a0, (struct sockaddr __user *)a1, a[2]);
20081da177e4SLinus Torvalds 		break;
20091da177e4SLinus Torvalds 	case SYS_CONNECT:
20101da177e4SLinus Torvalds 		err = sys_connect(a0, (struct sockaddr __user *)a1, a[2]);
20111da177e4SLinus Torvalds 		break;
20121da177e4SLinus Torvalds 	case SYS_LISTEN:
20131da177e4SLinus Torvalds 		err = sys_listen(a0, a1);
20141da177e4SLinus Torvalds 		break;
20151da177e4SLinus Torvalds 	case SYS_ACCEPT:
201689bddce5SStephen Hemminger 		err =
201789bddce5SStephen Hemminger 		    sys_accept(a0, (struct sockaddr __user *)a1,
201889bddce5SStephen Hemminger 			       (int __user *)a[2]);
20191da177e4SLinus Torvalds 		break;
20201da177e4SLinus Torvalds 	case SYS_GETSOCKNAME:
202189bddce5SStephen Hemminger 		err =
202289bddce5SStephen Hemminger 		    sys_getsockname(a0, (struct sockaddr __user *)a1,
202389bddce5SStephen Hemminger 				    (int __user *)a[2]);
20241da177e4SLinus Torvalds 		break;
20251da177e4SLinus Torvalds 	case SYS_GETPEERNAME:
202689bddce5SStephen Hemminger 		err =
202789bddce5SStephen Hemminger 		    sys_getpeername(a0, (struct sockaddr __user *)a1,
202889bddce5SStephen Hemminger 				    (int __user *)a[2]);
20291da177e4SLinus Torvalds 		break;
20301da177e4SLinus Torvalds 	case SYS_SOCKETPAIR:
20311da177e4SLinus Torvalds 		err = sys_socketpair(a0, a1, a[2], (int __user *)a[3]);
20321da177e4SLinus Torvalds 		break;
20331da177e4SLinus Torvalds 	case SYS_SEND:
20341da177e4SLinus Torvalds 		err = sys_send(a0, (void __user *)a1, a[2], a[3]);
20351da177e4SLinus Torvalds 		break;
20361da177e4SLinus Torvalds 	case SYS_SENDTO:
20371da177e4SLinus Torvalds 		err = sys_sendto(a0, (void __user *)a1, a[2], a[3],
20381da177e4SLinus Torvalds 				 (struct sockaddr __user *)a[4], a[5]);
20391da177e4SLinus Torvalds 		break;
20401da177e4SLinus Torvalds 	case SYS_RECV:
20411da177e4SLinus Torvalds 		err = sys_recv(a0, (void __user *)a1, a[2], a[3]);
20421da177e4SLinus Torvalds 		break;
20431da177e4SLinus Torvalds 	case SYS_RECVFROM:
20441da177e4SLinus Torvalds 		err = sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
204589bddce5SStephen Hemminger 				   (struct sockaddr __user *)a[4],
204689bddce5SStephen Hemminger 				   (int __user *)a[5]);
20471da177e4SLinus Torvalds 		break;
20481da177e4SLinus Torvalds 	case SYS_SHUTDOWN:
20491da177e4SLinus Torvalds 		err = sys_shutdown(a0, a1);
20501da177e4SLinus Torvalds 		break;
20511da177e4SLinus Torvalds 	case SYS_SETSOCKOPT:
20521da177e4SLinus Torvalds 		err = sys_setsockopt(a0, a1, a[2], (char __user *)a[3], a[4]);
20531da177e4SLinus Torvalds 		break;
20541da177e4SLinus Torvalds 	case SYS_GETSOCKOPT:
205589bddce5SStephen Hemminger 		err =
205689bddce5SStephen Hemminger 		    sys_getsockopt(a0, a1, a[2], (char __user *)a[3],
205789bddce5SStephen Hemminger 				   (int __user *)a[4]);
20581da177e4SLinus Torvalds 		break;
20591da177e4SLinus Torvalds 	case SYS_SENDMSG:
20601da177e4SLinus Torvalds 		err = sys_sendmsg(a0, (struct msghdr __user *)a1, a[2]);
20611da177e4SLinus Torvalds 		break;
20621da177e4SLinus Torvalds 	case SYS_RECVMSG:
20631da177e4SLinus Torvalds 		err = sys_recvmsg(a0, (struct msghdr __user *)a1, a[2]);
20641da177e4SLinus Torvalds 		break;
20651da177e4SLinus Torvalds 	default:
20661da177e4SLinus Torvalds 		err = -EINVAL;
20671da177e4SLinus Torvalds 		break;
20681da177e4SLinus Torvalds 	}
20691da177e4SLinus Torvalds 	return err;
20701da177e4SLinus Torvalds }
20711da177e4SLinus Torvalds 
20721da177e4SLinus Torvalds #endif				/* __ARCH_WANT_SYS_SOCKETCALL */
20731da177e4SLinus Torvalds 
2074*55737fdaSStephen Hemminger /**
2075*55737fdaSStephen Hemminger  *	sock_register - add a socket protocol handler
2076*55737fdaSStephen Hemminger  *	@ops: description of protocol
2077*55737fdaSStephen Hemminger  *
20781da177e4SLinus Torvalds  *	This function is called by a protocol handler that wants to
20791da177e4SLinus Torvalds  *	advertise its address family, and have it linked into the
2080*55737fdaSStephen Hemminger  *	socket interface. The value ops->family coresponds to the
2081*55737fdaSStephen Hemminger  *	socket system call protocol family.
20821da177e4SLinus Torvalds  */
20831da177e4SLinus Torvalds int sock_register(struct net_proto_family *ops)
20841da177e4SLinus Torvalds {
20851da177e4SLinus Torvalds 	int err;
20861da177e4SLinus Torvalds 
20871da177e4SLinus Torvalds 	if (ops->family >= NPROTO) {
208889bddce5SStephen Hemminger 		printk(KERN_CRIT "protocol %d >= NPROTO(%d)\n", ops->family,
208989bddce5SStephen Hemminger 		       NPROTO);
20901da177e4SLinus Torvalds 		return -ENOBUFS;
20911da177e4SLinus Torvalds 	}
2092*55737fdaSStephen Hemminger 
2093*55737fdaSStephen Hemminger 	spin_lock(&net_family_lock);
2094*55737fdaSStephen Hemminger 	if (net_families[ops->family])
20951da177e4SLinus Torvalds 		err = -EEXIST;
2096*55737fdaSStephen Hemminger 	else {
20971da177e4SLinus Torvalds 		net_families[ops->family] = ops;
20981da177e4SLinus Torvalds 		err = 0;
20991da177e4SLinus Torvalds 	}
2100*55737fdaSStephen Hemminger 	spin_unlock(&net_family_lock);
2101*55737fdaSStephen Hemminger 
210289bddce5SStephen Hemminger 	printk(KERN_INFO "NET: Registered protocol family %d\n", ops->family);
21031da177e4SLinus Torvalds 	return err;
21041da177e4SLinus Torvalds }
21051da177e4SLinus Torvalds 
2106*55737fdaSStephen Hemminger /**
2107*55737fdaSStephen Hemminger  *	sock_unregister - remove a protocol handler
2108*55737fdaSStephen Hemminger  *	@family: protocol family to remove
2109*55737fdaSStephen Hemminger  *
21101da177e4SLinus Torvalds  *	This function is called by a protocol handler that wants to
21111da177e4SLinus Torvalds  *	remove its address family, and have it unlinked from the
2112*55737fdaSStephen Hemminger  *	new socket creation.
2113*55737fdaSStephen Hemminger  *
2114*55737fdaSStephen Hemminger  *	If protocol handler is a module, then it can use module reference
2115*55737fdaSStephen Hemminger  *	counts to protect against new references. If protocol handler is not
2116*55737fdaSStephen Hemminger  *	a module then it needs to provide its own protection in
2117*55737fdaSStephen Hemminger  *	the ops->create routine.
21181da177e4SLinus Torvalds  */
21191da177e4SLinus Torvalds int sock_unregister(int family)
21201da177e4SLinus Torvalds {
21211da177e4SLinus Torvalds 	if (family < 0 || family >= NPROTO)
2122*55737fdaSStephen Hemminger 		return -EINVAL;
21231da177e4SLinus Torvalds 
2124*55737fdaSStephen Hemminger 	spin_lock(&net_family_lock);
21251da177e4SLinus Torvalds 	net_families[family] = NULL;
2126*55737fdaSStephen Hemminger 	spin_unlock(&net_family_lock);
2127*55737fdaSStephen Hemminger 
2128*55737fdaSStephen Hemminger 	synchronize_rcu();
2129*55737fdaSStephen Hemminger 
213089bddce5SStephen Hemminger 	printk(KERN_INFO "NET: Unregistered protocol family %d\n", family);
21311da177e4SLinus Torvalds 	return 0;
21321da177e4SLinus Torvalds }
21331da177e4SLinus Torvalds 
213477d76ea3SAndi Kleen static int __init sock_init(void)
21351da177e4SLinus Torvalds {
21361da177e4SLinus Torvalds 	/*
21371da177e4SLinus Torvalds 	 *      Initialize sock SLAB cache.
21381da177e4SLinus Torvalds 	 */
21391da177e4SLinus Torvalds 
21401da177e4SLinus Torvalds 	sk_init();
21411da177e4SLinus Torvalds 
21421da177e4SLinus Torvalds 	/*
21431da177e4SLinus Torvalds 	 *      Initialize skbuff SLAB cache
21441da177e4SLinus Torvalds 	 */
21451da177e4SLinus Torvalds 	skb_init();
21461da177e4SLinus Torvalds 
21471da177e4SLinus Torvalds 	/*
21481da177e4SLinus Torvalds 	 *      Initialize the protocols module.
21491da177e4SLinus Torvalds 	 */
21501da177e4SLinus Torvalds 
21511da177e4SLinus Torvalds 	init_inodecache();
21521da177e4SLinus Torvalds 	register_filesystem(&sock_fs_type);
21531da177e4SLinus Torvalds 	sock_mnt = kern_mount(&sock_fs_type);
215477d76ea3SAndi Kleen 
215577d76ea3SAndi Kleen 	/* The real protocol initialization is performed in later initcalls.
21561da177e4SLinus Torvalds 	 */
21571da177e4SLinus Torvalds 
21581da177e4SLinus Torvalds #ifdef CONFIG_NETFILTER
21591da177e4SLinus Torvalds 	netfilter_init();
21601da177e4SLinus Torvalds #endif
2161cbeb321aSDavid S. Miller 
2162cbeb321aSDavid S. Miller 	return 0;
21631da177e4SLinus Torvalds }
21641da177e4SLinus Torvalds 
216577d76ea3SAndi Kleen core_initcall(sock_init);	/* early initcall */
216677d76ea3SAndi Kleen 
21671da177e4SLinus Torvalds #ifdef CONFIG_PROC_FS
21681da177e4SLinus Torvalds void socket_seq_show(struct seq_file *seq)
21691da177e4SLinus Torvalds {
21701da177e4SLinus Torvalds 	int cpu;
21711da177e4SLinus Torvalds 	int counter = 0;
21721da177e4SLinus Torvalds 
21736f912042SKAMEZAWA Hiroyuki 	for_each_possible_cpu(cpu)
21741da177e4SLinus Torvalds 	    counter += per_cpu(sockets_in_use, cpu);
21751da177e4SLinus Torvalds 
21761da177e4SLinus Torvalds 	/* It can be negative, by the way. 8) */
21771da177e4SLinus Torvalds 	if (counter < 0)
21781da177e4SLinus Torvalds 		counter = 0;
21791da177e4SLinus Torvalds 
21801da177e4SLinus Torvalds 	seq_printf(seq, "sockets: used %d\n", counter);
21811da177e4SLinus Torvalds }
21821da177e4SLinus Torvalds #endif				/* CONFIG_PROC_FS */
21831da177e4SLinus Torvalds 
218489bbfc95SShaun Pereira #ifdef CONFIG_COMPAT
218589bbfc95SShaun Pereira static long compat_sock_ioctl(struct file *file, unsigned cmd,
218689bbfc95SShaun Pereira 			      unsigned long arg)
218789bbfc95SShaun Pereira {
218889bbfc95SShaun Pereira 	struct socket *sock = file->private_data;
218989bbfc95SShaun Pereira 	int ret = -ENOIOCTLCMD;
219089bbfc95SShaun Pereira 
219189bbfc95SShaun Pereira 	if (sock->ops->compat_ioctl)
219289bbfc95SShaun Pereira 		ret = sock->ops->compat_ioctl(sock, cmd, arg);
219389bbfc95SShaun Pereira 
219489bbfc95SShaun Pereira 	return ret;
219589bbfc95SShaun Pereira }
219689bbfc95SShaun Pereira #endif
219789bbfc95SShaun Pereira 
2198ac5a488eSSridhar Samudrala int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen)
2199ac5a488eSSridhar Samudrala {
2200ac5a488eSSridhar Samudrala 	return sock->ops->bind(sock, addr, addrlen);
2201ac5a488eSSridhar Samudrala }
2202ac5a488eSSridhar Samudrala 
2203ac5a488eSSridhar Samudrala int kernel_listen(struct socket *sock, int backlog)
2204ac5a488eSSridhar Samudrala {
2205ac5a488eSSridhar Samudrala 	return sock->ops->listen(sock, backlog);
2206ac5a488eSSridhar Samudrala }
2207ac5a488eSSridhar Samudrala 
2208ac5a488eSSridhar Samudrala int kernel_accept(struct socket *sock, struct socket **newsock, int flags)
2209ac5a488eSSridhar Samudrala {
2210ac5a488eSSridhar Samudrala 	struct sock *sk = sock->sk;
2211ac5a488eSSridhar Samudrala 	int err;
2212ac5a488eSSridhar Samudrala 
2213ac5a488eSSridhar Samudrala 	err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol,
2214ac5a488eSSridhar Samudrala 			       newsock);
2215ac5a488eSSridhar Samudrala 	if (err < 0)
2216ac5a488eSSridhar Samudrala 		goto done;
2217ac5a488eSSridhar Samudrala 
2218ac5a488eSSridhar Samudrala 	err = sock->ops->accept(sock, *newsock, flags);
2219ac5a488eSSridhar Samudrala 	if (err < 0) {
2220ac5a488eSSridhar Samudrala 		sock_release(*newsock);
2221ac5a488eSSridhar Samudrala 		goto done;
2222ac5a488eSSridhar Samudrala 	}
2223ac5a488eSSridhar Samudrala 
2224ac5a488eSSridhar Samudrala 	(*newsock)->ops = sock->ops;
2225ac5a488eSSridhar Samudrala 
2226ac5a488eSSridhar Samudrala done:
2227ac5a488eSSridhar Samudrala 	return err;
2228ac5a488eSSridhar Samudrala }
2229ac5a488eSSridhar Samudrala 
2230ac5a488eSSridhar Samudrala int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
2231ac5a488eSSridhar Samudrala                    int flags)
2232ac5a488eSSridhar Samudrala {
2233ac5a488eSSridhar Samudrala 	return sock->ops->connect(sock, addr, addrlen, flags);
2234ac5a488eSSridhar Samudrala }
2235ac5a488eSSridhar Samudrala 
2236ac5a488eSSridhar Samudrala int kernel_getsockname(struct socket *sock, struct sockaddr *addr,
2237ac5a488eSSridhar Samudrala 			 int *addrlen)
2238ac5a488eSSridhar Samudrala {
2239ac5a488eSSridhar Samudrala 	return sock->ops->getname(sock, addr, addrlen, 0);
2240ac5a488eSSridhar Samudrala }
2241ac5a488eSSridhar Samudrala 
2242ac5a488eSSridhar Samudrala int kernel_getpeername(struct socket *sock, struct sockaddr *addr,
2243ac5a488eSSridhar Samudrala 			 int *addrlen)
2244ac5a488eSSridhar Samudrala {
2245ac5a488eSSridhar Samudrala 	return sock->ops->getname(sock, addr, addrlen, 1);
2246ac5a488eSSridhar Samudrala }
2247ac5a488eSSridhar Samudrala 
2248ac5a488eSSridhar Samudrala int kernel_getsockopt(struct socket *sock, int level, int optname,
2249ac5a488eSSridhar Samudrala 			char *optval, int *optlen)
2250ac5a488eSSridhar Samudrala {
2251ac5a488eSSridhar Samudrala 	mm_segment_t oldfs = get_fs();
2252ac5a488eSSridhar Samudrala 	int err;
2253ac5a488eSSridhar Samudrala 
2254ac5a488eSSridhar Samudrala 	set_fs(KERNEL_DS);
2255ac5a488eSSridhar Samudrala 	if (level == SOL_SOCKET)
2256ac5a488eSSridhar Samudrala 		err = sock_getsockopt(sock, level, optname, optval, optlen);
2257ac5a488eSSridhar Samudrala 	else
2258ac5a488eSSridhar Samudrala 		err = sock->ops->getsockopt(sock, level, optname, optval,
2259ac5a488eSSridhar Samudrala 					    optlen);
2260ac5a488eSSridhar Samudrala 	set_fs(oldfs);
2261ac5a488eSSridhar Samudrala 	return err;
2262ac5a488eSSridhar Samudrala }
2263ac5a488eSSridhar Samudrala 
2264ac5a488eSSridhar Samudrala int kernel_setsockopt(struct socket *sock, int level, int optname,
2265ac5a488eSSridhar Samudrala 			char *optval, int optlen)
2266ac5a488eSSridhar Samudrala {
2267ac5a488eSSridhar Samudrala 	mm_segment_t oldfs = get_fs();
2268ac5a488eSSridhar Samudrala 	int err;
2269ac5a488eSSridhar Samudrala 
2270ac5a488eSSridhar Samudrala 	set_fs(KERNEL_DS);
2271ac5a488eSSridhar Samudrala 	if (level == SOL_SOCKET)
2272ac5a488eSSridhar Samudrala 		err = sock_setsockopt(sock, level, optname, optval, optlen);
2273ac5a488eSSridhar Samudrala 	else
2274ac5a488eSSridhar Samudrala 		err = sock->ops->setsockopt(sock, level, optname, optval,
2275ac5a488eSSridhar Samudrala 					    optlen);
2276ac5a488eSSridhar Samudrala 	set_fs(oldfs);
2277ac5a488eSSridhar Samudrala 	return err;
2278ac5a488eSSridhar Samudrala }
2279ac5a488eSSridhar Samudrala 
2280ac5a488eSSridhar Samudrala int kernel_sendpage(struct socket *sock, struct page *page, int offset,
2281ac5a488eSSridhar Samudrala 		    size_t size, int flags)
2282ac5a488eSSridhar Samudrala {
2283ac5a488eSSridhar Samudrala 	if (sock->ops->sendpage)
2284ac5a488eSSridhar Samudrala 		return sock->ops->sendpage(sock, page, offset, size, flags);
2285ac5a488eSSridhar Samudrala 
2286ac5a488eSSridhar Samudrala 	return sock_no_sendpage(sock, page, offset, size, flags);
2287ac5a488eSSridhar Samudrala }
2288ac5a488eSSridhar Samudrala 
2289ac5a488eSSridhar Samudrala int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg)
2290ac5a488eSSridhar Samudrala {
2291ac5a488eSSridhar Samudrala 	mm_segment_t oldfs = get_fs();
2292ac5a488eSSridhar Samudrala 	int err;
2293ac5a488eSSridhar Samudrala 
2294ac5a488eSSridhar Samudrala 	set_fs(KERNEL_DS);
2295ac5a488eSSridhar Samudrala 	err = sock->ops->ioctl(sock, cmd, arg);
2296ac5a488eSSridhar Samudrala 	set_fs(oldfs);
2297ac5a488eSSridhar Samudrala 
2298ac5a488eSSridhar Samudrala 	return err;
2299ac5a488eSSridhar Samudrala }
2300ac5a488eSSridhar Samudrala 
23011da177e4SLinus Torvalds /* ABI emulation layers need these two */
23021da177e4SLinus Torvalds EXPORT_SYMBOL(move_addr_to_kernel);
23031da177e4SLinus Torvalds EXPORT_SYMBOL(move_addr_to_user);
23041da177e4SLinus Torvalds EXPORT_SYMBOL(sock_create);
23051da177e4SLinus Torvalds EXPORT_SYMBOL(sock_create_kern);
23061da177e4SLinus Torvalds EXPORT_SYMBOL(sock_create_lite);
23071da177e4SLinus Torvalds EXPORT_SYMBOL(sock_map_fd);
23081da177e4SLinus Torvalds EXPORT_SYMBOL(sock_recvmsg);
23091da177e4SLinus Torvalds EXPORT_SYMBOL(sock_register);
23101da177e4SLinus Torvalds EXPORT_SYMBOL(sock_release);
23111da177e4SLinus Torvalds EXPORT_SYMBOL(sock_sendmsg);
23121da177e4SLinus Torvalds EXPORT_SYMBOL(sock_unregister);
23131da177e4SLinus Torvalds EXPORT_SYMBOL(sock_wake_async);
23141da177e4SLinus Torvalds EXPORT_SYMBOL(sockfd_lookup);
23151da177e4SLinus Torvalds EXPORT_SYMBOL(kernel_sendmsg);
23161da177e4SLinus Torvalds EXPORT_SYMBOL(kernel_recvmsg);
2317ac5a488eSSridhar Samudrala EXPORT_SYMBOL(kernel_bind);
2318ac5a488eSSridhar Samudrala EXPORT_SYMBOL(kernel_listen);
2319ac5a488eSSridhar Samudrala EXPORT_SYMBOL(kernel_accept);
2320ac5a488eSSridhar Samudrala EXPORT_SYMBOL(kernel_connect);
2321ac5a488eSSridhar Samudrala EXPORT_SYMBOL(kernel_getsockname);
2322ac5a488eSSridhar Samudrala EXPORT_SYMBOL(kernel_getpeername);
2323ac5a488eSSridhar Samudrala EXPORT_SYMBOL(kernel_getsockopt);
2324ac5a488eSSridhar Samudrala EXPORT_SYMBOL(kernel_setsockopt);
2325ac5a488eSSridhar Samudrala EXPORT_SYMBOL(kernel_sendpage);
2326ac5a488eSSridhar Samudrala EXPORT_SYMBOL(kernel_sock_ioctl);
2327