11da177e4SLinus Torvalds /* 21da177e4SLinus Torvalds * NET An implementation of the SOCKET network access protocol. 31da177e4SLinus Torvalds * 41da177e4SLinus Torvalds * Version: @(#)socket.c 1.1.93 18/02/95 51da177e4SLinus Torvalds * 61da177e4SLinus Torvalds * Authors: Orest Zborowski, <obz@Kodak.COM> 702c30a84SJesper Juhl * Ross Biro 81da177e4SLinus Torvalds * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> 91da177e4SLinus Torvalds * 101da177e4SLinus Torvalds * Fixes: 111da177e4SLinus Torvalds * Anonymous : NOTSOCK/BADF cleanup. Error fix in 121da177e4SLinus Torvalds * shutdown() 131da177e4SLinus Torvalds * Alan Cox : verify_area() fixes 141da177e4SLinus Torvalds * Alan Cox : Removed DDI 151da177e4SLinus Torvalds * Jonathan Kamens : SOCK_DGRAM reconnect bug 161da177e4SLinus Torvalds * Alan Cox : Moved a load of checks to the very 171da177e4SLinus Torvalds * top level. 181da177e4SLinus Torvalds * Alan Cox : Move address structures to/from user 191da177e4SLinus Torvalds * mode above the protocol layers. 201da177e4SLinus Torvalds * Rob Janssen : Allow 0 length sends. 211da177e4SLinus Torvalds * Alan Cox : Asynchronous I/O support (cribbed from the 221da177e4SLinus Torvalds * tty drivers). 231da177e4SLinus Torvalds * Niibe Yutaka : Asynchronous I/O for writes (4.4BSD style) 241da177e4SLinus Torvalds * Jeff Uphoff : Made max number of sockets command-line 251da177e4SLinus Torvalds * configurable. 261da177e4SLinus Torvalds * Matti Aarnio : Made the number of sockets dynamic, 271da177e4SLinus Torvalds * to be allocated when needed, and mr. 281da177e4SLinus Torvalds * Uphoff's max is used as max to be 291da177e4SLinus Torvalds * allowed to allocate. 301da177e4SLinus Torvalds * Linus : Argh. removed all the socket allocation 311da177e4SLinus Torvalds * altogether: it's in the inode now. 321da177e4SLinus Torvalds * Alan Cox : Made sock_alloc()/sock_release() public 331da177e4SLinus Torvalds * for NetROM and future kernel nfsd type 341da177e4SLinus Torvalds * stuff. 351da177e4SLinus Torvalds * Alan Cox : sendmsg/recvmsg basics. 361da177e4SLinus Torvalds * Tom Dyas : Export net symbols. 371da177e4SLinus Torvalds * Marcin Dalecki : Fixed problems with CONFIG_NET="n". 381da177e4SLinus Torvalds * Alan Cox : Added thread locking to sys_* calls 391da177e4SLinus Torvalds * for sockets. May have errors at the 401da177e4SLinus Torvalds * moment. 411da177e4SLinus Torvalds * Kevin Buhr : Fixed the dumb errors in the above. 421da177e4SLinus Torvalds * Andi Kleen : Some small cleanups, optimizations, 431da177e4SLinus Torvalds * and fixed a copy_from_user() bug. 441da177e4SLinus Torvalds * Tigran Aivazian : sys_send(args) calls sys_sendto(args, NULL, 0) 451da177e4SLinus Torvalds * Tigran Aivazian : Made listen(2) backlog sanity checks 461da177e4SLinus Torvalds * protocol-independent 471da177e4SLinus Torvalds * 481da177e4SLinus Torvalds * 491da177e4SLinus Torvalds * This program is free software; you can redistribute it and/or 501da177e4SLinus Torvalds * modify it under the terms of the GNU General Public License 511da177e4SLinus Torvalds * as published by the Free Software Foundation; either version 521da177e4SLinus Torvalds * 2 of the License, or (at your option) any later version. 531da177e4SLinus Torvalds * 541da177e4SLinus Torvalds * 551da177e4SLinus Torvalds * This module is effectively the top level interface to the BSD socket 561da177e4SLinus Torvalds * paradigm. 571da177e4SLinus Torvalds * 581da177e4SLinus Torvalds * Based upon Swansea University Computer Society NET3.039 591da177e4SLinus Torvalds */ 601da177e4SLinus Torvalds 611da177e4SLinus Torvalds #include <linux/mm.h> 621da177e4SLinus Torvalds #include <linux/socket.h> 631da177e4SLinus Torvalds #include <linux/file.h> 641da177e4SLinus Torvalds #include <linux/net.h> 651da177e4SLinus Torvalds #include <linux/interrupt.h> 6655737fdaSStephen Hemminger #include <linux/rcupdate.h> 671da177e4SLinus Torvalds #include <linux/netdevice.h> 681da177e4SLinus Torvalds #include <linux/proc_fs.h> 691da177e4SLinus Torvalds #include <linux/seq_file.h> 704a3e2f71SArjan van de Ven #include <linux/mutex.h> 711da177e4SLinus Torvalds #include <linux/wanrouter.h> 721da177e4SLinus Torvalds #include <linux/if_bridge.h> 7320380731SArnaldo Carvalho de Melo #include <linux/if_frad.h> 7420380731SArnaldo Carvalho de Melo #include <linux/if_vlan.h> 751da177e4SLinus Torvalds #include <linux/init.h> 761da177e4SLinus Torvalds #include <linux/poll.h> 771da177e4SLinus Torvalds #include <linux/cache.h> 781da177e4SLinus Torvalds #include <linux/module.h> 791da177e4SLinus Torvalds #include <linux/highmem.h> 801da177e4SLinus Torvalds #include <linux/mount.h> 811da177e4SLinus Torvalds #include <linux/security.h> 821da177e4SLinus Torvalds #include <linux/syscalls.h> 831da177e4SLinus Torvalds #include <linux/compat.h> 841da177e4SLinus Torvalds #include <linux/kmod.h> 853ec3b2fbSDavid Woodhouse #include <linux/audit.h> 86d86b5e0eSAdrian Bunk #include <linux/wireless.h> 871da177e4SLinus Torvalds 881da177e4SLinus Torvalds #include <asm/uaccess.h> 891da177e4SLinus Torvalds #include <asm/unistd.h> 901da177e4SLinus Torvalds 911da177e4SLinus Torvalds #include <net/compat.h> 921da177e4SLinus Torvalds 931da177e4SLinus Torvalds #include <net/sock.h> 941da177e4SLinus Torvalds #include <linux/netfilter.h> 951da177e4SLinus Torvalds 961da177e4SLinus Torvalds static int sock_no_open(struct inode *irrelevant, struct file *dontcare); 97027445c3SBadari Pulavarty static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov, 98027445c3SBadari Pulavarty unsigned long nr_segs, loff_t pos); 99027445c3SBadari Pulavarty static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov, 100027445c3SBadari Pulavarty unsigned long nr_segs, loff_t pos); 1011da177e4SLinus Torvalds static int sock_mmap(struct file *file, struct vm_area_struct *vma); 1021da177e4SLinus Torvalds 1031da177e4SLinus Torvalds static int sock_close(struct inode *inode, struct file *file); 1041da177e4SLinus Torvalds static unsigned int sock_poll(struct file *file, 1051da177e4SLinus Torvalds struct poll_table_struct *wait); 10689bddce5SStephen Hemminger static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg); 10789bbfc95SShaun Pereira #ifdef CONFIG_COMPAT 10889bbfc95SShaun Pereira static long compat_sock_ioctl(struct file *file, 10989bbfc95SShaun Pereira unsigned int cmd, unsigned long arg); 11089bbfc95SShaun Pereira #endif 1111da177e4SLinus Torvalds static int sock_fasync(int fd, struct file *filp, int on); 1121da177e4SLinus Torvalds static ssize_t sock_sendpage(struct file *file, struct page *page, 1131da177e4SLinus Torvalds int offset, size_t size, loff_t *ppos, int more); 1141da177e4SLinus Torvalds 1151da177e4SLinus Torvalds /* 1161da177e4SLinus Torvalds * Socket files have a set of 'special' operations as well as the generic file ones. These don't appear 1171da177e4SLinus Torvalds * in the operation structures but are done directly via the socketcall() multiplexor. 1181da177e4SLinus Torvalds */ 1191da177e4SLinus Torvalds 1201da177e4SLinus Torvalds static struct file_operations socket_file_ops = { 1211da177e4SLinus Torvalds .owner = THIS_MODULE, 1221da177e4SLinus Torvalds .llseek = no_llseek, 1231da177e4SLinus Torvalds .aio_read = sock_aio_read, 1241da177e4SLinus Torvalds .aio_write = sock_aio_write, 1251da177e4SLinus Torvalds .poll = sock_poll, 1261da177e4SLinus Torvalds .unlocked_ioctl = sock_ioctl, 12789bbfc95SShaun Pereira #ifdef CONFIG_COMPAT 12889bbfc95SShaun Pereira .compat_ioctl = compat_sock_ioctl, 12989bbfc95SShaun Pereira #endif 1301da177e4SLinus Torvalds .mmap = sock_mmap, 1311da177e4SLinus Torvalds .open = sock_no_open, /* special open code to disallow open via /proc */ 1321da177e4SLinus Torvalds .release = sock_close, 1331da177e4SLinus Torvalds .fasync = sock_fasync, 1345274f052SJens Axboe .sendpage = sock_sendpage, 1355274f052SJens Axboe .splice_write = generic_splice_sendpage, 1361da177e4SLinus Torvalds }; 1371da177e4SLinus Torvalds 1381da177e4SLinus Torvalds /* 1391da177e4SLinus Torvalds * The protocol list. Each protocol is registered in here. 1401da177e4SLinus Torvalds */ 1411da177e4SLinus Torvalds 1421da177e4SLinus Torvalds static DEFINE_SPINLOCK(net_family_lock); 143f0fd27d4SStephen Hemminger static const struct net_proto_family *net_families[NPROTO] __read_mostly; 1441da177e4SLinus Torvalds 1451da177e4SLinus Torvalds /* 1461da177e4SLinus Torvalds * Statistics counters of the socket lists 1471da177e4SLinus Torvalds */ 1481da177e4SLinus Torvalds 1491da177e4SLinus Torvalds static DEFINE_PER_CPU(int, sockets_in_use) = 0; 1501da177e4SLinus Torvalds 1511da177e4SLinus Torvalds /* 15289bddce5SStephen Hemminger * Support routines. 15389bddce5SStephen Hemminger * Move socket addresses back and forth across the kernel/user 1541da177e4SLinus Torvalds * divide and look after the messy bits. 1551da177e4SLinus Torvalds */ 1561da177e4SLinus Torvalds 1571da177e4SLinus Torvalds #define MAX_SOCK_ADDR 128 /* 108 for Unix domain - 1581da177e4SLinus Torvalds 16 for IP, 16 for IPX, 1591da177e4SLinus Torvalds 24 for IPv6, 1601da177e4SLinus Torvalds about 80 for AX.25 1611da177e4SLinus Torvalds must be at least one bigger than 1621da177e4SLinus Torvalds the AF_UNIX size (see net/unix/af_unix.c 1631da177e4SLinus Torvalds :unix_mkname()). 1641da177e4SLinus Torvalds */ 1651da177e4SLinus Torvalds 1661da177e4SLinus Torvalds /** 1671da177e4SLinus Torvalds * move_addr_to_kernel - copy a socket address into kernel space 1681da177e4SLinus Torvalds * @uaddr: Address in user space 1691da177e4SLinus Torvalds * @kaddr: Address in kernel space 1701da177e4SLinus Torvalds * @ulen: Length in user space 1711da177e4SLinus Torvalds * 1721da177e4SLinus Torvalds * The address is copied into kernel space. If the provided address is 1731da177e4SLinus Torvalds * too long an error code of -EINVAL is returned. If the copy gives 1741da177e4SLinus Torvalds * invalid addresses -EFAULT is returned. On a success 0 is returned. 1751da177e4SLinus Torvalds */ 1761da177e4SLinus Torvalds 1771da177e4SLinus Torvalds int move_addr_to_kernel(void __user *uaddr, int ulen, void *kaddr) 1781da177e4SLinus Torvalds { 1791da177e4SLinus Torvalds if (ulen < 0 || ulen > MAX_SOCK_ADDR) 1801da177e4SLinus Torvalds return -EINVAL; 1811da177e4SLinus Torvalds if (ulen == 0) 1821da177e4SLinus Torvalds return 0; 1831da177e4SLinus Torvalds if (copy_from_user(kaddr, uaddr, ulen)) 1841da177e4SLinus Torvalds return -EFAULT; 1853ec3b2fbSDavid Woodhouse return audit_sockaddr(ulen, kaddr); 1861da177e4SLinus Torvalds } 1871da177e4SLinus Torvalds 1881da177e4SLinus Torvalds /** 1891da177e4SLinus Torvalds * move_addr_to_user - copy an address to user space 1901da177e4SLinus Torvalds * @kaddr: kernel space address 1911da177e4SLinus Torvalds * @klen: length of address in kernel 1921da177e4SLinus Torvalds * @uaddr: user space address 1931da177e4SLinus Torvalds * @ulen: pointer to user length field 1941da177e4SLinus Torvalds * 1951da177e4SLinus Torvalds * The value pointed to by ulen on entry is the buffer length available. 1961da177e4SLinus Torvalds * This is overwritten with the buffer space used. -EINVAL is returned 1971da177e4SLinus Torvalds * if an overlong buffer is specified or a negative buffer size. -EFAULT 1981da177e4SLinus Torvalds * is returned if either the buffer or the length field are not 1991da177e4SLinus Torvalds * accessible. 2001da177e4SLinus Torvalds * After copying the data up to the limit the user specifies, the true 2011da177e4SLinus Torvalds * length of the data is written over the length limit the user 2021da177e4SLinus Torvalds * specified. Zero is returned for a success. 2031da177e4SLinus Torvalds */ 2041da177e4SLinus Torvalds 20589bddce5SStephen Hemminger int move_addr_to_user(void *kaddr, int klen, void __user *uaddr, 20689bddce5SStephen Hemminger int __user *ulen) 2071da177e4SLinus Torvalds { 2081da177e4SLinus Torvalds int err; 2091da177e4SLinus Torvalds int len; 2101da177e4SLinus Torvalds 21189bddce5SStephen Hemminger err = get_user(len, ulen); 21289bddce5SStephen Hemminger if (err) 2131da177e4SLinus Torvalds return err; 2141da177e4SLinus Torvalds if (len > klen) 2151da177e4SLinus Torvalds len = klen; 2161da177e4SLinus Torvalds if (len < 0 || len > MAX_SOCK_ADDR) 2171da177e4SLinus Torvalds return -EINVAL; 21889bddce5SStephen Hemminger if (len) { 219d6fe3945SSteve Grubb if (audit_sockaddr(klen, kaddr)) 220d6fe3945SSteve Grubb return -ENOMEM; 2211da177e4SLinus Torvalds if (copy_to_user(uaddr, kaddr, len)) 2221da177e4SLinus Torvalds return -EFAULT; 2231da177e4SLinus Torvalds } 2241da177e4SLinus Torvalds /* 2251da177e4SLinus Torvalds * "fromlen shall refer to the value before truncation.." 2261da177e4SLinus Torvalds * 1003.1g 2271da177e4SLinus Torvalds */ 2281da177e4SLinus Torvalds return __put_user(klen, ulen); 2291da177e4SLinus Torvalds } 2301da177e4SLinus Torvalds 2311da177e4SLinus Torvalds #define SOCKFS_MAGIC 0x534F434B 2321da177e4SLinus Torvalds 233ba89966cSEric Dumazet static kmem_cache_t *sock_inode_cachep __read_mostly; 2341da177e4SLinus Torvalds 2351da177e4SLinus Torvalds static struct inode *sock_alloc_inode(struct super_block *sb) 2361da177e4SLinus Torvalds { 2371da177e4SLinus Torvalds struct socket_alloc *ei; 23889bddce5SStephen Hemminger 239e94b1766SChristoph Lameter ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL); 2401da177e4SLinus Torvalds if (!ei) 2411da177e4SLinus Torvalds return NULL; 2421da177e4SLinus Torvalds init_waitqueue_head(&ei->socket.wait); 2431da177e4SLinus Torvalds 2441da177e4SLinus Torvalds ei->socket.fasync_list = NULL; 2451da177e4SLinus Torvalds ei->socket.state = SS_UNCONNECTED; 2461da177e4SLinus Torvalds ei->socket.flags = 0; 2471da177e4SLinus Torvalds ei->socket.ops = NULL; 2481da177e4SLinus Torvalds ei->socket.sk = NULL; 2491da177e4SLinus Torvalds ei->socket.file = NULL; 2501da177e4SLinus Torvalds 2511da177e4SLinus Torvalds return &ei->vfs_inode; 2521da177e4SLinus Torvalds } 2531da177e4SLinus Torvalds 2541da177e4SLinus Torvalds static void sock_destroy_inode(struct inode *inode) 2551da177e4SLinus Torvalds { 2561da177e4SLinus Torvalds kmem_cache_free(sock_inode_cachep, 2571da177e4SLinus Torvalds container_of(inode, struct socket_alloc, vfs_inode)); 2581da177e4SLinus Torvalds } 2591da177e4SLinus Torvalds 2601da177e4SLinus Torvalds static void init_once(void *foo, kmem_cache_t *cachep, unsigned long flags) 2611da177e4SLinus Torvalds { 2621da177e4SLinus Torvalds struct socket_alloc *ei = (struct socket_alloc *)foo; 2631da177e4SLinus Torvalds 26489bddce5SStephen Hemminger if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) 26589bddce5SStephen Hemminger == SLAB_CTOR_CONSTRUCTOR) 2661da177e4SLinus Torvalds inode_init_once(&ei->vfs_inode); 2671da177e4SLinus Torvalds } 2681da177e4SLinus Torvalds 2691da177e4SLinus Torvalds static int init_inodecache(void) 2701da177e4SLinus Torvalds { 2711da177e4SLinus Torvalds sock_inode_cachep = kmem_cache_create("sock_inode_cache", 2721da177e4SLinus Torvalds sizeof(struct socket_alloc), 27389bddce5SStephen Hemminger 0, 27489bddce5SStephen Hemminger (SLAB_HWCACHE_ALIGN | 27589bddce5SStephen Hemminger SLAB_RECLAIM_ACCOUNT | 276fffb60f9SPaul Jackson SLAB_MEM_SPREAD), 27789bddce5SStephen Hemminger init_once, 27889bddce5SStephen Hemminger NULL); 2791da177e4SLinus Torvalds if (sock_inode_cachep == NULL) 2801da177e4SLinus Torvalds return -ENOMEM; 2811da177e4SLinus Torvalds return 0; 2821da177e4SLinus Torvalds } 2831da177e4SLinus Torvalds 2841da177e4SLinus Torvalds static struct super_operations sockfs_ops = { 2851da177e4SLinus Torvalds .alloc_inode = sock_alloc_inode, 2861da177e4SLinus Torvalds .destroy_inode =sock_destroy_inode, 2871da177e4SLinus Torvalds .statfs = simple_statfs, 2881da177e4SLinus Torvalds }; 2891da177e4SLinus Torvalds 290454e2398SDavid Howells static int sockfs_get_sb(struct file_system_type *fs_type, 29189bddce5SStephen Hemminger int flags, const char *dev_name, void *data, 29289bddce5SStephen Hemminger struct vfsmount *mnt) 2931da177e4SLinus Torvalds { 294454e2398SDavid Howells return get_sb_pseudo(fs_type, "socket:", &sockfs_ops, SOCKFS_MAGIC, 295454e2398SDavid Howells mnt); 2961da177e4SLinus Torvalds } 2971da177e4SLinus Torvalds 298ba89966cSEric Dumazet static struct vfsmount *sock_mnt __read_mostly; 2991da177e4SLinus Torvalds 3001da177e4SLinus Torvalds static struct file_system_type sock_fs_type = { 3011da177e4SLinus Torvalds .name = "sockfs", 3021da177e4SLinus Torvalds .get_sb = sockfs_get_sb, 3031da177e4SLinus Torvalds .kill_sb = kill_anon_super, 3041da177e4SLinus Torvalds }; 30589bddce5SStephen Hemminger 3061da177e4SLinus Torvalds static int sockfs_delete_dentry(struct dentry *dentry) 3071da177e4SLinus Torvalds { 3081da177e4SLinus Torvalds return 1; 3091da177e4SLinus Torvalds } 3101da177e4SLinus Torvalds static struct dentry_operations sockfs_dentry_operations = { 3111da177e4SLinus Torvalds .d_delete = sockfs_delete_dentry, 3121da177e4SLinus Torvalds }; 3131da177e4SLinus Torvalds 3141da177e4SLinus Torvalds /* 3151da177e4SLinus Torvalds * Obtains the first available file descriptor and sets it up for use. 3161da177e4SLinus Torvalds * 31739d8c1b6SDavid S. Miller * These functions create file structures and maps them to fd space 31839d8c1b6SDavid S. Miller * of the current process. On success it returns file descriptor 3191da177e4SLinus Torvalds * and file struct implicitly stored in sock->file. 3201da177e4SLinus Torvalds * Note that another thread may close file descriptor before we return 3211da177e4SLinus Torvalds * from this function. We use the fact that now we do not refer 3221da177e4SLinus Torvalds * to socket after mapping. If one day we will need it, this 3231da177e4SLinus Torvalds * function will increment ref. count on file by 1. 3241da177e4SLinus Torvalds * 3251da177e4SLinus Torvalds * In any case returned fd MAY BE not valid! 3261da177e4SLinus Torvalds * This race condition is unavoidable 3271da177e4SLinus Torvalds * with shared fd spaces, we cannot solve it inside kernel, 3281da177e4SLinus Torvalds * but we take care of internal coherence yet. 3291da177e4SLinus Torvalds */ 3301da177e4SLinus Torvalds 33139d8c1b6SDavid S. Miller static int sock_alloc_fd(struct file **filep) 3321da177e4SLinus Torvalds { 3331da177e4SLinus Torvalds int fd; 3341da177e4SLinus Torvalds 3351da177e4SLinus Torvalds fd = get_unused_fd(); 33639d8c1b6SDavid S. Miller if (likely(fd >= 0)) { 3371da177e4SLinus Torvalds struct file *file = get_empty_filp(); 3381da177e4SLinus Torvalds 33939d8c1b6SDavid S. Miller *filep = file; 34039d8c1b6SDavid S. Miller if (unlikely(!file)) { 3411da177e4SLinus Torvalds put_unused_fd(fd); 34239d8c1b6SDavid S. Miller return -ENFILE; 3431da177e4SLinus Torvalds } 34439d8c1b6SDavid S. Miller } else 34539d8c1b6SDavid S. Miller *filep = NULL; 34639d8c1b6SDavid S. Miller return fd; 34739d8c1b6SDavid S. Miller } 34839d8c1b6SDavid S. Miller 34939d8c1b6SDavid S. Miller static int sock_attach_fd(struct socket *sock, struct file *file) 35039d8c1b6SDavid S. Miller { 35139d8c1b6SDavid S. Miller struct qstr this; 35239d8c1b6SDavid S. Miller char name[32]; 3531da177e4SLinus Torvalds 354f31f5f05SEric Dumazet this.len = sprintf(name, "[%lu]", SOCK_INODE(sock)->i_ino); 3551da177e4SLinus Torvalds this.name = name; 3561da177e4SLinus Torvalds this.hash = SOCK_INODE(sock)->i_ino; 3571da177e4SLinus Torvalds 3581da177e4SLinus Torvalds file->f_dentry = d_alloc(sock_mnt->mnt_sb->s_root, &this); 35939d8c1b6SDavid S. Miller if (unlikely(!file->f_dentry)) 36039d8c1b6SDavid S. Miller return -ENOMEM; 36139d8c1b6SDavid S. Miller 3621da177e4SLinus Torvalds file->f_dentry->d_op = &sockfs_dentry_operations; 3631da177e4SLinus Torvalds d_add(file->f_dentry, SOCK_INODE(sock)); 3641da177e4SLinus Torvalds file->f_vfsmnt = mntget(sock_mnt); 3651da177e4SLinus Torvalds file->f_mapping = file->f_dentry->d_inode->i_mapping; 3661da177e4SLinus Torvalds 3671da177e4SLinus Torvalds sock->file = file; 3681da177e4SLinus Torvalds file->f_op = SOCK_INODE(sock)->i_fop = &socket_file_ops; 3691da177e4SLinus Torvalds file->f_mode = FMODE_READ | FMODE_WRITE; 3701da177e4SLinus Torvalds file->f_flags = O_RDWR; 3711da177e4SLinus Torvalds file->f_pos = 0; 37207dc3f07SBenjamin LaHaise file->private_data = sock; 37339d8c1b6SDavid S. Miller 37439d8c1b6SDavid S. Miller return 0; 3751da177e4SLinus Torvalds } 3761da177e4SLinus Torvalds 37739d8c1b6SDavid S. Miller int sock_map_fd(struct socket *sock) 37839d8c1b6SDavid S. Miller { 37939d8c1b6SDavid S. Miller struct file *newfile; 38039d8c1b6SDavid S. Miller int fd = sock_alloc_fd(&newfile); 38139d8c1b6SDavid S. Miller 38239d8c1b6SDavid S. Miller if (likely(fd >= 0)) { 38339d8c1b6SDavid S. Miller int err = sock_attach_fd(sock, newfile); 38439d8c1b6SDavid S. Miller 38539d8c1b6SDavid S. Miller if (unlikely(err < 0)) { 38639d8c1b6SDavid S. Miller put_filp(newfile); 38739d8c1b6SDavid S. Miller put_unused_fd(fd); 38839d8c1b6SDavid S. Miller return err; 38939d8c1b6SDavid S. Miller } 39039d8c1b6SDavid S. Miller fd_install(fd, newfile); 39139d8c1b6SDavid S. Miller } 3921da177e4SLinus Torvalds return fd; 3931da177e4SLinus Torvalds } 3941da177e4SLinus Torvalds 3956cb153caSBenjamin LaHaise static struct socket *sock_from_file(struct file *file, int *err) 3966cb153caSBenjamin LaHaise { 3976cb153caSBenjamin LaHaise struct inode *inode; 3986cb153caSBenjamin LaHaise struct socket *sock; 3996cb153caSBenjamin LaHaise 4006cb153caSBenjamin LaHaise if (file->f_op == &socket_file_ops) 4016cb153caSBenjamin LaHaise return file->private_data; /* set in sock_map_fd */ 4026cb153caSBenjamin LaHaise 4036cb153caSBenjamin LaHaise inode = file->f_dentry->d_inode; 4046cb153caSBenjamin LaHaise if (!S_ISSOCK(inode->i_mode)) { 4056cb153caSBenjamin LaHaise *err = -ENOTSOCK; 4066cb153caSBenjamin LaHaise return NULL; 4076cb153caSBenjamin LaHaise } 4086cb153caSBenjamin LaHaise 4096cb153caSBenjamin LaHaise sock = SOCKET_I(inode); 4106cb153caSBenjamin LaHaise if (sock->file != file) { 4116cb153caSBenjamin LaHaise printk(KERN_ERR "socki_lookup: socket file changed!\n"); 4126cb153caSBenjamin LaHaise sock->file = file; 4136cb153caSBenjamin LaHaise } 4146cb153caSBenjamin LaHaise return sock; 4156cb153caSBenjamin LaHaise } 4166cb153caSBenjamin LaHaise 4171da177e4SLinus Torvalds /** 4181da177e4SLinus Torvalds * sockfd_lookup - Go from a file number to its socket slot 4191da177e4SLinus Torvalds * @fd: file handle 4201da177e4SLinus Torvalds * @err: pointer to an error code return 4211da177e4SLinus Torvalds * 4221da177e4SLinus Torvalds * The file handle passed in is locked and the socket it is bound 4231da177e4SLinus Torvalds * too is returned. If an error occurs the err pointer is overwritten 4241da177e4SLinus Torvalds * with a negative errno code and NULL is returned. The function checks 4251da177e4SLinus Torvalds * for both invalid handles and passing a handle which is not a socket. 4261da177e4SLinus Torvalds * 4271da177e4SLinus Torvalds * On a success the socket object pointer is returned. 4281da177e4SLinus Torvalds */ 4291da177e4SLinus Torvalds 4301da177e4SLinus Torvalds struct socket *sockfd_lookup(int fd, int *err) 4311da177e4SLinus Torvalds { 4321da177e4SLinus Torvalds struct file *file; 4331da177e4SLinus Torvalds struct socket *sock; 4341da177e4SLinus Torvalds 43589bddce5SStephen Hemminger file = fget(fd); 43689bddce5SStephen Hemminger if (!file) { 4371da177e4SLinus Torvalds *err = -EBADF; 4381da177e4SLinus Torvalds return NULL; 4391da177e4SLinus Torvalds } 44089bddce5SStephen Hemminger 4416cb153caSBenjamin LaHaise sock = sock_from_file(file, err); 4426cb153caSBenjamin LaHaise if (!sock) 4431da177e4SLinus Torvalds fput(file); 4446cb153caSBenjamin LaHaise return sock; 4451da177e4SLinus Torvalds } 4461da177e4SLinus Torvalds 4476cb153caSBenjamin LaHaise static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed) 4486cb153caSBenjamin LaHaise { 4496cb153caSBenjamin LaHaise struct file *file; 4506cb153caSBenjamin LaHaise struct socket *sock; 4516cb153caSBenjamin LaHaise 4523672558cSHua Zhong *err = -EBADF; 4536cb153caSBenjamin LaHaise file = fget_light(fd, fput_needed); 4546cb153caSBenjamin LaHaise if (file) { 4556cb153caSBenjamin LaHaise sock = sock_from_file(file, err); 4566cb153caSBenjamin LaHaise if (sock) 4571da177e4SLinus Torvalds return sock; 4586cb153caSBenjamin LaHaise fput_light(file, *fput_needed); 4596cb153caSBenjamin LaHaise } 4606cb153caSBenjamin LaHaise return NULL; 4611da177e4SLinus Torvalds } 4621da177e4SLinus Torvalds 4631da177e4SLinus Torvalds /** 4641da177e4SLinus Torvalds * sock_alloc - allocate a socket 4651da177e4SLinus Torvalds * 4661da177e4SLinus Torvalds * Allocate a new inode and socket object. The two are bound together 4671da177e4SLinus Torvalds * and initialised. The socket is then returned. If we are out of inodes 4681da177e4SLinus Torvalds * NULL is returned. 4691da177e4SLinus Torvalds */ 4701da177e4SLinus Torvalds 4711da177e4SLinus Torvalds static struct socket *sock_alloc(void) 4721da177e4SLinus Torvalds { 4731da177e4SLinus Torvalds struct inode *inode; 4741da177e4SLinus Torvalds struct socket *sock; 4751da177e4SLinus Torvalds 4761da177e4SLinus Torvalds inode = new_inode(sock_mnt->mnt_sb); 4771da177e4SLinus Torvalds if (!inode) 4781da177e4SLinus Torvalds return NULL; 4791da177e4SLinus Torvalds 4801da177e4SLinus Torvalds sock = SOCKET_I(inode); 4811da177e4SLinus Torvalds 4821da177e4SLinus Torvalds inode->i_mode = S_IFSOCK | S_IRWXUGO; 4831da177e4SLinus Torvalds inode->i_uid = current->fsuid; 4841da177e4SLinus Torvalds inode->i_gid = current->fsgid; 4851da177e4SLinus Torvalds 4861da177e4SLinus Torvalds get_cpu_var(sockets_in_use)++; 4871da177e4SLinus Torvalds put_cpu_var(sockets_in_use); 4881da177e4SLinus Torvalds return sock; 4891da177e4SLinus Torvalds } 4901da177e4SLinus Torvalds 4911da177e4SLinus Torvalds /* 4921da177e4SLinus Torvalds * In theory you can't get an open on this inode, but /proc provides 4931da177e4SLinus Torvalds * a back door. Remember to keep it shut otherwise you'll let the 4941da177e4SLinus Torvalds * creepy crawlies in. 4951da177e4SLinus Torvalds */ 4961da177e4SLinus Torvalds 4971da177e4SLinus Torvalds static int sock_no_open(struct inode *irrelevant, struct file *dontcare) 4981da177e4SLinus Torvalds { 4991da177e4SLinus Torvalds return -ENXIO; 5001da177e4SLinus Torvalds } 5011da177e4SLinus Torvalds 5024b6f5d20SArjan van de Ven const struct file_operations bad_sock_fops = { 5031da177e4SLinus Torvalds .owner = THIS_MODULE, 5041da177e4SLinus Torvalds .open = sock_no_open, 5051da177e4SLinus Torvalds }; 5061da177e4SLinus Torvalds 5071da177e4SLinus Torvalds /** 5081da177e4SLinus Torvalds * sock_release - close a socket 5091da177e4SLinus Torvalds * @sock: socket to close 5101da177e4SLinus Torvalds * 5111da177e4SLinus Torvalds * The socket is released from the protocol stack if it has a release 5121da177e4SLinus Torvalds * callback, and the inode is then released if the socket is bound to 5131da177e4SLinus Torvalds * an inode not a file. 5141da177e4SLinus Torvalds */ 5151da177e4SLinus Torvalds 5161da177e4SLinus Torvalds void sock_release(struct socket *sock) 5171da177e4SLinus Torvalds { 5181da177e4SLinus Torvalds if (sock->ops) { 5191da177e4SLinus Torvalds struct module *owner = sock->ops->owner; 5201da177e4SLinus Torvalds 5211da177e4SLinus Torvalds sock->ops->release(sock); 5221da177e4SLinus Torvalds sock->ops = NULL; 5231da177e4SLinus Torvalds module_put(owner); 5241da177e4SLinus Torvalds } 5251da177e4SLinus Torvalds 5261da177e4SLinus Torvalds if (sock->fasync_list) 5271da177e4SLinus Torvalds printk(KERN_ERR "sock_release: fasync list not empty!\n"); 5281da177e4SLinus Torvalds 5291da177e4SLinus Torvalds get_cpu_var(sockets_in_use)--; 5301da177e4SLinus Torvalds put_cpu_var(sockets_in_use); 5311da177e4SLinus Torvalds if (!sock->file) { 5321da177e4SLinus Torvalds iput(SOCK_INODE(sock)); 5331da177e4SLinus Torvalds return; 5341da177e4SLinus Torvalds } 5351da177e4SLinus Torvalds sock->file = NULL; 5361da177e4SLinus Torvalds } 5371da177e4SLinus Torvalds 5381da177e4SLinus Torvalds static inline int __sock_sendmsg(struct kiocb *iocb, struct socket *sock, 5391da177e4SLinus Torvalds struct msghdr *msg, size_t size) 5401da177e4SLinus Torvalds { 5411da177e4SLinus Torvalds struct sock_iocb *si = kiocb_to_siocb(iocb); 5421da177e4SLinus Torvalds int err; 5431da177e4SLinus Torvalds 5441da177e4SLinus Torvalds si->sock = sock; 5451da177e4SLinus Torvalds si->scm = NULL; 5461da177e4SLinus Torvalds si->msg = msg; 5471da177e4SLinus Torvalds si->size = size; 5481da177e4SLinus Torvalds 5491da177e4SLinus Torvalds err = security_socket_sendmsg(sock, msg, size); 5501da177e4SLinus Torvalds if (err) 5511da177e4SLinus Torvalds return err; 5521da177e4SLinus Torvalds 5531da177e4SLinus Torvalds return sock->ops->sendmsg(iocb, sock, msg, size); 5541da177e4SLinus Torvalds } 5551da177e4SLinus Torvalds 5561da177e4SLinus Torvalds int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) 5571da177e4SLinus Torvalds { 5581da177e4SLinus Torvalds struct kiocb iocb; 5591da177e4SLinus Torvalds struct sock_iocb siocb; 5601da177e4SLinus Torvalds int ret; 5611da177e4SLinus Torvalds 5621da177e4SLinus Torvalds init_sync_kiocb(&iocb, NULL); 5631da177e4SLinus Torvalds iocb.private = &siocb; 5641da177e4SLinus Torvalds ret = __sock_sendmsg(&iocb, sock, msg, size); 5651da177e4SLinus Torvalds if (-EIOCBQUEUED == ret) 5661da177e4SLinus Torvalds ret = wait_on_sync_kiocb(&iocb); 5671da177e4SLinus Torvalds return ret; 5681da177e4SLinus Torvalds } 5691da177e4SLinus Torvalds 5701da177e4SLinus Torvalds int kernel_sendmsg(struct socket *sock, struct msghdr *msg, 5711da177e4SLinus Torvalds struct kvec *vec, size_t num, size_t size) 5721da177e4SLinus Torvalds { 5731da177e4SLinus Torvalds mm_segment_t oldfs = get_fs(); 5741da177e4SLinus Torvalds int result; 5751da177e4SLinus Torvalds 5761da177e4SLinus Torvalds set_fs(KERNEL_DS); 5771da177e4SLinus Torvalds /* 5781da177e4SLinus Torvalds * the following is safe, since for compiler definitions of kvec and 5791da177e4SLinus Torvalds * iovec are identical, yielding the same in-core layout and alignment 5801da177e4SLinus Torvalds */ 58189bddce5SStephen Hemminger msg->msg_iov = (struct iovec *)vec; 5821da177e4SLinus Torvalds msg->msg_iovlen = num; 5831da177e4SLinus Torvalds result = sock_sendmsg(sock, msg, size); 5841da177e4SLinus Torvalds set_fs(oldfs); 5851da177e4SLinus Torvalds return result; 5861da177e4SLinus Torvalds } 5871da177e4SLinus Torvalds 5881da177e4SLinus Torvalds static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock, 5891da177e4SLinus Torvalds struct msghdr *msg, size_t size, int flags) 5901da177e4SLinus Torvalds { 5911da177e4SLinus Torvalds int err; 5921da177e4SLinus Torvalds struct sock_iocb *si = kiocb_to_siocb(iocb); 5931da177e4SLinus Torvalds 5941da177e4SLinus Torvalds si->sock = sock; 5951da177e4SLinus Torvalds si->scm = NULL; 5961da177e4SLinus Torvalds si->msg = msg; 5971da177e4SLinus Torvalds si->size = size; 5981da177e4SLinus Torvalds si->flags = flags; 5991da177e4SLinus Torvalds 6001da177e4SLinus Torvalds err = security_socket_recvmsg(sock, msg, size, flags); 6011da177e4SLinus Torvalds if (err) 6021da177e4SLinus Torvalds return err; 6031da177e4SLinus Torvalds 6041da177e4SLinus Torvalds return sock->ops->recvmsg(iocb, sock, msg, size, flags); 6051da177e4SLinus Torvalds } 6061da177e4SLinus Torvalds 6071da177e4SLinus Torvalds int sock_recvmsg(struct socket *sock, struct msghdr *msg, 6081da177e4SLinus Torvalds size_t size, int flags) 6091da177e4SLinus Torvalds { 6101da177e4SLinus Torvalds struct kiocb iocb; 6111da177e4SLinus Torvalds struct sock_iocb siocb; 6121da177e4SLinus Torvalds int ret; 6131da177e4SLinus Torvalds 6141da177e4SLinus Torvalds init_sync_kiocb(&iocb, NULL); 6151da177e4SLinus Torvalds iocb.private = &siocb; 6161da177e4SLinus Torvalds ret = __sock_recvmsg(&iocb, sock, msg, size, flags); 6171da177e4SLinus Torvalds if (-EIOCBQUEUED == ret) 6181da177e4SLinus Torvalds ret = wait_on_sync_kiocb(&iocb); 6191da177e4SLinus Torvalds return ret; 6201da177e4SLinus Torvalds } 6211da177e4SLinus Torvalds 6221da177e4SLinus Torvalds int kernel_recvmsg(struct socket *sock, struct msghdr *msg, 62389bddce5SStephen Hemminger struct kvec *vec, size_t num, size_t size, int flags) 6241da177e4SLinus Torvalds { 6251da177e4SLinus Torvalds mm_segment_t oldfs = get_fs(); 6261da177e4SLinus Torvalds int result; 6271da177e4SLinus Torvalds 6281da177e4SLinus Torvalds set_fs(KERNEL_DS); 6291da177e4SLinus Torvalds /* 6301da177e4SLinus Torvalds * the following is safe, since for compiler definitions of kvec and 6311da177e4SLinus Torvalds * iovec are identical, yielding the same in-core layout and alignment 6321da177e4SLinus Torvalds */ 63389bddce5SStephen Hemminger msg->msg_iov = (struct iovec *)vec, msg->msg_iovlen = num; 6341da177e4SLinus Torvalds result = sock_recvmsg(sock, msg, size, flags); 6351da177e4SLinus Torvalds set_fs(oldfs); 6361da177e4SLinus Torvalds return result; 6371da177e4SLinus Torvalds } 6381da177e4SLinus Torvalds 6391da177e4SLinus Torvalds static void sock_aio_dtor(struct kiocb *iocb) 6401da177e4SLinus Torvalds { 6411da177e4SLinus Torvalds kfree(iocb->private); 6421da177e4SLinus Torvalds } 6431da177e4SLinus Torvalds 64420380731SArnaldo Carvalho de Melo static ssize_t sock_sendpage(struct file *file, struct page *page, 6451da177e4SLinus Torvalds int offset, size_t size, loff_t *ppos, int more) 6461da177e4SLinus Torvalds { 6471da177e4SLinus Torvalds struct socket *sock; 6481da177e4SLinus Torvalds int flags; 6491da177e4SLinus Torvalds 650b69aee04SEric Dumazet sock = file->private_data; 6511da177e4SLinus Torvalds 6521da177e4SLinus Torvalds flags = !(file->f_flags & O_NONBLOCK) ? 0 : MSG_DONTWAIT; 6531da177e4SLinus Torvalds if (more) 6541da177e4SLinus Torvalds flags |= MSG_MORE; 6551da177e4SLinus Torvalds 6561da177e4SLinus Torvalds return sock->ops->sendpage(sock, page, offset, size, flags); 6571da177e4SLinus Torvalds } 6581da177e4SLinus Torvalds 659ce1d4d3eSChristoph Hellwig static struct sock_iocb *alloc_sock_iocb(struct kiocb *iocb, 66089bddce5SStephen Hemminger struct sock_iocb *siocb) 661ce1d4d3eSChristoph Hellwig { 662ce1d4d3eSChristoph Hellwig if (!is_sync_kiocb(iocb)) { 663ce1d4d3eSChristoph Hellwig siocb = kmalloc(sizeof(*siocb), GFP_KERNEL); 664ce1d4d3eSChristoph Hellwig if (!siocb) 665ce1d4d3eSChristoph Hellwig return NULL; 666ce1d4d3eSChristoph Hellwig iocb->ki_dtor = sock_aio_dtor; 667ce1d4d3eSChristoph Hellwig } 668ce1d4d3eSChristoph Hellwig 669ce1d4d3eSChristoph Hellwig siocb->kiocb = iocb; 670ce1d4d3eSChristoph Hellwig iocb->private = siocb; 671ce1d4d3eSChristoph Hellwig return siocb; 672ce1d4d3eSChristoph Hellwig } 673ce1d4d3eSChristoph Hellwig 674ce1d4d3eSChristoph Hellwig static ssize_t do_sock_read(struct msghdr *msg, struct kiocb *iocb, 675027445c3SBadari Pulavarty struct file *file, const struct iovec *iov, 67689bddce5SStephen Hemminger unsigned long nr_segs) 677ce1d4d3eSChristoph Hellwig { 678ce1d4d3eSChristoph Hellwig struct socket *sock = file->private_data; 679ce1d4d3eSChristoph Hellwig size_t size = 0; 680ce1d4d3eSChristoph Hellwig int i; 681ce1d4d3eSChristoph Hellwig 682ce1d4d3eSChristoph Hellwig for (i = 0; i < nr_segs; i++) 683ce1d4d3eSChristoph Hellwig size += iov[i].iov_len; 684ce1d4d3eSChristoph Hellwig 685ce1d4d3eSChristoph Hellwig msg->msg_name = NULL; 686ce1d4d3eSChristoph Hellwig msg->msg_namelen = 0; 687ce1d4d3eSChristoph Hellwig msg->msg_control = NULL; 688ce1d4d3eSChristoph Hellwig msg->msg_controllen = 0; 689ce1d4d3eSChristoph Hellwig msg->msg_iov = (struct iovec *)iov; 690ce1d4d3eSChristoph Hellwig msg->msg_iovlen = nr_segs; 691ce1d4d3eSChristoph Hellwig msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0; 692ce1d4d3eSChristoph Hellwig 693ce1d4d3eSChristoph Hellwig return __sock_recvmsg(iocb, sock, msg, size, msg->msg_flags); 694ce1d4d3eSChristoph Hellwig } 695ce1d4d3eSChristoph Hellwig 696027445c3SBadari Pulavarty static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov, 697027445c3SBadari Pulavarty unsigned long nr_segs, loff_t pos) 698ce1d4d3eSChristoph Hellwig { 699ce1d4d3eSChristoph Hellwig struct sock_iocb siocb, *x; 700ce1d4d3eSChristoph Hellwig 701ce1d4d3eSChristoph Hellwig if (pos != 0) 702ce1d4d3eSChristoph Hellwig return -ESPIPE; 703027445c3SBadari Pulavarty 704027445c3SBadari Pulavarty if (iocb->ki_left == 0) /* Match SYS5 behaviour */ 705ce1d4d3eSChristoph Hellwig return 0; 706ce1d4d3eSChristoph Hellwig 707027445c3SBadari Pulavarty 708027445c3SBadari Pulavarty x = alloc_sock_iocb(iocb, &siocb); 709ce1d4d3eSChristoph Hellwig if (!x) 710ce1d4d3eSChristoph Hellwig return -ENOMEM; 711027445c3SBadari Pulavarty return do_sock_read(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs); 712ce1d4d3eSChristoph Hellwig } 713ce1d4d3eSChristoph Hellwig 714ce1d4d3eSChristoph Hellwig static ssize_t do_sock_write(struct msghdr *msg, struct kiocb *iocb, 715027445c3SBadari Pulavarty struct file *file, const struct iovec *iov, 71689bddce5SStephen Hemminger unsigned long nr_segs) 717ce1d4d3eSChristoph Hellwig { 718ce1d4d3eSChristoph Hellwig struct socket *sock = file->private_data; 719ce1d4d3eSChristoph Hellwig size_t size = 0; 720ce1d4d3eSChristoph Hellwig int i; 721ce1d4d3eSChristoph Hellwig 722ce1d4d3eSChristoph Hellwig for (i = 0; i < nr_segs; i++) 723ce1d4d3eSChristoph Hellwig size += iov[i].iov_len; 724ce1d4d3eSChristoph Hellwig 725ce1d4d3eSChristoph Hellwig msg->msg_name = NULL; 726ce1d4d3eSChristoph Hellwig msg->msg_namelen = 0; 727ce1d4d3eSChristoph Hellwig msg->msg_control = NULL; 728ce1d4d3eSChristoph Hellwig msg->msg_controllen = 0; 729ce1d4d3eSChristoph Hellwig msg->msg_iov = (struct iovec *)iov; 730ce1d4d3eSChristoph Hellwig msg->msg_iovlen = nr_segs; 731ce1d4d3eSChristoph Hellwig msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0; 732ce1d4d3eSChristoph Hellwig if (sock->type == SOCK_SEQPACKET) 733ce1d4d3eSChristoph Hellwig msg->msg_flags |= MSG_EOR; 734ce1d4d3eSChristoph Hellwig 735ce1d4d3eSChristoph Hellwig return __sock_sendmsg(iocb, sock, msg, size); 736ce1d4d3eSChristoph Hellwig } 737ce1d4d3eSChristoph Hellwig 738027445c3SBadari Pulavarty static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov, 739027445c3SBadari Pulavarty unsigned long nr_segs, loff_t pos) 7401da177e4SLinus Torvalds { 741ce1d4d3eSChristoph Hellwig struct sock_iocb siocb, *x; 7421da177e4SLinus Torvalds 743ce1d4d3eSChristoph Hellwig if (pos != 0) 744ce1d4d3eSChristoph Hellwig return -ESPIPE; 745027445c3SBadari Pulavarty 746027445c3SBadari Pulavarty if (iocb->ki_left == 0) /* Match SYS5 behaviour */ 747ce1d4d3eSChristoph Hellwig return 0; 748ce1d4d3eSChristoph Hellwig 749027445c3SBadari Pulavarty x = alloc_sock_iocb(iocb, &siocb); 750ce1d4d3eSChristoph Hellwig if (!x) 751ce1d4d3eSChristoph Hellwig return -ENOMEM; 752ce1d4d3eSChristoph Hellwig 753027445c3SBadari Pulavarty return do_sock_write(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs); 7541da177e4SLinus Torvalds } 7551da177e4SLinus Torvalds 7561da177e4SLinus Torvalds /* 7571da177e4SLinus Torvalds * Atomic setting of ioctl hooks to avoid race 7581da177e4SLinus Torvalds * with module unload. 7591da177e4SLinus Torvalds */ 7601da177e4SLinus Torvalds 7614a3e2f71SArjan van de Ven static DEFINE_MUTEX(br_ioctl_mutex); 7621da177e4SLinus Torvalds static int (*br_ioctl_hook) (unsigned int cmd, void __user *arg) = NULL; 7631da177e4SLinus Torvalds 7641da177e4SLinus Torvalds void brioctl_set(int (*hook) (unsigned int, void __user *)) 7651da177e4SLinus Torvalds { 7664a3e2f71SArjan van de Ven mutex_lock(&br_ioctl_mutex); 7671da177e4SLinus Torvalds br_ioctl_hook = hook; 7684a3e2f71SArjan van de Ven mutex_unlock(&br_ioctl_mutex); 7691da177e4SLinus Torvalds } 77089bddce5SStephen Hemminger 7711da177e4SLinus Torvalds EXPORT_SYMBOL(brioctl_set); 7721da177e4SLinus Torvalds 7734a3e2f71SArjan van de Ven static DEFINE_MUTEX(vlan_ioctl_mutex); 7741da177e4SLinus Torvalds static int (*vlan_ioctl_hook) (void __user *arg); 7751da177e4SLinus Torvalds 7761da177e4SLinus Torvalds void vlan_ioctl_set(int (*hook) (void __user *)) 7771da177e4SLinus Torvalds { 7784a3e2f71SArjan van de Ven mutex_lock(&vlan_ioctl_mutex); 7791da177e4SLinus Torvalds vlan_ioctl_hook = hook; 7804a3e2f71SArjan van de Ven mutex_unlock(&vlan_ioctl_mutex); 7811da177e4SLinus Torvalds } 78289bddce5SStephen Hemminger 7831da177e4SLinus Torvalds EXPORT_SYMBOL(vlan_ioctl_set); 7841da177e4SLinus Torvalds 7854a3e2f71SArjan van de Ven static DEFINE_MUTEX(dlci_ioctl_mutex); 7861da177e4SLinus Torvalds static int (*dlci_ioctl_hook) (unsigned int, void __user *); 7871da177e4SLinus Torvalds 7881da177e4SLinus Torvalds void dlci_ioctl_set(int (*hook) (unsigned int, void __user *)) 7891da177e4SLinus Torvalds { 7904a3e2f71SArjan van de Ven mutex_lock(&dlci_ioctl_mutex); 7911da177e4SLinus Torvalds dlci_ioctl_hook = hook; 7924a3e2f71SArjan van de Ven mutex_unlock(&dlci_ioctl_mutex); 7931da177e4SLinus Torvalds } 79489bddce5SStephen Hemminger 7951da177e4SLinus Torvalds EXPORT_SYMBOL(dlci_ioctl_set); 7961da177e4SLinus Torvalds 7971da177e4SLinus Torvalds /* 7981da177e4SLinus Torvalds * With an ioctl, arg may well be a user mode pointer, but we don't know 7991da177e4SLinus Torvalds * what to do with it - that's up to the protocol still. 8001da177e4SLinus Torvalds */ 8011da177e4SLinus Torvalds 8021da177e4SLinus Torvalds static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg) 8031da177e4SLinus Torvalds { 8041da177e4SLinus Torvalds struct socket *sock; 8051da177e4SLinus Torvalds void __user *argp = (void __user *)arg; 8061da177e4SLinus Torvalds int pid, err; 8071da177e4SLinus Torvalds 808b69aee04SEric Dumazet sock = file->private_data; 8091da177e4SLinus Torvalds if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15)) { 8101da177e4SLinus Torvalds err = dev_ioctl(cmd, argp); 8111da177e4SLinus Torvalds } else 812d86b5e0eSAdrian Bunk #ifdef CONFIG_WIRELESS_EXT 8131da177e4SLinus Torvalds if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) { 8141da177e4SLinus Torvalds err = dev_ioctl(cmd, argp); 8151da177e4SLinus Torvalds } else 816d86b5e0eSAdrian Bunk #endif /* CONFIG_WIRELESS_EXT */ 8171da177e4SLinus Torvalds switch (cmd) { 8181da177e4SLinus Torvalds case FIOSETOWN: 8191da177e4SLinus Torvalds case SIOCSPGRP: 8201da177e4SLinus Torvalds err = -EFAULT; 8211da177e4SLinus Torvalds if (get_user(pid, (int __user *)argp)) 8221da177e4SLinus Torvalds break; 8231da177e4SLinus Torvalds err = f_setown(sock->file, pid, 1); 8241da177e4SLinus Torvalds break; 8251da177e4SLinus Torvalds case FIOGETOWN: 8261da177e4SLinus Torvalds case SIOCGPGRP: 827609d7fa9SEric W. Biederman err = put_user(f_getown(sock->file), 82889bddce5SStephen Hemminger (int __user *)argp); 8291da177e4SLinus Torvalds break; 8301da177e4SLinus Torvalds case SIOCGIFBR: 8311da177e4SLinus Torvalds case SIOCSIFBR: 8321da177e4SLinus Torvalds case SIOCBRADDBR: 8331da177e4SLinus Torvalds case SIOCBRDELBR: 8341da177e4SLinus Torvalds err = -ENOPKG; 8351da177e4SLinus Torvalds if (!br_ioctl_hook) 8361da177e4SLinus Torvalds request_module("bridge"); 8371da177e4SLinus Torvalds 8384a3e2f71SArjan van de Ven mutex_lock(&br_ioctl_mutex); 8391da177e4SLinus Torvalds if (br_ioctl_hook) 8401da177e4SLinus Torvalds err = br_ioctl_hook(cmd, argp); 8414a3e2f71SArjan van de Ven mutex_unlock(&br_ioctl_mutex); 8421da177e4SLinus Torvalds break; 8431da177e4SLinus Torvalds case SIOCGIFVLAN: 8441da177e4SLinus Torvalds case SIOCSIFVLAN: 8451da177e4SLinus Torvalds err = -ENOPKG; 8461da177e4SLinus Torvalds if (!vlan_ioctl_hook) 8471da177e4SLinus Torvalds request_module("8021q"); 8481da177e4SLinus Torvalds 8494a3e2f71SArjan van de Ven mutex_lock(&vlan_ioctl_mutex); 8501da177e4SLinus Torvalds if (vlan_ioctl_hook) 8511da177e4SLinus Torvalds err = vlan_ioctl_hook(argp); 8524a3e2f71SArjan van de Ven mutex_unlock(&vlan_ioctl_mutex); 8531da177e4SLinus Torvalds break; 8541da177e4SLinus Torvalds case SIOCADDDLCI: 8551da177e4SLinus Torvalds case SIOCDELDLCI: 8561da177e4SLinus Torvalds err = -ENOPKG; 8571da177e4SLinus Torvalds if (!dlci_ioctl_hook) 8581da177e4SLinus Torvalds request_module("dlci"); 8591da177e4SLinus Torvalds 8601da177e4SLinus Torvalds if (dlci_ioctl_hook) { 8614a3e2f71SArjan van de Ven mutex_lock(&dlci_ioctl_mutex); 8621da177e4SLinus Torvalds err = dlci_ioctl_hook(cmd, argp); 8634a3e2f71SArjan van de Ven mutex_unlock(&dlci_ioctl_mutex); 8641da177e4SLinus Torvalds } 8651da177e4SLinus Torvalds break; 8661da177e4SLinus Torvalds default: 8671da177e4SLinus Torvalds err = sock->ops->ioctl(sock, cmd, arg); 868b5e5fa5eSChristoph Hellwig 869b5e5fa5eSChristoph Hellwig /* 870b5e5fa5eSChristoph Hellwig * If this ioctl is unknown try to hand it down 871b5e5fa5eSChristoph Hellwig * to the NIC driver. 872b5e5fa5eSChristoph Hellwig */ 873b5e5fa5eSChristoph Hellwig if (err == -ENOIOCTLCMD) 874b5e5fa5eSChristoph Hellwig err = dev_ioctl(cmd, argp); 8751da177e4SLinus Torvalds break; 8761da177e4SLinus Torvalds } 8771da177e4SLinus Torvalds return err; 8781da177e4SLinus Torvalds } 8791da177e4SLinus Torvalds 8801da177e4SLinus Torvalds int sock_create_lite(int family, int type, int protocol, struct socket **res) 8811da177e4SLinus Torvalds { 8821da177e4SLinus Torvalds int err; 8831da177e4SLinus Torvalds struct socket *sock = NULL; 8841da177e4SLinus Torvalds 8851da177e4SLinus Torvalds err = security_socket_create(family, type, protocol, 1); 8861da177e4SLinus Torvalds if (err) 8871da177e4SLinus Torvalds goto out; 8881da177e4SLinus Torvalds 8891da177e4SLinus Torvalds sock = sock_alloc(); 8901da177e4SLinus Torvalds if (!sock) { 8911da177e4SLinus Torvalds err = -ENOMEM; 8921da177e4SLinus Torvalds goto out; 8931da177e4SLinus Torvalds } 8941da177e4SLinus Torvalds 8951da177e4SLinus Torvalds sock->type = type; 8967420ed23SVenkat Yekkirala err = security_socket_post_create(sock, family, type, protocol, 1); 8977420ed23SVenkat Yekkirala if (err) 8987420ed23SVenkat Yekkirala goto out_release; 8997420ed23SVenkat Yekkirala 9001da177e4SLinus Torvalds out: 9011da177e4SLinus Torvalds *res = sock; 9021da177e4SLinus Torvalds return err; 9037420ed23SVenkat Yekkirala out_release: 9047420ed23SVenkat Yekkirala sock_release(sock); 9057420ed23SVenkat Yekkirala sock = NULL; 9067420ed23SVenkat Yekkirala goto out; 9071da177e4SLinus Torvalds } 9081da177e4SLinus Torvalds 9091da177e4SLinus Torvalds /* No kernel lock held - perfect */ 9101da177e4SLinus Torvalds static unsigned int sock_poll(struct file *file, poll_table *wait) 9111da177e4SLinus Torvalds { 9121da177e4SLinus Torvalds struct socket *sock; 9131da177e4SLinus Torvalds 9141da177e4SLinus Torvalds /* 9151da177e4SLinus Torvalds * We can't return errors to poll, so it's either yes or no. 9161da177e4SLinus Torvalds */ 917b69aee04SEric Dumazet sock = file->private_data; 9181da177e4SLinus Torvalds return sock->ops->poll(file, sock, wait); 9191da177e4SLinus Torvalds } 9201da177e4SLinus Torvalds 9211da177e4SLinus Torvalds static int sock_mmap(struct file *file, struct vm_area_struct *vma) 9221da177e4SLinus Torvalds { 923b69aee04SEric Dumazet struct socket *sock = file->private_data; 9241da177e4SLinus Torvalds 9251da177e4SLinus Torvalds return sock->ops->mmap(file, sock, vma); 9261da177e4SLinus Torvalds } 9271da177e4SLinus Torvalds 92820380731SArnaldo Carvalho de Melo static int sock_close(struct inode *inode, struct file *filp) 9291da177e4SLinus Torvalds { 9301da177e4SLinus Torvalds /* 9311da177e4SLinus Torvalds * It was possible the inode is NULL we were 9321da177e4SLinus Torvalds * closing an unfinished socket. 9331da177e4SLinus Torvalds */ 9341da177e4SLinus Torvalds 93589bddce5SStephen Hemminger if (!inode) { 9361da177e4SLinus Torvalds printk(KERN_DEBUG "sock_close: NULL inode\n"); 9371da177e4SLinus Torvalds return 0; 9381da177e4SLinus Torvalds } 9391da177e4SLinus Torvalds sock_fasync(-1, filp, 0); 9401da177e4SLinus Torvalds sock_release(SOCKET_I(inode)); 9411da177e4SLinus Torvalds return 0; 9421da177e4SLinus Torvalds } 9431da177e4SLinus Torvalds 9441da177e4SLinus Torvalds /* 9451da177e4SLinus Torvalds * Update the socket async list 9461da177e4SLinus Torvalds * 9471da177e4SLinus Torvalds * Fasync_list locking strategy. 9481da177e4SLinus Torvalds * 9491da177e4SLinus Torvalds * 1. fasync_list is modified only under process context socket lock 9501da177e4SLinus Torvalds * i.e. under semaphore. 9511da177e4SLinus Torvalds * 2. fasync_list is used under read_lock(&sk->sk_callback_lock) 9521da177e4SLinus Torvalds * or under socket lock. 9531da177e4SLinus Torvalds * 3. fasync_list can be used from softirq context, so that 9541da177e4SLinus Torvalds * modification under socket lock have to be enhanced with 9551da177e4SLinus Torvalds * write_lock_bh(&sk->sk_callback_lock). 9561da177e4SLinus Torvalds * --ANK (990710) 9571da177e4SLinus Torvalds */ 9581da177e4SLinus Torvalds 9591da177e4SLinus Torvalds static int sock_fasync(int fd, struct file *filp, int on) 9601da177e4SLinus Torvalds { 9611da177e4SLinus Torvalds struct fasync_struct *fa, *fna = NULL, **prev; 9621da177e4SLinus Torvalds struct socket *sock; 9631da177e4SLinus Torvalds struct sock *sk; 9641da177e4SLinus Torvalds 96589bddce5SStephen Hemminger if (on) { 9668b3a7005SKris Katterjohn fna = kmalloc(sizeof(struct fasync_struct), GFP_KERNEL); 9671da177e4SLinus Torvalds if (fna == NULL) 9681da177e4SLinus Torvalds return -ENOMEM; 9691da177e4SLinus Torvalds } 9701da177e4SLinus Torvalds 971b69aee04SEric Dumazet sock = filp->private_data; 9721da177e4SLinus Torvalds 97389bddce5SStephen Hemminger sk = sock->sk; 97489bddce5SStephen Hemminger if (sk == NULL) { 9751da177e4SLinus Torvalds kfree(fna); 9761da177e4SLinus Torvalds return -EINVAL; 9771da177e4SLinus Torvalds } 9781da177e4SLinus Torvalds 9791da177e4SLinus Torvalds lock_sock(sk); 9801da177e4SLinus Torvalds 9811da177e4SLinus Torvalds prev = &(sock->fasync_list); 9821da177e4SLinus Torvalds 9831da177e4SLinus Torvalds for (fa = *prev; fa != NULL; prev = &fa->fa_next, fa = *prev) 9841da177e4SLinus Torvalds if (fa->fa_file == filp) 9851da177e4SLinus Torvalds break; 9861da177e4SLinus Torvalds 98789bddce5SStephen Hemminger if (on) { 98889bddce5SStephen Hemminger if (fa != NULL) { 9891da177e4SLinus Torvalds write_lock_bh(&sk->sk_callback_lock); 9901da177e4SLinus Torvalds fa->fa_fd = fd; 9911da177e4SLinus Torvalds write_unlock_bh(&sk->sk_callback_lock); 9921da177e4SLinus Torvalds 9931da177e4SLinus Torvalds kfree(fna); 9941da177e4SLinus Torvalds goto out; 9951da177e4SLinus Torvalds } 9961da177e4SLinus Torvalds fna->fa_file = filp; 9971da177e4SLinus Torvalds fna->fa_fd = fd; 9981da177e4SLinus Torvalds fna->magic = FASYNC_MAGIC; 9991da177e4SLinus Torvalds fna->fa_next = sock->fasync_list; 10001da177e4SLinus Torvalds write_lock_bh(&sk->sk_callback_lock); 10011da177e4SLinus Torvalds sock->fasync_list = fna; 10021da177e4SLinus Torvalds write_unlock_bh(&sk->sk_callback_lock); 100389bddce5SStephen Hemminger } else { 100489bddce5SStephen Hemminger if (fa != NULL) { 10051da177e4SLinus Torvalds write_lock_bh(&sk->sk_callback_lock); 10061da177e4SLinus Torvalds *prev = fa->fa_next; 10071da177e4SLinus Torvalds write_unlock_bh(&sk->sk_callback_lock); 10081da177e4SLinus Torvalds kfree(fa); 10091da177e4SLinus Torvalds } 10101da177e4SLinus Torvalds } 10111da177e4SLinus Torvalds 10121da177e4SLinus Torvalds out: 10131da177e4SLinus Torvalds release_sock(sock->sk); 10141da177e4SLinus Torvalds return 0; 10151da177e4SLinus Torvalds } 10161da177e4SLinus Torvalds 10171da177e4SLinus Torvalds /* This function may be called only under socket lock or callback_lock */ 10181da177e4SLinus Torvalds 10191da177e4SLinus Torvalds int sock_wake_async(struct socket *sock, int how, int band) 10201da177e4SLinus Torvalds { 10211da177e4SLinus Torvalds if (!sock || !sock->fasync_list) 10221da177e4SLinus Torvalds return -1; 102389bddce5SStephen Hemminger switch (how) { 10241da177e4SLinus Torvalds case 1: 10251da177e4SLinus Torvalds 10261da177e4SLinus Torvalds if (test_bit(SOCK_ASYNC_WAITDATA, &sock->flags)) 10271da177e4SLinus Torvalds break; 10281da177e4SLinus Torvalds goto call_kill; 10291da177e4SLinus Torvalds case 2: 10301da177e4SLinus Torvalds if (!test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags)) 10311da177e4SLinus Torvalds break; 10321da177e4SLinus Torvalds /* fall through */ 10331da177e4SLinus Torvalds case 0: 10341da177e4SLinus Torvalds call_kill: 10351da177e4SLinus Torvalds __kill_fasync(sock->fasync_list, SIGIO, band); 10361da177e4SLinus Torvalds break; 10371da177e4SLinus Torvalds case 3: 10381da177e4SLinus Torvalds __kill_fasync(sock->fasync_list, SIGURG, band); 10391da177e4SLinus Torvalds } 10401da177e4SLinus Torvalds return 0; 10411da177e4SLinus Torvalds } 10421da177e4SLinus Torvalds 104389bddce5SStephen Hemminger static int __sock_create(int family, int type, int protocol, 104489bddce5SStephen Hemminger struct socket **res, int kern) 10451da177e4SLinus Torvalds { 10461da177e4SLinus Torvalds int err; 10471da177e4SLinus Torvalds struct socket *sock; 104855737fdaSStephen Hemminger const struct net_proto_family *pf; 10491da177e4SLinus Torvalds 10501da177e4SLinus Torvalds /* 10511da177e4SLinus Torvalds * Check protocol is in range 10521da177e4SLinus Torvalds */ 10531da177e4SLinus Torvalds if (family < 0 || family >= NPROTO) 10541da177e4SLinus Torvalds return -EAFNOSUPPORT; 10551da177e4SLinus Torvalds if (type < 0 || type >= SOCK_MAX) 10561da177e4SLinus Torvalds return -EINVAL; 10571da177e4SLinus Torvalds 10581da177e4SLinus Torvalds /* Compatibility. 10591da177e4SLinus Torvalds 10601da177e4SLinus Torvalds This uglymoron is moved from INET layer to here to avoid 10611da177e4SLinus Torvalds deadlock in module load. 10621da177e4SLinus Torvalds */ 10631da177e4SLinus Torvalds if (family == PF_INET && type == SOCK_PACKET) { 10641da177e4SLinus Torvalds static int warned; 10651da177e4SLinus Torvalds if (!warned) { 10661da177e4SLinus Torvalds warned = 1; 106789bddce5SStephen Hemminger printk(KERN_INFO "%s uses obsolete (PF_INET,SOCK_PACKET)\n", 106889bddce5SStephen Hemminger current->comm); 10691da177e4SLinus Torvalds } 10701da177e4SLinus Torvalds family = PF_PACKET; 10711da177e4SLinus Torvalds } 10721da177e4SLinus Torvalds 10731da177e4SLinus Torvalds err = security_socket_create(family, type, protocol, kern); 10741da177e4SLinus Torvalds if (err) 10751da177e4SLinus Torvalds return err; 10761da177e4SLinus Torvalds 107755737fdaSStephen Hemminger /* 107855737fdaSStephen Hemminger * Allocate the socket and allow the family to set things up. if 107955737fdaSStephen Hemminger * the protocol is 0, the family is instructed to select an appropriate 108055737fdaSStephen Hemminger * default. 108155737fdaSStephen Hemminger */ 108255737fdaSStephen Hemminger sock = sock_alloc(); 108355737fdaSStephen Hemminger if (!sock) { 108455737fdaSStephen Hemminger if (net_ratelimit()) 108555737fdaSStephen Hemminger printk(KERN_WARNING "socket: no more sockets\n"); 108655737fdaSStephen Hemminger return -ENFILE; /* Not exactly a match, but its the 108755737fdaSStephen Hemminger closest posix thing */ 108855737fdaSStephen Hemminger } 108955737fdaSStephen Hemminger 109055737fdaSStephen Hemminger sock->type = type; 109155737fdaSStephen Hemminger 10921da177e4SLinus Torvalds #if defined(CONFIG_KMOD) 10931da177e4SLinus Torvalds /* Attempt to load a protocol module if the find failed. 10941da177e4SLinus Torvalds * 10951da177e4SLinus Torvalds * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user 10961da177e4SLinus Torvalds * requested real, full-featured networking support upon configuration. 10971da177e4SLinus Torvalds * Otherwise module support will break! 10981da177e4SLinus Torvalds */ 109955737fdaSStephen Hemminger if (net_families[family] == NULL) 11001da177e4SLinus Torvalds request_module("net-pf-%d", family); 11011da177e4SLinus Torvalds #endif 11021da177e4SLinus Torvalds 110355737fdaSStephen Hemminger rcu_read_lock(); 110455737fdaSStephen Hemminger pf = rcu_dereference(net_families[family]); 11051da177e4SLinus Torvalds err = -EAFNOSUPPORT; 110655737fdaSStephen Hemminger if (!pf) 110755737fdaSStephen Hemminger goto out_release; 11081da177e4SLinus Torvalds 11091da177e4SLinus Torvalds /* 11101da177e4SLinus Torvalds * We will call the ->create function, that possibly is in a loadable 11111da177e4SLinus Torvalds * module, so we have to bump that loadable module refcnt first. 11121da177e4SLinus Torvalds */ 111355737fdaSStephen Hemminger if (!try_module_get(pf->owner)) 11141da177e4SLinus Torvalds goto out_release; 11151da177e4SLinus Torvalds 111655737fdaSStephen Hemminger /* Now protected by module ref count */ 111755737fdaSStephen Hemminger rcu_read_unlock(); 111855737fdaSStephen Hemminger 111955737fdaSStephen Hemminger err = pf->create(sock, protocol); 112055737fdaSStephen Hemminger if (err < 0) 11211da177e4SLinus Torvalds goto out_module_put; 1122a79af59eSFrank Filz 11231da177e4SLinus Torvalds /* 11241da177e4SLinus Torvalds * Now to bump the refcnt of the [loadable] module that owns this 11251da177e4SLinus Torvalds * socket at sock_release time we decrement its refcnt. 11261da177e4SLinus Torvalds */ 112755737fdaSStephen Hemminger if (!try_module_get(sock->ops->owner)) 112855737fdaSStephen Hemminger goto out_module_busy; 112955737fdaSStephen Hemminger 11301da177e4SLinus Torvalds /* 11311da177e4SLinus Torvalds * Now that we're done with the ->create function, the [loadable] 11321da177e4SLinus Torvalds * module can have its refcnt decremented 11331da177e4SLinus Torvalds */ 113455737fdaSStephen Hemminger module_put(pf->owner); 11357420ed23SVenkat Yekkirala err = security_socket_post_create(sock, family, type, protocol, kern); 11367420ed23SVenkat Yekkirala if (err) 11377420ed23SVenkat Yekkirala goto out_release; 113855737fdaSStephen Hemminger *res = sock; 11391da177e4SLinus Torvalds 114055737fdaSStephen Hemminger return 0; 114155737fdaSStephen Hemminger 114255737fdaSStephen Hemminger out_module_busy: 114355737fdaSStephen Hemminger err = -EAFNOSUPPORT; 11441da177e4SLinus Torvalds out_module_put: 114555737fdaSStephen Hemminger sock->ops = NULL; 114655737fdaSStephen Hemminger module_put(pf->owner); 114755737fdaSStephen Hemminger out_sock_release: 11481da177e4SLinus Torvalds sock_release(sock); 114955737fdaSStephen Hemminger return err; 115055737fdaSStephen Hemminger 115155737fdaSStephen Hemminger out_release: 115255737fdaSStephen Hemminger rcu_read_unlock(); 115355737fdaSStephen Hemminger goto out_sock_release; 11541da177e4SLinus Torvalds } 11551da177e4SLinus Torvalds 11561da177e4SLinus Torvalds int sock_create(int family, int type, int protocol, struct socket **res) 11571da177e4SLinus Torvalds { 11581da177e4SLinus Torvalds return __sock_create(family, type, protocol, res, 0); 11591da177e4SLinus Torvalds } 11601da177e4SLinus Torvalds 11611da177e4SLinus Torvalds int sock_create_kern(int family, int type, int protocol, struct socket **res) 11621da177e4SLinus Torvalds { 11631da177e4SLinus Torvalds return __sock_create(family, type, protocol, res, 1); 11641da177e4SLinus Torvalds } 11651da177e4SLinus Torvalds 11661da177e4SLinus Torvalds asmlinkage long sys_socket(int family, int type, int protocol) 11671da177e4SLinus Torvalds { 11681da177e4SLinus Torvalds int retval; 11691da177e4SLinus Torvalds struct socket *sock; 11701da177e4SLinus Torvalds 11711da177e4SLinus Torvalds retval = sock_create(family, type, protocol, &sock); 11721da177e4SLinus Torvalds if (retval < 0) 11731da177e4SLinus Torvalds goto out; 11741da177e4SLinus Torvalds 11751da177e4SLinus Torvalds retval = sock_map_fd(sock); 11761da177e4SLinus Torvalds if (retval < 0) 11771da177e4SLinus Torvalds goto out_release; 11781da177e4SLinus Torvalds 11791da177e4SLinus Torvalds out: 11801da177e4SLinus Torvalds /* It may be already another descriptor 8) Not kernel problem. */ 11811da177e4SLinus Torvalds return retval; 11821da177e4SLinus Torvalds 11831da177e4SLinus Torvalds out_release: 11841da177e4SLinus Torvalds sock_release(sock); 11851da177e4SLinus Torvalds return retval; 11861da177e4SLinus Torvalds } 11871da177e4SLinus Torvalds 11881da177e4SLinus Torvalds /* 11891da177e4SLinus Torvalds * Create a pair of connected sockets. 11901da177e4SLinus Torvalds */ 11911da177e4SLinus Torvalds 119289bddce5SStephen Hemminger asmlinkage long sys_socketpair(int family, int type, int protocol, 119389bddce5SStephen Hemminger int __user *usockvec) 11941da177e4SLinus Torvalds { 11951da177e4SLinus Torvalds struct socket *sock1, *sock2; 11961da177e4SLinus Torvalds int fd1, fd2, err; 11971da177e4SLinus Torvalds 11981da177e4SLinus Torvalds /* 11991da177e4SLinus Torvalds * Obtain the first socket and check if the underlying protocol 12001da177e4SLinus Torvalds * supports the socketpair call. 12011da177e4SLinus Torvalds */ 12021da177e4SLinus Torvalds 12031da177e4SLinus Torvalds err = sock_create(family, type, protocol, &sock1); 12041da177e4SLinus Torvalds if (err < 0) 12051da177e4SLinus Torvalds goto out; 12061da177e4SLinus Torvalds 12071da177e4SLinus Torvalds err = sock_create(family, type, protocol, &sock2); 12081da177e4SLinus Torvalds if (err < 0) 12091da177e4SLinus Torvalds goto out_release_1; 12101da177e4SLinus Torvalds 12111da177e4SLinus Torvalds err = sock1->ops->socketpair(sock1, sock2); 12121da177e4SLinus Torvalds if (err < 0) 12131da177e4SLinus Torvalds goto out_release_both; 12141da177e4SLinus Torvalds 12151da177e4SLinus Torvalds fd1 = fd2 = -1; 12161da177e4SLinus Torvalds 12171da177e4SLinus Torvalds err = sock_map_fd(sock1); 12181da177e4SLinus Torvalds if (err < 0) 12191da177e4SLinus Torvalds goto out_release_both; 12201da177e4SLinus Torvalds fd1 = err; 12211da177e4SLinus Torvalds 12221da177e4SLinus Torvalds err = sock_map_fd(sock2); 12231da177e4SLinus Torvalds if (err < 0) 12241da177e4SLinus Torvalds goto out_close_1; 12251da177e4SLinus Torvalds fd2 = err; 12261da177e4SLinus Torvalds 12271da177e4SLinus Torvalds /* fd1 and fd2 may be already another descriptors. 12281da177e4SLinus Torvalds * Not kernel problem. 12291da177e4SLinus Torvalds */ 12301da177e4SLinus Torvalds 12311da177e4SLinus Torvalds err = put_user(fd1, &usockvec[0]); 12321da177e4SLinus Torvalds if (!err) 12331da177e4SLinus Torvalds err = put_user(fd2, &usockvec[1]); 12341da177e4SLinus Torvalds if (!err) 12351da177e4SLinus Torvalds return 0; 12361da177e4SLinus Torvalds 12371da177e4SLinus Torvalds sys_close(fd2); 12381da177e4SLinus Torvalds sys_close(fd1); 12391da177e4SLinus Torvalds return err; 12401da177e4SLinus Torvalds 12411da177e4SLinus Torvalds out_close_1: 12421da177e4SLinus Torvalds sock_release(sock2); 12431da177e4SLinus Torvalds sys_close(fd1); 12441da177e4SLinus Torvalds return err; 12451da177e4SLinus Torvalds 12461da177e4SLinus Torvalds out_release_both: 12471da177e4SLinus Torvalds sock_release(sock2); 12481da177e4SLinus Torvalds out_release_1: 12491da177e4SLinus Torvalds sock_release(sock1); 12501da177e4SLinus Torvalds out: 12511da177e4SLinus Torvalds return err; 12521da177e4SLinus Torvalds } 12531da177e4SLinus Torvalds 12541da177e4SLinus Torvalds /* 12551da177e4SLinus Torvalds * Bind a name to a socket. Nothing much to do here since it's 12561da177e4SLinus Torvalds * the protocol's responsibility to handle the local address. 12571da177e4SLinus Torvalds * 12581da177e4SLinus Torvalds * We move the socket address to kernel space before we call 12591da177e4SLinus Torvalds * the protocol layer (having also checked the address is ok). 12601da177e4SLinus Torvalds */ 12611da177e4SLinus Torvalds 12621da177e4SLinus Torvalds asmlinkage long sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen) 12631da177e4SLinus Torvalds { 12641da177e4SLinus Torvalds struct socket *sock; 12651da177e4SLinus Torvalds char address[MAX_SOCK_ADDR]; 12666cb153caSBenjamin LaHaise int err, fput_needed; 12671da177e4SLinus Torvalds 126889bddce5SStephen Hemminger sock = sockfd_lookup_light(fd, &err, &fput_needed); 126989bddce5SStephen Hemminger if(sock) { 127089bddce5SStephen Hemminger err = move_addr_to_kernel(umyaddr, addrlen, address); 127189bddce5SStephen Hemminger if (err >= 0) { 127289bddce5SStephen Hemminger err = security_socket_bind(sock, 127389bddce5SStephen Hemminger (struct sockaddr *)address, 127489bddce5SStephen Hemminger addrlen); 12756cb153caSBenjamin LaHaise if (!err) 12766cb153caSBenjamin LaHaise err = sock->ops->bind(sock, 127789bddce5SStephen Hemminger (struct sockaddr *) 127889bddce5SStephen Hemminger address, addrlen); 12791da177e4SLinus Torvalds } 12806cb153caSBenjamin LaHaise fput_light(sock->file, fput_needed); 12811da177e4SLinus Torvalds } 12821da177e4SLinus Torvalds return err; 12831da177e4SLinus Torvalds } 12841da177e4SLinus Torvalds 12851da177e4SLinus Torvalds /* 12861da177e4SLinus Torvalds * Perform a listen. Basically, we allow the protocol to do anything 12871da177e4SLinus Torvalds * necessary for a listen, and if that works, we mark the socket as 12881da177e4SLinus Torvalds * ready for listening. 12891da177e4SLinus Torvalds */ 12901da177e4SLinus Torvalds 12917a42c217SBrian Haley int sysctl_somaxconn __read_mostly = SOMAXCONN; 12921da177e4SLinus Torvalds 12931da177e4SLinus Torvalds asmlinkage long sys_listen(int fd, int backlog) 12941da177e4SLinus Torvalds { 12951da177e4SLinus Torvalds struct socket *sock; 12966cb153caSBenjamin LaHaise int err, fput_needed; 12971da177e4SLinus Torvalds 129889bddce5SStephen Hemminger sock = sockfd_lookup_light(fd, &err, &fput_needed); 129989bddce5SStephen Hemminger if (sock) { 13001da177e4SLinus Torvalds if ((unsigned)backlog > sysctl_somaxconn) 13011da177e4SLinus Torvalds backlog = sysctl_somaxconn; 13021da177e4SLinus Torvalds 13031da177e4SLinus Torvalds err = security_socket_listen(sock, backlog); 13046cb153caSBenjamin LaHaise if (!err) 13051da177e4SLinus Torvalds err = sock->ops->listen(sock, backlog); 13066cb153caSBenjamin LaHaise 13076cb153caSBenjamin LaHaise fput_light(sock->file, fput_needed); 13081da177e4SLinus Torvalds } 13091da177e4SLinus Torvalds return err; 13101da177e4SLinus Torvalds } 13111da177e4SLinus Torvalds 13121da177e4SLinus Torvalds /* 13131da177e4SLinus Torvalds * For accept, we attempt to create a new socket, set up the link 13141da177e4SLinus Torvalds * with the client, wake up the client, then return the new 13151da177e4SLinus Torvalds * connected fd. We collect the address of the connector in kernel 13161da177e4SLinus Torvalds * space and move it to user at the very end. This is unclean because 13171da177e4SLinus Torvalds * we open the socket then return an error. 13181da177e4SLinus Torvalds * 13191da177e4SLinus Torvalds * 1003.1g adds the ability to recvmsg() to query connection pending 13201da177e4SLinus Torvalds * status to recvmsg. We need to add that support in a way thats 13211da177e4SLinus Torvalds * clean when we restucture accept also. 13221da177e4SLinus Torvalds */ 13231da177e4SLinus Torvalds 132489bddce5SStephen Hemminger asmlinkage long sys_accept(int fd, struct sockaddr __user *upeer_sockaddr, 132589bddce5SStephen Hemminger int __user *upeer_addrlen) 13261da177e4SLinus Torvalds { 13271da177e4SLinus Torvalds struct socket *sock, *newsock; 132839d8c1b6SDavid S. Miller struct file *newfile; 13296cb153caSBenjamin LaHaise int err, len, newfd, fput_needed; 13301da177e4SLinus Torvalds char address[MAX_SOCK_ADDR]; 13311da177e4SLinus Torvalds 13326cb153caSBenjamin LaHaise sock = sockfd_lookup_light(fd, &err, &fput_needed); 13331da177e4SLinus Torvalds if (!sock) 13341da177e4SLinus Torvalds goto out; 13351da177e4SLinus Torvalds 13361da177e4SLinus Torvalds err = -ENFILE; 13371da177e4SLinus Torvalds if (!(newsock = sock_alloc())) 13381da177e4SLinus Torvalds goto out_put; 13391da177e4SLinus Torvalds 13401da177e4SLinus Torvalds newsock->type = sock->type; 13411da177e4SLinus Torvalds newsock->ops = sock->ops; 13421da177e4SLinus Torvalds 13431da177e4SLinus Torvalds /* 13441da177e4SLinus Torvalds * We don't need try_module_get here, as the listening socket (sock) 13451da177e4SLinus Torvalds * has the protocol module (sock->ops->owner) held. 13461da177e4SLinus Torvalds */ 13471da177e4SLinus Torvalds __module_get(newsock->ops->owner); 13481da177e4SLinus Torvalds 134939d8c1b6SDavid S. Miller newfd = sock_alloc_fd(&newfile); 135039d8c1b6SDavid S. Miller if (unlikely(newfd < 0)) { 135139d8c1b6SDavid S. Miller err = newfd; 13529a1875e6SDavid S. Miller sock_release(newsock); 13539a1875e6SDavid S. Miller goto out_put; 135439d8c1b6SDavid S. Miller } 135539d8c1b6SDavid S. Miller 135639d8c1b6SDavid S. Miller err = sock_attach_fd(newsock, newfile); 135739d8c1b6SDavid S. Miller if (err < 0) 135839d8c1b6SDavid S. Miller goto out_fd; 135939d8c1b6SDavid S. Miller 1360a79af59eSFrank Filz err = security_socket_accept(sock, newsock); 1361a79af59eSFrank Filz if (err) 136239d8c1b6SDavid S. Miller goto out_fd; 1363a79af59eSFrank Filz 13641da177e4SLinus Torvalds err = sock->ops->accept(sock, newsock, sock->file->f_flags); 13651da177e4SLinus Torvalds if (err < 0) 136639d8c1b6SDavid S. Miller goto out_fd; 13671da177e4SLinus Torvalds 13681da177e4SLinus Torvalds if (upeer_sockaddr) { 136989bddce5SStephen Hemminger if (newsock->ops->getname(newsock, (struct sockaddr *)address, 137089bddce5SStephen Hemminger &len, 2) < 0) { 13711da177e4SLinus Torvalds err = -ECONNABORTED; 137239d8c1b6SDavid S. Miller goto out_fd; 13731da177e4SLinus Torvalds } 137489bddce5SStephen Hemminger err = move_addr_to_user(address, len, upeer_sockaddr, 137589bddce5SStephen Hemminger upeer_addrlen); 13761da177e4SLinus Torvalds if (err < 0) 137739d8c1b6SDavid S. Miller goto out_fd; 13781da177e4SLinus Torvalds } 13791da177e4SLinus Torvalds 13801da177e4SLinus Torvalds /* File flags are not inherited via accept() unlike another OSes. */ 13811da177e4SLinus Torvalds 138239d8c1b6SDavid S. Miller fd_install(newfd, newfile); 138339d8c1b6SDavid S. Miller err = newfd; 13841da177e4SLinus Torvalds 13851da177e4SLinus Torvalds security_socket_post_accept(sock, newsock); 13861da177e4SLinus Torvalds 13871da177e4SLinus Torvalds out_put: 13886cb153caSBenjamin LaHaise fput_light(sock->file, fput_needed); 13891da177e4SLinus Torvalds out: 13901da177e4SLinus Torvalds return err; 139139d8c1b6SDavid S. Miller out_fd: 13929606a216SDavid S. Miller fput(newfile); 139339d8c1b6SDavid S. Miller put_unused_fd(newfd); 13941da177e4SLinus Torvalds goto out_put; 13951da177e4SLinus Torvalds } 13961da177e4SLinus Torvalds 13971da177e4SLinus Torvalds /* 13981da177e4SLinus Torvalds * Attempt to connect to a socket with the server address. The address 13991da177e4SLinus Torvalds * is in user space so we verify it is OK and move it to kernel space. 14001da177e4SLinus Torvalds * 14011da177e4SLinus Torvalds * For 1003.1g we need to add clean support for a bind to AF_UNSPEC to 14021da177e4SLinus Torvalds * break bindings 14031da177e4SLinus Torvalds * 14041da177e4SLinus Torvalds * NOTE: 1003.1g draft 6.3 is broken with respect to AX.25/NetROM and 14051da177e4SLinus Torvalds * other SEQPACKET protocols that take time to connect() as it doesn't 14061da177e4SLinus Torvalds * include the -EINPROGRESS status for such sockets. 14071da177e4SLinus Torvalds */ 14081da177e4SLinus Torvalds 140989bddce5SStephen Hemminger asmlinkage long sys_connect(int fd, struct sockaddr __user *uservaddr, 141089bddce5SStephen Hemminger int addrlen) 14111da177e4SLinus Torvalds { 14121da177e4SLinus Torvalds struct socket *sock; 14131da177e4SLinus Torvalds char address[MAX_SOCK_ADDR]; 14146cb153caSBenjamin LaHaise int err, fput_needed; 14151da177e4SLinus Torvalds 14166cb153caSBenjamin LaHaise sock = sockfd_lookup_light(fd, &err, &fput_needed); 14171da177e4SLinus Torvalds if (!sock) 14181da177e4SLinus Torvalds goto out; 14191da177e4SLinus Torvalds err = move_addr_to_kernel(uservaddr, addrlen, address); 14201da177e4SLinus Torvalds if (err < 0) 14211da177e4SLinus Torvalds goto out_put; 14221da177e4SLinus Torvalds 142389bddce5SStephen Hemminger err = 142489bddce5SStephen Hemminger security_socket_connect(sock, (struct sockaddr *)address, addrlen); 14251da177e4SLinus Torvalds if (err) 14261da177e4SLinus Torvalds goto out_put; 14271da177e4SLinus Torvalds 14281da177e4SLinus Torvalds err = sock->ops->connect(sock, (struct sockaddr *)address, addrlen, 14291da177e4SLinus Torvalds sock->file->f_flags); 14301da177e4SLinus Torvalds out_put: 14316cb153caSBenjamin LaHaise fput_light(sock->file, fput_needed); 14321da177e4SLinus Torvalds out: 14331da177e4SLinus Torvalds return err; 14341da177e4SLinus Torvalds } 14351da177e4SLinus Torvalds 14361da177e4SLinus Torvalds /* 14371da177e4SLinus Torvalds * Get the local address ('name') of a socket object. Move the obtained 14381da177e4SLinus Torvalds * name to user space. 14391da177e4SLinus Torvalds */ 14401da177e4SLinus Torvalds 144189bddce5SStephen Hemminger asmlinkage long sys_getsockname(int fd, struct sockaddr __user *usockaddr, 144289bddce5SStephen Hemminger int __user *usockaddr_len) 14431da177e4SLinus Torvalds { 14441da177e4SLinus Torvalds struct socket *sock; 14451da177e4SLinus Torvalds char address[MAX_SOCK_ADDR]; 14466cb153caSBenjamin LaHaise int len, err, fput_needed; 14471da177e4SLinus Torvalds 14486cb153caSBenjamin LaHaise sock = sockfd_lookup_light(fd, &err, &fput_needed); 14491da177e4SLinus Torvalds if (!sock) 14501da177e4SLinus Torvalds goto out; 14511da177e4SLinus Torvalds 14521da177e4SLinus Torvalds err = security_socket_getsockname(sock); 14531da177e4SLinus Torvalds if (err) 14541da177e4SLinus Torvalds goto out_put; 14551da177e4SLinus Torvalds 14561da177e4SLinus Torvalds err = sock->ops->getname(sock, (struct sockaddr *)address, &len, 0); 14571da177e4SLinus Torvalds if (err) 14581da177e4SLinus Torvalds goto out_put; 14591da177e4SLinus Torvalds err = move_addr_to_user(address, len, usockaddr, usockaddr_len); 14601da177e4SLinus Torvalds 14611da177e4SLinus Torvalds out_put: 14626cb153caSBenjamin LaHaise fput_light(sock->file, fput_needed); 14631da177e4SLinus Torvalds out: 14641da177e4SLinus Torvalds return err; 14651da177e4SLinus Torvalds } 14661da177e4SLinus Torvalds 14671da177e4SLinus Torvalds /* 14681da177e4SLinus Torvalds * Get the remote address ('name') of a socket object. Move the obtained 14691da177e4SLinus Torvalds * name to user space. 14701da177e4SLinus Torvalds */ 14711da177e4SLinus Torvalds 147289bddce5SStephen Hemminger asmlinkage long sys_getpeername(int fd, struct sockaddr __user *usockaddr, 147389bddce5SStephen Hemminger int __user *usockaddr_len) 14741da177e4SLinus Torvalds { 14751da177e4SLinus Torvalds struct socket *sock; 14761da177e4SLinus Torvalds char address[MAX_SOCK_ADDR]; 14776cb153caSBenjamin LaHaise int len, err, fput_needed; 14781da177e4SLinus Torvalds 147989bddce5SStephen Hemminger sock = sockfd_lookup_light(fd, &err, &fput_needed); 148089bddce5SStephen Hemminger if (sock != NULL) { 14811da177e4SLinus Torvalds err = security_socket_getpeername(sock); 14821da177e4SLinus Torvalds if (err) { 14836cb153caSBenjamin LaHaise fput_light(sock->file, fput_needed); 14841da177e4SLinus Torvalds return err; 14851da177e4SLinus Torvalds } 14861da177e4SLinus Torvalds 148789bddce5SStephen Hemminger err = 148889bddce5SStephen Hemminger sock->ops->getname(sock, (struct sockaddr *)address, &len, 148989bddce5SStephen Hemminger 1); 14901da177e4SLinus Torvalds if (!err) 149189bddce5SStephen Hemminger err = move_addr_to_user(address, len, usockaddr, 149289bddce5SStephen Hemminger usockaddr_len); 14936cb153caSBenjamin LaHaise fput_light(sock->file, fput_needed); 14941da177e4SLinus Torvalds } 14951da177e4SLinus Torvalds return err; 14961da177e4SLinus Torvalds } 14971da177e4SLinus Torvalds 14981da177e4SLinus Torvalds /* 14991da177e4SLinus Torvalds * Send a datagram to a given address. We move the address into kernel 15001da177e4SLinus Torvalds * space and check the user space data area is readable before invoking 15011da177e4SLinus Torvalds * the protocol. 15021da177e4SLinus Torvalds */ 15031da177e4SLinus Torvalds 150489bddce5SStephen Hemminger asmlinkage long sys_sendto(int fd, void __user *buff, size_t len, 150589bddce5SStephen Hemminger unsigned flags, struct sockaddr __user *addr, 150689bddce5SStephen Hemminger int addr_len) 15071da177e4SLinus Torvalds { 15081da177e4SLinus Torvalds struct socket *sock; 15091da177e4SLinus Torvalds char address[MAX_SOCK_ADDR]; 15101da177e4SLinus Torvalds int err; 15111da177e4SLinus Torvalds struct msghdr msg; 15121da177e4SLinus Torvalds struct iovec iov; 15136cb153caSBenjamin LaHaise int fput_needed; 15146cb153caSBenjamin LaHaise struct file *sock_file; 15151da177e4SLinus Torvalds 15166cb153caSBenjamin LaHaise sock_file = fget_light(fd, &fput_needed); 15176cb153caSBenjamin LaHaise if (!sock_file) 15186cb153caSBenjamin LaHaise return -EBADF; 15196cb153caSBenjamin LaHaise 15206cb153caSBenjamin LaHaise sock = sock_from_file(sock_file, &err); 15211da177e4SLinus Torvalds if (!sock) 15226cb153caSBenjamin LaHaise goto out_put; 15231da177e4SLinus Torvalds iov.iov_base = buff; 15241da177e4SLinus Torvalds iov.iov_len = len; 15251da177e4SLinus Torvalds msg.msg_name = NULL; 15261da177e4SLinus Torvalds msg.msg_iov = &iov; 15271da177e4SLinus Torvalds msg.msg_iovlen = 1; 15281da177e4SLinus Torvalds msg.msg_control = NULL; 15291da177e4SLinus Torvalds msg.msg_controllen = 0; 15301da177e4SLinus Torvalds msg.msg_namelen = 0; 15316cb153caSBenjamin LaHaise if (addr) { 15321da177e4SLinus Torvalds err = move_addr_to_kernel(addr, addr_len, address); 15331da177e4SLinus Torvalds if (err < 0) 15341da177e4SLinus Torvalds goto out_put; 15351da177e4SLinus Torvalds msg.msg_name = address; 15361da177e4SLinus Torvalds msg.msg_namelen = addr_len; 15371da177e4SLinus Torvalds } 15381da177e4SLinus Torvalds if (sock->file->f_flags & O_NONBLOCK) 15391da177e4SLinus Torvalds flags |= MSG_DONTWAIT; 15401da177e4SLinus Torvalds msg.msg_flags = flags; 15411da177e4SLinus Torvalds err = sock_sendmsg(sock, &msg, len); 15421da177e4SLinus Torvalds 15431da177e4SLinus Torvalds out_put: 15446cb153caSBenjamin LaHaise fput_light(sock_file, fput_needed); 15451da177e4SLinus Torvalds return err; 15461da177e4SLinus Torvalds } 15471da177e4SLinus Torvalds 15481da177e4SLinus Torvalds /* 15491da177e4SLinus Torvalds * Send a datagram down a socket. 15501da177e4SLinus Torvalds */ 15511da177e4SLinus Torvalds 15521da177e4SLinus Torvalds asmlinkage long sys_send(int fd, void __user *buff, size_t len, unsigned flags) 15531da177e4SLinus Torvalds { 15541da177e4SLinus Torvalds return sys_sendto(fd, buff, len, flags, NULL, 0); 15551da177e4SLinus Torvalds } 15561da177e4SLinus Torvalds 15571da177e4SLinus Torvalds /* 15581da177e4SLinus Torvalds * Receive a frame from the socket and optionally record the address of the 15591da177e4SLinus Torvalds * sender. We verify the buffers are writable and if needed move the 15601da177e4SLinus Torvalds * sender address from kernel to user space. 15611da177e4SLinus Torvalds */ 15621da177e4SLinus Torvalds 156389bddce5SStephen Hemminger asmlinkage long sys_recvfrom(int fd, void __user *ubuf, size_t size, 156489bddce5SStephen Hemminger unsigned flags, struct sockaddr __user *addr, 156589bddce5SStephen Hemminger int __user *addr_len) 15661da177e4SLinus Torvalds { 15671da177e4SLinus Torvalds struct socket *sock; 15681da177e4SLinus Torvalds struct iovec iov; 15691da177e4SLinus Torvalds struct msghdr msg; 15701da177e4SLinus Torvalds char address[MAX_SOCK_ADDR]; 15711da177e4SLinus Torvalds int err, err2; 15726cb153caSBenjamin LaHaise struct file *sock_file; 15736cb153caSBenjamin LaHaise int fput_needed; 15741da177e4SLinus Torvalds 15756cb153caSBenjamin LaHaise sock_file = fget_light(fd, &fput_needed); 15766cb153caSBenjamin LaHaise if (!sock_file) 15776cb153caSBenjamin LaHaise return -EBADF; 15786cb153caSBenjamin LaHaise 15796cb153caSBenjamin LaHaise sock = sock_from_file(sock_file, &err); 15801da177e4SLinus Torvalds if (!sock) 15811da177e4SLinus Torvalds goto out; 15821da177e4SLinus Torvalds 15831da177e4SLinus Torvalds msg.msg_control = NULL; 15841da177e4SLinus Torvalds msg.msg_controllen = 0; 15851da177e4SLinus Torvalds msg.msg_iovlen = 1; 15861da177e4SLinus Torvalds msg.msg_iov = &iov; 15871da177e4SLinus Torvalds iov.iov_len = size; 15881da177e4SLinus Torvalds iov.iov_base = ubuf; 15891da177e4SLinus Torvalds msg.msg_name = address; 15901da177e4SLinus Torvalds msg.msg_namelen = MAX_SOCK_ADDR; 15911da177e4SLinus Torvalds if (sock->file->f_flags & O_NONBLOCK) 15921da177e4SLinus Torvalds flags |= MSG_DONTWAIT; 15931da177e4SLinus Torvalds err = sock_recvmsg(sock, &msg, size, flags); 15941da177e4SLinus Torvalds 159589bddce5SStephen Hemminger if (err >= 0 && addr != NULL) { 15961da177e4SLinus Torvalds err2 = move_addr_to_user(address, msg.msg_namelen, addr, addr_len); 15971da177e4SLinus Torvalds if (err2 < 0) 15981da177e4SLinus Torvalds err = err2; 15991da177e4SLinus Torvalds } 16001da177e4SLinus Torvalds out: 16016cb153caSBenjamin LaHaise fput_light(sock_file, fput_needed); 16021da177e4SLinus Torvalds return err; 16031da177e4SLinus Torvalds } 16041da177e4SLinus Torvalds 16051da177e4SLinus Torvalds /* 16061da177e4SLinus Torvalds * Receive a datagram from a socket. 16071da177e4SLinus Torvalds */ 16081da177e4SLinus Torvalds 160989bddce5SStephen Hemminger asmlinkage long sys_recv(int fd, void __user *ubuf, size_t size, 161089bddce5SStephen Hemminger unsigned flags) 16111da177e4SLinus Torvalds { 16121da177e4SLinus Torvalds return sys_recvfrom(fd, ubuf, size, flags, NULL, NULL); 16131da177e4SLinus Torvalds } 16141da177e4SLinus Torvalds 16151da177e4SLinus Torvalds /* 16161da177e4SLinus Torvalds * Set a socket option. Because we don't know the option lengths we have 16171da177e4SLinus Torvalds * to pass the user mode parameter for the protocols to sort out. 16181da177e4SLinus Torvalds */ 16191da177e4SLinus Torvalds 162089bddce5SStephen Hemminger asmlinkage long sys_setsockopt(int fd, int level, int optname, 162189bddce5SStephen Hemminger char __user *optval, int optlen) 16221da177e4SLinus Torvalds { 16236cb153caSBenjamin LaHaise int err, fput_needed; 16241da177e4SLinus Torvalds struct socket *sock; 16251da177e4SLinus Torvalds 16261da177e4SLinus Torvalds if (optlen < 0) 16271da177e4SLinus Torvalds return -EINVAL; 16281da177e4SLinus Torvalds 162989bddce5SStephen Hemminger sock = sockfd_lookup_light(fd, &err, &fput_needed); 163089bddce5SStephen Hemminger if (sock != NULL) { 16311da177e4SLinus Torvalds err = security_socket_setsockopt(sock, level, optname); 16326cb153caSBenjamin LaHaise if (err) 16336cb153caSBenjamin LaHaise goto out_put; 16341da177e4SLinus Torvalds 16351da177e4SLinus Torvalds if (level == SOL_SOCKET) 163689bddce5SStephen Hemminger err = 163789bddce5SStephen Hemminger sock_setsockopt(sock, level, optname, optval, 163889bddce5SStephen Hemminger optlen); 16391da177e4SLinus Torvalds else 164089bddce5SStephen Hemminger err = 164189bddce5SStephen Hemminger sock->ops->setsockopt(sock, level, optname, optval, 164289bddce5SStephen Hemminger optlen); 16436cb153caSBenjamin LaHaise out_put: 16446cb153caSBenjamin LaHaise fput_light(sock->file, fput_needed); 16451da177e4SLinus Torvalds } 16461da177e4SLinus Torvalds return err; 16471da177e4SLinus Torvalds } 16481da177e4SLinus Torvalds 16491da177e4SLinus Torvalds /* 16501da177e4SLinus Torvalds * Get a socket option. Because we don't know the option lengths we have 16511da177e4SLinus Torvalds * to pass a user mode parameter for the protocols to sort out. 16521da177e4SLinus Torvalds */ 16531da177e4SLinus Torvalds 165489bddce5SStephen Hemminger asmlinkage long sys_getsockopt(int fd, int level, int optname, 165589bddce5SStephen Hemminger char __user *optval, int __user *optlen) 16561da177e4SLinus Torvalds { 16576cb153caSBenjamin LaHaise int err, fput_needed; 16581da177e4SLinus Torvalds struct socket *sock; 16591da177e4SLinus Torvalds 166089bddce5SStephen Hemminger sock = sockfd_lookup_light(fd, &err, &fput_needed); 166189bddce5SStephen Hemminger if (sock != NULL) { 16626cb153caSBenjamin LaHaise err = security_socket_getsockopt(sock, level, optname); 16636cb153caSBenjamin LaHaise if (err) 16646cb153caSBenjamin LaHaise goto out_put; 16651da177e4SLinus Torvalds 16661da177e4SLinus Torvalds if (level == SOL_SOCKET) 166789bddce5SStephen Hemminger err = 166889bddce5SStephen Hemminger sock_getsockopt(sock, level, optname, optval, 166989bddce5SStephen Hemminger optlen); 16701da177e4SLinus Torvalds else 167189bddce5SStephen Hemminger err = 167289bddce5SStephen Hemminger sock->ops->getsockopt(sock, level, optname, optval, 167389bddce5SStephen Hemminger optlen); 16746cb153caSBenjamin LaHaise out_put: 16756cb153caSBenjamin LaHaise fput_light(sock->file, fput_needed); 16761da177e4SLinus Torvalds } 16771da177e4SLinus Torvalds return err; 16781da177e4SLinus Torvalds } 16791da177e4SLinus Torvalds 16801da177e4SLinus Torvalds /* 16811da177e4SLinus Torvalds * Shutdown a socket. 16821da177e4SLinus Torvalds */ 16831da177e4SLinus Torvalds 16841da177e4SLinus Torvalds asmlinkage long sys_shutdown(int fd, int how) 16851da177e4SLinus Torvalds { 16866cb153caSBenjamin LaHaise int err, fput_needed; 16871da177e4SLinus Torvalds struct socket *sock; 16881da177e4SLinus Torvalds 168989bddce5SStephen Hemminger sock = sockfd_lookup_light(fd, &err, &fput_needed); 169089bddce5SStephen Hemminger if (sock != NULL) { 16911da177e4SLinus Torvalds err = security_socket_shutdown(sock, how); 16926cb153caSBenjamin LaHaise if (!err) 16931da177e4SLinus Torvalds err = sock->ops->shutdown(sock, how); 16946cb153caSBenjamin LaHaise fput_light(sock->file, fput_needed); 16951da177e4SLinus Torvalds } 16961da177e4SLinus Torvalds return err; 16971da177e4SLinus Torvalds } 16981da177e4SLinus Torvalds 16991da177e4SLinus Torvalds /* A couple of helpful macros for getting the address of the 32/64 bit 17001da177e4SLinus Torvalds * fields which are the same type (int / unsigned) on our platforms. 17011da177e4SLinus Torvalds */ 17021da177e4SLinus Torvalds #define COMPAT_MSG(msg, member) ((MSG_CMSG_COMPAT & flags) ? &msg##_compat->member : &msg->member) 17031da177e4SLinus Torvalds #define COMPAT_NAMELEN(msg) COMPAT_MSG(msg, msg_namelen) 17041da177e4SLinus Torvalds #define COMPAT_FLAGS(msg) COMPAT_MSG(msg, msg_flags) 17051da177e4SLinus Torvalds 17061da177e4SLinus Torvalds /* 17071da177e4SLinus Torvalds * BSD sendmsg interface 17081da177e4SLinus Torvalds */ 17091da177e4SLinus Torvalds 17101da177e4SLinus Torvalds asmlinkage long sys_sendmsg(int fd, struct msghdr __user *msg, unsigned flags) 17111da177e4SLinus Torvalds { 171289bddce5SStephen Hemminger struct compat_msghdr __user *msg_compat = 171389bddce5SStephen Hemminger (struct compat_msghdr __user *)msg; 17141da177e4SLinus Torvalds struct socket *sock; 17151da177e4SLinus Torvalds char address[MAX_SOCK_ADDR]; 17161da177e4SLinus Torvalds struct iovec iovstack[UIO_FASTIOV], *iov = iovstack; 1717b9d717a7SAlex Williamson unsigned char ctl[sizeof(struct cmsghdr) + 20] 1718b9d717a7SAlex Williamson __attribute__ ((aligned(sizeof(__kernel_size_t)))); 1719b9d717a7SAlex Williamson /* 20 is size of ipv6_pktinfo */ 17201da177e4SLinus Torvalds unsigned char *ctl_buf = ctl; 17211da177e4SLinus Torvalds struct msghdr msg_sys; 17221da177e4SLinus Torvalds int err, ctl_len, iov_size, total_len; 17236cb153caSBenjamin LaHaise int fput_needed; 17241da177e4SLinus Torvalds 17251da177e4SLinus Torvalds err = -EFAULT; 17261da177e4SLinus Torvalds if (MSG_CMSG_COMPAT & flags) { 17271da177e4SLinus Torvalds if (get_compat_msghdr(&msg_sys, msg_compat)) 17281da177e4SLinus Torvalds return -EFAULT; 172989bddce5SStephen Hemminger } 173089bddce5SStephen Hemminger else if (copy_from_user(&msg_sys, msg, sizeof(struct msghdr))) 17311da177e4SLinus Torvalds return -EFAULT; 17321da177e4SLinus Torvalds 17336cb153caSBenjamin LaHaise sock = sockfd_lookup_light(fd, &err, &fput_needed); 17341da177e4SLinus Torvalds if (!sock) 17351da177e4SLinus Torvalds goto out; 17361da177e4SLinus Torvalds 17371da177e4SLinus Torvalds /* do not move before msg_sys is valid */ 17381da177e4SLinus Torvalds err = -EMSGSIZE; 17391da177e4SLinus Torvalds if (msg_sys.msg_iovlen > UIO_MAXIOV) 17401da177e4SLinus Torvalds goto out_put; 17411da177e4SLinus Torvalds 17421da177e4SLinus Torvalds /* Check whether to allocate the iovec area */ 17431da177e4SLinus Torvalds err = -ENOMEM; 17441da177e4SLinus Torvalds iov_size = msg_sys.msg_iovlen * sizeof(struct iovec); 17451da177e4SLinus Torvalds if (msg_sys.msg_iovlen > UIO_FASTIOV) { 17461da177e4SLinus Torvalds iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL); 17471da177e4SLinus Torvalds if (!iov) 17481da177e4SLinus Torvalds goto out_put; 17491da177e4SLinus Torvalds } 17501da177e4SLinus Torvalds 17511da177e4SLinus Torvalds /* This will also move the address data into kernel space */ 17521da177e4SLinus Torvalds if (MSG_CMSG_COMPAT & flags) { 17531da177e4SLinus Torvalds err = verify_compat_iovec(&msg_sys, iov, address, VERIFY_READ); 17541da177e4SLinus Torvalds } else 17551da177e4SLinus Torvalds err = verify_iovec(&msg_sys, iov, address, VERIFY_READ); 17561da177e4SLinus Torvalds if (err < 0) 17571da177e4SLinus Torvalds goto out_freeiov; 17581da177e4SLinus Torvalds total_len = err; 17591da177e4SLinus Torvalds 17601da177e4SLinus Torvalds err = -ENOBUFS; 17611da177e4SLinus Torvalds 17621da177e4SLinus Torvalds if (msg_sys.msg_controllen > INT_MAX) 17631da177e4SLinus Torvalds goto out_freeiov; 17641da177e4SLinus Torvalds ctl_len = msg_sys.msg_controllen; 17651da177e4SLinus Torvalds if ((MSG_CMSG_COMPAT & flags) && ctl_len) { 176689bddce5SStephen Hemminger err = 176789bddce5SStephen Hemminger cmsghdr_from_user_compat_to_kern(&msg_sys, sock->sk, ctl, 176889bddce5SStephen Hemminger sizeof(ctl)); 17691da177e4SLinus Torvalds if (err) 17701da177e4SLinus Torvalds goto out_freeiov; 17711da177e4SLinus Torvalds ctl_buf = msg_sys.msg_control; 17728920e8f9SAl Viro ctl_len = msg_sys.msg_controllen; 17731da177e4SLinus Torvalds } else if (ctl_len) { 177489bddce5SStephen Hemminger if (ctl_len > sizeof(ctl)) { 17751da177e4SLinus Torvalds ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL); 17761da177e4SLinus Torvalds if (ctl_buf == NULL) 17771da177e4SLinus Torvalds goto out_freeiov; 17781da177e4SLinus Torvalds } 17791da177e4SLinus Torvalds err = -EFAULT; 17801da177e4SLinus Torvalds /* 17811da177e4SLinus Torvalds * Careful! Before this, msg_sys.msg_control contains a user pointer. 17821da177e4SLinus Torvalds * Afterwards, it will be a kernel pointer. Thus the compiler-assisted 17831da177e4SLinus Torvalds * checking falls down on this. 17841da177e4SLinus Torvalds */ 178589bddce5SStephen Hemminger if (copy_from_user(ctl_buf, (void __user *)msg_sys.msg_control, 178689bddce5SStephen Hemminger ctl_len)) 17871da177e4SLinus Torvalds goto out_freectl; 17881da177e4SLinus Torvalds msg_sys.msg_control = ctl_buf; 17891da177e4SLinus Torvalds } 17901da177e4SLinus Torvalds msg_sys.msg_flags = flags; 17911da177e4SLinus Torvalds 17921da177e4SLinus Torvalds if (sock->file->f_flags & O_NONBLOCK) 17931da177e4SLinus Torvalds msg_sys.msg_flags |= MSG_DONTWAIT; 17941da177e4SLinus Torvalds err = sock_sendmsg(sock, &msg_sys, total_len); 17951da177e4SLinus Torvalds 17961da177e4SLinus Torvalds out_freectl: 17971da177e4SLinus Torvalds if (ctl_buf != ctl) 17981da177e4SLinus Torvalds sock_kfree_s(sock->sk, ctl_buf, ctl_len); 17991da177e4SLinus Torvalds out_freeiov: 18001da177e4SLinus Torvalds if (iov != iovstack) 18011da177e4SLinus Torvalds sock_kfree_s(sock->sk, iov, iov_size); 18021da177e4SLinus Torvalds out_put: 18036cb153caSBenjamin LaHaise fput_light(sock->file, fput_needed); 18041da177e4SLinus Torvalds out: 18051da177e4SLinus Torvalds return err; 18061da177e4SLinus Torvalds } 18071da177e4SLinus Torvalds 18081da177e4SLinus Torvalds /* 18091da177e4SLinus Torvalds * BSD recvmsg interface 18101da177e4SLinus Torvalds */ 18111da177e4SLinus Torvalds 181289bddce5SStephen Hemminger asmlinkage long sys_recvmsg(int fd, struct msghdr __user *msg, 181389bddce5SStephen Hemminger unsigned int flags) 18141da177e4SLinus Torvalds { 181589bddce5SStephen Hemminger struct compat_msghdr __user *msg_compat = 181689bddce5SStephen Hemminger (struct compat_msghdr __user *)msg; 18171da177e4SLinus Torvalds struct socket *sock; 18181da177e4SLinus Torvalds struct iovec iovstack[UIO_FASTIOV]; 18191da177e4SLinus Torvalds struct iovec *iov = iovstack; 18201da177e4SLinus Torvalds struct msghdr msg_sys; 18211da177e4SLinus Torvalds unsigned long cmsg_ptr; 18221da177e4SLinus Torvalds int err, iov_size, total_len, len; 18236cb153caSBenjamin LaHaise int fput_needed; 18241da177e4SLinus Torvalds 18251da177e4SLinus Torvalds /* kernel mode address */ 18261da177e4SLinus Torvalds char addr[MAX_SOCK_ADDR]; 18271da177e4SLinus Torvalds 18281da177e4SLinus Torvalds /* user mode address pointers */ 18291da177e4SLinus Torvalds struct sockaddr __user *uaddr; 18301da177e4SLinus Torvalds int __user *uaddr_len; 18311da177e4SLinus Torvalds 18321da177e4SLinus Torvalds if (MSG_CMSG_COMPAT & flags) { 18331da177e4SLinus Torvalds if (get_compat_msghdr(&msg_sys, msg_compat)) 18341da177e4SLinus Torvalds return -EFAULT; 183589bddce5SStephen Hemminger } 183689bddce5SStephen Hemminger else if (copy_from_user(&msg_sys, msg, sizeof(struct msghdr))) 18371da177e4SLinus Torvalds return -EFAULT; 18381da177e4SLinus Torvalds 18396cb153caSBenjamin LaHaise sock = sockfd_lookup_light(fd, &err, &fput_needed); 18401da177e4SLinus Torvalds if (!sock) 18411da177e4SLinus Torvalds goto out; 18421da177e4SLinus Torvalds 18431da177e4SLinus Torvalds err = -EMSGSIZE; 18441da177e4SLinus Torvalds if (msg_sys.msg_iovlen > UIO_MAXIOV) 18451da177e4SLinus Torvalds goto out_put; 18461da177e4SLinus Torvalds 18471da177e4SLinus Torvalds /* Check whether to allocate the iovec area */ 18481da177e4SLinus Torvalds err = -ENOMEM; 18491da177e4SLinus Torvalds iov_size = msg_sys.msg_iovlen * sizeof(struct iovec); 18501da177e4SLinus Torvalds if (msg_sys.msg_iovlen > UIO_FASTIOV) { 18511da177e4SLinus Torvalds iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL); 18521da177e4SLinus Torvalds if (!iov) 18531da177e4SLinus Torvalds goto out_put; 18541da177e4SLinus Torvalds } 18551da177e4SLinus Torvalds 18561da177e4SLinus Torvalds /* 18571da177e4SLinus Torvalds * Save the user-mode address (verify_iovec will change the 18581da177e4SLinus Torvalds * kernel msghdr to use the kernel address space) 18591da177e4SLinus Torvalds */ 18601da177e4SLinus Torvalds 18611da177e4SLinus Torvalds uaddr = (void __user *)msg_sys.msg_name; 18621da177e4SLinus Torvalds uaddr_len = COMPAT_NAMELEN(msg); 18631da177e4SLinus Torvalds if (MSG_CMSG_COMPAT & flags) { 18641da177e4SLinus Torvalds err = verify_compat_iovec(&msg_sys, iov, addr, VERIFY_WRITE); 18651da177e4SLinus Torvalds } else 18661da177e4SLinus Torvalds err = verify_iovec(&msg_sys, iov, addr, VERIFY_WRITE); 18671da177e4SLinus Torvalds if (err < 0) 18681da177e4SLinus Torvalds goto out_freeiov; 18691da177e4SLinus Torvalds total_len = err; 18701da177e4SLinus Torvalds 18711da177e4SLinus Torvalds cmsg_ptr = (unsigned long)msg_sys.msg_control; 18721da177e4SLinus Torvalds msg_sys.msg_flags = 0; 18731da177e4SLinus Torvalds if (MSG_CMSG_COMPAT & flags) 18741da177e4SLinus Torvalds msg_sys.msg_flags = MSG_CMSG_COMPAT; 18751da177e4SLinus Torvalds 18761da177e4SLinus Torvalds if (sock->file->f_flags & O_NONBLOCK) 18771da177e4SLinus Torvalds flags |= MSG_DONTWAIT; 18781da177e4SLinus Torvalds err = sock_recvmsg(sock, &msg_sys, total_len, flags); 18791da177e4SLinus Torvalds if (err < 0) 18801da177e4SLinus Torvalds goto out_freeiov; 18811da177e4SLinus Torvalds len = err; 18821da177e4SLinus Torvalds 18831da177e4SLinus Torvalds if (uaddr != NULL) { 188489bddce5SStephen Hemminger err = move_addr_to_user(addr, msg_sys.msg_namelen, uaddr, 188589bddce5SStephen Hemminger uaddr_len); 18861da177e4SLinus Torvalds if (err < 0) 18871da177e4SLinus Torvalds goto out_freeiov; 18881da177e4SLinus Torvalds } 188937f7f421SDavid S. Miller err = __put_user((msg_sys.msg_flags & ~MSG_CMSG_COMPAT), 189037f7f421SDavid S. Miller COMPAT_FLAGS(msg)); 18911da177e4SLinus Torvalds if (err) 18921da177e4SLinus Torvalds goto out_freeiov; 18931da177e4SLinus Torvalds if (MSG_CMSG_COMPAT & flags) 18941da177e4SLinus Torvalds err = __put_user((unsigned long)msg_sys.msg_control - cmsg_ptr, 18951da177e4SLinus Torvalds &msg_compat->msg_controllen); 18961da177e4SLinus Torvalds else 18971da177e4SLinus Torvalds err = __put_user((unsigned long)msg_sys.msg_control - cmsg_ptr, 18981da177e4SLinus Torvalds &msg->msg_controllen); 18991da177e4SLinus Torvalds if (err) 19001da177e4SLinus Torvalds goto out_freeiov; 19011da177e4SLinus Torvalds err = len; 19021da177e4SLinus Torvalds 19031da177e4SLinus Torvalds out_freeiov: 19041da177e4SLinus Torvalds if (iov != iovstack) 19051da177e4SLinus Torvalds sock_kfree_s(sock->sk, iov, iov_size); 19061da177e4SLinus Torvalds out_put: 19076cb153caSBenjamin LaHaise fput_light(sock->file, fput_needed); 19081da177e4SLinus Torvalds out: 19091da177e4SLinus Torvalds return err; 19101da177e4SLinus Torvalds } 19111da177e4SLinus Torvalds 19121da177e4SLinus Torvalds #ifdef __ARCH_WANT_SYS_SOCKETCALL 19131da177e4SLinus Torvalds 19141da177e4SLinus Torvalds /* Argument list sizes for sys_socketcall */ 19151da177e4SLinus Torvalds #define AL(x) ((x) * sizeof(unsigned long)) 191689bddce5SStephen Hemminger static const unsigned char nargs[18]={ 191789bddce5SStephen Hemminger AL(0),AL(3),AL(3),AL(3),AL(2),AL(3), 19181da177e4SLinus Torvalds AL(3),AL(3),AL(4),AL(4),AL(4),AL(6), 191989bddce5SStephen Hemminger AL(6),AL(2),AL(5),AL(5),AL(3),AL(3) 192089bddce5SStephen Hemminger }; 192189bddce5SStephen Hemminger 19221da177e4SLinus Torvalds #undef AL 19231da177e4SLinus Torvalds 19241da177e4SLinus Torvalds /* 19251da177e4SLinus Torvalds * System call vectors. 19261da177e4SLinus Torvalds * 19271da177e4SLinus Torvalds * Argument checking cleaned up. Saved 20% in size. 19281da177e4SLinus Torvalds * This function doesn't need to set the kernel lock because 19291da177e4SLinus Torvalds * it is set by the callees. 19301da177e4SLinus Torvalds */ 19311da177e4SLinus Torvalds 19321da177e4SLinus Torvalds asmlinkage long sys_socketcall(int call, unsigned long __user *args) 19331da177e4SLinus Torvalds { 19341da177e4SLinus Torvalds unsigned long a[6]; 19351da177e4SLinus Torvalds unsigned long a0, a1; 19361da177e4SLinus Torvalds int err; 19371da177e4SLinus Torvalds 19381da177e4SLinus Torvalds if (call < 1 || call > SYS_RECVMSG) 19391da177e4SLinus Torvalds return -EINVAL; 19401da177e4SLinus Torvalds 19411da177e4SLinus Torvalds /* copy_from_user should be SMP safe. */ 19421da177e4SLinus Torvalds if (copy_from_user(a, args, nargs[call])) 19431da177e4SLinus Torvalds return -EFAULT; 19441da177e4SLinus Torvalds 19454bcff1b3SDavid Woodhouse err = audit_socketcall(nargs[call] / sizeof(unsigned long), a); 19463ec3b2fbSDavid Woodhouse if (err) 19473ec3b2fbSDavid Woodhouse return err; 19483ec3b2fbSDavid Woodhouse 19491da177e4SLinus Torvalds a0 = a[0]; 19501da177e4SLinus Torvalds a1 = a[1]; 19511da177e4SLinus Torvalds 195289bddce5SStephen Hemminger switch (call) { 19531da177e4SLinus Torvalds case SYS_SOCKET: 19541da177e4SLinus Torvalds err = sys_socket(a0, a1, a[2]); 19551da177e4SLinus Torvalds break; 19561da177e4SLinus Torvalds case SYS_BIND: 19571da177e4SLinus Torvalds err = sys_bind(a0, (struct sockaddr __user *)a1, a[2]); 19581da177e4SLinus Torvalds break; 19591da177e4SLinus Torvalds case SYS_CONNECT: 19601da177e4SLinus Torvalds err = sys_connect(a0, (struct sockaddr __user *)a1, a[2]); 19611da177e4SLinus Torvalds break; 19621da177e4SLinus Torvalds case SYS_LISTEN: 19631da177e4SLinus Torvalds err = sys_listen(a0, a1); 19641da177e4SLinus Torvalds break; 19651da177e4SLinus Torvalds case SYS_ACCEPT: 196689bddce5SStephen Hemminger err = 196789bddce5SStephen Hemminger sys_accept(a0, (struct sockaddr __user *)a1, 196889bddce5SStephen Hemminger (int __user *)a[2]); 19691da177e4SLinus Torvalds break; 19701da177e4SLinus Torvalds case SYS_GETSOCKNAME: 197189bddce5SStephen Hemminger err = 197289bddce5SStephen Hemminger sys_getsockname(a0, (struct sockaddr __user *)a1, 197389bddce5SStephen Hemminger (int __user *)a[2]); 19741da177e4SLinus Torvalds break; 19751da177e4SLinus Torvalds case SYS_GETPEERNAME: 197689bddce5SStephen Hemminger err = 197789bddce5SStephen Hemminger sys_getpeername(a0, (struct sockaddr __user *)a1, 197889bddce5SStephen Hemminger (int __user *)a[2]); 19791da177e4SLinus Torvalds break; 19801da177e4SLinus Torvalds case SYS_SOCKETPAIR: 19811da177e4SLinus Torvalds err = sys_socketpair(a0, a1, a[2], (int __user *)a[3]); 19821da177e4SLinus Torvalds break; 19831da177e4SLinus Torvalds case SYS_SEND: 19841da177e4SLinus Torvalds err = sys_send(a0, (void __user *)a1, a[2], a[3]); 19851da177e4SLinus Torvalds break; 19861da177e4SLinus Torvalds case SYS_SENDTO: 19871da177e4SLinus Torvalds err = sys_sendto(a0, (void __user *)a1, a[2], a[3], 19881da177e4SLinus Torvalds (struct sockaddr __user *)a[4], a[5]); 19891da177e4SLinus Torvalds break; 19901da177e4SLinus Torvalds case SYS_RECV: 19911da177e4SLinus Torvalds err = sys_recv(a0, (void __user *)a1, a[2], a[3]); 19921da177e4SLinus Torvalds break; 19931da177e4SLinus Torvalds case SYS_RECVFROM: 19941da177e4SLinus Torvalds err = sys_recvfrom(a0, (void __user *)a1, a[2], a[3], 199589bddce5SStephen Hemminger (struct sockaddr __user *)a[4], 199689bddce5SStephen Hemminger (int __user *)a[5]); 19971da177e4SLinus Torvalds break; 19981da177e4SLinus Torvalds case SYS_SHUTDOWN: 19991da177e4SLinus Torvalds err = sys_shutdown(a0, a1); 20001da177e4SLinus Torvalds break; 20011da177e4SLinus Torvalds case SYS_SETSOCKOPT: 20021da177e4SLinus Torvalds err = sys_setsockopt(a0, a1, a[2], (char __user *)a[3], a[4]); 20031da177e4SLinus Torvalds break; 20041da177e4SLinus Torvalds case SYS_GETSOCKOPT: 200589bddce5SStephen Hemminger err = 200689bddce5SStephen Hemminger sys_getsockopt(a0, a1, a[2], (char __user *)a[3], 200789bddce5SStephen Hemminger (int __user *)a[4]); 20081da177e4SLinus Torvalds break; 20091da177e4SLinus Torvalds case SYS_SENDMSG: 20101da177e4SLinus Torvalds err = sys_sendmsg(a0, (struct msghdr __user *)a1, a[2]); 20111da177e4SLinus Torvalds break; 20121da177e4SLinus Torvalds case SYS_RECVMSG: 20131da177e4SLinus Torvalds err = sys_recvmsg(a0, (struct msghdr __user *)a1, a[2]); 20141da177e4SLinus Torvalds break; 20151da177e4SLinus Torvalds default: 20161da177e4SLinus Torvalds err = -EINVAL; 20171da177e4SLinus Torvalds break; 20181da177e4SLinus Torvalds } 20191da177e4SLinus Torvalds return err; 20201da177e4SLinus Torvalds } 20211da177e4SLinus Torvalds 20221da177e4SLinus Torvalds #endif /* __ARCH_WANT_SYS_SOCKETCALL */ 20231da177e4SLinus Torvalds 202455737fdaSStephen Hemminger /** 202555737fdaSStephen Hemminger * sock_register - add a socket protocol handler 202655737fdaSStephen Hemminger * @ops: description of protocol 202755737fdaSStephen Hemminger * 20281da177e4SLinus Torvalds * This function is called by a protocol handler that wants to 20291da177e4SLinus Torvalds * advertise its address family, and have it linked into the 203055737fdaSStephen Hemminger * socket interface. The value ops->family coresponds to the 203155737fdaSStephen Hemminger * socket system call protocol family. 20321da177e4SLinus Torvalds */ 2033f0fd27d4SStephen Hemminger int sock_register(const struct net_proto_family *ops) 20341da177e4SLinus Torvalds { 20351da177e4SLinus Torvalds int err; 20361da177e4SLinus Torvalds 20371da177e4SLinus Torvalds if (ops->family >= NPROTO) { 203889bddce5SStephen Hemminger printk(KERN_CRIT "protocol %d >= NPROTO(%d)\n", ops->family, 203989bddce5SStephen Hemminger NPROTO); 20401da177e4SLinus Torvalds return -ENOBUFS; 20411da177e4SLinus Torvalds } 204255737fdaSStephen Hemminger 204355737fdaSStephen Hemminger spin_lock(&net_family_lock); 204455737fdaSStephen Hemminger if (net_families[ops->family]) 20451da177e4SLinus Torvalds err = -EEXIST; 204655737fdaSStephen Hemminger else { 20471da177e4SLinus Torvalds net_families[ops->family] = ops; 20481da177e4SLinus Torvalds err = 0; 20491da177e4SLinus Torvalds } 205055737fdaSStephen Hemminger spin_unlock(&net_family_lock); 205155737fdaSStephen Hemminger 205289bddce5SStephen Hemminger printk(KERN_INFO "NET: Registered protocol family %d\n", ops->family); 20531da177e4SLinus Torvalds return err; 20541da177e4SLinus Torvalds } 20551da177e4SLinus Torvalds 205655737fdaSStephen Hemminger /** 205755737fdaSStephen Hemminger * sock_unregister - remove a protocol handler 205855737fdaSStephen Hemminger * @family: protocol family to remove 205955737fdaSStephen Hemminger * 20601da177e4SLinus Torvalds * This function is called by a protocol handler that wants to 20611da177e4SLinus Torvalds * remove its address family, and have it unlinked from the 206255737fdaSStephen Hemminger * new socket creation. 206355737fdaSStephen Hemminger * 206455737fdaSStephen Hemminger * If protocol handler is a module, then it can use module reference 206555737fdaSStephen Hemminger * counts to protect against new references. If protocol handler is not 206655737fdaSStephen Hemminger * a module then it needs to provide its own protection in 206755737fdaSStephen Hemminger * the ops->create routine. 20681da177e4SLinus Torvalds */ 2069f0fd27d4SStephen Hemminger void sock_unregister(int family) 20701da177e4SLinus Torvalds { 2071f0fd27d4SStephen Hemminger BUG_ON(family < 0 || family >= NPROTO); 20721da177e4SLinus Torvalds 207355737fdaSStephen Hemminger spin_lock(&net_family_lock); 20741da177e4SLinus Torvalds net_families[family] = NULL; 207555737fdaSStephen Hemminger spin_unlock(&net_family_lock); 207655737fdaSStephen Hemminger 207755737fdaSStephen Hemminger synchronize_rcu(); 207855737fdaSStephen Hemminger 207989bddce5SStephen Hemminger printk(KERN_INFO "NET: Unregistered protocol family %d\n", family); 20801da177e4SLinus Torvalds } 20811da177e4SLinus Torvalds 208277d76ea3SAndi Kleen static int __init sock_init(void) 20831da177e4SLinus Torvalds { 20841da177e4SLinus Torvalds /* 20851da177e4SLinus Torvalds * Initialize sock SLAB cache. 20861da177e4SLinus Torvalds */ 20871da177e4SLinus Torvalds 20881da177e4SLinus Torvalds sk_init(); 20891da177e4SLinus Torvalds 20901da177e4SLinus Torvalds /* 20911da177e4SLinus Torvalds * Initialize skbuff SLAB cache 20921da177e4SLinus Torvalds */ 20931da177e4SLinus Torvalds skb_init(); 20941da177e4SLinus Torvalds 20951da177e4SLinus Torvalds /* 20961da177e4SLinus Torvalds * Initialize the protocols module. 20971da177e4SLinus Torvalds */ 20981da177e4SLinus Torvalds 20991da177e4SLinus Torvalds init_inodecache(); 21001da177e4SLinus Torvalds register_filesystem(&sock_fs_type); 21011da177e4SLinus Torvalds sock_mnt = kern_mount(&sock_fs_type); 210277d76ea3SAndi Kleen 210377d76ea3SAndi Kleen /* The real protocol initialization is performed in later initcalls. 21041da177e4SLinus Torvalds */ 21051da177e4SLinus Torvalds 21061da177e4SLinus Torvalds #ifdef CONFIG_NETFILTER 21071da177e4SLinus Torvalds netfilter_init(); 21081da177e4SLinus Torvalds #endif 2109cbeb321aSDavid S. Miller 2110cbeb321aSDavid S. Miller return 0; 21111da177e4SLinus Torvalds } 21121da177e4SLinus Torvalds 211377d76ea3SAndi Kleen core_initcall(sock_init); /* early initcall */ 211477d76ea3SAndi Kleen 21151da177e4SLinus Torvalds #ifdef CONFIG_PROC_FS 21161da177e4SLinus Torvalds void socket_seq_show(struct seq_file *seq) 21171da177e4SLinus Torvalds { 21181da177e4SLinus Torvalds int cpu; 21191da177e4SLinus Torvalds int counter = 0; 21201da177e4SLinus Torvalds 21216f912042SKAMEZAWA Hiroyuki for_each_possible_cpu(cpu) 21221da177e4SLinus Torvalds counter += per_cpu(sockets_in_use, cpu); 21231da177e4SLinus Torvalds 21241da177e4SLinus Torvalds /* It can be negative, by the way. 8) */ 21251da177e4SLinus Torvalds if (counter < 0) 21261da177e4SLinus Torvalds counter = 0; 21271da177e4SLinus Torvalds 21281da177e4SLinus Torvalds seq_printf(seq, "sockets: used %d\n", counter); 21291da177e4SLinus Torvalds } 21301da177e4SLinus Torvalds #endif /* CONFIG_PROC_FS */ 21311da177e4SLinus Torvalds 213289bbfc95SShaun Pereira #ifdef CONFIG_COMPAT 213389bbfc95SShaun Pereira static long compat_sock_ioctl(struct file *file, unsigned cmd, 213489bbfc95SShaun Pereira unsigned long arg) 213589bbfc95SShaun Pereira { 213689bbfc95SShaun Pereira struct socket *sock = file->private_data; 213789bbfc95SShaun Pereira int ret = -ENOIOCTLCMD; 213889bbfc95SShaun Pereira 213989bbfc95SShaun Pereira if (sock->ops->compat_ioctl) 214089bbfc95SShaun Pereira ret = sock->ops->compat_ioctl(sock, cmd, arg); 214189bbfc95SShaun Pereira 214289bbfc95SShaun Pereira return ret; 214389bbfc95SShaun Pereira } 214489bbfc95SShaun Pereira #endif 214589bbfc95SShaun Pereira 2146ac5a488eSSridhar Samudrala int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen) 2147ac5a488eSSridhar Samudrala { 2148ac5a488eSSridhar Samudrala return sock->ops->bind(sock, addr, addrlen); 2149ac5a488eSSridhar Samudrala } 2150ac5a488eSSridhar Samudrala 2151ac5a488eSSridhar Samudrala int kernel_listen(struct socket *sock, int backlog) 2152ac5a488eSSridhar Samudrala { 2153ac5a488eSSridhar Samudrala return sock->ops->listen(sock, backlog); 2154ac5a488eSSridhar Samudrala } 2155ac5a488eSSridhar Samudrala 2156ac5a488eSSridhar Samudrala int kernel_accept(struct socket *sock, struct socket **newsock, int flags) 2157ac5a488eSSridhar Samudrala { 2158ac5a488eSSridhar Samudrala struct sock *sk = sock->sk; 2159ac5a488eSSridhar Samudrala int err; 2160ac5a488eSSridhar Samudrala 2161ac5a488eSSridhar Samudrala err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol, 2162ac5a488eSSridhar Samudrala newsock); 2163ac5a488eSSridhar Samudrala if (err < 0) 2164ac5a488eSSridhar Samudrala goto done; 2165ac5a488eSSridhar Samudrala 2166ac5a488eSSridhar Samudrala err = sock->ops->accept(sock, *newsock, flags); 2167ac5a488eSSridhar Samudrala if (err < 0) { 2168ac5a488eSSridhar Samudrala sock_release(*newsock); 2169ac5a488eSSridhar Samudrala goto done; 2170ac5a488eSSridhar Samudrala } 2171ac5a488eSSridhar Samudrala 2172ac5a488eSSridhar Samudrala (*newsock)->ops = sock->ops; 2173ac5a488eSSridhar Samudrala 2174ac5a488eSSridhar Samudrala done: 2175ac5a488eSSridhar Samudrala return err; 2176ac5a488eSSridhar Samudrala } 2177ac5a488eSSridhar Samudrala 2178ac5a488eSSridhar Samudrala int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen, 2179ac5a488eSSridhar Samudrala int flags) 2180ac5a488eSSridhar Samudrala { 2181ac5a488eSSridhar Samudrala return sock->ops->connect(sock, addr, addrlen, flags); 2182ac5a488eSSridhar Samudrala } 2183ac5a488eSSridhar Samudrala 2184ac5a488eSSridhar Samudrala int kernel_getsockname(struct socket *sock, struct sockaddr *addr, 2185ac5a488eSSridhar Samudrala int *addrlen) 2186ac5a488eSSridhar Samudrala { 2187ac5a488eSSridhar Samudrala return sock->ops->getname(sock, addr, addrlen, 0); 2188ac5a488eSSridhar Samudrala } 2189ac5a488eSSridhar Samudrala 2190ac5a488eSSridhar Samudrala int kernel_getpeername(struct socket *sock, struct sockaddr *addr, 2191ac5a488eSSridhar Samudrala int *addrlen) 2192ac5a488eSSridhar Samudrala { 2193ac5a488eSSridhar Samudrala return sock->ops->getname(sock, addr, addrlen, 1); 2194ac5a488eSSridhar Samudrala } 2195ac5a488eSSridhar Samudrala 2196ac5a488eSSridhar Samudrala int kernel_getsockopt(struct socket *sock, int level, int optname, 2197ac5a488eSSridhar Samudrala char *optval, int *optlen) 2198ac5a488eSSridhar Samudrala { 2199ac5a488eSSridhar Samudrala mm_segment_t oldfs = get_fs(); 2200ac5a488eSSridhar Samudrala int err; 2201ac5a488eSSridhar Samudrala 2202ac5a488eSSridhar Samudrala set_fs(KERNEL_DS); 2203ac5a488eSSridhar Samudrala if (level == SOL_SOCKET) 2204ac5a488eSSridhar Samudrala err = sock_getsockopt(sock, level, optname, optval, optlen); 2205ac5a488eSSridhar Samudrala else 2206ac5a488eSSridhar Samudrala err = sock->ops->getsockopt(sock, level, optname, optval, 2207ac5a488eSSridhar Samudrala optlen); 2208ac5a488eSSridhar Samudrala set_fs(oldfs); 2209ac5a488eSSridhar Samudrala return err; 2210ac5a488eSSridhar Samudrala } 2211ac5a488eSSridhar Samudrala 2212ac5a488eSSridhar Samudrala int kernel_setsockopt(struct socket *sock, int level, int optname, 2213ac5a488eSSridhar Samudrala char *optval, int optlen) 2214ac5a488eSSridhar Samudrala { 2215ac5a488eSSridhar Samudrala mm_segment_t oldfs = get_fs(); 2216ac5a488eSSridhar Samudrala int err; 2217ac5a488eSSridhar Samudrala 2218ac5a488eSSridhar Samudrala set_fs(KERNEL_DS); 2219ac5a488eSSridhar Samudrala if (level == SOL_SOCKET) 2220ac5a488eSSridhar Samudrala err = sock_setsockopt(sock, level, optname, optval, optlen); 2221ac5a488eSSridhar Samudrala else 2222ac5a488eSSridhar Samudrala err = sock->ops->setsockopt(sock, level, optname, optval, 2223ac5a488eSSridhar Samudrala optlen); 2224ac5a488eSSridhar Samudrala set_fs(oldfs); 2225ac5a488eSSridhar Samudrala return err; 2226ac5a488eSSridhar Samudrala } 2227ac5a488eSSridhar Samudrala 2228ac5a488eSSridhar Samudrala int kernel_sendpage(struct socket *sock, struct page *page, int offset, 2229ac5a488eSSridhar Samudrala size_t size, int flags) 2230ac5a488eSSridhar Samudrala { 2231ac5a488eSSridhar Samudrala if (sock->ops->sendpage) 2232ac5a488eSSridhar Samudrala return sock->ops->sendpage(sock, page, offset, size, flags); 2233ac5a488eSSridhar Samudrala 2234ac5a488eSSridhar Samudrala return sock_no_sendpage(sock, page, offset, size, flags); 2235ac5a488eSSridhar Samudrala } 2236ac5a488eSSridhar Samudrala 2237ac5a488eSSridhar Samudrala int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg) 2238ac5a488eSSridhar Samudrala { 2239ac5a488eSSridhar Samudrala mm_segment_t oldfs = get_fs(); 2240ac5a488eSSridhar Samudrala int err; 2241ac5a488eSSridhar Samudrala 2242ac5a488eSSridhar Samudrala set_fs(KERNEL_DS); 2243ac5a488eSSridhar Samudrala err = sock->ops->ioctl(sock, cmd, arg); 2244ac5a488eSSridhar Samudrala set_fs(oldfs); 2245ac5a488eSSridhar Samudrala 2246ac5a488eSSridhar Samudrala return err; 2247ac5a488eSSridhar Samudrala } 2248ac5a488eSSridhar Samudrala 22491da177e4SLinus Torvalds /* ABI emulation layers need these two */ 22501da177e4SLinus Torvalds EXPORT_SYMBOL(move_addr_to_kernel); 22511da177e4SLinus Torvalds EXPORT_SYMBOL(move_addr_to_user); 22521da177e4SLinus Torvalds EXPORT_SYMBOL(sock_create); 22531da177e4SLinus Torvalds EXPORT_SYMBOL(sock_create_kern); 22541da177e4SLinus Torvalds EXPORT_SYMBOL(sock_create_lite); 22551da177e4SLinus Torvalds EXPORT_SYMBOL(sock_map_fd); 22561da177e4SLinus Torvalds EXPORT_SYMBOL(sock_recvmsg); 22571da177e4SLinus Torvalds EXPORT_SYMBOL(sock_register); 22581da177e4SLinus Torvalds EXPORT_SYMBOL(sock_release); 22591da177e4SLinus Torvalds EXPORT_SYMBOL(sock_sendmsg); 22601da177e4SLinus Torvalds EXPORT_SYMBOL(sock_unregister); 22611da177e4SLinus Torvalds EXPORT_SYMBOL(sock_wake_async); 22621da177e4SLinus Torvalds EXPORT_SYMBOL(sockfd_lookup); 22631da177e4SLinus Torvalds EXPORT_SYMBOL(kernel_sendmsg); 22641da177e4SLinus Torvalds EXPORT_SYMBOL(kernel_recvmsg); 2265ac5a488eSSridhar Samudrala EXPORT_SYMBOL(kernel_bind); 2266ac5a488eSSridhar Samudrala EXPORT_SYMBOL(kernel_listen); 2267ac5a488eSSridhar Samudrala EXPORT_SYMBOL(kernel_accept); 2268ac5a488eSSridhar Samudrala EXPORT_SYMBOL(kernel_connect); 2269ac5a488eSSridhar Samudrala EXPORT_SYMBOL(kernel_getsockname); 2270ac5a488eSSridhar Samudrala EXPORT_SYMBOL(kernel_getpeername); 2271ac5a488eSSridhar Samudrala EXPORT_SYMBOL(kernel_getsockopt); 2272ac5a488eSSridhar Samudrala EXPORT_SYMBOL(kernel_setsockopt); 2273ac5a488eSSridhar Samudrala EXPORT_SYMBOL(kernel_sendpage); 2274ac5a488eSSridhar Samudrala EXPORT_SYMBOL(kernel_sock_ioctl); 2275