1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * NET An implementation of the SOCKET network access protocol. 4 * 5 * Version: @(#)socket.c 1.1.93 18/02/95 6 * 7 * Authors: Orest Zborowski, <obz@Kodak.COM> 8 * Ross Biro 9 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> 10 * 11 * Fixes: 12 * Anonymous : NOTSOCK/BADF cleanup. Error fix in 13 * shutdown() 14 * Alan Cox : verify_area() fixes 15 * Alan Cox : Removed DDI 16 * Jonathan Kamens : SOCK_DGRAM reconnect bug 17 * Alan Cox : Moved a load of checks to the very 18 * top level. 19 * Alan Cox : Move address structures to/from user 20 * mode above the protocol layers. 21 * Rob Janssen : Allow 0 length sends. 22 * Alan Cox : Asynchronous I/O support (cribbed from the 23 * tty drivers). 24 * Niibe Yutaka : Asynchronous I/O for writes (4.4BSD style) 25 * Jeff Uphoff : Made max number of sockets command-line 26 * configurable. 27 * Matti Aarnio : Made the number of sockets dynamic, 28 * to be allocated when needed, and mr. 29 * Uphoff's max is used as max to be 30 * allowed to allocate. 31 * Linus : Argh. removed all the socket allocation 32 * altogether: it's in the inode now. 33 * Alan Cox : Made sock_alloc()/sock_release() public 34 * for NetROM and future kernel nfsd type 35 * stuff. 36 * Alan Cox : sendmsg/recvmsg basics. 37 * Tom Dyas : Export net symbols. 38 * Marcin Dalecki : Fixed problems with CONFIG_NET="n". 39 * Alan Cox : Added thread locking to sys_* calls 40 * for sockets. May have errors at the 41 * moment. 42 * Kevin Buhr : Fixed the dumb errors in the above. 43 * Andi Kleen : Some small cleanups, optimizations, 44 * and fixed a copy_from_user() bug. 45 * Tigran Aivazian : sys_send(args) calls sys_sendto(args, NULL, 0) 46 * Tigran Aivazian : Made listen(2) backlog sanity checks 47 * protocol-independent 48 * 49 * This module is effectively the top level interface to the BSD socket 50 * paradigm. 51 * 52 * Based upon Swansea University Computer Society NET3.039 53 */ 54 55 #include <linux/mm.h> 56 #include <linux/socket.h> 57 #include <linux/file.h> 58 #include <linux/net.h> 59 #include <linux/interrupt.h> 60 #include <linux/thread_info.h> 61 #include <linux/rcupdate.h> 62 #include <linux/netdevice.h> 63 #include <linux/proc_fs.h> 64 #include <linux/seq_file.h> 65 #include <linux/mutex.h> 66 #include <linux/if_bridge.h> 67 #include <linux/if_frad.h> 68 #include <linux/if_vlan.h> 69 #include <linux/ptp_classify.h> 70 #include <linux/init.h> 71 #include <linux/poll.h> 72 #include <linux/cache.h> 73 #include <linux/module.h> 74 #include <linux/highmem.h> 75 #include <linux/mount.h> 76 #include <linux/pseudo_fs.h> 77 #include <linux/security.h> 78 #include <linux/syscalls.h> 79 #include <linux/compat.h> 80 #include <linux/kmod.h> 81 #include <linux/audit.h> 82 #include <linux/wireless.h> 83 #include <linux/nsproxy.h> 84 #include <linux/magic.h> 85 #include <linux/slab.h> 86 #include <linux/xattr.h> 87 #include <linux/nospec.h> 88 #include <linux/indirect_call_wrapper.h> 89 90 #include <linux/uaccess.h> 91 #include <asm/unistd.h> 92 93 #include <net/compat.h> 94 #include <net/wext.h> 95 #include <net/cls_cgroup.h> 96 97 #include <net/sock.h> 98 #include <linux/netfilter.h> 99 100 #include <linux/if_tun.h> 101 #include <linux/ipv6_route.h> 102 #include <linux/route.h> 103 #include <linux/termios.h> 104 #include <linux/sockios.h> 105 #include <net/busy_poll.h> 106 #include <linux/errqueue.h> 107 108 #ifdef CONFIG_NET_RX_BUSY_POLL 109 unsigned int sysctl_net_busy_read __read_mostly; 110 unsigned int sysctl_net_busy_poll __read_mostly; 111 #endif 112 113 static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to); 114 static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from); 115 static int sock_mmap(struct file *file, struct vm_area_struct *vma); 116 117 static int sock_close(struct inode *inode, struct file *file); 118 static __poll_t sock_poll(struct file *file, 119 struct poll_table_struct *wait); 120 static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg); 121 #ifdef CONFIG_COMPAT 122 static long compat_sock_ioctl(struct file *file, 123 unsigned int cmd, unsigned long arg); 124 #endif 125 static int sock_fasync(int fd, struct file *filp, int on); 126 static ssize_t sock_sendpage(struct file *file, struct page *page, 127 int offset, size_t size, loff_t *ppos, int more); 128 static ssize_t sock_splice_read(struct file *file, loff_t *ppos, 129 struct pipe_inode_info *pipe, size_t len, 130 unsigned int flags); 131 static void sock_show_fdinfo(struct seq_file *m, struct file *f); 132 133 /* 134 * Socket files have a set of 'special' operations as well as the generic file ones. These don't appear 135 * in the operation structures but are done directly via the socketcall() multiplexor. 136 */ 137 138 static const struct file_operations socket_file_ops = { 139 .owner = THIS_MODULE, 140 .llseek = no_llseek, 141 .read_iter = sock_read_iter, 142 .write_iter = sock_write_iter, 143 .poll = sock_poll, 144 .unlocked_ioctl = sock_ioctl, 145 #ifdef CONFIG_COMPAT 146 .compat_ioctl = compat_sock_ioctl, 147 #endif 148 .mmap = sock_mmap, 149 .release = sock_close, 150 .fasync = sock_fasync, 151 .sendpage = sock_sendpage, 152 .splice_write = generic_splice_sendpage, 153 .splice_read = sock_splice_read, 154 #ifdef CONFIG_PROC_FS 155 .show_fdinfo = sock_show_fdinfo, 156 #endif 157 }; 158 159 /* 160 * The protocol list. Each protocol is registered in here. 161 */ 162 163 static DEFINE_SPINLOCK(net_family_lock); 164 static const struct net_proto_family __rcu *net_families[NPROTO] __read_mostly; 165 166 /* 167 * Support routines. 168 * Move socket addresses back and forth across the kernel/user 169 * divide and look after the messy bits. 170 */ 171 172 /** 173 * move_addr_to_kernel - copy a socket address into kernel space 174 * @uaddr: Address in user space 175 * @kaddr: Address in kernel space 176 * @ulen: Length in user space 177 * 178 * The address is copied into kernel space. If the provided address is 179 * too long an error code of -EINVAL is returned. If the copy gives 180 * invalid addresses -EFAULT is returned. On a success 0 is returned. 181 */ 182 183 int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr_storage *kaddr) 184 { 185 if (ulen < 0 || ulen > sizeof(struct sockaddr_storage)) 186 return -EINVAL; 187 if (ulen == 0) 188 return 0; 189 if (copy_from_user(kaddr, uaddr, ulen)) 190 return -EFAULT; 191 return audit_sockaddr(ulen, kaddr); 192 } 193 194 /** 195 * move_addr_to_user - copy an address to user space 196 * @kaddr: kernel space address 197 * @klen: length of address in kernel 198 * @uaddr: user space address 199 * @ulen: pointer to user length field 200 * 201 * The value pointed to by ulen on entry is the buffer length available. 202 * This is overwritten with the buffer space used. -EINVAL is returned 203 * if an overlong buffer is specified or a negative buffer size. -EFAULT 204 * is returned if either the buffer or the length field are not 205 * accessible. 206 * After copying the data up to the limit the user specifies, the true 207 * length of the data is written over the length limit the user 208 * specified. Zero is returned for a success. 209 */ 210 211 static int move_addr_to_user(struct sockaddr_storage *kaddr, int klen, 212 void __user *uaddr, int __user *ulen) 213 { 214 int err; 215 int len; 216 217 BUG_ON(klen > sizeof(struct sockaddr_storage)); 218 err = get_user(len, ulen); 219 if (err) 220 return err; 221 if (len > klen) 222 len = klen; 223 if (len < 0) 224 return -EINVAL; 225 if (len) { 226 if (audit_sockaddr(klen, kaddr)) 227 return -ENOMEM; 228 if (copy_to_user(uaddr, kaddr, len)) 229 return -EFAULT; 230 } 231 /* 232 * "fromlen shall refer to the value before truncation.." 233 * 1003.1g 234 */ 235 return __put_user(klen, ulen); 236 } 237 238 static struct kmem_cache *sock_inode_cachep __ro_after_init; 239 240 static struct inode *sock_alloc_inode(struct super_block *sb) 241 { 242 struct socket_alloc *ei; 243 244 ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL); 245 if (!ei) 246 return NULL; 247 init_waitqueue_head(&ei->socket.wq.wait); 248 ei->socket.wq.fasync_list = NULL; 249 ei->socket.wq.flags = 0; 250 251 ei->socket.state = SS_UNCONNECTED; 252 ei->socket.flags = 0; 253 ei->socket.ops = NULL; 254 ei->socket.sk = NULL; 255 ei->socket.file = NULL; 256 257 return &ei->vfs_inode; 258 } 259 260 static void sock_free_inode(struct inode *inode) 261 { 262 struct socket_alloc *ei; 263 264 ei = container_of(inode, struct socket_alloc, vfs_inode); 265 kmem_cache_free(sock_inode_cachep, ei); 266 } 267 268 static void init_once(void *foo) 269 { 270 struct socket_alloc *ei = (struct socket_alloc *)foo; 271 272 inode_init_once(&ei->vfs_inode); 273 } 274 275 static void init_inodecache(void) 276 { 277 sock_inode_cachep = kmem_cache_create("sock_inode_cache", 278 sizeof(struct socket_alloc), 279 0, 280 (SLAB_HWCACHE_ALIGN | 281 SLAB_RECLAIM_ACCOUNT | 282 SLAB_MEM_SPREAD | SLAB_ACCOUNT), 283 init_once); 284 BUG_ON(sock_inode_cachep == NULL); 285 } 286 287 static const struct super_operations sockfs_ops = { 288 .alloc_inode = sock_alloc_inode, 289 .free_inode = sock_free_inode, 290 .statfs = simple_statfs, 291 }; 292 293 /* 294 * sockfs_dname() is called from d_path(). 295 */ 296 static char *sockfs_dname(struct dentry *dentry, char *buffer, int buflen) 297 { 298 return dynamic_dname(dentry, buffer, buflen, "socket:[%lu]", 299 d_inode(dentry)->i_ino); 300 } 301 302 static const struct dentry_operations sockfs_dentry_operations = { 303 .d_dname = sockfs_dname, 304 }; 305 306 static int sockfs_xattr_get(const struct xattr_handler *handler, 307 struct dentry *dentry, struct inode *inode, 308 const char *suffix, void *value, size_t size) 309 { 310 if (value) { 311 if (dentry->d_name.len + 1 > size) 312 return -ERANGE; 313 memcpy(value, dentry->d_name.name, dentry->d_name.len + 1); 314 } 315 return dentry->d_name.len + 1; 316 } 317 318 #define XATTR_SOCKPROTONAME_SUFFIX "sockprotoname" 319 #define XATTR_NAME_SOCKPROTONAME (XATTR_SYSTEM_PREFIX XATTR_SOCKPROTONAME_SUFFIX) 320 #define XATTR_NAME_SOCKPROTONAME_LEN (sizeof(XATTR_NAME_SOCKPROTONAME)-1) 321 322 static const struct xattr_handler sockfs_xattr_handler = { 323 .name = XATTR_NAME_SOCKPROTONAME, 324 .get = sockfs_xattr_get, 325 }; 326 327 static int sockfs_security_xattr_set(const struct xattr_handler *handler, 328 struct dentry *dentry, struct inode *inode, 329 const char *suffix, const void *value, 330 size_t size, int flags) 331 { 332 /* Handled by LSM. */ 333 return -EAGAIN; 334 } 335 336 static const struct xattr_handler sockfs_security_xattr_handler = { 337 .prefix = XATTR_SECURITY_PREFIX, 338 .set = sockfs_security_xattr_set, 339 }; 340 341 static const struct xattr_handler *sockfs_xattr_handlers[] = { 342 &sockfs_xattr_handler, 343 &sockfs_security_xattr_handler, 344 NULL 345 }; 346 347 static int sockfs_init_fs_context(struct fs_context *fc) 348 { 349 struct pseudo_fs_context *ctx = init_pseudo(fc, SOCKFS_MAGIC); 350 if (!ctx) 351 return -ENOMEM; 352 ctx->ops = &sockfs_ops; 353 ctx->dops = &sockfs_dentry_operations; 354 ctx->xattr = sockfs_xattr_handlers; 355 return 0; 356 } 357 358 static struct vfsmount *sock_mnt __read_mostly; 359 360 static struct file_system_type sock_fs_type = { 361 .name = "sockfs", 362 .init_fs_context = sockfs_init_fs_context, 363 .kill_sb = kill_anon_super, 364 }; 365 366 /* 367 * Obtains the first available file descriptor and sets it up for use. 368 * 369 * These functions create file structures and maps them to fd space 370 * of the current process. On success it returns file descriptor 371 * and file struct implicitly stored in sock->file. 372 * Note that another thread may close file descriptor before we return 373 * from this function. We use the fact that now we do not refer 374 * to socket after mapping. If one day we will need it, this 375 * function will increment ref. count on file by 1. 376 * 377 * In any case returned fd MAY BE not valid! 378 * This race condition is unavoidable 379 * with shared fd spaces, we cannot solve it inside kernel, 380 * but we take care of internal coherence yet. 381 */ 382 383 /** 384 * sock_alloc_file - Bind a &socket to a &file 385 * @sock: socket 386 * @flags: file status flags 387 * @dname: protocol name 388 * 389 * Returns the &file bound with @sock, implicitly storing it 390 * in sock->file. If dname is %NULL, sets to "". 391 * On failure the return is a ERR pointer (see linux/err.h). 392 * This function uses GFP_KERNEL internally. 393 */ 394 395 struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname) 396 { 397 struct file *file; 398 399 if (!dname) 400 dname = sock->sk ? sock->sk->sk_prot_creator->name : ""; 401 402 file = alloc_file_pseudo(SOCK_INODE(sock), sock_mnt, dname, 403 O_RDWR | (flags & O_NONBLOCK), 404 &socket_file_ops); 405 if (IS_ERR(file)) { 406 sock_release(sock); 407 return file; 408 } 409 410 sock->file = file; 411 file->private_data = sock; 412 stream_open(SOCK_INODE(sock), file); 413 return file; 414 } 415 EXPORT_SYMBOL(sock_alloc_file); 416 417 static int sock_map_fd(struct socket *sock, int flags) 418 { 419 struct file *newfile; 420 int fd = get_unused_fd_flags(flags); 421 if (unlikely(fd < 0)) { 422 sock_release(sock); 423 return fd; 424 } 425 426 newfile = sock_alloc_file(sock, flags, NULL); 427 if (!IS_ERR(newfile)) { 428 fd_install(fd, newfile); 429 return fd; 430 } 431 432 put_unused_fd(fd); 433 return PTR_ERR(newfile); 434 } 435 436 /** 437 * sock_from_file - Return the &socket bounded to @file. 438 * @file: file 439 * @err: pointer to an error code return 440 * 441 * On failure returns %NULL and assigns -ENOTSOCK to @err. 442 */ 443 444 struct socket *sock_from_file(struct file *file, int *err) 445 { 446 if (file->f_op == &socket_file_ops) 447 return file->private_data; /* set in sock_map_fd */ 448 449 *err = -ENOTSOCK; 450 return NULL; 451 } 452 EXPORT_SYMBOL(sock_from_file); 453 454 /** 455 * sockfd_lookup - Go from a file number to its socket slot 456 * @fd: file handle 457 * @err: pointer to an error code return 458 * 459 * The file handle passed in is locked and the socket it is bound 460 * to is returned. If an error occurs the err pointer is overwritten 461 * with a negative errno code and NULL is returned. The function checks 462 * for both invalid handles and passing a handle which is not a socket. 463 * 464 * On a success the socket object pointer is returned. 465 */ 466 467 struct socket *sockfd_lookup(int fd, int *err) 468 { 469 struct file *file; 470 struct socket *sock; 471 472 file = fget(fd); 473 if (!file) { 474 *err = -EBADF; 475 return NULL; 476 } 477 478 sock = sock_from_file(file, err); 479 if (!sock) 480 fput(file); 481 return sock; 482 } 483 EXPORT_SYMBOL(sockfd_lookup); 484 485 static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed) 486 { 487 struct fd f = fdget(fd); 488 struct socket *sock; 489 490 *err = -EBADF; 491 if (f.file) { 492 sock = sock_from_file(f.file, err); 493 if (likely(sock)) { 494 *fput_needed = f.flags; 495 return sock; 496 } 497 fdput(f); 498 } 499 return NULL; 500 } 501 502 static ssize_t sockfs_listxattr(struct dentry *dentry, char *buffer, 503 size_t size) 504 { 505 ssize_t len; 506 ssize_t used = 0; 507 508 len = security_inode_listsecurity(d_inode(dentry), buffer, size); 509 if (len < 0) 510 return len; 511 used += len; 512 if (buffer) { 513 if (size < used) 514 return -ERANGE; 515 buffer += len; 516 } 517 518 len = (XATTR_NAME_SOCKPROTONAME_LEN + 1); 519 used += len; 520 if (buffer) { 521 if (size < used) 522 return -ERANGE; 523 memcpy(buffer, XATTR_NAME_SOCKPROTONAME, len); 524 buffer += len; 525 } 526 527 return used; 528 } 529 530 static int sockfs_setattr(struct dentry *dentry, struct iattr *iattr) 531 { 532 int err = simple_setattr(dentry, iattr); 533 534 if (!err && (iattr->ia_valid & ATTR_UID)) { 535 struct socket *sock = SOCKET_I(d_inode(dentry)); 536 537 if (sock->sk) 538 sock->sk->sk_uid = iattr->ia_uid; 539 else 540 err = -ENOENT; 541 } 542 543 return err; 544 } 545 546 static const struct inode_operations sockfs_inode_ops = { 547 .listxattr = sockfs_listxattr, 548 .setattr = sockfs_setattr, 549 }; 550 551 /** 552 * sock_alloc - allocate a socket 553 * 554 * Allocate a new inode and socket object. The two are bound together 555 * and initialised. The socket is then returned. If we are out of inodes 556 * NULL is returned. This functions uses GFP_KERNEL internally. 557 */ 558 559 struct socket *sock_alloc(void) 560 { 561 struct inode *inode; 562 struct socket *sock; 563 564 inode = new_inode_pseudo(sock_mnt->mnt_sb); 565 if (!inode) 566 return NULL; 567 568 sock = SOCKET_I(inode); 569 570 inode->i_ino = get_next_ino(); 571 inode->i_mode = S_IFSOCK | S_IRWXUGO; 572 inode->i_uid = current_fsuid(); 573 inode->i_gid = current_fsgid(); 574 inode->i_op = &sockfs_inode_ops; 575 576 return sock; 577 } 578 EXPORT_SYMBOL(sock_alloc); 579 580 /** 581 * sock_release - close a socket 582 * @sock: socket to close 583 * 584 * The socket is released from the protocol stack if it has a release 585 * callback, and the inode is then released if the socket is bound to 586 * an inode not a file. 587 */ 588 589 static void __sock_release(struct socket *sock, struct inode *inode) 590 { 591 if (sock->ops) { 592 struct module *owner = sock->ops->owner; 593 594 if (inode) 595 inode_lock(inode); 596 sock->ops->release(sock); 597 sock->sk = NULL; 598 if (inode) 599 inode_unlock(inode); 600 sock->ops = NULL; 601 module_put(owner); 602 } 603 604 if (sock->wq.fasync_list) 605 pr_err("%s: fasync list not empty!\n", __func__); 606 607 if (!sock->file) { 608 iput(SOCK_INODE(sock)); 609 return; 610 } 611 sock->file = NULL; 612 } 613 614 void sock_release(struct socket *sock) 615 { 616 __sock_release(sock, NULL); 617 } 618 EXPORT_SYMBOL(sock_release); 619 620 void __sock_tx_timestamp(__u16 tsflags, __u8 *tx_flags) 621 { 622 u8 flags = *tx_flags; 623 624 if (tsflags & SOF_TIMESTAMPING_TX_HARDWARE) 625 flags |= SKBTX_HW_TSTAMP; 626 627 if (tsflags & SOF_TIMESTAMPING_TX_SOFTWARE) 628 flags |= SKBTX_SW_TSTAMP; 629 630 if (tsflags & SOF_TIMESTAMPING_TX_SCHED) 631 flags |= SKBTX_SCHED_TSTAMP; 632 633 *tx_flags = flags; 634 } 635 EXPORT_SYMBOL(__sock_tx_timestamp); 636 637 INDIRECT_CALLABLE_DECLARE(int inet_sendmsg(struct socket *, struct msghdr *, 638 size_t)); 639 INDIRECT_CALLABLE_DECLARE(int inet6_sendmsg(struct socket *, struct msghdr *, 640 size_t)); 641 static inline int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg) 642 { 643 int ret = INDIRECT_CALL_INET(sock->ops->sendmsg, inet6_sendmsg, 644 inet_sendmsg, sock, msg, 645 msg_data_left(msg)); 646 BUG_ON(ret == -EIOCBQUEUED); 647 return ret; 648 } 649 650 /** 651 * sock_sendmsg - send a message through @sock 652 * @sock: socket 653 * @msg: message to send 654 * 655 * Sends @msg through @sock, passing through LSM. 656 * Returns the number of bytes sent, or an error code. 657 */ 658 int sock_sendmsg(struct socket *sock, struct msghdr *msg) 659 { 660 int err = security_socket_sendmsg(sock, msg, 661 msg_data_left(msg)); 662 663 return err ?: sock_sendmsg_nosec(sock, msg); 664 } 665 EXPORT_SYMBOL(sock_sendmsg); 666 667 /** 668 * kernel_sendmsg - send a message through @sock (kernel-space) 669 * @sock: socket 670 * @msg: message header 671 * @vec: kernel vec 672 * @num: vec array length 673 * @size: total message data size 674 * 675 * Builds the message data with @vec and sends it through @sock. 676 * Returns the number of bytes sent, or an error code. 677 */ 678 679 int kernel_sendmsg(struct socket *sock, struct msghdr *msg, 680 struct kvec *vec, size_t num, size_t size) 681 { 682 iov_iter_kvec(&msg->msg_iter, WRITE, vec, num, size); 683 return sock_sendmsg(sock, msg); 684 } 685 EXPORT_SYMBOL(kernel_sendmsg); 686 687 /** 688 * kernel_sendmsg_locked - send a message through @sock (kernel-space) 689 * @sk: sock 690 * @msg: message header 691 * @vec: output s/g array 692 * @num: output s/g array length 693 * @size: total message data size 694 * 695 * Builds the message data with @vec and sends it through @sock. 696 * Returns the number of bytes sent, or an error code. 697 * Caller must hold @sk. 698 */ 699 700 int kernel_sendmsg_locked(struct sock *sk, struct msghdr *msg, 701 struct kvec *vec, size_t num, size_t size) 702 { 703 struct socket *sock = sk->sk_socket; 704 705 if (!sock->ops->sendmsg_locked) 706 return sock_no_sendmsg_locked(sk, msg, size); 707 708 iov_iter_kvec(&msg->msg_iter, WRITE, vec, num, size); 709 710 return sock->ops->sendmsg_locked(sk, msg, msg_data_left(msg)); 711 } 712 EXPORT_SYMBOL(kernel_sendmsg_locked); 713 714 static bool skb_is_err_queue(const struct sk_buff *skb) 715 { 716 /* pkt_type of skbs enqueued on the error queue are set to 717 * PACKET_OUTGOING in skb_set_err_queue(). This is only safe to do 718 * in recvmsg, since skbs received on a local socket will never 719 * have a pkt_type of PACKET_OUTGOING. 720 */ 721 return skb->pkt_type == PACKET_OUTGOING; 722 } 723 724 /* On transmit, software and hardware timestamps are returned independently. 725 * As the two skb clones share the hardware timestamp, which may be updated 726 * before the software timestamp is received, a hardware TX timestamp may be 727 * returned only if there is no software TX timestamp. Ignore false software 728 * timestamps, which may be made in the __sock_recv_timestamp() call when the 729 * option SO_TIMESTAMP_OLD(NS) is enabled on the socket, even when the skb has a 730 * hardware timestamp. 731 */ 732 static bool skb_is_swtx_tstamp(const struct sk_buff *skb, int false_tstamp) 733 { 734 return skb->tstamp && !false_tstamp && skb_is_err_queue(skb); 735 } 736 737 static void put_ts_pktinfo(struct msghdr *msg, struct sk_buff *skb) 738 { 739 struct scm_ts_pktinfo ts_pktinfo; 740 struct net_device *orig_dev; 741 742 if (!skb_mac_header_was_set(skb)) 743 return; 744 745 memset(&ts_pktinfo, 0, sizeof(ts_pktinfo)); 746 747 rcu_read_lock(); 748 orig_dev = dev_get_by_napi_id(skb_napi_id(skb)); 749 if (orig_dev) 750 ts_pktinfo.if_index = orig_dev->ifindex; 751 rcu_read_unlock(); 752 753 ts_pktinfo.pkt_length = skb->len - skb_mac_offset(skb); 754 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPING_PKTINFO, 755 sizeof(ts_pktinfo), &ts_pktinfo); 756 } 757 758 /* 759 * called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP) 760 */ 761 void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk, 762 struct sk_buff *skb) 763 { 764 int need_software_tstamp = sock_flag(sk, SOCK_RCVTSTAMP); 765 int new_tstamp = sock_flag(sk, SOCK_TSTAMP_NEW); 766 struct scm_timestamping_internal tss; 767 768 int empty = 1, false_tstamp = 0; 769 struct skb_shared_hwtstamps *shhwtstamps = 770 skb_hwtstamps(skb); 771 772 /* Race occurred between timestamp enabling and packet 773 receiving. Fill in the current time for now. */ 774 if (need_software_tstamp && skb->tstamp == 0) { 775 __net_timestamp(skb); 776 false_tstamp = 1; 777 } 778 779 if (need_software_tstamp) { 780 if (!sock_flag(sk, SOCK_RCVTSTAMPNS)) { 781 if (new_tstamp) { 782 struct __kernel_sock_timeval tv; 783 784 skb_get_new_timestamp(skb, &tv); 785 put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP_NEW, 786 sizeof(tv), &tv); 787 } else { 788 struct __kernel_old_timeval tv; 789 790 skb_get_timestamp(skb, &tv); 791 put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP_OLD, 792 sizeof(tv), &tv); 793 } 794 } else { 795 if (new_tstamp) { 796 struct __kernel_timespec ts; 797 798 skb_get_new_timestampns(skb, &ts); 799 put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMPNS_NEW, 800 sizeof(ts), &ts); 801 } else { 802 struct __kernel_old_timespec ts; 803 804 skb_get_timestampns(skb, &ts); 805 put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMPNS_OLD, 806 sizeof(ts), &ts); 807 } 808 } 809 } 810 811 memset(&tss, 0, sizeof(tss)); 812 if ((sk->sk_tsflags & SOF_TIMESTAMPING_SOFTWARE) && 813 ktime_to_timespec64_cond(skb->tstamp, tss.ts + 0)) 814 empty = 0; 815 if (shhwtstamps && 816 (sk->sk_tsflags & SOF_TIMESTAMPING_RAW_HARDWARE) && 817 !skb_is_swtx_tstamp(skb, false_tstamp) && 818 ktime_to_timespec64_cond(shhwtstamps->hwtstamp, tss.ts + 2)) { 819 empty = 0; 820 if ((sk->sk_tsflags & SOF_TIMESTAMPING_OPT_PKTINFO) && 821 !skb_is_err_queue(skb)) 822 put_ts_pktinfo(msg, skb); 823 } 824 if (!empty) { 825 if (sock_flag(sk, SOCK_TSTAMP_NEW)) 826 put_cmsg_scm_timestamping64(msg, &tss); 827 else 828 put_cmsg_scm_timestamping(msg, &tss); 829 830 if (skb_is_err_queue(skb) && skb->len && 831 SKB_EXT_ERR(skb)->opt_stats) 832 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPING_OPT_STATS, 833 skb->len, skb->data); 834 } 835 } 836 EXPORT_SYMBOL_GPL(__sock_recv_timestamp); 837 838 void __sock_recv_wifi_status(struct msghdr *msg, struct sock *sk, 839 struct sk_buff *skb) 840 { 841 int ack; 842 843 if (!sock_flag(sk, SOCK_WIFI_STATUS)) 844 return; 845 if (!skb->wifi_acked_valid) 846 return; 847 848 ack = skb->wifi_acked; 849 850 put_cmsg(msg, SOL_SOCKET, SCM_WIFI_STATUS, sizeof(ack), &ack); 851 } 852 EXPORT_SYMBOL_GPL(__sock_recv_wifi_status); 853 854 static inline void sock_recv_drops(struct msghdr *msg, struct sock *sk, 855 struct sk_buff *skb) 856 { 857 if (sock_flag(sk, SOCK_RXQ_OVFL) && skb && SOCK_SKB_CB(skb)->dropcount) 858 put_cmsg(msg, SOL_SOCKET, SO_RXQ_OVFL, 859 sizeof(__u32), &SOCK_SKB_CB(skb)->dropcount); 860 } 861 862 void __sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk, 863 struct sk_buff *skb) 864 { 865 sock_recv_timestamp(msg, sk, skb); 866 sock_recv_drops(msg, sk, skb); 867 } 868 EXPORT_SYMBOL_GPL(__sock_recv_ts_and_drops); 869 870 INDIRECT_CALLABLE_DECLARE(int inet_recvmsg(struct socket *, struct msghdr *, 871 size_t, int)); 872 INDIRECT_CALLABLE_DECLARE(int inet6_recvmsg(struct socket *, struct msghdr *, 873 size_t, int)); 874 static inline int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg, 875 int flags) 876 { 877 return INDIRECT_CALL_INET(sock->ops->recvmsg, inet6_recvmsg, 878 inet_recvmsg, sock, msg, msg_data_left(msg), 879 flags); 880 } 881 882 /** 883 * sock_recvmsg - receive a message from @sock 884 * @sock: socket 885 * @msg: message to receive 886 * @flags: message flags 887 * 888 * Receives @msg from @sock, passing through LSM. Returns the total number 889 * of bytes received, or an error. 890 */ 891 int sock_recvmsg(struct socket *sock, struct msghdr *msg, int flags) 892 { 893 int err = security_socket_recvmsg(sock, msg, msg_data_left(msg), flags); 894 895 return err ?: sock_recvmsg_nosec(sock, msg, flags); 896 } 897 EXPORT_SYMBOL(sock_recvmsg); 898 899 /** 900 * kernel_recvmsg - Receive a message from a socket (kernel space) 901 * @sock: The socket to receive the message from 902 * @msg: Received message 903 * @vec: Input s/g array for message data 904 * @num: Size of input s/g array 905 * @size: Number of bytes to read 906 * @flags: Message flags (MSG_DONTWAIT, etc...) 907 * 908 * On return the msg structure contains the scatter/gather array passed in the 909 * vec argument. The array is modified so that it consists of the unfilled 910 * portion of the original array. 911 * 912 * The returned value is the total number of bytes received, or an error. 913 */ 914 915 int kernel_recvmsg(struct socket *sock, struct msghdr *msg, 916 struct kvec *vec, size_t num, size_t size, int flags) 917 { 918 mm_segment_t oldfs = get_fs(); 919 int result; 920 921 iov_iter_kvec(&msg->msg_iter, READ, vec, num, size); 922 set_fs(KERNEL_DS); 923 result = sock_recvmsg(sock, msg, flags); 924 set_fs(oldfs); 925 return result; 926 } 927 EXPORT_SYMBOL(kernel_recvmsg); 928 929 static ssize_t sock_sendpage(struct file *file, struct page *page, 930 int offset, size_t size, loff_t *ppos, int more) 931 { 932 struct socket *sock; 933 int flags; 934 935 sock = file->private_data; 936 937 flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0; 938 /* more is a combination of MSG_MORE and MSG_SENDPAGE_NOTLAST */ 939 flags |= more; 940 941 return kernel_sendpage(sock, page, offset, size, flags); 942 } 943 944 static ssize_t sock_splice_read(struct file *file, loff_t *ppos, 945 struct pipe_inode_info *pipe, size_t len, 946 unsigned int flags) 947 { 948 struct socket *sock = file->private_data; 949 950 if (unlikely(!sock->ops->splice_read)) 951 return generic_file_splice_read(file, ppos, pipe, len, flags); 952 953 return sock->ops->splice_read(sock, ppos, pipe, len, flags); 954 } 955 956 static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to) 957 { 958 struct file *file = iocb->ki_filp; 959 struct socket *sock = file->private_data; 960 struct msghdr msg = {.msg_iter = *to, 961 .msg_iocb = iocb}; 962 ssize_t res; 963 964 if (file->f_flags & O_NONBLOCK) 965 msg.msg_flags = MSG_DONTWAIT; 966 967 if (iocb->ki_pos != 0) 968 return -ESPIPE; 969 970 if (!iov_iter_count(to)) /* Match SYS5 behaviour */ 971 return 0; 972 973 res = sock_recvmsg(sock, &msg, msg.msg_flags); 974 *to = msg.msg_iter; 975 return res; 976 } 977 978 static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from) 979 { 980 struct file *file = iocb->ki_filp; 981 struct socket *sock = file->private_data; 982 struct msghdr msg = {.msg_iter = *from, 983 .msg_iocb = iocb}; 984 ssize_t res; 985 986 if (iocb->ki_pos != 0) 987 return -ESPIPE; 988 989 if (file->f_flags & O_NONBLOCK) 990 msg.msg_flags = MSG_DONTWAIT; 991 992 if (sock->type == SOCK_SEQPACKET) 993 msg.msg_flags |= MSG_EOR; 994 995 res = sock_sendmsg(sock, &msg); 996 *from = msg.msg_iter; 997 return res; 998 } 999 1000 static void sock_show_fdinfo(struct seq_file *m, struct file *f) 1001 { 1002 struct socket *sock = f->private_data; 1003 1004 if (sock->ops->show_fdinfo) 1005 sock->ops->show_fdinfo(m, sock); 1006 } 1007 1008 /* 1009 * Atomic setting of ioctl hooks to avoid race 1010 * with module unload. 1011 */ 1012 1013 static DEFINE_MUTEX(br_ioctl_mutex); 1014 static int (*br_ioctl_hook) (struct net *, unsigned int cmd, void __user *arg); 1015 1016 void brioctl_set(int (*hook) (struct net *, unsigned int, void __user *)) 1017 { 1018 mutex_lock(&br_ioctl_mutex); 1019 br_ioctl_hook = hook; 1020 mutex_unlock(&br_ioctl_mutex); 1021 } 1022 EXPORT_SYMBOL(brioctl_set); 1023 1024 static DEFINE_MUTEX(vlan_ioctl_mutex); 1025 static int (*vlan_ioctl_hook) (struct net *, void __user *arg); 1026 1027 void vlan_ioctl_set(int (*hook) (struct net *, void __user *)) 1028 { 1029 mutex_lock(&vlan_ioctl_mutex); 1030 vlan_ioctl_hook = hook; 1031 mutex_unlock(&vlan_ioctl_mutex); 1032 } 1033 EXPORT_SYMBOL(vlan_ioctl_set); 1034 1035 static DEFINE_MUTEX(dlci_ioctl_mutex); 1036 static int (*dlci_ioctl_hook) (unsigned int, void __user *); 1037 1038 void dlci_ioctl_set(int (*hook) (unsigned int, void __user *)) 1039 { 1040 mutex_lock(&dlci_ioctl_mutex); 1041 dlci_ioctl_hook = hook; 1042 mutex_unlock(&dlci_ioctl_mutex); 1043 } 1044 EXPORT_SYMBOL(dlci_ioctl_set); 1045 1046 static long sock_do_ioctl(struct net *net, struct socket *sock, 1047 unsigned int cmd, unsigned long arg) 1048 { 1049 int err; 1050 void __user *argp = (void __user *)arg; 1051 1052 err = sock->ops->ioctl(sock, cmd, arg); 1053 1054 /* 1055 * If this ioctl is unknown try to hand it down 1056 * to the NIC driver. 1057 */ 1058 if (err != -ENOIOCTLCMD) 1059 return err; 1060 1061 if (cmd == SIOCGIFCONF) { 1062 struct ifconf ifc; 1063 if (copy_from_user(&ifc, argp, sizeof(struct ifconf))) 1064 return -EFAULT; 1065 rtnl_lock(); 1066 err = dev_ifconf(net, &ifc, sizeof(struct ifreq)); 1067 rtnl_unlock(); 1068 if (!err && copy_to_user(argp, &ifc, sizeof(struct ifconf))) 1069 err = -EFAULT; 1070 } else { 1071 struct ifreq ifr; 1072 bool need_copyout; 1073 if (copy_from_user(&ifr, argp, sizeof(struct ifreq))) 1074 return -EFAULT; 1075 err = dev_ioctl(net, cmd, &ifr, &need_copyout); 1076 if (!err && need_copyout) 1077 if (copy_to_user(argp, &ifr, sizeof(struct ifreq))) 1078 return -EFAULT; 1079 } 1080 return err; 1081 } 1082 1083 /* 1084 * With an ioctl, arg may well be a user mode pointer, but we don't know 1085 * what to do with it - that's up to the protocol still. 1086 */ 1087 1088 /** 1089 * get_net_ns - increment the refcount of the network namespace 1090 * @ns: common namespace (net) 1091 * 1092 * Returns the net's common namespace. 1093 */ 1094 1095 struct ns_common *get_net_ns(struct ns_common *ns) 1096 { 1097 return &get_net(container_of(ns, struct net, ns))->ns; 1098 } 1099 EXPORT_SYMBOL_GPL(get_net_ns); 1100 1101 static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg) 1102 { 1103 struct socket *sock; 1104 struct sock *sk; 1105 void __user *argp = (void __user *)arg; 1106 int pid, err; 1107 struct net *net; 1108 1109 sock = file->private_data; 1110 sk = sock->sk; 1111 net = sock_net(sk); 1112 if (unlikely(cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15))) { 1113 struct ifreq ifr; 1114 bool need_copyout; 1115 if (copy_from_user(&ifr, argp, sizeof(struct ifreq))) 1116 return -EFAULT; 1117 err = dev_ioctl(net, cmd, &ifr, &need_copyout); 1118 if (!err && need_copyout) 1119 if (copy_to_user(argp, &ifr, sizeof(struct ifreq))) 1120 return -EFAULT; 1121 } else 1122 #ifdef CONFIG_WEXT_CORE 1123 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) { 1124 err = wext_handle_ioctl(net, cmd, argp); 1125 } else 1126 #endif 1127 switch (cmd) { 1128 case FIOSETOWN: 1129 case SIOCSPGRP: 1130 err = -EFAULT; 1131 if (get_user(pid, (int __user *)argp)) 1132 break; 1133 err = f_setown(sock->file, pid, 1); 1134 break; 1135 case FIOGETOWN: 1136 case SIOCGPGRP: 1137 err = put_user(f_getown(sock->file), 1138 (int __user *)argp); 1139 break; 1140 case SIOCGIFBR: 1141 case SIOCSIFBR: 1142 case SIOCBRADDBR: 1143 case SIOCBRDELBR: 1144 err = -ENOPKG; 1145 if (!br_ioctl_hook) 1146 request_module("bridge"); 1147 1148 mutex_lock(&br_ioctl_mutex); 1149 if (br_ioctl_hook) 1150 err = br_ioctl_hook(net, cmd, argp); 1151 mutex_unlock(&br_ioctl_mutex); 1152 break; 1153 case SIOCGIFVLAN: 1154 case SIOCSIFVLAN: 1155 err = -ENOPKG; 1156 if (!vlan_ioctl_hook) 1157 request_module("8021q"); 1158 1159 mutex_lock(&vlan_ioctl_mutex); 1160 if (vlan_ioctl_hook) 1161 err = vlan_ioctl_hook(net, argp); 1162 mutex_unlock(&vlan_ioctl_mutex); 1163 break; 1164 case SIOCADDDLCI: 1165 case SIOCDELDLCI: 1166 err = -ENOPKG; 1167 if (!dlci_ioctl_hook) 1168 request_module("dlci"); 1169 1170 mutex_lock(&dlci_ioctl_mutex); 1171 if (dlci_ioctl_hook) 1172 err = dlci_ioctl_hook(cmd, argp); 1173 mutex_unlock(&dlci_ioctl_mutex); 1174 break; 1175 case SIOCGSKNS: 1176 err = -EPERM; 1177 if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) 1178 break; 1179 1180 err = open_related_ns(&net->ns, get_net_ns); 1181 break; 1182 case SIOCGSTAMP_OLD: 1183 case SIOCGSTAMPNS_OLD: 1184 if (!sock->ops->gettstamp) { 1185 err = -ENOIOCTLCMD; 1186 break; 1187 } 1188 err = sock->ops->gettstamp(sock, argp, 1189 cmd == SIOCGSTAMP_OLD, 1190 !IS_ENABLED(CONFIG_64BIT)); 1191 break; 1192 case SIOCGSTAMP_NEW: 1193 case SIOCGSTAMPNS_NEW: 1194 if (!sock->ops->gettstamp) { 1195 err = -ENOIOCTLCMD; 1196 break; 1197 } 1198 err = sock->ops->gettstamp(sock, argp, 1199 cmd == SIOCGSTAMP_NEW, 1200 false); 1201 break; 1202 default: 1203 err = sock_do_ioctl(net, sock, cmd, arg); 1204 break; 1205 } 1206 return err; 1207 } 1208 1209 /** 1210 * sock_create_lite - creates a socket 1211 * @family: protocol family (AF_INET, ...) 1212 * @type: communication type (SOCK_STREAM, ...) 1213 * @protocol: protocol (0, ...) 1214 * @res: new socket 1215 * 1216 * Creates a new socket and assigns it to @res, passing through LSM. 1217 * The new socket initialization is not complete, see kernel_accept(). 1218 * Returns 0 or an error. On failure @res is set to %NULL. 1219 * This function internally uses GFP_KERNEL. 1220 */ 1221 1222 int sock_create_lite(int family, int type, int protocol, struct socket **res) 1223 { 1224 int err; 1225 struct socket *sock = NULL; 1226 1227 err = security_socket_create(family, type, protocol, 1); 1228 if (err) 1229 goto out; 1230 1231 sock = sock_alloc(); 1232 if (!sock) { 1233 err = -ENOMEM; 1234 goto out; 1235 } 1236 1237 sock->type = type; 1238 err = security_socket_post_create(sock, family, type, protocol, 1); 1239 if (err) 1240 goto out_release; 1241 1242 out: 1243 *res = sock; 1244 return err; 1245 out_release: 1246 sock_release(sock); 1247 sock = NULL; 1248 goto out; 1249 } 1250 EXPORT_SYMBOL(sock_create_lite); 1251 1252 /* No kernel lock held - perfect */ 1253 static __poll_t sock_poll(struct file *file, poll_table *wait) 1254 { 1255 struct socket *sock = file->private_data; 1256 __poll_t events = poll_requested_events(wait), flag = 0; 1257 1258 if (!sock->ops->poll) 1259 return 0; 1260 1261 if (sk_can_busy_loop(sock->sk)) { 1262 /* poll once if requested by the syscall */ 1263 if (events & POLL_BUSY_LOOP) 1264 sk_busy_loop(sock->sk, 1); 1265 1266 /* if this socket can poll_ll, tell the system call */ 1267 flag = POLL_BUSY_LOOP; 1268 } 1269 1270 return sock->ops->poll(file, sock, wait) | flag; 1271 } 1272 1273 static int sock_mmap(struct file *file, struct vm_area_struct *vma) 1274 { 1275 struct socket *sock = file->private_data; 1276 1277 return sock->ops->mmap(file, sock, vma); 1278 } 1279 1280 static int sock_close(struct inode *inode, struct file *filp) 1281 { 1282 __sock_release(SOCKET_I(inode), inode); 1283 return 0; 1284 } 1285 1286 /* 1287 * Update the socket async list 1288 * 1289 * Fasync_list locking strategy. 1290 * 1291 * 1. fasync_list is modified only under process context socket lock 1292 * i.e. under semaphore. 1293 * 2. fasync_list is used under read_lock(&sk->sk_callback_lock) 1294 * or under socket lock 1295 */ 1296 1297 static int sock_fasync(int fd, struct file *filp, int on) 1298 { 1299 struct socket *sock = filp->private_data; 1300 struct sock *sk = sock->sk; 1301 struct socket_wq *wq = &sock->wq; 1302 1303 if (sk == NULL) 1304 return -EINVAL; 1305 1306 lock_sock(sk); 1307 fasync_helper(fd, filp, on, &wq->fasync_list); 1308 1309 if (!wq->fasync_list) 1310 sock_reset_flag(sk, SOCK_FASYNC); 1311 else 1312 sock_set_flag(sk, SOCK_FASYNC); 1313 1314 release_sock(sk); 1315 return 0; 1316 } 1317 1318 /* This function may be called only under rcu_lock */ 1319 1320 int sock_wake_async(struct socket_wq *wq, int how, int band) 1321 { 1322 if (!wq || !wq->fasync_list) 1323 return -1; 1324 1325 switch (how) { 1326 case SOCK_WAKE_WAITD: 1327 if (test_bit(SOCKWQ_ASYNC_WAITDATA, &wq->flags)) 1328 break; 1329 goto call_kill; 1330 case SOCK_WAKE_SPACE: 1331 if (!test_and_clear_bit(SOCKWQ_ASYNC_NOSPACE, &wq->flags)) 1332 break; 1333 /* fall through */ 1334 case SOCK_WAKE_IO: 1335 call_kill: 1336 kill_fasync(&wq->fasync_list, SIGIO, band); 1337 break; 1338 case SOCK_WAKE_URG: 1339 kill_fasync(&wq->fasync_list, SIGURG, band); 1340 } 1341 1342 return 0; 1343 } 1344 EXPORT_SYMBOL(sock_wake_async); 1345 1346 /** 1347 * __sock_create - creates a socket 1348 * @net: net namespace 1349 * @family: protocol family (AF_INET, ...) 1350 * @type: communication type (SOCK_STREAM, ...) 1351 * @protocol: protocol (0, ...) 1352 * @res: new socket 1353 * @kern: boolean for kernel space sockets 1354 * 1355 * Creates a new socket and assigns it to @res, passing through LSM. 1356 * Returns 0 or an error. On failure @res is set to %NULL. @kern must 1357 * be set to true if the socket resides in kernel space. 1358 * This function internally uses GFP_KERNEL. 1359 */ 1360 1361 int __sock_create(struct net *net, int family, int type, int protocol, 1362 struct socket **res, int kern) 1363 { 1364 int err; 1365 struct socket *sock; 1366 const struct net_proto_family *pf; 1367 1368 /* 1369 * Check protocol is in range 1370 */ 1371 if (family < 0 || family >= NPROTO) 1372 return -EAFNOSUPPORT; 1373 if (type < 0 || type >= SOCK_MAX) 1374 return -EINVAL; 1375 1376 /* Compatibility. 1377 1378 This uglymoron is moved from INET layer to here to avoid 1379 deadlock in module load. 1380 */ 1381 if (family == PF_INET && type == SOCK_PACKET) { 1382 pr_info_once("%s uses obsolete (PF_INET,SOCK_PACKET)\n", 1383 current->comm); 1384 family = PF_PACKET; 1385 } 1386 1387 err = security_socket_create(family, type, protocol, kern); 1388 if (err) 1389 return err; 1390 1391 /* 1392 * Allocate the socket and allow the family to set things up. if 1393 * the protocol is 0, the family is instructed to select an appropriate 1394 * default. 1395 */ 1396 sock = sock_alloc(); 1397 if (!sock) { 1398 net_warn_ratelimited("socket: no more sockets\n"); 1399 return -ENFILE; /* Not exactly a match, but its the 1400 closest posix thing */ 1401 } 1402 1403 sock->type = type; 1404 1405 #ifdef CONFIG_MODULES 1406 /* Attempt to load a protocol module if the find failed. 1407 * 1408 * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user 1409 * requested real, full-featured networking support upon configuration. 1410 * Otherwise module support will break! 1411 */ 1412 if (rcu_access_pointer(net_families[family]) == NULL) 1413 request_module("net-pf-%d", family); 1414 #endif 1415 1416 rcu_read_lock(); 1417 pf = rcu_dereference(net_families[family]); 1418 err = -EAFNOSUPPORT; 1419 if (!pf) 1420 goto out_release; 1421 1422 /* 1423 * We will call the ->create function, that possibly is in a loadable 1424 * module, so we have to bump that loadable module refcnt first. 1425 */ 1426 if (!try_module_get(pf->owner)) 1427 goto out_release; 1428 1429 /* Now protected by module ref count */ 1430 rcu_read_unlock(); 1431 1432 err = pf->create(net, sock, protocol, kern); 1433 if (err < 0) 1434 goto out_module_put; 1435 1436 /* 1437 * Now to bump the refcnt of the [loadable] module that owns this 1438 * socket at sock_release time we decrement its refcnt. 1439 */ 1440 if (!try_module_get(sock->ops->owner)) 1441 goto out_module_busy; 1442 1443 /* 1444 * Now that we're done with the ->create function, the [loadable] 1445 * module can have its refcnt decremented 1446 */ 1447 module_put(pf->owner); 1448 err = security_socket_post_create(sock, family, type, protocol, kern); 1449 if (err) 1450 goto out_sock_release; 1451 *res = sock; 1452 1453 return 0; 1454 1455 out_module_busy: 1456 err = -EAFNOSUPPORT; 1457 out_module_put: 1458 sock->ops = NULL; 1459 module_put(pf->owner); 1460 out_sock_release: 1461 sock_release(sock); 1462 return err; 1463 1464 out_release: 1465 rcu_read_unlock(); 1466 goto out_sock_release; 1467 } 1468 EXPORT_SYMBOL(__sock_create); 1469 1470 /** 1471 * sock_create - creates a socket 1472 * @family: protocol family (AF_INET, ...) 1473 * @type: communication type (SOCK_STREAM, ...) 1474 * @protocol: protocol (0, ...) 1475 * @res: new socket 1476 * 1477 * A wrapper around __sock_create(). 1478 * Returns 0 or an error. This function internally uses GFP_KERNEL. 1479 */ 1480 1481 int sock_create(int family, int type, int protocol, struct socket **res) 1482 { 1483 return __sock_create(current->nsproxy->net_ns, family, type, protocol, res, 0); 1484 } 1485 EXPORT_SYMBOL(sock_create); 1486 1487 /** 1488 * sock_create_kern - creates a socket (kernel space) 1489 * @net: net namespace 1490 * @family: protocol family (AF_INET, ...) 1491 * @type: communication type (SOCK_STREAM, ...) 1492 * @protocol: protocol (0, ...) 1493 * @res: new socket 1494 * 1495 * A wrapper around __sock_create(). 1496 * Returns 0 or an error. This function internally uses GFP_KERNEL. 1497 */ 1498 1499 int sock_create_kern(struct net *net, int family, int type, int protocol, struct socket **res) 1500 { 1501 return __sock_create(net, family, type, protocol, res, 1); 1502 } 1503 EXPORT_SYMBOL(sock_create_kern); 1504 1505 int __sys_socket(int family, int type, int protocol) 1506 { 1507 int retval; 1508 struct socket *sock; 1509 int flags; 1510 1511 /* Check the SOCK_* constants for consistency. */ 1512 BUILD_BUG_ON(SOCK_CLOEXEC != O_CLOEXEC); 1513 BUILD_BUG_ON((SOCK_MAX | SOCK_TYPE_MASK) != SOCK_TYPE_MASK); 1514 BUILD_BUG_ON(SOCK_CLOEXEC & SOCK_TYPE_MASK); 1515 BUILD_BUG_ON(SOCK_NONBLOCK & SOCK_TYPE_MASK); 1516 1517 flags = type & ~SOCK_TYPE_MASK; 1518 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK)) 1519 return -EINVAL; 1520 type &= SOCK_TYPE_MASK; 1521 1522 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK)) 1523 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK; 1524 1525 retval = sock_create(family, type, protocol, &sock); 1526 if (retval < 0) 1527 return retval; 1528 1529 return sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK)); 1530 } 1531 1532 SYSCALL_DEFINE3(socket, int, family, int, type, int, protocol) 1533 { 1534 return __sys_socket(family, type, protocol); 1535 } 1536 1537 /* 1538 * Create a pair of connected sockets. 1539 */ 1540 1541 int __sys_socketpair(int family, int type, int protocol, int __user *usockvec) 1542 { 1543 struct socket *sock1, *sock2; 1544 int fd1, fd2, err; 1545 struct file *newfile1, *newfile2; 1546 int flags; 1547 1548 flags = type & ~SOCK_TYPE_MASK; 1549 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK)) 1550 return -EINVAL; 1551 type &= SOCK_TYPE_MASK; 1552 1553 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK)) 1554 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK; 1555 1556 /* 1557 * reserve descriptors and make sure we won't fail 1558 * to return them to userland. 1559 */ 1560 fd1 = get_unused_fd_flags(flags); 1561 if (unlikely(fd1 < 0)) 1562 return fd1; 1563 1564 fd2 = get_unused_fd_flags(flags); 1565 if (unlikely(fd2 < 0)) { 1566 put_unused_fd(fd1); 1567 return fd2; 1568 } 1569 1570 err = put_user(fd1, &usockvec[0]); 1571 if (err) 1572 goto out; 1573 1574 err = put_user(fd2, &usockvec[1]); 1575 if (err) 1576 goto out; 1577 1578 /* 1579 * Obtain the first socket and check if the underlying protocol 1580 * supports the socketpair call. 1581 */ 1582 1583 err = sock_create(family, type, protocol, &sock1); 1584 if (unlikely(err < 0)) 1585 goto out; 1586 1587 err = sock_create(family, type, protocol, &sock2); 1588 if (unlikely(err < 0)) { 1589 sock_release(sock1); 1590 goto out; 1591 } 1592 1593 err = security_socket_socketpair(sock1, sock2); 1594 if (unlikely(err)) { 1595 sock_release(sock2); 1596 sock_release(sock1); 1597 goto out; 1598 } 1599 1600 err = sock1->ops->socketpair(sock1, sock2); 1601 if (unlikely(err < 0)) { 1602 sock_release(sock2); 1603 sock_release(sock1); 1604 goto out; 1605 } 1606 1607 newfile1 = sock_alloc_file(sock1, flags, NULL); 1608 if (IS_ERR(newfile1)) { 1609 err = PTR_ERR(newfile1); 1610 sock_release(sock2); 1611 goto out; 1612 } 1613 1614 newfile2 = sock_alloc_file(sock2, flags, NULL); 1615 if (IS_ERR(newfile2)) { 1616 err = PTR_ERR(newfile2); 1617 fput(newfile1); 1618 goto out; 1619 } 1620 1621 audit_fd_pair(fd1, fd2); 1622 1623 fd_install(fd1, newfile1); 1624 fd_install(fd2, newfile2); 1625 return 0; 1626 1627 out: 1628 put_unused_fd(fd2); 1629 put_unused_fd(fd1); 1630 return err; 1631 } 1632 1633 SYSCALL_DEFINE4(socketpair, int, family, int, type, int, protocol, 1634 int __user *, usockvec) 1635 { 1636 return __sys_socketpair(family, type, protocol, usockvec); 1637 } 1638 1639 /* 1640 * Bind a name to a socket. Nothing much to do here since it's 1641 * the protocol's responsibility to handle the local address. 1642 * 1643 * We move the socket address to kernel space before we call 1644 * the protocol layer (having also checked the address is ok). 1645 */ 1646 1647 int __sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen) 1648 { 1649 struct socket *sock; 1650 struct sockaddr_storage address; 1651 int err, fput_needed; 1652 1653 sock = sockfd_lookup_light(fd, &err, &fput_needed); 1654 if (sock) { 1655 err = move_addr_to_kernel(umyaddr, addrlen, &address); 1656 if (!err) { 1657 err = security_socket_bind(sock, 1658 (struct sockaddr *)&address, 1659 addrlen); 1660 if (!err) 1661 err = sock->ops->bind(sock, 1662 (struct sockaddr *) 1663 &address, addrlen); 1664 } 1665 fput_light(sock->file, fput_needed); 1666 } 1667 return err; 1668 } 1669 1670 SYSCALL_DEFINE3(bind, int, fd, struct sockaddr __user *, umyaddr, int, addrlen) 1671 { 1672 return __sys_bind(fd, umyaddr, addrlen); 1673 } 1674 1675 /* 1676 * Perform a listen. Basically, we allow the protocol to do anything 1677 * necessary for a listen, and if that works, we mark the socket as 1678 * ready for listening. 1679 */ 1680 1681 int __sys_listen(int fd, int backlog) 1682 { 1683 struct socket *sock; 1684 int err, fput_needed; 1685 int somaxconn; 1686 1687 sock = sockfd_lookup_light(fd, &err, &fput_needed); 1688 if (sock) { 1689 somaxconn = sock_net(sock->sk)->core.sysctl_somaxconn; 1690 if ((unsigned int)backlog > somaxconn) 1691 backlog = somaxconn; 1692 1693 err = security_socket_listen(sock, backlog); 1694 if (!err) 1695 err = sock->ops->listen(sock, backlog); 1696 1697 fput_light(sock->file, fput_needed); 1698 } 1699 return err; 1700 } 1701 1702 SYSCALL_DEFINE2(listen, int, fd, int, backlog) 1703 { 1704 return __sys_listen(fd, backlog); 1705 } 1706 1707 int __sys_accept4_file(struct file *file, unsigned file_flags, 1708 struct sockaddr __user *upeer_sockaddr, 1709 int __user *upeer_addrlen, int flags) 1710 { 1711 struct socket *sock, *newsock; 1712 struct file *newfile; 1713 int err, len, newfd; 1714 struct sockaddr_storage address; 1715 1716 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK)) 1717 return -EINVAL; 1718 1719 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK)) 1720 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK; 1721 1722 sock = sock_from_file(file, &err); 1723 if (!sock) 1724 goto out; 1725 1726 err = -ENFILE; 1727 newsock = sock_alloc(); 1728 if (!newsock) 1729 goto out; 1730 1731 newsock->type = sock->type; 1732 newsock->ops = sock->ops; 1733 1734 /* 1735 * We don't need try_module_get here, as the listening socket (sock) 1736 * has the protocol module (sock->ops->owner) held. 1737 */ 1738 __module_get(newsock->ops->owner); 1739 1740 newfd = get_unused_fd_flags(flags); 1741 if (unlikely(newfd < 0)) { 1742 err = newfd; 1743 sock_release(newsock); 1744 goto out; 1745 } 1746 newfile = sock_alloc_file(newsock, flags, sock->sk->sk_prot_creator->name); 1747 if (IS_ERR(newfile)) { 1748 err = PTR_ERR(newfile); 1749 put_unused_fd(newfd); 1750 goto out; 1751 } 1752 1753 err = security_socket_accept(sock, newsock); 1754 if (err) 1755 goto out_fd; 1756 1757 err = sock->ops->accept(sock, newsock, sock->file->f_flags | file_flags, 1758 false); 1759 if (err < 0) 1760 goto out_fd; 1761 1762 if (upeer_sockaddr) { 1763 len = newsock->ops->getname(newsock, 1764 (struct sockaddr *)&address, 2); 1765 if (len < 0) { 1766 err = -ECONNABORTED; 1767 goto out_fd; 1768 } 1769 err = move_addr_to_user(&address, 1770 len, upeer_sockaddr, upeer_addrlen); 1771 if (err < 0) 1772 goto out_fd; 1773 } 1774 1775 /* File flags are not inherited via accept() unlike another OSes. */ 1776 1777 fd_install(newfd, newfile); 1778 err = newfd; 1779 out: 1780 return err; 1781 out_fd: 1782 fput(newfile); 1783 put_unused_fd(newfd); 1784 goto out; 1785 1786 } 1787 1788 /* 1789 * For accept, we attempt to create a new socket, set up the link 1790 * with the client, wake up the client, then return the new 1791 * connected fd. We collect the address of the connector in kernel 1792 * space and move it to user at the very end. This is unclean because 1793 * we open the socket then return an error. 1794 * 1795 * 1003.1g adds the ability to recvmsg() to query connection pending 1796 * status to recvmsg. We need to add that support in a way thats 1797 * clean when we restructure accept also. 1798 */ 1799 1800 int __sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr, 1801 int __user *upeer_addrlen, int flags) 1802 { 1803 int ret = -EBADF; 1804 struct fd f; 1805 1806 f = fdget(fd); 1807 if (f.file) { 1808 ret = __sys_accept4_file(f.file, 0, upeer_sockaddr, 1809 upeer_addrlen, flags); 1810 if (f.flags) 1811 fput(f.file); 1812 } 1813 1814 return ret; 1815 } 1816 1817 SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr, 1818 int __user *, upeer_addrlen, int, flags) 1819 { 1820 return __sys_accept4(fd, upeer_sockaddr, upeer_addrlen, flags); 1821 } 1822 1823 SYSCALL_DEFINE3(accept, int, fd, struct sockaddr __user *, upeer_sockaddr, 1824 int __user *, upeer_addrlen) 1825 { 1826 return __sys_accept4(fd, upeer_sockaddr, upeer_addrlen, 0); 1827 } 1828 1829 /* 1830 * Attempt to connect to a socket with the server address. The address 1831 * is in user space so we verify it is OK and move it to kernel space. 1832 * 1833 * For 1003.1g we need to add clean support for a bind to AF_UNSPEC to 1834 * break bindings 1835 * 1836 * NOTE: 1003.1g draft 6.3 is broken with respect to AX.25/NetROM and 1837 * other SEQPACKET protocols that take time to connect() as it doesn't 1838 * include the -EINPROGRESS status for such sockets. 1839 */ 1840 1841 int __sys_connect_file(struct file *file, struct sockaddr_storage *address, 1842 int addrlen, int file_flags) 1843 { 1844 struct socket *sock; 1845 int err; 1846 1847 sock = sock_from_file(file, &err); 1848 if (!sock) 1849 goto out; 1850 1851 err = 1852 security_socket_connect(sock, (struct sockaddr *)address, addrlen); 1853 if (err) 1854 goto out; 1855 1856 err = sock->ops->connect(sock, (struct sockaddr *)address, addrlen, 1857 sock->file->f_flags | file_flags); 1858 out: 1859 return err; 1860 } 1861 1862 int __sys_connect(int fd, struct sockaddr __user *uservaddr, int addrlen) 1863 { 1864 int ret = -EBADF; 1865 struct fd f; 1866 1867 f = fdget(fd); 1868 if (f.file) { 1869 struct sockaddr_storage address; 1870 1871 ret = move_addr_to_kernel(uservaddr, addrlen, &address); 1872 if (!ret) 1873 ret = __sys_connect_file(f.file, &address, addrlen, 0); 1874 if (f.flags) 1875 fput(f.file); 1876 } 1877 1878 return ret; 1879 } 1880 1881 SYSCALL_DEFINE3(connect, int, fd, struct sockaddr __user *, uservaddr, 1882 int, addrlen) 1883 { 1884 return __sys_connect(fd, uservaddr, addrlen); 1885 } 1886 1887 /* 1888 * Get the local address ('name') of a socket object. Move the obtained 1889 * name to user space. 1890 */ 1891 1892 int __sys_getsockname(int fd, struct sockaddr __user *usockaddr, 1893 int __user *usockaddr_len) 1894 { 1895 struct socket *sock; 1896 struct sockaddr_storage address; 1897 int err, fput_needed; 1898 1899 sock = sockfd_lookup_light(fd, &err, &fput_needed); 1900 if (!sock) 1901 goto out; 1902 1903 err = security_socket_getsockname(sock); 1904 if (err) 1905 goto out_put; 1906 1907 err = sock->ops->getname(sock, (struct sockaddr *)&address, 0); 1908 if (err < 0) 1909 goto out_put; 1910 /* "err" is actually length in this case */ 1911 err = move_addr_to_user(&address, err, usockaddr, usockaddr_len); 1912 1913 out_put: 1914 fput_light(sock->file, fput_needed); 1915 out: 1916 return err; 1917 } 1918 1919 SYSCALL_DEFINE3(getsockname, int, fd, struct sockaddr __user *, usockaddr, 1920 int __user *, usockaddr_len) 1921 { 1922 return __sys_getsockname(fd, usockaddr, usockaddr_len); 1923 } 1924 1925 /* 1926 * Get the remote address ('name') of a socket object. Move the obtained 1927 * name to user space. 1928 */ 1929 1930 int __sys_getpeername(int fd, struct sockaddr __user *usockaddr, 1931 int __user *usockaddr_len) 1932 { 1933 struct socket *sock; 1934 struct sockaddr_storage address; 1935 int err, fput_needed; 1936 1937 sock = sockfd_lookup_light(fd, &err, &fput_needed); 1938 if (sock != NULL) { 1939 err = security_socket_getpeername(sock); 1940 if (err) { 1941 fput_light(sock->file, fput_needed); 1942 return err; 1943 } 1944 1945 err = sock->ops->getname(sock, (struct sockaddr *)&address, 1); 1946 if (err >= 0) 1947 /* "err" is actually length in this case */ 1948 err = move_addr_to_user(&address, err, usockaddr, 1949 usockaddr_len); 1950 fput_light(sock->file, fput_needed); 1951 } 1952 return err; 1953 } 1954 1955 SYSCALL_DEFINE3(getpeername, int, fd, struct sockaddr __user *, usockaddr, 1956 int __user *, usockaddr_len) 1957 { 1958 return __sys_getpeername(fd, usockaddr, usockaddr_len); 1959 } 1960 1961 /* 1962 * Send a datagram to a given address. We move the address into kernel 1963 * space and check the user space data area is readable before invoking 1964 * the protocol. 1965 */ 1966 int __sys_sendto(int fd, void __user *buff, size_t len, unsigned int flags, 1967 struct sockaddr __user *addr, int addr_len) 1968 { 1969 struct socket *sock; 1970 struct sockaddr_storage address; 1971 int err; 1972 struct msghdr msg; 1973 struct iovec iov; 1974 int fput_needed; 1975 1976 err = import_single_range(WRITE, buff, len, &iov, &msg.msg_iter); 1977 if (unlikely(err)) 1978 return err; 1979 sock = sockfd_lookup_light(fd, &err, &fput_needed); 1980 if (!sock) 1981 goto out; 1982 1983 msg.msg_name = NULL; 1984 msg.msg_control = NULL; 1985 msg.msg_controllen = 0; 1986 msg.msg_namelen = 0; 1987 if (addr) { 1988 err = move_addr_to_kernel(addr, addr_len, &address); 1989 if (err < 0) 1990 goto out_put; 1991 msg.msg_name = (struct sockaddr *)&address; 1992 msg.msg_namelen = addr_len; 1993 } 1994 if (sock->file->f_flags & O_NONBLOCK) 1995 flags |= MSG_DONTWAIT; 1996 msg.msg_flags = flags; 1997 err = sock_sendmsg(sock, &msg); 1998 1999 out_put: 2000 fput_light(sock->file, fput_needed); 2001 out: 2002 return err; 2003 } 2004 2005 SYSCALL_DEFINE6(sendto, int, fd, void __user *, buff, size_t, len, 2006 unsigned int, flags, struct sockaddr __user *, addr, 2007 int, addr_len) 2008 { 2009 return __sys_sendto(fd, buff, len, flags, addr, addr_len); 2010 } 2011 2012 /* 2013 * Send a datagram down a socket. 2014 */ 2015 2016 SYSCALL_DEFINE4(send, int, fd, void __user *, buff, size_t, len, 2017 unsigned int, flags) 2018 { 2019 return __sys_sendto(fd, buff, len, flags, NULL, 0); 2020 } 2021 2022 /* 2023 * Receive a frame from the socket and optionally record the address of the 2024 * sender. We verify the buffers are writable and if needed move the 2025 * sender address from kernel to user space. 2026 */ 2027 int __sys_recvfrom(int fd, void __user *ubuf, size_t size, unsigned int flags, 2028 struct sockaddr __user *addr, int __user *addr_len) 2029 { 2030 struct socket *sock; 2031 struct iovec iov; 2032 struct msghdr msg; 2033 struct sockaddr_storage address; 2034 int err, err2; 2035 int fput_needed; 2036 2037 err = import_single_range(READ, ubuf, size, &iov, &msg.msg_iter); 2038 if (unlikely(err)) 2039 return err; 2040 sock = sockfd_lookup_light(fd, &err, &fput_needed); 2041 if (!sock) 2042 goto out; 2043 2044 msg.msg_control = NULL; 2045 msg.msg_controllen = 0; 2046 /* Save some cycles and don't copy the address if not needed */ 2047 msg.msg_name = addr ? (struct sockaddr *)&address : NULL; 2048 /* We assume all kernel code knows the size of sockaddr_storage */ 2049 msg.msg_namelen = 0; 2050 msg.msg_iocb = NULL; 2051 msg.msg_flags = 0; 2052 if (sock->file->f_flags & O_NONBLOCK) 2053 flags |= MSG_DONTWAIT; 2054 err = sock_recvmsg(sock, &msg, flags); 2055 2056 if (err >= 0 && addr != NULL) { 2057 err2 = move_addr_to_user(&address, 2058 msg.msg_namelen, addr, addr_len); 2059 if (err2 < 0) 2060 err = err2; 2061 } 2062 2063 fput_light(sock->file, fput_needed); 2064 out: 2065 return err; 2066 } 2067 2068 SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, ubuf, size_t, size, 2069 unsigned int, flags, struct sockaddr __user *, addr, 2070 int __user *, addr_len) 2071 { 2072 return __sys_recvfrom(fd, ubuf, size, flags, addr, addr_len); 2073 } 2074 2075 /* 2076 * Receive a datagram from a socket. 2077 */ 2078 2079 SYSCALL_DEFINE4(recv, int, fd, void __user *, ubuf, size_t, size, 2080 unsigned int, flags) 2081 { 2082 return __sys_recvfrom(fd, ubuf, size, flags, NULL, NULL); 2083 } 2084 2085 /* 2086 * Set a socket option. Because we don't know the option lengths we have 2087 * to pass the user mode parameter for the protocols to sort out. 2088 */ 2089 2090 static int __sys_setsockopt(int fd, int level, int optname, 2091 char __user *optval, int optlen) 2092 { 2093 mm_segment_t oldfs = get_fs(); 2094 char *kernel_optval = NULL; 2095 int err, fput_needed; 2096 struct socket *sock; 2097 2098 if (optlen < 0) 2099 return -EINVAL; 2100 2101 sock = sockfd_lookup_light(fd, &err, &fput_needed); 2102 if (sock != NULL) { 2103 err = security_socket_setsockopt(sock, level, optname); 2104 if (err) 2105 goto out_put; 2106 2107 err = BPF_CGROUP_RUN_PROG_SETSOCKOPT(sock->sk, &level, 2108 &optname, optval, &optlen, 2109 &kernel_optval); 2110 2111 if (err < 0) { 2112 goto out_put; 2113 } else if (err > 0) { 2114 err = 0; 2115 goto out_put; 2116 } 2117 2118 if (kernel_optval) { 2119 set_fs(KERNEL_DS); 2120 optval = (char __user __force *)kernel_optval; 2121 } 2122 2123 if (level == SOL_SOCKET) 2124 err = 2125 sock_setsockopt(sock, level, optname, optval, 2126 optlen); 2127 else 2128 err = 2129 sock->ops->setsockopt(sock, level, optname, optval, 2130 optlen); 2131 2132 if (kernel_optval) { 2133 set_fs(oldfs); 2134 kfree(kernel_optval); 2135 } 2136 out_put: 2137 fput_light(sock->file, fput_needed); 2138 } 2139 return err; 2140 } 2141 2142 SYSCALL_DEFINE5(setsockopt, int, fd, int, level, int, optname, 2143 char __user *, optval, int, optlen) 2144 { 2145 return __sys_setsockopt(fd, level, optname, optval, optlen); 2146 } 2147 2148 /* 2149 * Get a socket option. Because we don't know the option lengths we have 2150 * to pass a user mode parameter for the protocols to sort out. 2151 */ 2152 2153 static int __sys_getsockopt(int fd, int level, int optname, 2154 char __user *optval, int __user *optlen) 2155 { 2156 int err, fput_needed; 2157 struct socket *sock; 2158 int max_optlen; 2159 2160 sock = sockfd_lookup_light(fd, &err, &fput_needed); 2161 if (sock != NULL) { 2162 err = security_socket_getsockopt(sock, level, optname); 2163 if (err) 2164 goto out_put; 2165 2166 max_optlen = BPF_CGROUP_GETSOCKOPT_MAX_OPTLEN(optlen); 2167 2168 if (level == SOL_SOCKET) 2169 err = 2170 sock_getsockopt(sock, level, optname, optval, 2171 optlen); 2172 else 2173 err = 2174 sock->ops->getsockopt(sock, level, optname, optval, 2175 optlen); 2176 2177 err = BPF_CGROUP_RUN_PROG_GETSOCKOPT(sock->sk, level, optname, 2178 optval, optlen, 2179 max_optlen, err); 2180 out_put: 2181 fput_light(sock->file, fput_needed); 2182 } 2183 return err; 2184 } 2185 2186 SYSCALL_DEFINE5(getsockopt, int, fd, int, level, int, optname, 2187 char __user *, optval, int __user *, optlen) 2188 { 2189 return __sys_getsockopt(fd, level, optname, optval, optlen); 2190 } 2191 2192 /* 2193 * Shutdown a socket. 2194 */ 2195 2196 int __sys_shutdown(int fd, int how) 2197 { 2198 int err, fput_needed; 2199 struct socket *sock; 2200 2201 sock = sockfd_lookup_light(fd, &err, &fput_needed); 2202 if (sock != NULL) { 2203 err = security_socket_shutdown(sock, how); 2204 if (!err) 2205 err = sock->ops->shutdown(sock, how); 2206 fput_light(sock->file, fput_needed); 2207 } 2208 return err; 2209 } 2210 2211 SYSCALL_DEFINE2(shutdown, int, fd, int, how) 2212 { 2213 return __sys_shutdown(fd, how); 2214 } 2215 2216 /* A couple of helpful macros for getting the address of the 32/64 bit 2217 * fields which are the same type (int / unsigned) on our platforms. 2218 */ 2219 #define COMPAT_MSG(msg, member) ((MSG_CMSG_COMPAT & flags) ? &msg##_compat->member : &msg->member) 2220 #define COMPAT_NAMELEN(msg) COMPAT_MSG(msg, msg_namelen) 2221 #define COMPAT_FLAGS(msg) COMPAT_MSG(msg, msg_flags) 2222 2223 struct used_address { 2224 struct sockaddr_storage name; 2225 unsigned int name_len; 2226 }; 2227 2228 static int copy_msghdr_from_user(struct msghdr *kmsg, 2229 struct user_msghdr __user *umsg, 2230 struct sockaddr __user **save_addr, 2231 struct iovec **iov) 2232 { 2233 struct user_msghdr msg; 2234 ssize_t err; 2235 2236 if (copy_from_user(&msg, umsg, sizeof(*umsg))) 2237 return -EFAULT; 2238 2239 kmsg->msg_control = (void __force *)msg.msg_control; 2240 kmsg->msg_controllen = msg.msg_controllen; 2241 kmsg->msg_flags = msg.msg_flags; 2242 2243 kmsg->msg_namelen = msg.msg_namelen; 2244 if (!msg.msg_name) 2245 kmsg->msg_namelen = 0; 2246 2247 if (kmsg->msg_namelen < 0) 2248 return -EINVAL; 2249 2250 if (kmsg->msg_namelen > sizeof(struct sockaddr_storage)) 2251 kmsg->msg_namelen = sizeof(struct sockaddr_storage); 2252 2253 if (save_addr) 2254 *save_addr = msg.msg_name; 2255 2256 if (msg.msg_name && kmsg->msg_namelen) { 2257 if (!save_addr) { 2258 err = move_addr_to_kernel(msg.msg_name, 2259 kmsg->msg_namelen, 2260 kmsg->msg_name); 2261 if (err < 0) 2262 return err; 2263 } 2264 } else { 2265 kmsg->msg_name = NULL; 2266 kmsg->msg_namelen = 0; 2267 } 2268 2269 if (msg.msg_iovlen > UIO_MAXIOV) 2270 return -EMSGSIZE; 2271 2272 kmsg->msg_iocb = NULL; 2273 2274 err = import_iovec(save_addr ? READ : WRITE, 2275 msg.msg_iov, msg.msg_iovlen, 2276 UIO_FASTIOV, iov, &kmsg->msg_iter); 2277 return err < 0 ? err : 0; 2278 } 2279 2280 static int ____sys_sendmsg(struct socket *sock, struct msghdr *msg_sys, 2281 unsigned int flags, struct used_address *used_address, 2282 unsigned int allowed_msghdr_flags) 2283 { 2284 unsigned char ctl[sizeof(struct cmsghdr) + 20] 2285 __aligned(sizeof(__kernel_size_t)); 2286 /* 20 is size of ipv6_pktinfo */ 2287 unsigned char *ctl_buf = ctl; 2288 int ctl_len; 2289 ssize_t err; 2290 2291 err = -ENOBUFS; 2292 2293 if (msg_sys->msg_controllen > INT_MAX) 2294 goto out; 2295 flags |= (msg_sys->msg_flags & allowed_msghdr_flags); 2296 ctl_len = msg_sys->msg_controllen; 2297 if ((MSG_CMSG_COMPAT & flags) && ctl_len) { 2298 err = 2299 cmsghdr_from_user_compat_to_kern(msg_sys, sock->sk, ctl, 2300 sizeof(ctl)); 2301 if (err) 2302 goto out; 2303 ctl_buf = msg_sys->msg_control; 2304 ctl_len = msg_sys->msg_controllen; 2305 } else if (ctl_len) { 2306 BUILD_BUG_ON(sizeof(struct cmsghdr) != 2307 CMSG_ALIGN(sizeof(struct cmsghdr))); 2308 if (ctl_len > sizeof(ctl)) { 2309 ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL); 2310 if (ctl_buf == NULL) 2311 goto out; 2312 } 2313 err = -EFAULT; 2314 /* 2315 * Careful! Before this, msg_sys->msg_control contains a user pointer. 2316 * Afterwards, it will be a kernel pointer. Thus the compiler-assisted 2317 * checking falls down on this. 2318 */ 2319 if (copy_from_user(ctl_buf, 2320 (void __user __force *)msg_sys->msg_control, 2321 ctl_len)) 2322 goto out_freectl; 2323 msg_sys->msg_control = ctl_buf; 2324 } 2325 msg_sys->msg_flags = flags; 2326 2327 if (sock->file->f_flags & O_NONBLOCK) 2328 msg_sys->msg_flags |= MSG_DONTWAIT; 2329 /* 2330 * If this is sendmmsg() and current destination address is same as 2331 * previously succeeded address, omit asking LSM's decision. 2332 * used_address->name_len is initialized to UINT_MAX so that the first 2333 * destination address never matches. 2334 */ 2335 if (used_address && msg_sys->msg_name && 2336 used_address->name_len == msg_sys->msg_namelen && 2337 !memcmp(&used_address->name, msg_sys->msg_name, 2338 used_address->name_len)) { 2339 err = sock_sendmsg_nosec(sock, msg_sys); 2340 goto out_freectl; 2341 } 2342 err = sock_sendmsg(sock, msg_sys); 2343 /* 2344 * If this is sendmmsg() and sending to current destination address was 2345 * successful, remember it. 2346 */ 2347 if (used_address && err >= 0) { 2348 used_address->name_len = msg_sys->msg_namelen; 2349 if (msg_sys->msg_name) 2350 memcpy(&used_address->name, msg_sys->msg_name, 2351 used_address->name_len); 2352 } 2353 2354 out_freectl: 2355 if (ctl_buf != ctl) 2356 sock_kfree_s(sock->sk, ctl_buf, ctl_len); 2357 out: 2358 return err; 2359 } 2360 2361 int sendmsg_copy_msghdr(struct msghdr *msg, 2362 struct user_msghdr __user *umsg, unsigned flags, 2363 struct iovec **iov) 2364 { 2365 int err; 2366 2367 if (flags & MSG_CMSG_COMPAT) { 2368 struct compat_msghdr __user *msg_compat; 2369 2370 msg_compat = (struct compat_msghdr __user *) umsg; 2371 err = get_compat_msghdr(msg, msg_compat, NULL, iov); 2372 } else { 2373 err = copy_msghdr_from_user(msg, umsg, NULL, iov); 2374 } 2375 if (err < 0) 2376 return err; 2377 2378 return 0; 2379 } 2380 2381 static int ___sys_sendmsg(struct socket *sock, struct user_msghdr __user *msg, 2382 struct msghdr *msg_sys, unsigned int flags, 2383 struct used_address *used_address, 2384 unsigned int allowed_msghdr_flags) 2385 { 2386 struct sockaddr_storage address; 2387 struct iovec iovstack[UIO_FASTIOV], *iov = iovstack; 2388 ssize_t err; 2389 2390 msg_sys->msg_name = &address; 2391 2392 err = sendmsg_copy_msghdr(msg_sys, msg, flags, &iov); 2393 if (err < 0) 2394 return err; 2395 2396 err = ____sys_sendmsg(sock, msg_sys, flags, used_address, 2397 allowed_msghdr_flags); 2398 kfree(iov); 2399 return err; 2400 } 2401 2402 /* 2403 * BSD sendmsg interface 2404 */ 2405 long __sys_sendmsg_sock(struct socket *sock, struct msghdr *msg, 2406 unsigned int flags) 2407 { 2408 /* disallow ancillary data requests from this path */ 2409 if (msg->msg_control || msg->msg_controllen) 2410 return -EINVAL; 2411 2412 return ____sys_sendmsg(sock, msg, flags, NULL, 0); 2413 } 2414 2415 long __sys_sendmsg(int fd, struct user_msghdr __user *msg, unsigned int flags, 2416 bool forbid_cmsg_compat) 2417 { 2418 int fput_needed, err; 2419 struct msghdr msg_sys; 2420 struct socket *sock; 2421 2422 if (forbid_cmsg_compat && (flags & MSG_CMSG_COMPAT)) 2423 return -EINVAL; 2424 2425 sock = sockfd_lookup_light(fd, &err, &fput_needed); 2426 if (!sock) 2427 goto out; 2428 2429 err = ___sys_sendmsg(sock, msg, &msg_sys, flags, NULL, 0); 2430 2431 fput_light(sock->file, fput_needed); 2432 out: 2433 return err; 2434 } 2435 2436 SYSCALL_DEFINE3(sendmsg, int, fd, struct user_msghdr __user *, msg, unsigned int, flags) 2437 { 2438 return __sys_sendmsg(fd, msg, flags, true); 2439 } 2440 2441 /* 2442 * Linux sendmmsg interface 2443 */ 2444 2445 int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen, 2446 unsigned int flags, bool forbid_cmsg_compat) 2447 { 2448 int fput_needed, err, datagrams; 2449 struct socket *sock; 2450 struct mmsghdr __user *entry; 2451 struct compat_mmsghdr __user *compat_entry; 2452 struct msghdr msg_sys; 2453 struct used_address used_address; 2454 unsigned int oflags = flags; 2455 2456 if (forbid_cmsg_compat && (flags & MSG_CMSG_COMPAT)) 2457 return -EINVAL; 2458 2459 if (vlen > UIO_MAXIOV) 2460 vlen = UIO_MAXIOV; 2461 2462 datagrams = 0; 2463 2464 sock = sockfd_lookup_light(fd, &err, &fput_needed); 2465 if (!sock) 2466 return err; 2467 2468 used_address.name_len = UINT_MAX; 2469 entry = mmsg; 2470 compat_entry = (struct compat_mmsghdr __user *)mmsg; 2471 err = 0; 2472 flags |= MSG_BATCH; 2473 2474 while (datagrams < vlen) { 2475 if (datagrams == vlen - 1) 2476 flags = oflags; 2477 2478 if (MSG_CMSG_COMPAT & flags) { 2479 err = ___sys_sendmsg(sock, (struct user_msghdr __user *)compat_entry, 2480 &msg_sys, flags, &used_address, MSG_EOR); 2481 if (err < 0) 2482 break; 2483 err = __put_user(err, &compat_entry->msg_len); 2484 ++compat_entry; 2485 } else { 2486 err = ___sys_sendmsg(sock, 2487 (struct user_msghdr __user *)entry, 2488 &msg_sys, flags, &used_address, MSG_EOR); 2489 if (err < 0) 2490 break; 2491 err = put_user(err, &entry->msg_len); 2492 ++entry; 2493 } 2494 2495 if (err) 2496 break; 2497 ++datagrams; 2498 if (msg_data_left(&msg_sys)) 2499 break; 2500 cond_resched(); 2501 } 2502 2503 fput_light(sock->file, fput_needed); 2504 2505 /* We only return an error if no datagrams were able to be sent */ 2506 if (datagrams != 0) 2507 return datagrams; 2508 2509 return err; 2510 } 2511 2512 SYSCALL_DEFINE4(sendmmsg, int, fd, struct mmsghdr __user *, mmsg, 2513 unsigned int, vlen, unsigned int, flags) 2514 { 2515 return __sys_sendmmsg(fd, mmsg, vlen, flags, true); 2516 } 2517 2518 int recvmsg_copy_msghdr(struct msghdr *msg, 2519 struct user_msghdr __user *umsg, unsigned flags, 2520 struct sockaddr __user **uaddr, 2521 struct iovec **iov) 2522 { 2523 ssize_t err; 2524 2525 if (MSG_CMSG_COMPAT & flags) { 2526 struct compat_msghdr __user *msg_compat; 2527 2528 msg_compat = (struct compat_msghdr __user *) umsg; 2529 err = get_compat_msghdr(msg, msg_compat, uaddr, iov); 2530 } else { 2531 err = copy_msghdr_from_user(msg, umsg, uaddr, iov); 2532 } 2533 if (err < 0) 2534 return err; 2535 2536 return 0; 2537 } 2538 2539 static int ____sys_recvmsg(struct socket *sock, struct msghdr *msg_sys, 2540 struct user_msghdr __user *msg, 2541 struct sockaddr __user *uaddr, 2542 unsigned int flags, int nosec) 2543 { 2544 struct compat_msghdr __user *msg_compat = 2545 (struct compat_msghdr __user *) msg; 2546 int __user *uaddr_len = COMPAT_NAMELEN(msg); 2547 struct sockaddr_storage addr; 2548 unsigned long cmsg_ptr; 2549 int len; 2550 ssize_t err; 2551 2552 msg_sys->msg_name = &addr; 2553 cmsg_ptr = (unsigned long)msg_sys->msg_control; 2554 msg_sys->msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT); 2555 2556 /* We assume all kernel code knows the size of sockaddr_storage */ 2557 msg_sys->msg_namelen = 0; 2558 2559 if (sock->file->f_flags & O_NONBLOCK) 2560 flags |= MSG_DONTWAIT; 2561 2562 if (unlikely(nosec)) 2563 err = sock_recvmsg_nosec(sock, msg_sys, flags); 2564 else 2565 err = sock_recvmsg(sock, msg_sys, flags); 2566 2567 if (err < 0) 2568 goto out; 2569 len = err; 2570 2571 if (uaddr != NULL) { 2572 err = move_addr_to_user(&addr, 2573 msg_sys->msg_namelen, uaddr, 2574 uaddr_len); 2575 if (err < 0) 2576 goto out; 2577 } 2578 err = __put_user((msg_sys->msg_flags & ~MSG_CMSG_COMPAT), 2579 COMPAT_FLAGS(msg)); 2580 if (err) 2581 goto out; 2582 if (MSG_CMSG_COMPAT & flags) 2583 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr, 2584 &msg_compat->msg_controllen); 2585 else 2586 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr, 2587 &msg->msg_controllen); 2588 if (err) 2589 goto out; 2590 err = len; 2591 out: 2592 return err; 2593 } 2594 2595 static int ___sys_recvmsg(struct socket *sock, struct user_msghdr __user *msg, 2596 struct msghdr *msg_sys, unsigned int flags, int nosec) 2597 { 2598 struct iovec iovstack[UIO_FASTIOV], *iov = iovstack; 2599 /* user mode address pointers */ 2600 struct sockaddr __user *uaddr; 2601 ssize_t err; 2602 2603 err = recvmsg_copy_msghdr(msg_sys, msg, flags, &uaddr, &iov); 2604 if (err < 0) 2605 return err; 2606 2607 err = ____sys_recvmsg(sock, msg_sys, msg, uaddr, flags, nosec); 2608 kfree(iov); 2609 return err; 2610 } 2611 2612 /* 2613 * BSD recvmsg interface 2614 */ 2615 2616 long __sys_recvmsg_sock(struct socket *sock, struct msghdr *msg, 2617 struct user_msghdr __user *umsg, 2618 struct sockaddr __user *uaddr, unsigned int flags) 2619 { 2620 /* disallow ancillary data requests from this path */ 2621 if (msg->msg_control || msg->msg_controllen) 2622 return -EINVAL; 2623 2624 return ____sys_recvmsg(sock, msg, umsg, uaddr, flags, 0); 2625 } 2626 2627 long __sys_recvmsg(int fd, struct user_msghdr __user *msg, unsigned int flags, 2628 bool forbid_cmsg_compat) 2629 { 2630 int fput_needed, err; 2631 struct msghdr msg_sys; 2632 struct socket *sock; 2633 2634 if (forbid_cmsg_compat && (flags & MSG_CMSG_COMPAT)) 2635 return -EINVAL; 2636 2637 sock = sockfd_lookup_light(fd, &err, &fput_needed); 2638 if (!sock) 2639 goto out; 2640 2641 err = ___sys_recvmsg(sock, msg, &msg_sys, flags, 0); 2642 2643 fput_light(sock->file, fput_needed); 2644 out: 2645 return err; 2646 } 2647 2648 SYSCALL_DEFINE3(recvmsg, int, fd, struct user_msghdr __user *, msg, 2649 unsigned int, flags) 2650 { 2651 return __sys_recvmsg(fd, msg, flags, true); 2652 } 2653 2654 /* 2655 * Linux recvmmsg interface 2656 */ 2657 2658 static int do_recvmmsg(int fd, struct mmsghdr __user *mmsg, 2659 unsigned int vlen, unsigned int flags, 2660 struct timespec64 *timeout) 2661 { 2662 int fput_needed, err, datagrams; 2663 struct socket *sock; 2664 struct mmsghdr __user *entry; 2665 struct compat_mmsghdr __user *compat_entry; 2666 struct msghdr msg_sys; 2667 struct timespec64 end_time; 2668 struct timespec64 timeout64; 2669 2670 if (timeout && 2671 poll_select_set_timeout(&end_time, timeout->tv_sec, 2672 timeout->tv_nsec)) 2673 return -EINVAL; 2674 2675 datagrams = 0; 2676 2677 sock = sockfd_lookup_light(fd, &err, &fput_needed); 2678 if (!sock) 2679 return err; 2680 2681 if (likely(!(flags & MSG_ERRQUEUE))) { 2682 err = sock_error(sock->sk); 2683 if (err) { 2684 datagrams = err; 2685 goto out_put; 2686 } 2687 } 2688 2689 entry = mmsg; 2690 compat_entry = (struct compat_mmsghdr __user *)mmsg; 2691 2692 while (datagrams < vlen) { 2693 /* 2694 * No need to ask LSM for more than the first datagram. 2695 */ 2696 if (MSG_CMSG_COMPAT & flags) { 2697 err = ___sys_recvmsg(sock, (struct user_msghdr __user *)compat_entry, 2698 &msg_sys, flags & ~MSG_WAITFORONE, 2699 datagrams); 2700 if (err < 0) 2701 break; 2702 err = __put_user(err, &compat_entry->msg_len); 2703 ++compat_entry; 2704 } else { 2705 err = ___sys_recvmsg(sock, 2706 (struct user_msghdr __user *)entry, 2707 &msg_sys, flags & ~MSG_WAITFORONE, 2708 datagrams); 2709 if (err < 0) 2710 break; 2711 err = put_user(err, &entry->msg_len); 2712 ++entry; 2713 } 2714 2715 if (err) 2716 break; 2717 ++datagrams; 2718 2719 /* MSG_WAITFORONE turns on MSG_DONTWAIT after one packet */ 2720 if (flags & MSG_WAITFORONE) 2721 flags |= MSG_DONTWAIT; 2722 2723 if (timeout) { 2724 ktime_get_ts64(&timeout64); 2725 *timeout = timespec64_sub(end_time, timeout64); 2726 if (timeout->tv_sec < 0) { 2727 timeout->tv_sec = timeout->tv_nsec = 0; 2728 break; 2729 } 2730 2731 /* Timeout, return less than vlen datagrams */ 2732 if (timeout->tv_nsec == 0 && timeout->tv_sec == 0) 2733 break; 2734 } 2735 2736 /* Out of band data, return right away */ 2737 if (msg_sys.msg_flags & MSG_OOB) 2738 break; 2739 cond_resched(); 2740 } 2741 2742 if (err == 0) 2743 goto out_put; 2744 2745 if (datagrams == 0) { 2746 datagrams = err; 2747 goto out_put; 2748 } 2749 2750 /* 2751 * We may return less entries than requested (vlen) if the 2752 * sock is non block and there aren't enough datagrams... 2753 */ 2754 if (err != -EAGAIN) { 2755 /* 2756 * ... or if recvmsg returns an error after we 2757 * received some datagrams, where we record the 2758 * error to return on the next call or if the 2759 * app asks about it using getsockopt(SO_ERROR). 2760 */ 2761 sock->sk->sk_err = -err; 2762 } 2763 out_put: 2764 fput_light(sock->file, fput_needed); 2765 2766 return datagrams; 2767 } 2768 2769 int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, 2770 unsigned int vlen, unsigned int flags, 2771 struct __kernel_timespec __user *timeout, 2772 struct old_timespec32 __user *timeout32) 2773 { 2774 int datagrams; 2775 struct timespec64 timeout_sys; 2776 2777 if (timeout && get_timespec64(&timeout_sys, timeout)) 2778 return -EFAULT; 2779 2780 if (timeout32 && get_old_timespec32(&timeout_sys, timeout32)) 2781 return -EFAULT; 2782 2783 if (!timeout && !timeout32) 2784 return do_recvmmsg(fd, mmsg, vlen, flags, NULL); 2785 2786 datagrams = do_recvmmsg(fd, mmsg, vlen, flags, &timeout_sys); 2787 2788 if (datagrams <= 0) 2789 return datagrams; 2790 2791 if (timeout && put_timespec64(&timeout_sys, timeout)) 2792 datagrams = -EFAULT; 2793 2794 if (timeout32 && put_old_timespec32(&timeout_sys, timeout32)) 2795 datagrams = -EFAULT; 2796 2797 return datagrams; 2798 } 2799 2800 SYSCALL_DEFINE5(recvmmsg, int, fd, struct mmsghdr __user *, mmsg, 2801 unsigned int, vlen, unsigned int, flags, 2802 struct __kernel_timespec __user *, timeout) 2803 { 2804 if (flags & MSG_CMSG_COMPAT) 2805 return -EINVAL; 2806 2807 return __sys_recvmmsg(fd, mmsg, vlen, flags, timeout, NULL); 2808 } 2809 2810 #ifdef CONFIG_COMPAT_32BIT_TIME 2811 SYSCALL_DEFINE5(recvmmsg_time32, int, fd, struct mmsghdr __user *, mmsg, 2812 unsigned int, vlen, unsigned int, flags, 2813 struct old_timespec32 __user *, timeout) 2814 { 2815 if (flags & MSG_CMSG_COMPAT) 2816 return -EINVAL; 2817 2818 return __sys_recvmmsg(fd, mmsg, vlen, flags, NULL, timeout); 2819 } 2820 #endif 2821 2822 #ifdef __ARCH_WANT_SYS_SOCKETCALL 2823 /* Argument list sizes for sys_socketcall */ 2824 #define AL(x) ((x) * sizeof(unsigned long)) 2825 static const unsigned char nargs[21] = { 2826 AL(0), AL(3), AL(3), AL(3), AL(2), AL(3), 2827 AL(3), AL(3), AL(4), AL(4), AL(4), AL(6), 2828 AL(6), AL(2), AL(5), AL(5), AL(3), AL(3), 2829 AL(4), AL(5), AL(4) 2830 }; 2831 2832 #undef AL 2833 2834 /* 2835 * System call vectors. 2836 * 2837 * Argument checking cleaned up. Saved 20% in size. 2838 * This function doesn't need to set the kernel lock because 2839 * it is set by the callees. 2840 */ 2841 2842 SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args) 2843 { 2844 unsigned long a[AUDITSC_ARGS]; 2845 unsigned long a0, a1; 2846 int err; 2847 unsigned int len; 2848 2849 if (call < 1 || call > SYS_SENDMMSG) 2850 return -EINVAL; 2851 call = array_index_nospec(call, SYS_SENDMMSG + 1); 2852 2853 len = nargs[call]; 2854 if (len > sizeof(a)) 2855 return -EINVAL; 2856 2857 /* copy_from_user should be SMP safe. */ 2858 if (copy_from_user(a, args, len)) 2859 return -EFAULT; 2860 2861 err = audit_socketcall(nargs[call] / sizeof(unsigned long), a); 2862 if (err) 2863 return err; 2864 2865 a0 = a[0]; 2866 a1 = a[1]; 2867 2868 switch (call) { 2869 case SYS_SOCKET: 2870 err = __sys_socket(a0, a1, a[2]); 2871 break; 2872 case SYS_BIND: 2873 err = __sys_bind(a0, (struct sockaddr __user *)a1, a[2]); 2874 break; 2875 case SYS_CONNECT: 2876 err = __sys_connect(a0, (struct sockaddr __user *)a1, a[2]); 2877 break; 2878 case SYS_LISTEN: 2879 err = __sys_listen(a0, a1); 2880 break; 2881 case SYS_ACCEPT: 2882 err = __sys_accept4(a0, (struct sockaddr __user *)a1, 2883 (int __user *)a[2], 0); 2884 break; 2885 case SYS_GETSOCKNAME: 2886 err = 2887 __sys_getsockname(a0, (struct sockaddr __user *)a1, 2888 (int __user *)a[2]); 2889 break; 2890 case SYS_GETPEERNAME: 2891 err = 2892 __sys_getpeername(a0, (struct sockaddr __user *)a1, 2893 (int __user *)a[2]); 2894 break; 2895 case SYS_SOCKETPAIR: 2896 err = __sys_socketpair(a0, a1, a[2], (int __user *)a[3]); 2897 break; 2898 case SYS_SEND: 2899 err = __sys_sendto(a0, (void __user *)a1, a[2], a[3], 2900 NULL, 0); 2901 break; 2902 case SYS_SENDTO: 2903 err = __sys_sendto(a0, (void __user *)a1, a[2], a[3], 2904 (struct sockaddr __user *)a[4], a[5]); 2905 break; 2906 case SYS_RECV: 2907 err = __sys_recvfrom(a0, (void __user *)a1, a[2], a[3], 2908 NULL, NULL); 2909 break; 2910 case SYS_RECVFROM: 2911 err = __sys_recvfrom(a0, (void __user *)a1, a[2], a[3], 2912 (struct sockaddr __user *)a[4], 2913 (int __user *)a[5]); 2914 break; 2915 case SYS_SHUTDOWN: 2916 err = __sys_shutdown(a0, a1); 2917 break; 2918 case SYS_SETSOCKOPT: 2919 err = __sys_setsockopt(a0, a1, a[2], (char __user *)a[3], 2920 a[4]); 2921 break; 2922 case SYS_GETSOCKOPT: 2923 err = 2924 __sys_getsockopt(a0, a1, a[2], (char __user *)a[3], 2925 (int __user *)a[4]); 2926 break; 2927 case SYS_SENDMSG: 2928 err = __sys_sendmsg(a0, (struct user_msghdr __user *)a1, 2929 a[2], true); 2930 break; 2931 case SYS_SENDMMSG: 2932 err = __sys_sendmmsg(a0, (struct mmsghdr __user *)a1, a[2], 2933 a[3], true); 2934 break; 2935 case SYS_RECVMSG: 2936 err = __sys_recvmsg(a0, (struct user_msghdr __user *)a1, 2937 a[2], true); 2938 break; 2939 case SYS_RECVMMSG: 2940 if (IS_ENABLED(CONFIG_64BIT)) 2941 err = __sys_recvmmsg(a0, (struct mmsghdr __user *)a1, 2942 a[2], a[3], 2943 (struct __kernel_timespec __user *)a[4], 2944 NULL); 2945 else 2946 err = __sys_recvmmsg(a0, (struct mmsghdr __user *)a1, 2947 a[2], a[3], NULL, 2948 (struct old_timespec32 __user *)a[4]); 2949 break; 2950 case SYS_ACCEPT4: 2951 err = __sys_accept4(a0, (struct sockaddr __user *)a1, 2952 (int __user *)a[2], a[3]); 2953 break; 2954 default: 2955 err = -EINVAL; 2956 break; 2957 } 2958 return err; 2959 } 2960 2961 #endif /* __ARCH_WANT_SYS_SOCKETCALL */ 2962 2963 /** 2964 * sock_register - add a socket protocol handler 2965 * @ops: description of protocol 2966 * 2967 * This function is called by a protocol handler that wants to 2968 * advertise its address family, and have it linked into the 2969 * socket interface. The value ops->family corresponds to the 2970 * socket system call protocol family. 2971 */ 2972 int sock_register(const struct net_proto_family *ops) 2973 { 2974 int err; 2975 2976 if (ops->family >= NPROTO) { 2977 pr_crit("protocol %d >= NPROTO(%d)\n", ops->family, NPROTO); 2978 return -ENOBUFS; 2979 } 2980 2981 spin_lock(&net_family_lock); 2982 if (rcu_dereference_protected(net_families[ops->family], 2983 lockdep_is_held(&net_family_lock))) 2984 err = -EEXIST; 2985 else { 2986 rcu_assign_pointer(net_families[ops->family], ops); 2987 err = 0; 2988 } 2989 spin_unlock(&net_family_lock); 2990 2991 pr_info("NET: Registered protocol family %d\n", ops->family); 2992 return err; 2993 } 2994 EXPORT_SYMBOL(sock_register); 2995 2996 /** 2997 * sock_unregister - remove a protocol handler 2998 * @family: protocol family to remove 2999 * 3000 * This function is called by a protocol handler that wants to 3001 * remove its address family, and have it unlinked from the 3002 * new socket creation. 3003 * 3004 * If protocol handler is a module, then it can use module reference 3005 * counts to protect against new references. If protocol handler is not 3006 * a module then it needs to provide its own protection in 3007 * the ops->create routine. 3008 */ 3009 void sock_unregister(int family) 3010 { 3011 BUG_ON(family < 0 || family >= NPROTO); 3012 3013 spin_lock(&net_family_lock); 3014 RCU_INIT_POINTER(net_families[family], NULL); 3015 spin_unlock(&net_family_lock); 3016 3017 synchronize_rcu(); 3018 3019 pr_info("NET: Unregistered protocol family %d\n", family); 3020 } 3021 EXPORT_SYMBOL(sock_unregister); 3022 3023 bool sock_is_registered(int family) 3024 { 3025 return family < NPROTO && rcu_access_pointer(net_families[family]); 3026 } 3027 3028 static int __init sock_init(void) 3029 { 3030 int err; 3031 /* 3032 * Initialize the network sysctl infrastructure. 3033 */ 3034 err = net_sysctl_init(); 3035 if (err) 3036 goto out; 3037 3038 /* 3039 * Initialize skbuff SLAB cache 3040 */ 3041 skb_init(); 3042 3043 /* 3044 * Initialize the protocols module. 3045 */ 3046 3047 init_inodecache(); 3048 3049 err = register_filesystem(&sock_fs_type); 3050 if (err) 3051 goto out_fs; 3052 sock_mnt = kern_mount(&sock_fs_type); 3053 if (IS_ERR(sock_mnt)) { 3054 err = PTR_ERR(sock_mnt); 3055 goto out_mount; 3056 } 3057 3058 /* The real protocol initialization is performed in later initcalls. 3059 */ 3060 3061 #ifdef CONFIG_NETFILTER 3062 err = netfilter_init(); 3063 if (err) 3064 goto out; 3065 #endif 3066 3067 ptp_classifier_init(); 3068 3069 out: 3070 return err; 3071 3072 out_mount: 3073 unregister_filesystem(&sock_fs_type); 3074 out_fs: 3075 goto out; 3076 } 3077 3078 core_initcall(sock_init); /* early initcall */ 3079 3080 #ifdef CONFIG_PROC_FS 3081 void socket_seq_show(struct seq_file *seq) 3082 { 3083 seq_printf(seq, "sockets: used %d\n", 3084 sock_inuse_get(seq->private)); 3085 } 3086 #endif /* CONFIG_PROC_FS */ 3087 3088 #ifdef CONFIG_COMPAT 3089 static int compat_dev_ifconf(struct net *net, struct compat_ifconf __user *uifc32) 3090 { 3091 struct compat_ifconf ifc32; 3092 struct ifconf ifc; 3093 int err; 3094 3095 if (copy_from_user(&ifc32, uifc32, sizeof(struct compat_ifconf))) 3096 return -EFAULT; 3097 3098 ifc.ifc_len = ifc32.ifc_len; 3099 ifc.ifc_req = compat_ptr(ifc32.ifcbuf); 3100 3101 rtnl_lock(); 3102 err = dev_ifconf(net, &ifc, sizeof(struct compat_ifreq)); 3103 rtnl_unlock(); 3104 if (err) 3105 return err; 3106 3107 ifc32.ifc_len = ifc.ifc_len; 3108 if (copy_to_user(uifc32, &ifc32, sizeof(struct compat_ifconf))) 3109 return -EFAULT; 3110 3111 return 0; 3112 } 3113 3114 static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32) 3115 { 3116 struct compat_ethtool_rxnfc __user *compat_rxnfc; 3117 bool convert_in = false, convert_out = false; 3118 size_t buf_size = 0; 3119 struct ethtool_rxnfc __user *rxnfc = NULL; 3120 struct ifreq ifr; 3121 u32 rule_cnt = 0, actual_rule_cnt; 3122 u32 ethcmd; 3123 u32 data; 3124 int ret; 3125 3126 if (get_user(data, &ifr32->ifr_ifru.ifru_data)) 3127 return -EFAULT; 3128 3129 compat_rxnfc = compat_ptr(data); 3130 3131 if (get_user(ethcmd, &compat_rxnfc->cmd)) 3132 return -EFAULT; 3133 3134 /* Most ethtool structures are defined without padding. 3135 * Unfortunately struct ethtool_rxnfc is an exception. 3136 */ 3137 switch (ethcmd) { 3138 default: 3139 break; 3140 case ETHTOOL_GRXCLSRLALL: 3141 /* Buffer size is variable */ 3142 if (get_user(rule_cnt, &compat_rxnfc->rule_cnt)) 3143 return -EFAULT; 3144 if (rule_cnt > KMALLOC_MAX_SIZE / sizeof(u32)) 3145 return -ENOMEM; 3146 buf_size += rule_cnt * sizeof(u32); 3147 /* fall through */ 3148 case ETHTOOL_GRXRINGS: 3149 case ETHTOOL_GRXCLSRLCNT: 3150 case ETHTOOL_GRXCLSRULE: 3151 case ETHTOOL_SRXCLSRLINS: 3152 convert_out = true; 3153 /* fall through */ 3154 case ETHTOOL_SRXCLSRLDEL: 3155 buf_size += sizeof(struct ethtool_rxnfc); 3156 convert_in = true; 3157 rxnfc = compat_alloc_user_space(buf_size); 3158 break; 3159 } 3160 3161 if (copy_from_user(&ifr.ifr_name, &ifr32->ifr_name, IFNAMSIZ)) 3162 return -EFAULT; 3163 3164 ifr.ifr_data = convert_in ? rxnfc : (void __user *)compat_rxnfc; 3165 3166 if (convert_in) { 3167 /* We expect there to be holes between fs.m_ext and 3168 * fs.ring_cookie and at the end of fs, but nowhere else. 3169 */ 3170 BUILD_BUG_ON(offsetof(struct compat_ethtool_rxnfc, fs.m_ext) + 3171 sizeof(compat_rxnfc->fs.m_ext) != 3172 offsetof(struct ethtool_rxnfc, fs.m_ext) + 3173 sizeof(rxnfc->fs.m_ext)); 3174 BUILD_BUG_ON( 3175 offsetof(struct compat_ethtool_rxnfc, fs.location) - 3176 offsetof(struct compat_ethtool_rxnfc, fs.ring_cookie) != 3177 offsetof(struct ethtool_rxnfc, fs.location) - 3178 offsetof(struct ethtool_rxnfc, fs.ring_cookie)); 3179 3180 if (copy_in_user(rxnfc, compat_rxnfc, 3181 (void __user *)(&rxnfc->fs.m_ext + 1) - 3182 (void __user *)rxnfc) || 3183 copy_in_user(&rxnfc->fs.ring_cookie, 3184 &compat_rxnfc->fs.ring_cookie, 3185 (void __user *)(&rxnfc->fs.location + 1) - 3186 (void __user *)&rxnfc->fs.ring_cookie)) 3187 return -EFAULT; 3188 if (ethcmd == ETHTOOL_GRXCLSRLALL) { 3189 if (put_user(rule_cnt, &rxnfc->rule_cnt)) 3190 return -EFAULT; 3191 } else if (copy_in_user(&rxnfc->rule_cnt, 3192 &compat_rxnfc->rule_cnt, 3193 sizeof(rxnfc->rule_cnt))) 3194 return -EFAULT; 3195 } 3196 3197 ret = dev_ioctl(net, SIOCETHTOOL, &ifr, NULL); 3198 if (ret) 3199 return ret; 3200 3201 if (convert_out) { 3202 if (copy_in_user(compat_rxnfc, rxnfc, 3203 (const void __user *)(&rxnfc->fs.m_ext + 1) - 3204 (const void __user *)rxnfc) || 3205 copy_in_user(&compat_rxnfc->fs.ring_cookie, 3206 &rxnfc->fs.ring_cookie, 3207 (const void __user *)(&rxnfc->fs.location + 1) - 3208 (const void __user *)&rxnfc->fs.ring_cookie) || 3209 copy_in_user(&compat_rxnfc->rule_cnt, &rxnfc->rule_cnt, 3210 sizeof(rxnfc->rule_cnt))) 3211 return -EFAULT; 3212 3213 if (ethcmd == ETHTOOL_GRXCLSRLALL) { 3214 /* As an optimisation, we only copy the actual 3215 * number of rules that the underlying 3216 * function returned. Since Mallory might 3217 * change the rule count in user memory, we 3218 * check that it is less than the rule count 3219 * originally given (as the user buffer size), 3220 * which has been range-checked. 3221 */ 3222 if (get_user(actual_rule_cnt, &rxnfc->rule_cnt)) 3223 return -EFAULT; 3224 if (actual_rule_cnt < rule_cnt) 3225 rule_cnt = actual_rule_cnt; 3226 if (copy_in_user(&compat_rxnfc->rule_locs[0], 3227 &rxnfc->rule_locs[0], 3228 rule_cnt * sizeof(u32))) 3229 return -EFAULT; 3230 } 3231 } 3232 3233 return 0; 3234 } 3235 3236 static int compat_siocwandev(struct net *net, struct compat_ifreq __user *uifr32) 3237 { 3238 compat_uptr_t uptr32; 3239 struct ifreq ifr; 3240 void __user *saved; 3241 int err; 3242 3243 if (copy_from_user(&ifr, uifr32, sizeof(struct compat_ifreq))) 3244 return -EFAULT; 3245 3246 if (get_user(uptr32, &uifr32->ifr_settings.ifs_ifsu)) 3247 return -EFAULT; 3248 3249 saved = ifr.ifr_settings.ifs_ifsu.raw_hdlc; 3250 ifr.ifr_settings.ifs_ifsu.raw_hdlc = compat_ptr(uptr32); 3251 3252 err = dev_ioctl(net, SIOCWANDEV, &ifr, NULL); 3253 if (!err) { 3254 ifr.ifr_settings.ifs_ifsu.raw_hdlc = saved; 3255 if (copy_to_user(uifr32, &ifr, sizeof(struct compat_ifreq))) 3256 err = -EFAULT; 3257 } 3258 return err; 3259 } 3260 3261 /* Handle ioctls that use ifreq::ifr_data and just need struct ifreq converted */ 3262 static int compat_ifr_data_ioctl(struct net *net, unsigned int cmd, 3263 struct compat_ifreq __user *u_ifreq32) 3264 { 3265 struct ifreq ifreq; 3266 u32 data32; 3267 3268 if (copy_from_user(ifreq.ifr_name, u_ifreq32->ifr_name, IFNAMSIZ)) 3269 return -EFAULT; 3270 if (get_user(data32, &u_ifreq32->ifr_data)) 3271 return -EFAULT; 3272 ifreq.ifr_data = compat_ptr(data32); 3273 3274 return dev_ioctl(net, cmd, &ifreq, NULL); 3275 } 3276 3277 static int compat_ifreq_ioctl(struct net *net, struct socket *sock, 3278 unsigned int cmd, 3279 struct compat_ifreq __user *uifr32) 3280 { 3281 struct ifreq __user *uifr; 3282 int err; 3283 3284 /* Handle the fact that while struct ifreq has the same *layout* on 3285 * 32/64 for everything but ifreq::ifru_ifmap and ifreq::ifru_data, 3286 * which are handled elsewhere, it still has different *size* due to 3287 * ifreq::ifru_ifmap (which is 16 bytes on 32 bit, 24 bytes on 64-bit, 3288 * resulting in struct ifreq being 32 and 40 bytes respectively). 3289 * As a result, if the struct happens to be at the end of a page and 3290 * the next page isn't readable/writable, we get a fault. To prevent 3291 * that, copy back and forth to the full size. 3292 */ 3293 3294 uifr = compat_alloc_user_space(sizeof(*uifr)); 3295 if (copy_in_user(uifr, uifr32, sizeof(*uifr32))) 3296 return -EFAULT; 3297 3298 err = sock_do_ioctl(net, sock, cmd, (unsigned long)uifr); 3299 3300 if (!err) { 3301 switch (cmd) { 3302 case SIOCGIFFLAGS: 3303 case SIOCGIFMETRIC: 3304 case SIOCGIFMTU: 3305 case SIOCGIFMEM: 3306 case SIOCGIFHWADDR: 3307 case SIOCGIFINDEX: 3308 case SIOCGIFADDR: 3309 case SIOCGIFBRDADDR: 3310 case SIOCGIFDSTADDR: 3311 case SIOCGIFNETMASK: 3312 case SIOCGIFPFLAGS: 3313 case SIOCGIFTXQLEN: 3314 case SIOCGMIIPHY: 3315 case SIOCGMIIREG: 3316 case SIOCGIFNAME: 3317 if (copy_in_user(uifr32, uifr, sizeof(*uifr32))) 3318 err = -EFAULT; 3319 break; 3320 } 3321 } 3322 return err; 3323 } 3324 3325 static int compat_sioc_ifmap(struct net *net, unsigned int cmd, 3326 struct compat_ifreq __user *uifr32) 3327 { 3328 struct ifreq ifr; 3329 struct compat_ifmap __user *uifmap32; 3330 int err; 3331 3332 uifmap32 = &uifr32->ifr_ifru.ifru_map; 3333 err = copy_from_user(&ifr, uifr32, sizeof(ifr.ifr_name)); 3334 err |= get_user(ifr.ifr_map.mem_start, &uifmap32->mem_start); 3335 err |= get_user(ifr.ifr_map.mem_end, &uifmap32->mem_end); 3336 err |= get_user(ifr.ifr_map.base_addr, &uifmap32->base_addr); 3337 err |= get_user(ifr.ifr_map.irq, &uifmap32->irq); 3338 err |= get_user(ifr.ifr_map.dma, &uifmap32->dma); 3339 err |= get_user(ifr.ifr_map.port, &uifmap32->port); 3340 if (err) 3341 return -EFAULT; 3342 3343 err = dev_ioctl(net, cmd, &ifr, NULL); 3344 3345 if (cmd == SIOCGIFMAP && !err) { 3346 err = copy_to_user(uifr32, &ifr, sizeof(ifr.ifr_name)); 3347 err |= put_user(ifr.ifr_map.mem_start, &uifmap32->mem_start); 3348 err |= put_user(ifr.ifr_map.mem_end, &uifmap32->mem_end); 3349 err |= put_user(ifr.ifr_map.base_addr, &uifmap32->base_addr); 3350 err |= put_user(ifr.ifr_map.irq, &uifmap32->irq); 3351 err |= put_user(ifr.ifr_map.dma, &uifmap32->dma); 3352 err |= put_user(ifr.ifr_map.port, &uifmap32->port); 3353 if (err) 3354 err = -EFAULT; 3355 } 3356 return err; 3357 } 3358 3359 struct rtentry32 { 3360 u32 rt_pad1; 3361 struct sockaddr rt_dst; /* target address */ 3362 struct sockaddr rt_gateway; /* gateway addr (RTF_GATEWAY) */ 3363 struct sockaddr rt_genmask; /* target network mask (IP) */ 3364 unsigned short rt_flags; 3365 short rt_pad2; 3366 u32 rt_pad3; 3367 unsigned char rt_tos; 3368 unsigned char rt_class; 3369 short rt_pad4; 3370 short rt_metric; /* +1 for binary compatibility! */ 3371 /* char * */ u32 rt_dev; /* forcing the device at add */ 3372 u32 rt_mtu; /* per route MTU/Window */ 3373 u32 rt_window; /* Window clamping */ 3374 unsigned short rt_irtt; /* Initial RTT */ 3375 }; 3376 3377 struct in6_rtmsg32 { 3378 struct in6_addr rtmsg_dst; 3379 struct in6_addr rtmsg_src; 3380 struct in6_addr rtmsg_gateway; 3381 u32 rtmsg_type; 3382 u16 rtmsg_dst_len; 3383 u16 rtmsg_src_len; 3384 u32 rtmsg_metric; 3385 u32 rtmsg_info; 3386 u32 rtmsg_flags; 3387 s32 rtmsg_ifindex; 3388 }; 3389 3390 static int routing_ioctl(struct net *net, struct socket *sock, 3391 unsigned int cmd, void __user *argp) 3392 { 3393 int ret; 3394 void *r = NULL; 3395 struct in6_rtmsg r6; 3396 struct rtentry r4; 3397 char devname[16]; 3398 u32 rtdev; 3399 mm_segment_t old_fs = get_fs(); 3400 3401 if (sock && sock->sk && sock->sk->sk_family == AF_INET6) { /* ipv6 */ 3402 struct in6_rtmsg32 __user *ur6 = argp; 3403 ret = copy_from_user(&r6.rtmsg_dst, &(ur6->rtmsg_dst), 3404 3 * sizeof(struct in6_addr)); 3405 ret |= get_user(r6.rtmsg_type, &(ur6->rtmsg_type)); 3406 ret |= get_user(r6.rtmsg_dst_len, &(ur6->rtmsg_dst_len)); 3407 ret |= get_user(r6.rtmsg_src_len, &(ur6->rtmsg_src_len)); 3408 ret |= get_user(r6.rtmsg_metric, &(ur6->rtmsg_metric)); 3409 ret |= get_user(r6.rtmsg_info, &(ur6->rtmsg_info)); 3410 ret |= get_user(r6.rtmsg_flags, &(ur6->rtmsg_flags)); 3411 ret |= get_user(r6.rtmsg_ifindex, &(ur6->rtmsg_ifindex)); 3412 3413 r = (void *) &r6; 3414 } else { /* ipv4 */ 3415 struct rtentry32 __user *ur4 = argp; 3416 ret = copy_from_user(&r4.rt_dst, &(ur4->rt_dst), 3417 3 * sizeof(struct sockaddr)); 3418 ret |= get_user(r4.rt_flags, &(ur4->rt_flags)); 3419 ret |= get_user(r4.rt_metric, &(ur4->rt_metric)); 3420 ret |= get_user(r4.rt_mtu, &(ur4->rt_mtu)); 3421 ret |= get_user(r4.rt_window, &(ur4->rt_window)); 3422 ret |= get_user(r4.rt_irtt, &(ur4->rt_irtt)); 3423 ret |= get_user(rtdev, &(ur4->rt_dev)); 3424 if (rtdev) { 3425 ret |= copy_from_user(devname, compat_ptr(rtdev), 15); 3426 r4.rt_dev = (char __user __force *)devname; 3427 devname[15] = 0; 3428 } else 3429 r4.rt_dev = NULL; 3430 3431 r = (void *) &r4; 3432 } 3433 3434 if (ret) { 3435 ret = -EFAULT; 3436 goto out; 3437 } 3438 3439 set_fs(KERNEL_DS); 3440 ret = sock_do_ioctl(net, sock, cmd, (unsigned long) r); 3441 set_fs(old_fs); 3442 3443 out: 3444 return ret; 3445 } 3446 3447 /* Since old style bridge ioctl's endup using SIOCDEVPRIVATE 3448 * for some operations; this forces use of the newer bridge-utils that 3449 * use compatible ioctls 3450 */ 3451 static int old_bridge_ioctl(compat_ulong_t __user *argp) 3452 { 3453 compat_ulong_t tmp; 3454 3455 if (get_user(tmp, argp)) 3456 return -EFAULT; 3457 if (tmp == BRCTL_GET_VERSION) 3458 return BRCTL_VERSION + 1; 3459 return -EINVAL; 3460 } 3461 3462 static int compat_sock_ioctl_trans(struct file *file, struct socket *sock, 3463 unsigned int cmd, unsigned long arg) 3464 { 3465 void __user *argp = compat_ptr(arg); 3466 struct sock *sk = sock->sk; 3467 struct net *net = sock_net(sk); 3468 3469 if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15)) 3470 return compat_ifr_data_ioctl(net, cmd, argp); 3471 3472 switch (cmd) { 3473 case SIOCSIFBR: 3474 case SIOCGIFBR: 3475 return old_bridge_ioctl(argp); 3476 case SIOCGIFCONF: 3477 return compat_dev_ifconf(net, argp); 3478 case SIOCETHTOOL: 3479 return ethtool_ioctl(net, argp); 3480 case SIOCWANDEV: 3481 return compat_siocwandev(net, argp); 3482 case SIOCGIFMAP: 3483 case SIOCSIFMAP: 3484 return compat_sioc_ifmap(net, cmd, argp); 3485 case SIOCADDRT: 3486 case SIOCDELRT: 3487 return routing_ioctl(net, sock, cmd, argp); 3488 case SIOCGSTAMP_OLD: 3489 case SIOCGSTAMPNS_OLD: 3490 if (!sock->ops->gettstamp) 3491 return -ENOIOCTLCMD; 3492 return sock->ops->gettstamp(sock, argp, cmd == SIOCGSTAMP_OLD, 3493 !COMPAT_USE_64BIT_TIME); 3494 3495 case SIOCBONDSLAVEINFOQUERY: 3496 case SIOCBONDINFOQUERY: 3497 case SIOCSHWTSTAMP: 3498 case SIOCGHWTSTAMP: 3499 return compat_ifr_data_ioctl(net, cmd, argp); 3500 3501 case FIOSETOWN: 3502 case SIOCSPGRP: 3503 case FIOGETOWN: 3504 case SIOCGPGRP: 3505 case SIOCBRADDBR: 3506 case SIOCBRDELBR: 3507 case SIOCGIFVLAN: 3508 case SIOCSIFVLAN: 3509 case SIOCADDDLCI: 3510 case SIOCDELDLCI: 3511 case SIOCGSKNS: 3512 case SIOCGSTAMP_NEW: 3513 case SIOCGSTAMPNS_NEW: 3514 return sock_ioctl(file, cmd, arg); 3515 3516 case SIOCGIFFLAGS: 3517 case SIOCSIFFLAGS: 3518 case SIOCGIFMETRIC: 3519 case SIOCSIFMETRIC: 3520 case SIOCGIFMTU: 3521 case SIOCSIFMTU: 3522 case SIOCGIFMEM: 3523 case SIOCSIFMEM: 3524 case SIOCGIFHWADDR: 3525 case SIOCSIFHWADDR: 3526 case SIOCADDMULTI: 3527 case SIOCDELMULTI: 3528 case SIOCGIFINDEX: 3529 case SIOCGIFADDR: 3530 case SIOCSIFADDR: 3531 case SIOCSIFHWBROADCAST: 3532 case SIOCDIFADDR: 3533 case SIOCGIFBRDADDR: 3534 case SIOCSIFBRDADDR: 3535 case SIOCGIFDSTADDR: 3536 case SIOCSIFDSTADDR: 3537 case SIOCGIFNETMASK: 3538 case SIOCSIFNETMASK: 3539 case SIOCSIFPFLAGS: 3540 case SIOCGIFPFLAGS: 3541 case SIOCGIFTXQLEN: 3542 case SIOCSIFTXQLEN: 3543 case SIOCBRADDIF: 3544 case SIOCBRDELIF: 3545 case SIOCGIFNAME: 3546 case SIOCSIFNAME: 3547 case SIOCGMIIPHY: 3548 case SIOCGMIIREG: 3549 case SIOCSMIIREG: 3550 case SIOCBONDENSLAVE: 3551 case SIOCBONDRELEASE: 3552 case SIOCBONDSETHWADDR: 3553 case SIOCBONDCHANGEACTIVE: 3554 return compat_ifreq_ioctl(net, sock, cmd, argp); 3555 3556 case SIOCSARP: 3557 case SIOCGARP: 3558 case SIOCDARP: 3559 case SIOCOUTQ: 3560 case SIOCOUTQNSD: 3561 case SIOCATMARK: 3562 return sock_do_ioctl(net, sock, cmd, arg); 3563 } 3564 3565 return -ENOIOCTLCMD; 3566 } 3567 3568 static long compat_sock_ioctl(struct file *file, unsigned int cmd, 3569 unsigned long arg) 3570 { 3571 struct socket *sock = file->private_data; 3572 int ret = -ENOIOCTLCMD; 3573 struct sock *sk; 3574 struct net *net; 3575 3576 sk = sock->sk; 3577 net = sock_net(sk); 3578 3579 if (sock->ops->compat_ioctl) 3580 ret = sock->ops->compat_ioctl(sock, cmd, arg); 3581 3582 if (ret == -ENOIOCTLCMD && 3583 (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST)) 3584 ret = compat_wext_handle_ioctl(net, cmd, arg); 3585 3586 if (ret == -ENOIOCTLCMD) 3587 ret = compat_sock_ioctl_trans(file, sock, cmd, arg); 3588 3589 return ret; 3590 } 3591 #endif 3592 3593 /** 3594 * kernel_bind - bind an address to a socket (kernel space) 3595 * @sock: socket 3596 * @addr: address 3597 * @addrlen: length of address 3598 * 3599 * Returns 0 or an error. 3600 */ 3601 3602 int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen) 3603 { 3604 return sock->ops->bind(sock, addr, addrlen); 3605 } 3606 EXPORT_SYMBOL(kernel_bind); 3607 3608 /** 3609 * kernel_listen - move socket to listening state (kernel space) 3610 * @sock: socket 3611 * @backlog: pending connections queue size 3612 * 3613 * Returns 0 or an error. 3614 */ 3615 3616 int kernel_listen(struct socket *sock, int backlog) 3617 { 3618 return sock->ops->listen(sock, backlog); 3619 } 3620 EXPORT_SYMBOL(kernel_listen); 3621 3622 /** 3623 * kernel_accept - accept a connection (kernel space) 3624 * @sock: listening socket 3625 * @newsock: new connected socket 3626 * @flags: flags 3627 * 3628 * @flags must be SOCK_CLOEXEC, SOCK_NONBLOCK or 0. 3629 * If it fails, @newsock is guaranteed to be %NULL. 3630 * Returns 0 or an error. 3631 */ 3632 3633 int kernel_accept(struct socket *sock, struct socket **newsock, int flags) 3634 { 3635 struct sock *sk = sock->sk; 3636 int err; 3637 3638 err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol, 3639 newsock); 3640 if (err < 0) 3641 goto done; 3642 3643 err = sock->ops->accept(sock, *newsock, flags, true); 3644 if (err < 0) { 3645 sock_release(*newsock); 3646 *newsock = NULL; 3647 goto done; 3648 } 3649 3650 (*newsock)->ops = sock->ops; 3651 __module_get((*newsock)->ops->owner); 3652 3653 done: 3654 return err; 3655 } 3656 EXPORT_SYMBOL(kernel_accept); 3657 3658 /** 3659 * kernel_connect - connect a socket (kernel space) 3660 * @sock: socket 3661 * @addr: address 3662 * @addrlen: address length 3663 * @flags: flags (O_NONBLOCK, ...) 3664 * 3665 * For datagram sockets, @addr is the addres to which datagrams are sent 3666 * by default, and the only address from which datagrams are received. 3667 * For stream sockets, attempts to connect to @addr. 3668 * Returns 0 or an error code. 3669 */ 3670 3671 int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen, 3672 int flags) 3673 { 3674 return sock->ops->connect(sock, addr, addrlen, flags); 3675 } 3676 EXPORT_SYMBOL(kernel_connect); 3677 3678 /** 3679 * kernel_getsockname - get the address which the socket is bound (kernel space) 3680 * @sock: socket 3681 * @addr: address holder 3682 * 3683 * Fills the @addr pointer with the address which the socket is bound. 3684 * Returns 0 or an error code. 3685 */ 3686 3687 int kernel_getsockname(struct socket *sock, struct sockaddr *addr) 3688 { 3689 return sock->ops->getname(sock, addr, 0); 3690 } 3691 EXPORT_SYMBOL(kernel_getsockname); 3692 3693 /** 3694 * kernel_peername - get the address which the socket is connected (kernel space) 3695 * @sock: socket 3696 * @addr: address holder 3697 * 3698 * Fills the @addr pointer with the address which the socket is connected. 3699 * Returns 0 or an error code. 3700 */ 3701 3702 int kernel_getpeername(struct socket *sock, struct sockaddr *addr) 3703 { 3704 return sock->ops->getname(sock, addr, 1); 3705 } 3706 EXPORT_SYMBOL(kernel_getpeername); 3707 3708 /** 3709 * kernel_getsockopt - get a socket option (kernel space) 3710 * @sock: socket 3711 * @level: API level (SOL_SOCKET, ...) 3712 * @optname: option tag 3713 * @optval: option value 3714 * @optlen: option length 3715 * 3716 * Assigns the option length to @optlen. 3717 * Returns 0 or an error. 3718 */ 3719 3720 int kernel_getsockopt(struct socket *sock, int level, int optname, 3721 char *optval, int *optlen) 3722 { 3723 mm_segment_t oldfs = get_fs(); 3724 char __user *uoptval; 3725 int __user *uoptlen; 3726 int err; 3727 3728 uoptval = (char __user __force *) optval; 3729 uoptlen = (int __user __force *) optlen; 3730 3731 set_fs(KERNEL_DS); 3732 if (level == SOL_SOCKET) 3733 err = sock_getsockopt(sock, level, optname, uoptval, uoptlen); 3734 else 3735 err = sock->ops->getsockopt(sock, level, optname, uoptval, 3736 uoptlen); 3737 set_fs(oldfs); 3738 return err; 3739 } 3740 EXPORT_SYMBOL(kernel_getsockopt); 3741 3742 /** 3743 * kernel_setsockopt - set a socket option (kernel space) 3744 * @sock: socket 3745 * @level: API level (SOL_SOCKET, ...) 3746 * @optname: option tag 3747 * @optval: option value 3748 * @optlen: option length 3749 * 3750 * Returns 0 or an error. 3751 */ 3752 3753 int kernel_setsockopt(struct socket *sock, int level, int optname, 3754 char *optval, unsigned int optlen) 3755 { 3756 mm_segment_t oldfs = get_fs(); 3757 char __user *uoptval; 3758 int err; 3759 3760 uoptval = (char __user __force *) optval; 3761 3762 set_fs(KERNEL_DS); 3763 if (level == SOL_SOCKET) 3764 err = sock_setsockopt(sock, level, optname, uoptval, optlen); 3765 else 3766 err = sock->ops->setsockopt(sock, level, optname, uoptval, 3767 optlen); 3768 set_fs(oldfs); 3769 return err; 3770 } 3771 EXPORT_SYMBOL(kernel_setsockopt); 3772 3773 /** 3774 * kernel_sendpage - send a &page through a socket (kernel space) 3775 * @sock: socket 3776 * @page: page 3777 * @offset: page offset 3778 * @size: total size in bytes 3779 * @flags: flags (MSG_DONTWAIT, ...) 3780 * 3781 * Returns the total amount sent in bytes or an error. 3782 */ 3783 3784 int kernel_sendpage(struct socket *sock, struct page *page, int offset, 3785 size_t size, int flags) 3786 { 3787 if (sock->ops->sendpage) 3788 return sock->ops->sendpage(sock, page, offset, size, flags); 3789 3790 return sock_no_sendpage(sock, page, offset, size, flags); 3791 } 3792 EXPORT_SYMBOL(kernel_sendpage); 3793 3794 /** 3795 * kernel_sendpage_locked - send a &page through the locked sock (kernel space) 3796 * @sk: sock 3797 * @page: page 3798 * @offset: page offset 3799 * @size: total size in bytes 3800 * @flags: flags (MSG_DONTWAIT, ...) 3801 * 3802 * Returns the total amount sent in bytes or an error. 3803 * Caller must hold @sk. 3804 */ 3805 3806 int kernel_sendpage_locked(struct sock *sk, struct page *page, int offset, 3807 size_t size, int flags) 3808 { 3809 struct socket *sock = sk->sk_socket; 3810 3811 if (sock->ops->sendpage_locked) 3812 return sock->ops->sendpage_locked(sk, page, offset, size, 3813 flags); 3814 3815 return sock_no_sendpage_locked(sk, page, offset, size, flags); 3816 } 3817 EXPORT_SYMBOL(kernel_sendpage_locked); 3818 3819 /** 3820 * kernel_shutdown - shut down part of a full-duplex connection (kernel space) 3821 * @sock: socket 3822 * @how: connection part 3823 * 3824 * Returns 0 or an error. 3825 */ 3826 3827 int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how) 3828 { 3829 return sock->ops->shutdown(sock, how); 3830 } 3831 EXPORT_SYMBOL(kernel_sock_shutdown); 3832 3833 /** 3834 * kernel_sock_ip_overhead - returns the IP overhead imposed by a socket 3835 * @sk: socket 3836 * 3837 * This routine returns the IP overhead imposed by a socket i.e. 3838 * the length of the underlying IP header, depending on whether 3839 * this is an IPv4 or IPv6 socket and the length from IP options turned 3840 * on at the socket. Assumes that the caller has a lock on the socket. 3841 */ 3842 3843 u32 kernel_sock_ip_overhead(struct sock *sk) 3844 { 3845 struct inet_sock *inet; 3846 struct ip_options_rcu *opt; 3847 u32 overhead = 0; 3848 #if IS_ENABLED(CONFIG_IPV6) 3849 struct ipv6_pinfo *np; 3850 struct ipv6_txoptions *optv6 = NULL; 3851 #endif /* IS_ENABLED(CONFIG_IPV6) */ 3852 3853 if (!sk) 3854 return overhead; 3855 3856 switch (sk->sk_family) { 3857 case AF_INET: 3858 inet = inet_sk(sk); 3859 overhead += sizeof(struct iphdr); 3860 opt = rcu_dereference_protected(inet->inet_opt, 3861 sock_owned_by_user(sk)); 3862 if (opt) 3863 overhead += opt->opt.optlen; 3864 return overhead; 3865 #if IS_ENABLED(CONFIG_IPV6) 3866 case AF_INET6: 3867 np = inet6_sk(sk); 3868 overhead += sizeof(struct ipv6hdr); 3869 if (np) 3870 optv6 = rcu_dereference_protected(np->opt, 3871 sock_owned_by_user(sk)); 3872 if (optv6) 3873 overhead += (optv6->opt_flen + optv6->opt_nflen); 3874 return overhead; 3875 #endif /* IS_ENABLED(CONFIG_IPV6) */ 3876 default: /* Returns 0 overhead if the socket is not ipv4 or ipv6 */ 3877 return overhead; 3878 } 3879 } 3880 EXPORT_SYMBOL(kernel_sock_ip_overhead); 3881