1 /* 2 * INET An implementation of the TCP/IP protocol suite for the LINUX 3 * operating system. INET is implemented using the BSD Socket 4 * interface as the means of communication with the user level. 5 * 6 * PF_INET protocol family socket handler. 7 * 8 * Version: $Id: af_inet.c,v 1.137 2002/02/01 22:01:03 davem Exp $ 9 * 10 * Authors: Ross Biro, <bir7@leland.Stanford.Edu> 11 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> 12 * Florian La Roche, <flla@stud.uni-sb.de> 13 * Alan Cox, <A.Cox@swansea.ac.uk> 14 * 15 * Changes (see also sock.c) 16 * 17 * piggy, 18 * Karl Knutson : Socket protocol table 19 * A.N.Kuznetsov : Socket death error in accept(). 20 * John Richardson : Fix non blocking error in connect() 21 * so sockets that fail to connect 22 * don't return -EINPROGRESS. 23 * Alan Cox : Asynchronous I/O support 24 * Alan Cox : Keep correct socket pointer on sock 25 * structures 26 * when accept() ed 27 * Alan Cox : Semantics of SO_LINGER aren't state 28 * moved to close when you look carefully. 29 * With this fixed and the accept bug fixed 30 * some RPC stuff seems happier. 31 * Niibe Yutaka : 4.4BSD style write async I/O 32 * Alan Cox, 33 * Tony Gale : Fixed reuse semantics. 34 * Alan Cox : bind() shouldn't abort existing but dead 35 * sockets. Stops FTP netin:.. I hope. 36 * Alan Cox : bind() works correctly for RAW sockets. 37 * Note that FreeBSD at least was broken 38 * in this respect so be careful with 39 * compatibility tests... 40 * Alan Cox : routing cache support 41 * Alan Cox : memzero the socket structure for 42 * compactness. 43 * Matt Day : nonblock connect error handler 44 * Alan Cox : Allow large numbers of pending sockets 45 * (eg for big web sites), but only if 46 * specifically application requested. 47 * Alan Cox : New buffering throughout IP. Used 48 * dumbly. 49 * Alan Cox : New buffering now used smartly. 50 * Alan Cox : BSD rather than common sense 51 * interpretation of listen. 52 * Germano Caronni : Assorted small races. 53 * Alan Cox : sendmsg/recvmsg basic support. 54 * Alan Cox : Only sendmsg/recvmsg now supported. 55 * Alan Cox : Locked down bind (see security list). 56 * Alan Cox : Loosened bind a little. 57 * Mike McLagan : ADD/DEL DLCI Ioctls 58 * Willy Konynenberg : Transparent proxying support. 59 * David S. Miller : New socket lookup architecture. 60 * Some other random speedups. 61 * Cyrus Durgin : Cleaned up file for kmod hacks. 62 * Andi Kleen : Fix inet_stream_connect TCP race. 63 * 64 * This program is free software; you can redistribute it and/or 65 * modify it under the terms of the GNU General Public License 66 * as published by the Free Software Foundation; either version 67 * 2 of the License, or (at your option) any later version. 68 */ 69 70 #include <linux/config.h> 71 #include <linux/errno.h> 72 #include <linux/types.h> 73 #include <linux/socket.h> 74 #include <linux/in.h> 75 #include <linux/kernel.h> 76 #include <linux/major.h> 77 #include <linux/module.h> 78 #include <linux/sched.h> 79 #include <linux/timer.h> 80 #include <linux/string.h> 81 #include <linux/sockios.h> 82 #include <linux/net.h> 83 #include <linux/fcntl.h> 84 #include <linux/mm.h> 85 #include <linux/interrupt.h> 86 #include <linux/stat.h> 87 #include <linux/init.h> 88 #include <linux/poll.h> 89 #include <linux/netfilter_ipv4.h> 90 91 #include <asm/uaccess.h> 92 #include <asm/system.h> 93 94 #include <linux/smp_lock.h> 95 #include <linux/inet.h> 96 #include <linux/igmp.h> 97 #include <linux/netdevice.h> 98 #include <net/ip.h> 99 #include <net/protocol.h> 100 #include <net/arp.h> 101 #include <net/route.h> 102 #include <net/ip_fib.h> 103 #include <net/tcp.h> 104 #include <net/udp.h> 105 #include <linux/skbuff.h> 106 #include <net/sock.h> 107 #include <net/raw.h> 108 #include <net/icmp.h> 109 #include <net/ipip.h> 110 #include <net/inet_common.h> 111 #include <net/xfrm.h> 112 #ifdef CONFIG_IP_MROUTE 113 #include <linux/mroute.h> 114 #endif 115 116 DEFINE_SNMP_STAT(struct linux_mib, net_statistics); 117 118 #ifdef INET_REFCNT_DEBUG 119 atomic_t inet_sock_nr; 120 #endif 121 122 extern void ip_mc_drop_socket(struct sock *sk); 123 124 /* The inetsw table contains everything that inet_create needs to 125 * build a new socket. 126 */ 127 static struct list_head inetsw[SOCK_MAX]; 128 static DEFINE_SPINLOCK(inetsw_lock); 129 130 /* New destruction routine */ 131 132 void inet_sock_destruct(struct sock *sk) 133 { 134 struct inet_sock *inet = inet_sk(sk); 135 136 __skb_queue_purge(&sk->sk_receive_queue); 137 __skb_queue_purge(&sk->sk_error_queue); 138 139 if (sk->sk_type == SOCK_STREAM && sk->sk_state != TCP_CLOSE) { 140 printk("Attempt to release TCP socket in state %d %p\n", 141 sk->sk_state, sk); 142 return; 143 } 144 if (!sock_flag(sk, SOCK_DEAD)) { 145 printk("Attempt to release alive inet socket %p\n", sk); 146 return; 147 } 148 149 BUG_TRAP(!atomic_read(&sk->sk_rmem_alloc)); 150 BUG_TRAP(!atomic_read(&sk->sk_wmem_alloc)); 151 BUG_TRAP(!sk->sk_wmem_queued); 152 BUG_TRAP(!sk->sk_forward_alloc); 153 154 if (inet->opt) 155 kfree(inet->opt); 156 dst_release(sk->sk_dst_cache); 157 #ifdef INET_REFCNT_DEBUG 158 atomic_dec(&inet_sock_nr); 159 printk(KERN_DEBUG "INET socket %p released, %d are still alive\n", 160 sk, atomic_read(&inet_sock_nr)); 161 #endif 162 } 163 164 /* 165 * The routines beyond this point handle the behaviour of an AF_INET 166 * socket object. Mostly it punts to the subprotocols of IP to do 167 * the work. 168 */ 169 170 /* 171 * Automatically bind an unbound socket. 172 */ 173 174 static int inet_autobind(struct sock *sk) 175 { 176 struct inet_sock *inet; 177 /* We may need to bind the socket. */ 178 lock_sock(sk); 179 inet = inet_sk(sk); 180 if (!inet->num) { 181 if (sk->sk_prot->get_port(sk, 0)) { 182 release_sock(sk); 183 return -EAGAIN; 184 } 185 inet->sport = htons(inet->num); 186 } 187 release_sock(sk); 188 return 0; 189 } 190 191 /* 192 * Move a socket into listening state. 193 */ 194 int inet_listen(struct socket *sock, int backlog) 195 { 196 struct sock *sk = sock->sk; 197 unsigned char old_state; 198 int err; 199 200 lock_sock(sk); 201 202 err = -EINVAL; 203 if (sock->state != SS_UNCONNECTED || sock->type != SOCK_STREAM) 204 goto out; 205 206 old_state = sk->sk_state; 207 if (!((1 << old_state) & (TCPF_CLOSE | TCPF_LISTEN))) 208 goto out; 209 210 /* Really, if the socket is already in listen state 211 * we can only allow the backlog to be adjusted. 212 */ 213 if (old_state != TCP_LISTEN) { 214 err = tcp_listen_start(sk); 215 if (err) 216 goto out; 217 } 218 sk->sk_max_ack_backlog = backlog; 219 err = 0; 220 221 out: 222 release_sock(sk); 223 return err; 224 } 225 226 /* 227 * Create an inet socket. 228 */ 229 230 static int inet_create(struct socket *sock, int protocol) 231 { 232 struct sock *sk; 233 struct list_head *p; 234 struct inet_protosw *answer; 235 struct inet_sock *inet; 236 struct proto *answer_prot; 237 unsigned char answer_flags; 238 char answer_no_check; 239 int err; 240 241 sock->state = SS_UNCONNECTED; 242 243 /* Look for the requested type/protocol pair. */ 244 answer = NULL; 245 rcu_read_lock(); 246 list_for_each_rcu(p, &inetsw[sock->type]) { 247 answer = list_entry(p, struct inet_protosw, list); 248 249 /* Check the non-wild match. */ 250 if (protocol == answer->protocol) { 251 if (protocol != IPPROTO_IP) 252 break; 253 } else { 254 /* Check for the two wild cases. */ 255 if (IPPROTO_IP == protocol) { 256 protocol = answer->protocol; 257 break; 258 } 259 if (IPPROTO_IP == answer->protocol) 260 break; 261 } 262 answer = NULL; 263 } 264 265 err = -ESOCKTNOSUPPORT; 266 if (!answer) 267 goto out_rcu_unlock; 268 err = -EPERM; 269 if (answer->capability > 0 && !capable(answer->capability)) 270 goto out_rcu_unlock; 271 err = -EPROTONOSUPPORT; 272 if (!protocol) 273 goto out_rcu_unlock; 274 275 sock->ops = answer->ops; 276 answer_prot = answer->prot; 277 answer_no_check = answer->no_check; 278 answer_flags = answer->flags; 279 rcu_read_unlock(); 280 281 BUG_TRAP(answer_prot->slab != NULL); 282 283 err = -ENOBUFS; 284 sk = sk_alloc(PF_INET, GFP_KERNEL, answer_prot, 1); 285 if (sk == NULL) 286 goto out; 287 288 err = 0; 289 sk->sk_no_check = answer_no_check; 290 if (INET_PROTOSW_REUSE & answer_flags) 291 sk->sk_reuse = 1; 292 293 inet = inet_sk(sk); 294 295 if (SOCK_RAW == sock->type) { 296 inet->num = protocol; 297 if (IPPROTO_RAW == protocol) 298 inet->hdrincl = 1; 299 } 300 301 if (ipv4_config.no_pmtu_disc) 302 inet->pmtudisc = IP_PMTUDISC_DONT; 303 else 304 inet->pmtudisc = IP_PMTUDISC_WANT; 305 306 inet->id = 0; 307 308 sock_init_data(sock, sk); 309 310 sk->sk_destruct = inet_sock_destruct; 311 sk->sk_family = PF_INET; 312 sk->sk_protocol = protocol; 313 sk->sk_backlog_rcv = sk->sk_prot->backlog_rcv; 314 315 inet->uc_ttl = -1; 316 inet->mc_loop = 1; 317 inet->mc_ttl = 1; 318 inet->mc_index = 0; 319 inet->mc_list = NULL; 320 321 #ifdef INET_REFCNT_DEBUG 322 atomic_inc(&inet_sock_nr); 323 #endif 324 325 if (inet->num) { 326 /* It assumes that any protocol which allows 327 * the user to assign a number at socket 328 * creation time automatically 329 * shares. 330 */ 331 inet->sport = htons(inet->num); 332 /* Add to protocol hash chains. */ 333 sk->sk_prot->hash(sk); 334 } 335 336 if (sk->sk_prot->init) { 337 err = sk->sk_prot->init(sk); 338 if (err) 339 sk_common_release(sk); 340 } 341 out: 342 return err; 343 out_rcu_unlock: 344 rcu_read_unlock(); 345 goto out; 346 } 347 348 349 /* 350 * The peer socket should always be NULL (or else). When we call this 351 * function we are destroying the object and from then on nobody 352 * should refer to it. 353 */ 354 int inet_release(struct socket *sock) 355 { 356 struct sock *sk = sock->sk; 357 358 if (sk) { 359 long timeout; 360 361 /* Applications forget to leave groups before exiting */ 362 ip_mc_drop_socket(sk); 363 364 /* If linger is set, we don't return until the close 365 * is complete. Otherwise we return immediately. The 366 * actually closing is done the same either way. 367 * 368 * If the close is due to the process exiting, we never 369 * linger.. 370 */ 371 timeout = 0; 372 if (sock_flag(sk, SOCK_LINGER) && 373 !(current->flags & PF_EXITING)) 374 timeout = sk->sk_lingertime; 375 sock->sk = NULL; 376 sk->sk_prot->close(sk, timeout); 377 } 378 return 0; 379 } 380 381 /* It is off by default, see below. */ 382 int sysctl_ip_nonlocal_bind; 383 384 int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) 385 { 386 struct sockaddr_in *addr = (struct sockaddr_in *)uaddr; 387 struct sock *sk = sock->sk; 388 struct inet_sock *inet = inet_sk(sk); 389 unsigned short snum; 390 int chk_addr_ret; 391 int err; 392 393 /* If the socket has its own bind function then use it. (RAW) */ 394 if (sk->sk_prot->bind) { 395 err = sk->sk_prot->bind(sk, uaddr, addr_len); 396 goto out; 397 } 398 err = -EINVAL; 399 if (addr_len < sizeof(struct sockaddr_in)) 400 goto out; 401 402 chk_addr_ret = inet_addr_type(addr->sin_addr.s_addr); 403 404 /* Not specified by any standard per-se, however it breaks too 405 * many applications when removed. It is unfortunate since 406 * allowing applications to make a non-local bind solves 407 * several problems with systems using dynamic addressing. 408 * (ie. your servers still start up even if your ISDN link 409 * is temporarily down) 410 */ 411 err = -EADDRNOTAVAIL; 412 if (!sysctl_ip_nonlocal_bind && 413 !inet->freebind && 414 addr->sin_addr.s_addr != INADDR_ANY && 415 chk_addr_ret != RTN_LOCAL && 416 chk_addr_ret != RTN_MULTICAST && 417 chk_addr_ret != RTN_BROADCAST) 418 goto out; 419 420 snum = ntohs(addr->sin_port); 421 err = -EACCES; 422 if (snum && snum < PROT_SOCK && !capable(CAP_NET_BIND_SERVICE)) 423 goto out; 424 425 /* We keep a pair of addresses. rcv_saddr is the one 426 * used by hash lookups, and saddr is used for transmit. 427 * 428 * In the BSD API these are the same except where it 429 * would be illegal to use them (multicast/broadcast) in 430 * which case the sending device address is used. 431 */ 432 lock_sock(sk); 433 434 /* Check these errors (active socket, double bind). */ 435 err = -EINVAL; 436 if (sk->sk_state != TCP_CLOSE || inet->num) 437 goto out_release_sock; 438 439 inet->rcv_saddr = inet->saddr = addr->sin_addr.s_addr; 440 if (chk_addr_ret == RTN_MULTICAST || chk_addr_ret == RTN_BROADCAST) 441 inet->saddr = 0; /* Use device */ 442 443 /* Make sure we are allowed to bind here. */ 444 if (sk->sk_prot->get_port(sk, snum)) { 445 inet->saddr = inet->rcv_saddr = 0; 446 err = -EADDRINUSE; 447 goto out_release_sock; 448 } 449 450 if (inet->rcv_saddr) 451 sk->sk_userlocks |= SOCK_BINDADDR_LOCK; 452 if (snum) 453 sk->sk_userlocks |= SOCK_BINDPORT_LOCK; 454 inet->sport = htons(inet->num); 455 inet->daddr = 0; 456 inet->dport = 0; 457 sk_dst_reset(sk); 458 err = 0; 459 out_release_sock: 460 release_sock(sk); 461 out: 462 return err; 463 } 464 465 int inet_dgram_connect(struct socket *sock, struct sockaddr * uaddr, 466 int addr_len, int flags) 467 { 468 struct sock *sk = sock->sk; 469 470 if (uaddr->sa_family == AF_UNSPEC) 471 return sk->sk_prot->disconnect(sk, flags); 472 473 if (!inet_sk(sk)->num && inet_autobind(sk)) 474 return -EAGAIN; 475 return sk->sk_prot->connect(sk, (struct sockaddr *)uaddr, addr_len); 476 } 477 478 static long inet_wait_for_connect(struct sock *sk, long timeo) 479 { 480 DEFINE_WAIT(wait); 481 482 prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); 483 484 /* Basic assumption: if someone sets sk->sk_err, he _must_ 485 * change state of the socket from TCP_SYN_*. 486 * Connect() does not allow to get error notifications 487 * without closing the socket. 488 */ 489 while ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) { 490 release_sock(sk); 491 timeo = schedule_timeout(timeo); 492 lock_sock(sk); 493 if (signal_pending(current) || !timeo) 494 break; 495 prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); 496 } 497 finish_wait(sk->sk_sleep, &wait); 498 return timeo; 499 } 500 501 /* 502 * Connect to a remote host. There is regrettably still a little 503 * TCP 'magic' in here. 504 */ 505 int inet_stream_connect(struct socket *sock, struct sockaddr *uaddr, 506 int addr_len, int flags) 507 { 508 struct sock *sk = sock->sk; 509 int err; 510 long timeo; 511 512 lock_sock(sk); 513 514 if (uaddr->sa_family == AF_UNSPEC) { 515 err = sk->sk_prot->disconnect(sk, flags); 516 sock->state = err ? SS_DISCONNECTING : SS_UNCONNECTED; 517 goto out; 518 } 519 520 switch (sock->state) { 521 default: 522 err = -EINVAL; 523 goto out; 524 case SS_CONNECTED: 525 err = -EISCONN; 526 goto out; 527 case SS_CONNECTING: 528 err = -EALREADY; 529 /* Fall out of switch with err, set for this state */ 530 break; 531 case SS_UNCONNECTED: 532 err = -EISCONN; 533 if (sk->sk_state != TCP_CLOSE) 534 goto out; 535 536 err = sk->sk_prot->connect(sk, uaddr, addr_len); 537 if (err < 0) 538 goto out; 539 540 sock->state = SS_CONNECTING; 541 542 /* Just entered SS_CONNECTING state; the only 543 * difference is that return value in non-blocking 544 * case is EINPROGRESS, rather than EALREADY. 545 */ 546 err = -EINPROGRESS; 547 break; 548 } 549 550 timeo = sock_sndtimeo(sk, flags & O_NONBLOCK); 551 552 if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) { 553 /* Error code is set above */ 554 if (!timeo || !inet_wait_for_connect(sk, timeo)) 555 goto out; 556 557 err = sock_intr_errno(timeo); 558 if (signal_pending(current)) 559 goto out; 560 } 561 562 /* Connection was closed by RST, timeout, ICMP error 563 * or another process disconnected us. 564 */ 565 if (sk->sk_state == TCP_CLOSE) 566 goto sock_error; 567 568 /* sk->sk_err may be not zero now, if RECVERR was ordered by user 569 * and error was received after socket entered established state. 570 * Hence, it is handled normally after connect() return successfully. 571 */ 572 573 sock->state = SS_CONNECTED; 574 err = 0; 575 out: 576 release_sock(sk); 577 return err; 578 579 sock_error: 580 err = sock_error(sk) ? : -ECONNABORTED; 581 sock->state = SS_UNCONNECTED; 582 if (sk->sk_prot->disconnect(sk, flags)) 583 sock->state = SS_DISCONNECTING; 584 goto out; 585 } 586 587 /* 588 * Accept a pending connection. The TCP layer now gives BSD semantics. 589 */ 590 591 int inet_accept(struct socket *sock, struct socket *newsock, int flags) 592 { 593 struct sock *sk1 = sock->sk; 594 int err = -EINVAL; 595 struct sock *sk2 = sk1->sk_prot->accept(sk1, flags, &err); 596 597 if (!sk2) 598 goto do_err; 599 600 lock_sock(sk2); 601 602 BUG_TRAP((1 << sk2->sk_state) & 603 (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT | TCPF_CLOSE)); 604 605 sock_graft(sk2, newsock); 606 607 newsock->state = SS_CONNECTED; 608 err = 0; 609 release_sock(sk2); 610 do_err: 611 return err; 612 } 613 614 615 /* 616 * This does both peername and sockname. 617 */ 618 int inet_getname(struct socket *sock, struct sockaddr *uaddr, 619 int *uaddr_len, int peer) 620 { 621 struct sock *sk = sock->sk; 622 struct inet_sock *inet = inet_sk(sk); 623 struct sockaddr_in *sin = (struct sockaddr_in *)uaddr; 624 625 sin->sin_family = AF_INET; 626 if (peer) { 627 if (!inet->dport || 628 (((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_SYN_SENT)) && 629 peer == 1)) 630 return -ENOTCONN; 631 sin->sin_port = inet->dport; 632 sin->sin_addr.s_addr = inet->daddr; 633 } else { 634 __u32 addr = inet->rcv_saddr; 635 if (!addr) 636 addr = inet->saddr; 637 sin->sin_port = inet->sport; 638 sin->sin_addr.s_addr = addr; 639 } 640 memset(sin->sin_zero, 0, sizeof(sin->sin_zero)); 641 *uaddr_len = sizeof(*sin); 642 return 0; 643 } 644 645 int inet_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, 646 size_t size) 647 { 648 struct sock *sk = sock->sk; 649 650 /* We may need to bind the socket. */ 651 if (!inet_sk(sk)->num && inet_autobind(sk)) 652 return -EAGAIN; 653 654 return sk->sk_prot->sendmsg(iocb, sk, msg, size); 655 } 656 657 658 static ssize_t inet_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags) 659 { 660 struct sock *sk = sock->sk; 661 662 /* We may need to bind the socket. */ 663 if (!inet_sk(sk)->num && inet_autobind(sk)) 664 return -EAGAIN; 665 666 if (sk->sk_prot->sendpage) 667 return sk->sk_prot->sendpage(sk, page, offset, size, flags); 668 return sock_no_sendpage(sock, page, offset, size, flags); 669 } 670 671 672 int inet_shutdown(struct socket *sock, int how) 673 { 674 struct sock *sk = sock->sk; 675 int err = 0; 676 677 /* This should really check to make sure 678 * the socket is a TCP socket. (WHY AC...) 679 */ 680 how++; /* maps 0->1 has the advantage of making bit 1 rcvs and 681 1->2 bit 2 snds. 682 2->3 */ 683 if ((how & ~SHUTDOWN_MASK) || !how) /* MAXINT->0 */ 684 return -EINVAL; 685 686 lock_sock(sk); 687 if (sock->state == SS_CONNECTING) { 688 if ((1 << sk->sk_state) & 689 (TCPF_SYN_SENT | TCPF_SYN_RECV | TCPF_CLOSE)) 690 sock->state = SS_DISCONNECTING; 691 else 692 sock->state = SS_CONNECTED; 693 } 694 695 switch (sk->sk_state) { 696 case TCP_CLOSE: 697 err = -ENOTCONN; 698 /* Hack to wake up other listeners, who can poll for 699 POLLHUP, even on eg. unconnected UDP sockets -- RR */ 700 default: 701 sk->sk_shutdown |= how; 702 if (sk->sk_prot->shutdown) 703 sk->sk_prot->shutdown(sk, how); 704 break; 705 706 /* Remaining two branches are temporary solution for missing 707 * close() in multithreaded environment. It is _not_ a good idea, 708 * but we have no choice until close() is repaired at VFS level. 709 */ 710 case TCP_LISTEN: 711 if (!(how & RCV_SHUTDOWN)) 712 break; 713 /* Fall through */ 714 case TCP_SYN_SENT: 715 err = sk->sk_prot->disconnect(sk, O_NONBLOCK); 716 sock->state = err ? SS_DISCONNECTING : SS_UNCONNECTED; 717 break; 718 } 719 720 /* Wake up anyone sleeping in poll. */ 721 sk->sk_state_change(sk); 722 release_sock(sk); 723 return err; 724 } 725 726 /* 727 * ioctl() calls you can issue on an INET socket. Most of these are 728 * device configuration and stuff and very rarely used. Some ioctls 729 * pass on to the socket itself. 730 * 731 * NOTE: I like the idea of a module for the config stuff. ie ifconfig 732 * loads the devconfigure module does its configuring and unloads it. 733 * There's a good 20K of config code hanging around the kernel. 734 */ 735 736 int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) 737 { 738 struct sock *sk = sock->sk; 739 int err = 0; 740 741 switch (cmd) { 742 case SIOCGSTAMP: 743 err = sock_get_timestamp(sk, (struct timeval __user *)arg); 744 break; 745 case SIOCADDRT: 746 case SIOCDELRT: 747 case SIOCRTMSG: 748 err = ip_rt_ioctl(cmd, (void __user *)arg); 749 break; 750 case SIOCDARP: 751 case SIOCGARP: 752 case SIOCSARP: 753 err = arp_ioctl(cmd, (void __user *)arg); 754 break; 755 case SIOCGIFADDR: 756 case SIOCSIFADDR: 757 case SIOCGIFBRDADDR: 758 case SIOCSIFBRDADDR: 759 case SIOCGIFNETMASK: 760 case SIOCSIFNETMASK: 761 case SIOCGIFDSTADDR: 762 case SIOCSIFDSTADDR: 763 case SIOCSIFPFLAGS: 764 case SIOCGIFPFLAGS: 765 case SIOCSIFFLAGS: 766 err = devinet_ioctl(cmd, (void __user *)arg); 767 break; 768 default: 769 if (!sk->sk_prot->ioctl || 770 (err = sk->sk_prot->ioctl(sk, cmd, arg)) == 771 -ENOIOCTLCMD) 772 err = dev_ioctl(cmd, (void __user *)arg); 773 break; 774 } 775 return err; 776 } 777 778 struct proto_ops inet_stream_ops = { 779 .family = PF_INET, 780 .owner = THIS_MODULE, 781 .release = inet_release, 782 .bind = inet_bind, 783 .connect = inet_stream_connect, 784 .socketpair = sock_no_socketpair, 785 .accept = inet_accept, 786 .getname = inet_getname, 787 .poll = tcp_poll, 788 .ioctl = inet_ioctl, 789 .listen = inet_listen, 790 .shutdown = inet_shutdown, 791 .setsockopt = sock_common_setsockopt, 792 .getsockopt = sock_common_getsockopt, 793 .sendmsg = inet_sendmsg, 794 .recvmsg = sock_common_recvmsg, 795 .mmap = sock_no_mmap, 796 .sendpage = tcp_sendpage 797 }; 798 799 struct proto_ops inet_dgram_ops = { 800 .family = PF_INET, 801 .owner = THIS_MODULE, 802 .release = inet_release, 803 .bind = inet_bind, 804 .connect = inet_dgram_connect, 805 .socketpair = sock_no_socketpair, 806 .accept = sock_no_accept, 807 .getname = inet_getname, 808 .poll = udp_poll, 809 .ioctl = inet_ioctl, 810 .listen = sock_no_listen, 811 .shutdown = inet_shutdown, 812 .setsockopt = sock_common_setsockopt, 813 .getsockopt = sock_common_getsockopt, 814 .sendmsg = inet_sendmsg, 815 .recvmsg = sock_common_recvmsg, 816 .mmap = sock_no_mmap, 817 .sendpage = inet_sendpage, 818 }; 819 820 /* 821 * For SOCK_RAW sockets; should be the same as inet_dgram_ops but without 822 * udp_poll 823 */ 824 static struct proto_ops inet_sockraw_ops = { 825 .family = PF_INET, 826 .owner = THIS_MODULE, 827 .release = inet_release, 828 .bind = inet_bind, 829 .connect = inet_dgram_connect, 830 .socketpair = sock_no_socketpair, 831 .accept = sock_no_accept, 832 .getname = inet_getname, 833 .poll = datagram_poll, 834 .ioctl = inet_ioctl, 835 .listen = sock_no_listen, 836 .shutdown = inet_shutdown, 837 .setsockopt = sock_common_setsockopt, 838 .getsockopt = sock_common_getsockopt, 839 .sendmsg = inet_sendmsg, 840 .recvmsg = sock_common_recvmsg, 841 .mmap = sock_no_mmap, 842 .sendpage = inet_sendpage, 843 }; 844 845 static struct net_proto_family inet_family_ops = { 846 .family = PF_INET, 847 .create = inet_create, 848 .owner = THIS_MODULE, 849 }; 850 851 852 extern void tcp_init(void); 853 extern void tcp_v4_init(struct net_proto_family *); 854 855 /* Upon startup we insert all the elements in inetsw_array[] into 856 * the linked list inetsw. 857 */ 858 static struct inet_protosw inetsw_array[] = 859 { 860 { 861 .type = SOCK_STREAM, 862 .protocol = IPPROTO_TCP, 863 .prot = &tcp_prot, 864 .ops = &inet_stream_ops, 865 .capability = -1, 866 .no_check = 0, 867 .flags = INET_PROTOSW_PERMANENT, 868 }, 869 870 { 871 .type = SOCK_DGRAM, 872 .protocol = IPPROTO_UDP, 873 .prot = &udp_prot, 874 .ops = &inet_dgram_ops, 875 .capability = -1, 876 .no_check = UDP_CSUM_DEFAULT, 877 .flags = INET_PROTOSW_PERMANENT, 878 }, 879 880 881 { 882 .type = SOCK_RAW, 883 .protocol = IPPROTO_IP, /* wild card */ 884 .prot = &raw_prot, 885 .ops = &inet_sockraw_ops, 886 .capability = CAP_NET_RAW, 887 .no_check = UDP_CSUM_DEFAULT, 888 .flags = INET_PROTOSW_REUSE, 889 } 890 }; 891 892 #define INETSW_ARRAY_LEN (sizeof(inetsw_array) / sizeof(struct inet_protosw)) 893 894 void inet_register_protosw(struct inet_protosw *p) 895 { 896 struct list_head *lh; 897 struct inet_protosw *answer; 898 int protocol = p->protocol; 899 struct list_head *last_perm; 900 901 spin_lock_bh(&inetsw_lock); 902 903 if (p->type >= SOCK_MAX) 904 goto out_illegal; 905 906 /* If we are trying to override a permanent protocol, bail. */ 907 answer = NULL; 908 last_perm = &inetsw[p->type]; 909 list_for_each(lh, &inetsw[p->type]) { 910 answer = list_entry(lh, struct inet_protosw, list); 911 912 /* Check only the non-wild match. */ 913 if (INET_PROTOSW_PERMANENT & answer->flags) { 914 if (protocol == answer->protocol) 915 break; 916 last_perm = lh; 917 } 918 919 answer = NULL; 920 } 921 if (answer) 922 goto out_permanent; 923 924 /* Add the new entry after the last permanent entry if any, so that 925 * the new entry does not override a permanent entry when matched with 926 * a wild-card protocol. But it is allowed to override any existing 927 * non-permanent entry. This means that when we remove this entry, the 928 * system automatically returns to the old behavior. 929 */ 930 list_add_rcu(&p->list, last_perm); 931 out: 932 spin_unlock_bh(&inetsw_lock); 933 934 synchronize_net(); 935 936 return; 937 938 out_permanent: 939 printk(KERN_ERR "Attempt to override permanent protocol %d.\n", 940 protocol); 941 goto out; 942 943 out_illegal: 944 printk(KERN_ERR 945 "Ignoring attempt to register invalid socket type %d.\n", 946 p->type); 947 goto out; 948 } 949 950 void inet_unregister_protosw(struct inet_protosw *p) 951 { 952 if (INET_PROTOSW_PERMANENT & p->flags) { 953 printk(KERN_ERR 954 "Attempt to unregister permanent protocol %d.\n", 955 p->protocol); 956 } else { 957 spin_lock_bh(&inetsw_lock); 958 list_del_rcu(&p->list); 959 spin_unlock_bh(&inetsw_lock); 960 961 synchronize_net(); 962 } 963 } 964 965 #ifdef CONFIG_IP_MULTICAST 966 static struct net_protocol igmp_protocol = { 967 .handler = igmp_rcv, 968 }; 969 #endif 970 971 static struct net_protocol tcp_protocol = { 972 .handler = tcp_v4_rcv, 973 .err_handler = tcp_v4_err, 974 .no_policy = 1, 975 }; 976 977 static struct net_protocol udp_protocol = { 978 .handler = udp_rcv, 979 .err_handler = udp_err, 980 .no_policy = 1, 981 }; 982 983 static struct net_protocol icmp_protocol = { 984 .handler = icmp_rcv, 985 }; 986 987 static int __init init_ipv4_mibs(void) 988 { 989 net_statistics[0] = alloc_percpu(struct linux_mib); 990 net_statistics[1] = alloc_percpu(struct linux_mib); 991 ip_statistics[0] = alloc_percpu(struct ipstats_mib); 992 ip_statistics[1] = alloc_percpu(struct ipstats_mib); 993 icmp_statistics[0] = alloc_percpu(struct icmp_mib); 994 icmp_statistics[1] = alloc_percpu(struct icmp_mib); 995 tcp_statistics[0] = alloc_percpu(struct tcp_mib); 996 tcp_statistics[1] = alloc_percpu(struct tcp_mib); 997 udp_statistics[0] = alloc_percpu(struct udp_mib); 998 udp_statistics[1] = alloc_percpu(struct udp_mib); 999 if (! 1000 (net_statistics[0] && net_statistics[1] && ip_statistics[0] 1001 && ip_statistics[1] && tcp_statistics[0] && tcp_statistics[1] 1002 && udp_statistics[0] && udp_statistics[1])) 1003 return -ENOMEM; 1004 1005 (void) tcp_mib_init(); 1006 1007 return 0; 1008 } 1009 1010 static int ipv4_proc_init(void); 1011 extern void ipfrag_init(void); 1012 1013 static int __init inet_init(void) 1014 { 1015 struct sk_buff *dummy_skb; 1016 struct inet_protosw *q; 1017 struct list_head *r; 1018 int rc = -EINVAL; 1019 1020 if (sizeof(struct inet_skb_parm) > sizeof(dummy_skb->cb)) { 1021 printk(KERN_CRIT "%s: panic\n", __FUNCTION__); 1022 goto out; 1023 } 1024 1025 rc = proto_register(&tcp_prot, 1); 1026 if (rc) 1027 goto out; 1028 1029 rc = proto_register(&udp_prot, 1); 1030 if (rc) 1031 goto out_unregister_tcp_proto; 1032 1033 rc = proto_register(&raw_prot, 1); 1034 if (rc) 1035 goto out_unregister_udp_proto; 1036 1037 /* 1038 * Tell SOCKET that we are alive... 1039 */ 1040 1041 (void)sock_register(&inet_family_ops); 1042 1043 /* 1044 * Add all the base protocols. 1045 */ 1046 1047 if (inet_add_protocol(&icmp_protocol, IPPROTO_ICMP) < 0) 1048 printk(KERN_CRIT "inet_init: Cannot add ICMP protocol\n"); 1049 if (inet_add_protocol(&udp_protocol, IPPROTO_UDP) < 0) 1050 printk(KERN_CRIT "inet_init: Cannot add UDP protocol\n"); 1051 if (inet_add_protocol(&tcp_protocol, IPPROTO_TCP) < 0) 1052 printk(KERN_CRIT "inet_init: Cannot add TCP protocol\n"); 1053 #ifdef CONFIG_IP_MULTICAST 1054 if (inet_add_protocol(&igmp_protocol, IPPROTO_IGMP) < 0) 1055 printk(KERN_CRIT "inet_init: Cannot add IGMP protocol\n"); 1056 #endif 1057 1058 /* Register the socket-side information for inet_create. */ 1059 for (r = &inetsw[0]; r < &inetsw[SOCK_MAX]; ++r) 1060 INIT_LIST_HEAD(r); 1061 1062 for (q = inetsw_array; q < &inetsw_array[INETSW_ARRAY_LEN]; ++q) 1063 inet_register_protosw(q); 1064 1065 /* 1066 * Set the ARP module up 1067 */ 1068 1069 arp_init(); 1070 1071 /* 1072 * Set the IP module up 1073 */ 1074 1075 ip_init(); 1076 1077 tcp_v4_init(&inet_family_ops); 1078 1079 /* Setup TCP slab cache for open requests. */ 1080 tcp_init(); 1081 1082 1083 /* 1084 * Set the ICMP layer up 1085 */ 1086 1087 icmp_init(&inet_family_ops); 1088 1089 /* 1090 * Initialise the multicast router 1091 */ 1092 #if defined(CONFIG_IP_MROUTE) 1093 ip_mr_init(); 1094 #endif 1095 /* 1096 * Initialise per-cpu ipv4 mibs 1097 */ 1098 1099 if(init_ipv4_mibs()) 1100 printk(KERN_CRIT "inet_init: Cannot init ipv4 mibs\n"); ; 1101 1102 ipv4_proc_init(); 1103 1104 ipfrag_init(); 1105 1106 rc = 0; 1107 out: 1108 return rc; 1109 out_unregister_tcp_proto: 1110 proto_unregister(&tcp_prot); 1111 out_unregister_udp_proto: 1112 proto_unregister(&udp_prot); 1113 goto out; 1114 } 1115 1116 module_init(inet_init); 1117 1118 /* ------------------------------------------------------------------------ */ 1119 1120 #ifdef CONFIG_PROC_FS 1121 extern int fib_proc_init(void); 1122 extern void fib_proc_exit(void); 1123 extern int ip_misc_proc_init(void); 1124 extern int raw_proc_init(void); 1125 extern void raw_proc_exit(void); 1126 extern int tcp4_proc_init(void); 1127 extern void tcp4_proc_exit(void); 1128 extern int udp4_proc_init(void); 1129 extern void udp4_proc_exit(void); 1130 1131 static int __init ipv4_proc_init(void) 1132 { 1133 int rc = 0; 1134 1135 if (raw_proc_init()) 1136 goto out_raw; 1137 if (tcp4_proc_init()) 1138 goto out_tcp; 1139 if (udp4_proc_init()) 1140 goto out_udp; 1141 if (fib_proc_init()) 1142 goto out_fib; 1143 if (ip_misc_proc_init()) 1144 goto out_misc; 1145 out: 1146 return rc; 1147 out_misc: 1148 fib_proc_exit(); 1149 out_fib: 1150 udp4_proc_exit(); 1151 out_udp: 1152 tcp4_proc_exit(); 1153 out_tcp: 1154 raw_proc_exit(); 1155 out_raw: 1156 rc = -ENOMEM; 1157 goto out; 1158 } 1159 1160 #else /* CONFIG_PROC_FS */ 1161 static int __init ipv4_proc_init(void) 1162 { 1163 return 0; 1164 } 1165 #endif /* CONFIG_PROC_FS */ 1166 1167 MODULE_ALIAS_NETPROTO(PF_INET); 1168 1169 EXPORT_SYMBOL(inet_accept); 1170 EXPORT_SYMBOL(inet_bind); 1171 EXPORT_SYMBOL(inet_dgram_connect); 1172 EXPORT_SYMBOL(inet_dgram_ops); 1173 EXPORT_SYMBOL(inet_getname); 1174 EXPORT_SYMBOL(inet_ioctl); 1175 EXPORT_SYMBOL(inet_listen); 1176 EXPORT_SYMBOL(inet_register_protosw); 1177 EXPORT_SYMBOL(inet_release); 1178 EXPORT_SYMBOL(inet_sendmsg); 1179 EXPORT_SYMBOL(inet_shutdown); 1180 EXPORT_SYMBOL(inet_sock_destruct); 1181 EXPORT_SYMBOL(inet_stream_connect); 1182 EXPORT_SYMBOL(inet_stream_ops); 1183 EXPORT_SYMBOL(inet_unregister_protosw); 1184 EXPORT_SYMBOL(net_statistics); 1185 1186 #ifdef INET_REFCNT_DEBUG 1187 EXPORT_SYMBOL(inet_sock_nr); 1188 #endif 1189