1 /* 2 * INET An implementation of the TCP/IP protocol suite for the LINUX 3 * operating system. INET is implemented using the BSD Socket 4 * interface as the means of communication with the user level. 5 * 6 * PF_INET protocol family socket handler. 7 * 8 * Version: $Id: af_inet.c,v 1.137 2002/02/01 22:01:03 davem Exp $ 9 * 10 * Authors: Ross Biro 11 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> 12 * Florian La Roche, <flla@stud.uni-sb.de> 13 * Alan Cox, <A.Cox@swansea.ac.uk> 14 * 15 * Changes (see also sock.c) 16 * 17 * piggy, 18 * Karl Knutson : Socket protocol table 19 * A.N.Kuznetsov : Socket death error in accept(). 20 * John Richardson : Fix non blocking error in connect() 21 * so sockets that fail to connect 22 * don't return -EINPROGRESS. 23 * Alan Cox : Asynchronous I/O support 24 * Alan Cox : Keep correct socket pointer on sock 25 * structures 26 * when accept() ed 27 * Alan Cox : Semantics of SO_LINGER aren't state 28 * moved to close when you look carefully. 29 * With this fixed and the accept bug fixed 30 * some RPC stuff seems happier. 31 * Niibe Yutaka : 4.4BSD style write async I/O 32 * Alan Cox, 33 * Tony Gale : Fixed reuse semantics. 34 * Alan Cox : bind() shouldn't abort existing but dead 35 * sockets. Stops FTP netin:.. I hope. 36 * Alan Cox : bind() works correctly for RAW sockets. 37 * Note that FreeBSD at least was broken 38 * in this respect so be careful with 39 * compatibility tests... 40 * Alan Cox : routing cache support 41 * Alan Cox : memzero the socket structure for 42 * compactness. 43 * Matt Day : nonblock connect error handler 44 * Alan Cox : Allow large numbers of pending sockets 45 * (eg for big web sites), but only if 46 * specifically application requested. 47 * Alan Cox : New buffering throughout IP. Used 48 * dumbly. 49 * Alan Cox : New buffering now used smartly. 50 * Alan Cox : BSD rather than common sense 51 * interpretation of listen. 52 * Germano Caronni : Assorted small races. 53 * Alan Cox : sendmsg/recvmsg basic support. 54 * Alan Cox : Only sendmsg/recvmsg now supported. 55 * Alan Cox : Locked down bind (see security list). 56 * Alan Cox : Loosened bind a little. 57 * Mike McLagan : ADD/DEL DLCI Ioctls 58 * Willy Konynenberg : Transparent proxying support. 59 * David S. Miller : New socket lookup architecture. 60 * Some other random speedups. 61 * Cyrus Durgin : Cleaned up file for kmod hacks. 62 * Andi Kleen : Fix inet_stream_connect TCP race. 63 * 64 * This program is free software; you can redistribute it and/or 65 * modify it under the terms of the GNU General Public License 66 * as published by the Free Software Foundation; either version 67 * 2 of the License, or (at your option) any later version. 68 */ 69 70 #include <linux/err.h> 71 #include <linux/errno.h> 72 #include <linux/types.h> 73 #include <linux/socket.h> 74 #include <linux/in.h> 75 #include <linux/kernel.h> 76 #include <linux/module.h> 77 #include <linux/sched.h> 78 #include <linux/timer.h> 79 #include <linux/string.h> 80 #include <linux/sockios.h> 81 #include <linux/net.h> 82 #include <linux/capability.h> 83 #include <linux/fcntl.h> 84 #include <linux/mm.h> 85 #include <linux/interrupt.h> 86 #include <linux/stat.h> 87 #include <linux/init.h> 88 #include <linux/poll.h> 89 #include <linux/netfilter_ipv4.h> 90 #include <linux/random.h> 91 92 #include <asm/uaccess.h> 93 #include <asm/system.h> 94 95 #include <linux/inet.h> 96 #include <linux/igmp.h> 97 #include <linux/inetdevice.h> 98 #include <linux/netdevice.h> 99 #include <net/ip.h> 100 #include <net/protocol.h> 101 #include <net/arp.h> 102 #include <net/route.h> 103 #include <net/ip_fib.h> 104 #include <net/inet_connection_sock.h> 105 #include <net/tcp.h> 106 #include <net/udp.h> 107 #include <net/udplite.h> 108 #include <linux/skbuff.h> 109 #include <net/sock.h> 110 #include <net/raw.h> 111 #include <net/icmp.h> 112 #include <net/ipip.h> 113 #include <net/inet_common.h> 114 #include <net/xfrm.h> 115 #ifdef CONFIG_IP_MROUTE 116 #include <linux/mroute.h> 117 #endif 118 119 DEFINE_SNMP_STAT(struct linux_mib, net_statistics) __read_mostly; 120 121 extern void ip_mc_drop_socket(struct sock *sk); 122 123 /* The inetsw table contains everything that inet_create needs to 124 * build a new socket. 125 */ 126 static struct list_head inetsw[SOCK_MAX]; 127 static DEFINE_SPINLOCK(inetsw_lock); 128 129 struct ipv4_config ipv4_config; 130 131 EXPORT_SYMBOL(ipv4_config); 132 133 /* New destruction routine */ 134 135 void inet_sock_destruct(struct sock *sk) 136 { 137 struct inet_sock *inet = inet_sk(sk); 138 139 __skb_queue_purge(&sk->sk_receive_queue); 140 __skb_queue_purge(&sk->sk_error_queue); 141 142 sk_mem_reclaim(sk); 143 144 if (sk->sk_type == SOCK_STREAM && sk->sk_state != TCP_CLOSE) { 145 printk("Attempt to release TCP socket in state %d %p\n", 146 sk->sk_state, sk); 147 return; 148 } 149 if (!sock_flag(sk, SOCK_DEAD)) { 150 printk("Attempt to release alive inet socket %p\n", sk); 151 return; 152 } 153 154 BUG_TRAP(!atomic_read(&sk->sk_rmem_alloc)); 155 BUG_TRAP(!atomic_read(&sk->sk_wmem_alloc)); 156 BUG_TRAP(!sk->sk_wmem_queued); 157 BUG_TRAP(!sk->sk_forward_alloc); 158 159 kfree(inet->opt); 160 dst_release(sk->sk_dst_cache); 161 sk_refcnt_debug_dec(sk); 162 } 163 164 /* 165 * The routines beyond this point handle the behaviour of an AF_INET 166 * socket object. Mostly it punts to the subprotocols of IP to do 167 * the work. 168 */ 169 170 /* 171 * Automatically bind an unbound socket. 172 */ 173 174 static int inet_autobind(struct sock *sk) 175 { 176 struct inet_sock *inet; 177 /* We may need to bind the socket. */ 178 lock_sock(sk); 179 inet = inet_sk(sk); 180 if (!inet->num) { 181 if (sk->sk_prot->get_port(sk, 0)) { 182 release_sock(sk); 183 return -EAGAIN; 184 } 185 inet->sport = htons(inet->num); 186 } 187 release_sock(sk); 188 return 0; 189 } 190 191 /* 192 * Move a socket into listening state. 193 */ 194 int inet_listen(struct socket *sock, int backlog) 195 { 196 struct sock *sk = sock->sk; 197 unsigned char old_state; 198 int err; 199 200 lock_sock(sk); 201 202 err = -EINVAL; 203 if (sock->state != SS_UNCONNECTED || sock->type != SOCK_STREAM) 204 goto out; 205 206 old_state = sk->sk_state; 207 if (!((1 << old_state) & (TCPF_CLOSE | TCPF_LISTEN))) 208 goto out; 209 210 /* Really, if the socket is already in listen state 211 * we can only allow the backlog to be adjusted. 212 */ 213 if (old_state != TCP_LISTEN) { 214 err = inet_csk_listen_start(sk, backlog); 215 if (err) 216 goto out; 217 } 218 sk->sk_max_ack_backlog = backlog; 219 err = 0; 220 221 out: 222 release_sock(sk); 223 return err; 224 } 225 226 u32 inet_ehash_secret __read_mostly; 227 EXPORT_SYMBOL(inet_ehash_secret); 228 229 /* 230 * inet_ehash_secret must be set exactly once 231 * Instead of using a dedicated spinlock, we (ab)use inetsw_lock 232 */ 233 void build_ehash_secret(void) 234 { 235 u32 rnd; 236 do { 237 get_random_bytes(&rnd, sizeof(rnd)); 238 } while (rnd == 0); 239 spin_lock_bh(&inetsw_lock); 240 if (!inet_ehash_secret) 241 inet_ehash_secret = rnd; 242 spin_unlock_bh(&inetsw_lock); 243 } 244 EXPORT_SYMBOL(build_ehash_secret); 245 246 static inline int inet_netns_ok(struct net *net, int protocol) 247 { 248 int hash; 249 struct net_protocol *ipprot; 250 251 if (net == &init_net) 252 return 1; 253 254 hash = protocol & (MAX_INET_PROTOS - 1); 255 ipprot = rcu_dereference(inet_protos[hash]); 256 257 if (ipprot == NULL) 258 /* raw IP is OK */ 259 return 1; 260 return ipprot->netns_ok; 261 } 262 263 /* 264 * Create an inet socket. 265 */ 266 267 static int inet_create(struct net *net, struct socket *sock, int protocol) 268 { 269 struct sock *sk; 270 struct list_head *p; 271 struct inet_protosw *answer; 272 struct inet_sock *inet; 273 struct proto *answer_prot; 274 unsigned char answer_flags; 275 char answer_no_check; 276 int try_loading_module = 0; 277 int err; 278 279 if (sock->type != SOCK_RAW && 280 sock->type != SOCK_DGRAM && 281 !inet_ehash_secret) 282 build_ehash_secret(); 283 284 sock->state = SS_UNCONNECTED; 285 286 /* Look for the requested type/protocol pair. */ 287 answer = NULL; 288 lookup_protocol: 289 err = -ESOCKTNOSUPPORT; 290 rcu_read_lock(); 291 list_for_each_rcu(p, &inetsw[sock->type]) { 292 answer = list_entry(p, struct inet_protosw, list); 293 294 /* Check the non-wild match. */ 295 if (protocol == answer->protocol) { 296 if (protocol != IPPROTO_IP) 297 break; 298 } else { 299 /* Check for the two wild cases. */ 300 if (IPPROTO_IP == protocol) { 301 protocol = answer->protocol; 302 break; 303 } 304 if (IPPROTO_IP == answer->protocol) 305 break; 306 } 307 err = -EPROTONOSUPPORT; 308 answer = NULL; 309 } 310 311 if (unlikely(answer == NULL)) { 312 if (try_loading_module < 2) { 313 rcu_read_unlock(); 314 /* 315 * Be more specific, e.g. net-pf-2-proto-132-type-1 316 * (net-pf-PF_INET-proto-IPPROTO_SCTP-type-SOCK_STREAM) 317 */ 318 if (++try_loading_module == 1) 319 request_module("net-pf-%d-proto-%d-type-%d", 320 PF_INET, protocol, sock->type); 321 /* 322 * Fall back to generic, e.g. net-pf-2-proto-132 323 * (net-pf-PF_INET-proto-IPPROTO_SCTP) 324 */ 325 else 326 request_module("net-pf-%d-proto-%d", 327 PF_INET, protocol); 328 goto lookup_protocol; 329 } else 330 goto out_rcu_unlock; 331 } 332 333 err = -EPERM; 334 if (answer->capability > 0 && !capable(answer->capability)) 335 goto out_rcu_unlock; 336 337 err = -EAFNOSUPPORT; 338 if (!inet_netns_ok(net, protocol)) 339 goto out_rcu_unlock; 340 341 sock->ops = answer->ops; 342 answer_prot = answer->prot; 343 answer_no_check = answer->no_check; 344 answer_flags = answer->flags; 345 rcu_read_unlock(); 346 347 BUG_TRAP(answer_prot->slab != NULL); 348 349 err = -ENOBUFS; 350 sk = sk_alloc(net, PF_INET, GFP_KERNEL, answer_prot); 351 if (sk == NULL) 352 goto out; 353 354 err = 0; 355 sk->sk_no_check = answer_no_check; 356 if (INET_PROTOSW_REUSE & answer_flags) 357 sk->sk_reuse = 1; 358 359 inet = inet_sk(sk); 360 inet->is_icsk = (INET_PROTOSW_ICSK & answer_flags) != 0; 361 362 if (SOCK_RAW == sock->type) { 363 inet->num = protocol; 364 if (IPPROTO_RAW == protocol) 365 inet->hdrincl = 1; 366 } 367 368 if (ipv4_config.no_pmtu_disc) 369 inet->pmtudisc = IP_PMTUDISC_DONT; 370 else 371 inet->pmtudisc = IP_PMTUDISC_WANT; 372 373 inet->id = 0; 374 375 sock_init_data(sock, sk); 376 377 sk->sk_destruct = inet_sock_destruct; 378 sk->sk_family = PF_INET; 379 sk->sk_protocol = protocol; 380 sk->sk_backlog_rcv = sk->sk_prot->backlog_rcv; 381 382 inet->uc_ttl = -1; 383 inet->mc_loop = 1; 384 inet->mc_ttl = 1; 385 inet->mc_index = 0; 386 inet->mc_list = NULL; 387 388 sk_refcnt_debug_inc(sk); 389 390 if (inet->num) { 391 /* It assumes that any protocol which allows 392 * the user to assign a number at socket 393 * creation time automatically 394 * shares. 395 */ 396 inet->sport = htons(inet->num); 397 /* Add to protocol hash chains. */ 398 sk->sk_prot->hash(sk); 399 } 400 401 if (sk->sk_prot->init) { 402 err = sk->sk_prot->init(sk); 403 if (err) 404 sk_common_release(sk); 405 } 406 out: 407 return err; 408 out_rcu_unlock: 409 rcu_read_unlock(); 410 goto out; 411 } 412 413 414 /* 415 * The peer socket should always be NULL (or else). When we call this 416 * function we are destroying the object and from then on nobody 417 * should refer to it. 418 */ 419 int inet_release(struct socket *sock) 420 { 421 struct sock *sk = sock->sk; 422 423 if (sk) { 424 long timeout; 425 426 /* Applications forget to leave groups before exiting */ 427 ip_mc_drop_socket(sk); 428 429 /* If linger is set, we don't return until the close 430 * is complete. Otherwise we return immediately. The 431 * actually closing is done the same either way. 432 * 433 * If the close is due to the process exiting, we never 434 * linger.. 435 */ 436 timeout = 0; 437 if (sock_flag(sk, SOCK_LINGER) && 438 !(current->flags & PF_EXITING)) 439 timeout = sk->sk_lingertime; 440 sock->sk = NULL; 441 sk->sk_prot->close(sk, timeout); 442 } 443 return 0; 444 } 445 446 /* It is off by default, see below. */ 447 int sysctl_ip_nonlocal_bind __read_mostly; 448 449 int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) 450 { 451 struct sockaddr_in *addr = (struct sockaddr_in *)uaddr; 452 struct sock *sk = sock->sk; 453 struct inet_sock *inet = inet_sk(sk); 454 unsigned short snum; 455 int chk_addr_ret; 456 int err; 457 458 /* If the socket has its own bind function then use it. (RAW) */ 459 if (sk->sk_prot->bind) { 460 err = sk->sk_prot->bind(sk, uaddr, addr_len); 461 goto out; 462 } 463 err = -EINVAL; 464 if (addr_len < sizeof(struct sockaddr_in)) 465 goto out; 466 467 chk_addr_ret = inet_addr_type(sock_net(sk), addr->sin_addr.s_addr); 468 469 /* Not specified by any standard per-se, however it breaks too 470 * many applications when removed. It is unfortunate since 471 * allowing applications to make a non-local bind solves 472 * several problems with systems using dynamic addressing. 473 * (ie. your servers still start up even if your ISDN link 474 * is temporarily down) 475 */ 476 err = -EADDRNOTAVAIL; 477 if (!sysctl_ip_nonlocal_bind && 478 !inet->freebind && 479 addr->sin_addr.s_addr != htonl(INADDR_ANY) && 480 chk_addr_ret != RTN_LOCAL && 481 chk_addr_ret != RTN_MULTICAST && 482 chk_addr_ret != RTN_BROADCAST) 483 goto out; 484 485 snum = ntohs(addr->sin_port); 486 err = -EACCES; 487 if (snum && snum < PROT_SOCK && !capable(CAP_NET_BIND_SERVICE)) 488 goto out; 489 490 /* We keep a pair of addresses. rcv_saddr is the one 491 * used by hash lookups, and saddr is used for transmit. 492 * 493 * In the BSD API these are the same except where it 494 * would be illegal to use them (multicast/broadcast) in 495 * which case the sending device address is used. 496 */ 497 lock_sock(sk); 498 499 /* Check these errors (active socket, double bind). */ 500 err = -EINVAL; 501 if (sk->sk_state != TCP_CLOSE || inet->num) 502 goto out_release_sock; 503 504 inet->rcv_saddr = inet->saddr = addr->sin_addr.s_addr; 505 if (chk_addr_ret == RTN_MULTICAST || chk_addr_ret == RTN_BROADCAST) 506 inet->saddr = 0; /* Use device */ 507 508 /* Make sure we are allowed to bind here. */ 509 if (sk->sk_prot->get_port(sk, snum)) { 510 inet->saddr = inet->rcv_saddr = 0; 511 err = -EADDRINUSE; 512 goto out_release_sock; 513 } 514 515 if (inet->rcv_saddr) 516 sk->sk_userlocks |= SOCK_BINDADDR_LOCK; 517 if (snum) 518 sk->sk_userlocks |= SOCK_BINDPORT_LOCK; 519 inet->sport = htons(inet->num); 520 inet->daddr = 0; 521 inet->dport = 0; 522 sk_dst_reset(sk); 523 err = 0; 524 out_release_sock: 525 release_sock(sk); 526 out: 527 return err; 528 } 529 530 int inet_dgram_connect(struct socket *sock, struct sockaddr * uaddr, 531 int addr_len, int flags) 532 { 533 struct sock *sk = sock->sk; 534 535 if (uaddr->sa_family == AF_UNSPEC) 536 return sk->sk_prot->disconnect(sk, flags); 537 538 if (!inet_sk(sk)->num && inet_autobind(sk)) 539 return -EAGAIN; 540 return sk->sk_prot->connect(sk, (struct sockaddr *)uaddr, addr_len); 541 } 542 543 static long inet_wait_for_connect(struct sock *sk, long timeo) 544 { 545 DEFINE_WAIT(wait); 546 547 prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); 548 549 /* Basic assumption: if someone sets sk->sk_err, he _must_ 550 * change state of the socket from TCP_SYN_*. 551 * Connect() does not allow to get error notifications 552 * without closing the socket. 553 */ 554 while ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) { 555 release_sock(sk); 556 timeo = schedule_timeout(timeo); 557 lock_sock(sk); 558 if (signal_pending(current) || !timeo) 559 break; 560 prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); 561 } 562 finish_wait(sk->sk_sleep, &wait); 563 return timeo; 564 } 565 566 /* 567 * Connect to a remote host. There is regrettably still a little 568 * TCP 'magic' in here. 569 */ 570 int inet_stream_connect(struct socket *sock, struct sockaddr *uaddr, 571 int addr_len, int flags) 572 { 573 struct sock *sk = sock->sk; 574 int err; 575 long timeo; 576 577 lock_sock(sk); 578 579 if (uaddr->sa_family == AF_UNSPEC) { 580 err = sk->sk_prot->disconnect(sk, flags); 581 sock->state = err ? SS_DISCONNECTING : SS_UNCONNECTED; 582 goto out; 583 } 584 585 switch (sock->state) { 586 default: 587 err = -EINVAL; 588 goto out; 589 case SS_CONNECTED: 590 err = -EISCONN; 591 goto out; 592 case SS_CONNECTING: 593 err = -EALREADY; 594 /* Fall out of switch with err, set for this state */ 595 break; 596 case SS_UNCONNECTED: 597 err = -EISCONN; 598 if (sk->sk_state != TCP_CLOSE) 599 goto out; 600 601 err = sk->sk_prot->connect(sk, uaddr, addr_len); 602 if (err < 0) 603 goto out; 604 605 sock->state = SS_CONNECTING; 606 607 /* Just entered SS_CONNECTING state; the only 608 * difference is that return value in non-blocking 609 * case is EINPROGRESS, rather than EALREADY. 610 */ 611 err = -EINPROGRESS; 612 break; 613 } 614 615 timeo = sock_sndtimeo(sk, flags & O_NONBLOCK); 616 617 if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) { 618 /* Error code is set above */ 619 if (!timeo || !inet_wait_for_connect(sk, timeo)) 620 goto out; 621 622 err = sock_intr_errno(timeo); 623 if (signal_pending(current)) 624 goto out; 625 } 626 627 /* Connection was closed by RST, timeout, ICMP error 628 * or another process disconnected us. 629 */ 630 if (sk->sk_state == TCP_CLOSE) 631 goto sock_error; 632 633 /* sk->sk_err may be not zero now, if RECVERR was ordered by user 634 * and error was received after socket entered established state. 635 * Hence, it is handled normally after connect() return successfully. 636 */ 637 638 sock->state = SS_CONNECTED; 639 err = 0; 640 out: 641 release_sock(sk); 642 return err; 643 644 sock_error: 645 err = sock_error(sk) ? : -ECONNABORTED; 646 sock->state = SS_UNCONNECTED; 647 if (sk->sk_prot->disconnect(sk, flags)) 648 sock->state = SS_DISCONNECTING; 649 goto out; 650 } 651 652 /* 653 * Accept a pending connection. The TCP layer now gives BSD semantics. 654 */ 655 656 int inet_accept(struct socket *sock, struct socket *newsock, int flags) 657 { 658 struct sock *sk1 = sock->sk; 659 int err = -EINVAL; 660 struct sock *sk2 = sk1->sk_prot->accept(sk1, flags, &err); 661 662 if (!sk2) 663 goto do_err; 664 665 lock_sock(sk2); 666 667 BUG_TRAP((1 << sk2->sk_state) & 668 (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT | TCPF_CLOSE)); 669 670 sock_graft(sk2, newsock); 671 672 newsock->state = SS_CONNECTED; 673 err = 0; 674 release_sock(sk2); 675 do_err: 676 return err; 677 } 678 679 680 /* 681 * This does both peername and sockname. 682 */ 683 int inet_getname(struct socket *sock, struct sockaddr *uaddr, 684 int *uaddr_len, int peer) 685 { 686 struct sock *sk = sock->sk; 687 struct inet_sock *inet = inet_sk(sk); 688 struct sockaddr_in *sin = (struct sockaddr_in *)uaddr; 689 690 sin->sin_family = AF_INET; 691 if (peer) { 692 if (!inet->dport || 693 (((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_SYN_SENT)) && 694 peer == 1)) 695 return -ENOTCONN; 696 sin->sin_port = inet->dport; 697 sin->sin_addr.s_addr = inet->daddr; 698 } else { 699 __be32 addr = inet->rcv_saddr; 700 if (!addr) 701 addr = inet->saddr; 702 sin->sin_port = inet->sport; 703 sin->sin_addr.s_addr = addr; 704 } 705 memset(sin->sin_zero, 0, sizeof(sin->sin_zero)); 706 *uaddr_len = sizeof(*sin); 707 return 0; 708 } 709 710 int inet_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, 711 size_t size) 712 { 713 struct sock *sk = sock->sk; 714 715 /* We may need to bind the socket. */ 716 if (!inet_sk(sk)->num && inet_autobind(sk)) 717 return -EAGAIN; 718 719 return sk->sk_prot->sendmsg(iocb, sk, msg, size); 720 } 721 722 723 static ssize_t inet_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags) 724 { 725 struct sock *sk = sock->sk; 726 727 /* We may need to bind the socket. */ 728 if (!inet_sk(sk)->num && inet_autobind(sk)) 729 return -EAGAIN; 730 731 if (sk->sk_prot->sendpage) 732 return sk->sk_prot->sendpage(sk, page, offset, size, flags); 733 return sock_no_sendpage(sock, page, offset, size, flags); 734 } 735 736 737 int inet_shutdown(struct socket *sock, int how) 738 { 739 struct sock *sk = sock->sk; 740 int err = 0; 741 742 /* This should really check to make sure 743 * the socket is a TCP socket. (WHY AC...) 744 */ 745 how++; /* maps 0->1 has the advantage of making bit 1 rcvs and 746 1->2 bit 2 snds. 747 2->3 */ 748 if ((how & ~SHUTDOWN_MASK) || !how) /* MAXINT->0 */ 749 return -EINVAL; 750 751 lock_sock(sk); 752 if (sock->state == SS_CONNECTING) { 753 if ((1 << sk->sk_state) & 754 (TCPF_SYN_SENT | TCPF_SYN_RECV | TCPF_CLOSE)) 755 sock->state = SS_DISCONNECTING; 756 else 757 sock->state = SS_CONNECTED; 758 } 759 760 switch (sk->sk_state) { 761 case TCP_CLOSE: 762 err = -ENOTCONN; 763 /* Hack to wake up other listeners, who can poll for 764 POLLHUP, even on eg. unconnected UDP sockets -- RR */ 765 default: 766 sk->sk_shutdown |= how; 767 if (sk->sk_prot->shutdown) 768 sk->sk_prot->shutdown(sk, how); 769 break; 770 771 /* Remaining two branches are temporary solution for missing 772 * close() in multithreaded environment. It is _not_ a good idea, 773 * but we have no choice until close() is repaired at VFS level. 774 */ 775 case TCP_LISTEN: 776 if (!(how & RCV_SHUTDOWN)) 777 break; 778 /* Fall through */ 779 case TCP_SYN_SENT: 780 err = sk->sk_prot->disconnect(sk, O_NONBLOCK); 781 sock->state = err ? SS_DISCONNECTING : SS_UNCONNECTED; 782 break; 783 } 784 785 /* Wake up anyone sleeping in poll. */ 786 sk->sk_state_change(sk); 787 release_sock(sk); 788 return err; 789 } 790 791 /* 792 * ioctl() calls you can issue on an INET socket. Most of these are 793 * device configuration and stuff and very rarely used. Some ioctls 794 * pass on to the socket itself. 795 * 796 * NOTE: I like the idea of a module for the config stuff. ie ifconfig 797 * loads the devconfigure module does its configuring and unloads it. 798 * There's a good 20K of config code hanging around the kernel. 799 */ 800 801 int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) 802 { 803 struct sock *sk = sock->sk; 804 int err = 0; 805 struct net *net = sock_net(sk); 806 807 switch (cmd) { 808 case SIOCGSTAMP: 809 err = sock_get_timestamp(sk, (struct timeval __user *)arg); 810 break; 811 case SIOCGSTAMPNS: 812 err = sock_get_timestampns(sk, (struct timespec __user *)arg); 813 break; 814 case SIOCADDRT: 815 case SIOCDELRT: 816 case SIOCRTMSG: 817 err = ip_rt_ioctl(net, cmd, (void __user *)arg); 818 break; 819 case SIOCDARP: 820 case SIOCGARP: 821 case SIOCSARP: 822 err = arp_ioctl(net, cmd, (void __user *)arg); 823 break; 824 case SIOCGIFADDR: 825 case SIOCSIFADDR: 826 case SIOCGIFBRDADDR: 827 case SIOCSIFBRDADDR: 828 case SIOCGIFNETMASK: 829 case SIOCSIFNETMASK: 830 case SIOCGIFDSTADDR: 831 case SIOCSIFDSTADDR: 832 case SIOCSIFPFLAGS: 833 case SIOCGIFPFLAGS: 834 case SIOCSIFFLAGS: 835 err = devinet_ioctl(net, cmd, (void __user *)arg); 836 break; 837 default: 838 if (sk->sk_prot->ioctl) 839 err = sk->sk_prot->ioctl(sk, cmd, arg); 840 else 841 err = -ENOIOCTLCMD; 842 break; 843 } 844 return err; 845 } 846 847 const struct proto_ops inet_stream_ops = { 848 .family = PF_INET, 849 .owner = THIS_MODULE, 850 .release = inet_release, 851 .bind = inet_bind, 852 .connect = inet_stream_connect, 853 .socketpair = sock_no_socketpair, 854 .accept = inet_accept, 855 .getname = inet_getname, 856 .poll = tcp_poll, 857 .ioctl = inet_ioctl, 858 .listen = inet_listen, 859 .shutdown = inet_shutdown, 860 .setsockopt = sock_common_setsockopt, 861 .getsockopt = sock_common_getsockopt, 862 .sendmsg = tcp_sendmsg, 863 .recvmsg = sock_common_recvmsg, 864 .mmap = sock_no_mmap, 865 .sendpage = tcp_sendpage, 866 .splice_read = tcp_splice_read, 867 #ifdef CONFIG_COMPAT 868 .compat_setsockopt = compat_sock_common_setsockopt, 869 .compat_getsockopt = compat_sock_common_getsockopt, 870 #endif 871 }; 872 873 const struct proto_ops inet_dgram_ops = { 874 .family = PF_INET, 875 .owner = THIS_MODULE, 876 .release = inet_release, 877 .bind = inet_bind, 878 .connect = inet_dgram_connect, 879 .socketpair = sock_no_socketpair, 880 .accept = sock_no_accept, 881 .getname = inet_getname, 882 .poll = udp_poll, 883 .ioctl = inet_ioctl, 884 .listen = sock_no_listen, 885 .shutdown = inet_shutdown, 886 .setsockopt = sock_common_setsockopt, 887 .getsockopt = sock_common_getsockopt, 888 .sendmsg = inet_sendmsg, 889 .recvmsg = sock_common_recvmsg, 890 .mmap = sock_no_mmap, 891 .sendpage = inet_sendpage, 892 #ifdef CONFIG_COMPAT 893 .compat_setsockopt = compat_sock_common_setsockopt, 894 .compat_getsockopt = compat_sock_common_getsockopt, 895 #endif 896 }; 897 898 /* 899 * For SOCK_RAW sockets; should be the same as inet_dgram_ops but without 900 * udp_poll 901 */ 902 static const struct proto_ops inet_sockraw_ops = { 903 .family = PF_INET, 904 .owner = THIS_MODULE, 905 .release = inet_release, 906 .bind = inet_bind, 907 .connect = inet_dgram_connect, 908 .socketpair = sock_no_socketpair, 909 .accept = sock_no_accept, 910 .getname = inet_getname, 911 .poll = datagram_poll, 912 .ioctl = inet_ioctl, 913 .listen = sock_no_listen, 914 .shutdown = inet_shutdown, 915 .setsockopt = sock_common_setsockopt, 916 .getsockopt = sock_common_getsockopt, 917 .sendmsg = inet_sendmsg, 918 .recvmsg = sock_common_recvmsg, 919 .mmap = sock_no_mmap, 920 .sendpage = inet_sendpage, 921 #ifdef CONFIG_COMPAT 922 .compat_setsockopt = compat_sock_common_setsockopt, 923 .compat_getsockopt = compat_sock_common_getsockopt, 924 #endif 925 }; 926 927 static struct net_proto_family inet_family_ops = { 928 .family = PF_INET, 929 .create = inet_create, 930 .owner = THIS_MODULE, 931 }; 932 933 /* Upon startup we insert all the elements in inetsw_array[] into 934 * the linked list inetsw. 935 */ 936 static struct inet_protosw inetsw_array[] = 937 { 938 { 939 .type = SOCK_STREAM, 940 .protocol = IPPROTO_TCP, 941 .prot = &tcp_prot, 942 .ops = &inet_stream_ops, 943 .capability = -1, 944 .no_check = 0, 945 .flags = INET_PROTOSW_PERMANENT | 946 INET_PROTOSW_ICSK, 947 }, 948 949 { 950 .type = SOCK_DGRAM, 951 .protocol = IPPROTO_UDP, 952 .prot = &udp_prot, 953 .ops = &inet_dgram_ops, 954 .capability = -1, 955 .no_check = UDP_CSUM_DEFAULT, 956 .flags = INET_PROTOSW_PERMANENT, 957 }, 958 959 960 { 961 .type = SOCK_RAW, 962 .protocol = IPPROTO_IP, /* wild card */ 963 .prot = &raw_prot, 964 .ops = &inet_sockraw_ops, 965 .capability = CAP_NET_RAW, 966 .no_check = UDP_CSUM_DEFAULT, 967 .flags = INET_PROTOSW_REUSE, 968 } 969 }; 970 971 #define INETSW_ARRAY_LEN ARRAY_SIZE(inetsw_array) 972 973 void inet_register_protosw(struct inet_protosw *p) 974 { 975 struct list_head *lh; 976 struct inet_protosw *answer; 977 int protocol = p->protocol; 978 struct list_head *last_perm; 979 980 spin_lock_bh(&inetsw_lock); 981 982 if (p->type >= SOCK_MAX) 983 goto out_illegal; 984 985 /* If we are trying to override a permanent protocol, bail. */ 986 answer = NULL; 987 last_perm = &inetsw[p->type]; 988 list_for_each(lh, &inetsw[p->type]) { 989 answer = list_entry(lh, struct inet_protosw, list); 990 991 /* Check only the non-wild match. */ 992 if (INET_PROTOSW_PERMANENT & answer->flags) { 993 if (protocol == answer->protocol) 994 break; 995 last_perm = lh; 996 } 997 998 answer = NULL; 999 } 1000 if (answer) 1001 goto out_permanent; 1002 1003 /* Add the new entry after the last permanent entry if any, so that 1004 * the new entry does not override a permanent entry when matched with 1005 * a wild-card protocol. But it is allowed to override any existing 1006 * non-permanent entry. This means that when we remove this entry, the 1007 * system automatically returns to the old behavior. 1008 */ 1009 list_add_rcu(&p->list, last_perm); 1010 out: 1011 spin_unlock_bh(&inetsw_lock); 1012 1013 synchronize_net(); 1014 1015 return; 1016 1017 out_permanent: 1018 printk(KERN_ERR "Attempt to override permanent protocol %d.\n", 1019 protocol); 1020 goto out; 1021 1022 out_illegal: 1023 printk(KERN_ERR 1024 "Ignoring attempt to register invalid socket type %d.\n", 1025 p->type); 1026 goto out; 1027 } 1028 1029 void inet_unregister_protosw(struct inet_protosw *p) 1030 { 1031 if (INET_PROTOSW_PERMANENT & p->flags) { 1032 printk(KERN_ERR 1033 "Attempt to unregister permanent protocol %d.\n", 1034 p->protocol); 1035 } else { 1036 spin_lock_bh(&inetsw_lock); 1037 list_del_rcu(&p->list); 1038 spin_unlock_bh(&inetsw_lock); 1039 1040 synchronize_net(); 1041 } 1042 } 1043 1044 /* 1045 * Shall we try to damage output packets if routing dev changes? 1046 */ 1047 1048 int sysctl_ip_dynaddr __read_mostly; 1049 1050 static int inet_sk_reselect_saddr(struct sock *sk) 1051 { 1052 struct inet_sock *inet = inet_sk(sk); 1053 int err; 1054 struct rtable *rt; 1055 __be32 old_saddr = inet->saddr; 1056 __be32 new_saddr; 1057 __be32 daddr = inet->daddr; 1058 1059 if (inet->opt && inet->opt->srr) 1060 daddr = inet->opt->faddr; 1061 1062 /* Query new route. */ 1063 err = ip_route_connect(&rt, daddr, 0, 1064 RT_CONN_FLAGS(sk), 1065 sk->sk_bound_dev_if, 1066 sk->sk_protocol, 1067 inet->sport, inet->dport, sk, 0); 1068 if (err) 1069 return err; 1070 1071 sk_setup_caps(sk, &rt->u.dst); 1072 1073 new_saddr = rt->rt_src; 1074 1075 if (new_saddr == old_saddr) 1076 return 0; 1077 1078 if (sysctl_ip_dynaddr > 1) { 1079 printk(KERN_INFO "%s(): shifting inet->" 1080 "saddr from " NIPQUAD_FMT " to " NIPQUAD_FMT "\n", 1081 __func__, 1082 NIPQUAD(old_saddr), 1083 NIPQUAD(new_saddr)); 1084 } 1085 1086 inet->saddr = inet->rcv_saddr = new_saddr; 1087 1088 /* 1089 * XXX The only one ugly spot where we need to 1090 * XXX really change the sockets identity after 1091 * XXX it has entered the hashes. -DaveM 1092 * 1093 * Besides that, it does not check for connection 1094 * uniqueness. Wait for troubles. 1095 */ 1096 __sk_prot_rehash(sk); 1097 return 0; 1098 } 1099 1100 int inet_sk_rebuild_header(struct sock *sk) 1101 { 1102 struct inet_sock *inet = inet_sk(sk); 1103 struct rtable *rt = (struct rtable *)__sk_dst_check(sk, 0); 1104 __be32 daddr; 1105 int err; 1106 1107 /* Route is OK, nothing to do. */ 1108 if (rt) 1109 return 0; 1110 1111 /* Reroute. */ 1112 daddr = inet->daddr; 1113 if (inet->opt && inet->opt->srr) 1114 daddr = inet->opt->faddr; 1115 { 1116 struct flowi fl = { 1117 .oif = sk->sk_bound_dev_if, 1118 .nl_u = { 1119 .ip4_u = { 1120 .daddr = daddr, 1121 .saddr = inet->saddr, 1122 .tos = RT_CONN_FLAGS(sk), 1123 }, 1124 }, 1125 .proto = sk->sk_protocol, 1126 .uli_u = { 1127 .ports = { 1128 .sport = inet->sport, 1129 .dport = inet->dport, 1130 }, 1131 }, 1132 }; 1133 1134 security_sk_classify_flow(sk, &fl); 1135 err = ip_route_output_flow(sock_net(sk), &rt, &fl, sk, 0); 1136 } 1137 if (!err) 1138 sk_setup_caps(sk, &rt->u.dst); 1139 else { 1140 /* Routing failed... */ 1141 sk->sk_route_caps = 0; 1142 /* 1143 * Other protocols have to map its equivalent state to TCP_SYN_SENT. 1144 * DCCP maps its DCCP_REQUESTING state to TCP_SYN_SENT. -acme 1145 */ 1146 if (!sysctl_ip_dynaddr || 1147 sk->sk_state != TCP_SYN_SENT || 1148 (sk->sk_userlocks & SOCK_BINDADDR_LOCK) || 1149 (err = inet_sk_reselect_saddr(sk)) != 0) 1150 sk->sk_err_soft = -err; 1151 } 1152 1153 return err; 1154 } 1155 1156 EXPORT_SYMBOL(inet_sk_rebuild_header); 1157 1158 static int inet_gso_send_check(struct sk_buff *skb) 1159 { 1160 struct iphdr *iph; 1161 struct net_protocol *ops; 1162 int proto; 1163 int ihl; 1164 int err = -EINVAL; 1165 1166 if (unlikely(!pskb_may_pull(skb, sizeof(*iph)))) 1167 goto out; 1168 1169 iph = ip_hdr(skb); 1170 ihl = iph->ihl * 4; 1171 if (ihl < sizeof(*iph)) 1172 goto out; 1173 1174 if (unlikely(!pskb_may_pull(skb, ihl))) 1175 goto out; 1176 1177 __skb_pull(skb, ihl); 1178 skb_reset_transport_header(skb); 1179 iph = ip_hdr(skb); 1180 proto = iph->protocol & (MAX_INET_PROTOS - 1); 1181 err = -EPROTONOSUPPORT; 1182 1183 rcu_read_lock(); 1184 ops = rcu_dereference(inet_protos[proto]); 1185 if (likely(ops && ops->gso_send_check)) 1186 err = ops->gso_send_check(skb); 1187 rcu_read_unlock(); 1188 1189 out: 1190 return err; 1191 } 1192 1193 static struct sk_buff *inet_gso_segment(struct sk_buff *skb, int features) 1194 { 1195 struct sk_buff *segs = ERR_PTR(-EINVAL); 1196 struct iphdr *iph; 1197 struct net_protocol *ops; 1198 int proto; 1199 int ihl; 1200 int id; 1201 1202 if (!(features & NETIF_F_V4_CSUM)) 1203 features &= ~NETIF_F_SG; 1204 1205 if (unlikely(skb_shinfo(skb)->gso_type & 1206 ~(SKB_GSO_TCPV4 | 1207 SKB_GSO_UDP | 1208 SKB_GSO_DODGY | 1209 SKB_GSO_TCP_ECN | 1210 0))) 1211 goto out; 1212 1213 if (unlikely(!pskb_may_pull(skb, sizeof(*iph)))) 1214 goto out; 1215 1216 iph = ip_hdr(skb); 1217 ihl = iph->ihl * 4; 1218 if (ihl < sizeof(*iph)) 1219 goto out; 1220 1221 if (unlikely(!pskb_may_pull(skb, ihl))) 1222 goto out; 1223 1224 __skb_pull(skb, ihl); 1225 skb_reset_transport_header(skb); 1226 iph = ip_hdr(skb); 1227 id = ntohs(iph->id); 1228 proto = iph->protocol & (MAX_INET_PROTOS - 1); 1229 segs = ERR_PTR(-EPROTONOSUPPORT); 1230 1231 rcu_read_lock(); 1232 ops = rcu_dereference(inet_protos[proto]); 1233 if (likely(ops && ops->gso_segment)) 1234 segs = ops->gso_segment(skb, features); 1235 rcu_read_unlock(); 1236 1237 if (!segs || IS_ERR(segs)) 1238 goto out; 1239 1240 skb = segs; 1241 do { 1242 iph = ip_hdr(skb); 1243 iph->id = htons(id++); 1244 iph->tot_len = htons(skb->len - skb->mac_len); 1245 iph->check = 0; 1246 iph->check = ip_fast_csum(skb_network_header(skb), iph->ihl); 1247 } while ((skb = skb->next)); 1248 1249 out: 1250 return segs; 1251 } 1252 1253 int inet_ctl_sock_create(struct sock **sk, unsigned short family, 1254 unsigned short type, unsigned char protocol, 1255 struct net *net) 1256 { 1257 struct socket *sock; 1258 int rc = sock_create_kern(family, type, protocol, &sock); 1259 1260 if (rc == 0) { 1261 *sk = sock->sk; 1262 (*sk)->sk_allocation = GFP_ATOMIC; 1263 /* 1264 * Unhash it so that IP input processing does not even see it, 1265 * we do not wish this socket to see incoming packets. 1266 */ 1267 (*sk)->sk_prot->unhash(*sk); 1268 1269 sk_change_net(*sk, net); 1270 } 1271 return rc; 1272 } 1273 1274 EXPORT_SYMBOL_GPL(inet_ctl_sock_create); 1275 1276 unsigned long snmp_fold_field(void *mib[], int offt) 1277 { 1278 unsigned long res = 0; 1279 int i; 1280 1281 for_each_possible_cpu(i) { 1282 res += *(((unsigned long *) per_cpu_ptr(mib[0], i)) + offt); 1283 res += *(((unsigned long *) per_cpu_ptr(mib[1], i)) + offt); 1284 } 1285 return res; 1286 } 1287 EXPORT_SYMBOL_GPL(snmp_fold_field); 1288 1289 int snmp_mib_init(void *ptr[2], size_t mibsize) 1290 { 1291 BUG_ON(ptr == NULL); 1292 ptr[0] = __alloc_percpu(mibsize); 1293 if (!ptr[0]) 1294 goto err0; 1295 ptr[1] = __alloc_percpu(mibsize); 1296 if (!ptr[1]) 1297 goto err1; 1298 return 0; 1299 err1: 1300 free_percpu(ptr[0]); 1301 ptr[0] = NULL; 1302 err0: 1303 return -ENOMEM; 1304 } 1305 EXPORT_SYMBOL_GPL(snmp_mib_init); 1306 1307 void snmp_mib_free(void *ptr[2]) 1308 { 1309 BUG_ON(ptr == NULL); 1310 free_percpu(ptr[0]); 1311 free_percpu(ptr[1]); 1312 ptr[0] = ptr[1] = NULL; 1313 } 1314 EXPORT_SYMBOL_GPL(snmp_mib_free); 1315 1316 #ifdef CONFIG_IP_MULTICAST 1317 static struct net_protocol igmp_protocol = { 1318 .handler = igmp_rcv, 1319 }; 1320 #endif 1321 1322 static struct net_protocol tcp_protocol = { 1323 .handler = tcp_v4_rcv, 1324 .err_handler = tcp_v4_err, 1325 .gso_send_check = tcp_v4_gso_send_check, 1326 .gso_segment = tcp_tso_segment, 1327 .no_policy = 1, 1328 .netns_ok = 1, 1329 }; 1330 1331 static struct net_protocol udp_protocol = { 1332 .handler = udp_rcv, 1333 .err_handler = udp_err, 1334 .no_policy = 1, 1335 .netns_ok = 1, 1336 }; 1337 1338 static struct net_protocol icmp_protocol = { 1339 .handler = icmp_rcv, 1340 .no_policy = 1, 1341 .netns_ok = 1, 1342 }; 1343 1344 static int __init init_ipv4_mibs(void) 1345 { 1346 if (snmp_mib_init((void **)net_statistics, 1347 sizeof(struct linux_mib)) < 0) 1348 goto err_net_mib; 1349 if (snmp_mib_init((void **)ip_statistics, 1350 sizeof(struct ipstats_mib)) < 0) 1351 goto err_ip_mib; 1352 if (snmp_mib_init((void **)icmp_statistics, 1353 sizeof(struct icmp_mib)) < 0) 1354 goto err_icmp_mib; 1355 if (snmp_mib_init((void **)icmpmsg_statistics, 1356 sizeof(struct icmpmsg_mib)) < 0) 1357 goto err_icmpmsg_mib; 1358 if (snmp_mib_init((void **)tcp_statistics, 1359 sizeof(struct tcp_mib)) < 0) 1360 goto err_tcp_mib; 1361 if (snmp_mib_init((void **)udp_statistics, 1362 sizeof(struct udp_mib)) < 0) 1363 goto err_udp_mib; 1364 if (snmp_mib_init((void **)udplite_statistics, 1365 sizeof(struct udp_mib)) < 0) 1366 goto err_udplite_mib; 1367 1368 tcp_mib_init(); 1369 1370 return 0; 1371 1372 err_udplite_mib: 1373 snmp_mib_free((void **)udp_statistics); 1374 err_udp_mib: 1375 snmp_mib_free((void **)tcp_statistics); 1376 err_tcp_mib: 1377 snmp_mib_free((void **)icmpmsg_statistics); 1378 err_icmpmsg_mib: 1379 snmp_mib_free((void **)icmp_statistics); 1380 err_icmp_mib: 1381 snmp_mib_free((void **)ip_statistics); 1382 err_ip_mib: 1383 snmp_mib_free((void **)net_statistics); 1384 err_net_mib: 1385 return -ENOMEM; 1386 } 1387 1388 static int ipv4_proc_init(void); 1389 1390 /* 1391 * IP protocol layer initialiser 1392 */ 1393 1394 static struct packet_type ip_packet_type = { 1395 .type = __constant_htons(ETH_P_IP), 1396 .func = ip_rcv, 1397 .gso_send_check = inet_gso_send_check, 1398 .gso_segment = inet_gso_segment, 1399 }; 1400 1401 static int __init inet_init(void) 1402 { 1403 struct sk_buff *dummy_skb; 1404 struct inet_protosw *q; 1405 struct list_head *r; 1406 int rc = -EINVAL; 1407 1408 BUILD_BUG_ON(sizeof(struct inet_skb_parm) > sizeof(dummy_skb->cb)); 1409 1410 rc = proto_register(&tcp_prot, 1); 1411 if (rc) 1412 goto out; 1413 1414 rc = proto_register(&udp_prot, 1); 1415 if (rc) 1416 goto out_unregister_tcp_proto; 1417 1418 rc = proto_register(&raw_prot, 1); 1419 if (rc) 1420 goto out_unregister_udp_proto; 1421 1422 /* 1423 * Tell SOCKET that we are alive... 1424 */ 1425 1426 (void)sock_register(&inet_family_ops); 1427 1428 /* 1429 * Add all the base protocols. 1430 */ 1431 1432 if (inet_add_protocol(&icmp_protocol, IPPROTO_ICMP) < 0) 1433 printk(KERN_CRIT "inet_init: Cannot add ICMP protocol\n"); 1434 if (inet_add_protocol(&udp_protocol, IPPROTO_UDP) < 0) 1435 printk(KERN_CRIT "inet_init: Cannot add UDP protocol\n"); 1436 if (inet_add_protocol(&tcp_protocol, IPPROTO_TCP) < 0) 1437 printk(KERN_CRIT "inet_init: Cannot add TCP protocol\n"); 1438 #ifdef CONFIG_IP_MULTICAST 1439 if (inet_add_protocol(&igmp_protocol, IPPROTO_IGMP) < 0) 1440 printk(KERN_CRIT "inet_init: Cannot add IGMP protocol\n"); 1441 #endif 1442 1443 /* Register the socket-side information for inet_create. */ 1444 for (r = &inetsw[0]; r < &inetsw[SOCK_MAX]; ++r) 1445 INIT_LIST_HEAD(r); 1446 1447 for (q = inetsw_array; q < &inetsw_array[INETSW_ARRAY_LEN]; ++q) 1448 inet_register_protosw(q); 1449 1450 /* 1451 * Set the ARP module up 1452 */ 1453 1454 arp_init(); 1455 1456 /* 1457 * Set the IP module up 1458 */ 1459 1460 ip_init(); 1461 1462 tcp_v4_init(); 1463 1464 /* Setup TCP slab cache for open requests. */ 1465 tcp_init(); 1466 1467 /* Setup UDP memory threshold */ 1468 udp_init(); 1469 1470 /* Add UDP-Lite (RFC 3828) */ 1471 udplite4_register(); 1472 1473 /* 1474 * Set the ICMP layer up 1475 */ 1476 1477 if (icmp_init() < 0) 1478 panic("Failed to create the ICMP control socket.\n"); 1479 1480 /* 1481 * Initialise the multicast router 1482 */ 1483 #if defined(CONFIG_IP_MROUTE) 1484 ip_mr_init(); 1485 #endif 1486 /* 1487 * Initialise per-cpu ipv4 mibs 1488 */ 1489 1490 if (init_ipv4_mibs()) 1491 printk(KERN_CRIT "inet_init: Cannot init ipv4 mibs\n"); ; 1492 1493 ipv4_proc_init(); 1494 1495 ipfrag_init(); 1496 1497 dev_add_pack(&ip_packet_type); 1498 1499 rc = 0; 1500 out: 1501 return rc; 1502 out_unregister_udp_proto: 1503 proto_unregister(&udp_prot); 1504 out_unregister_tcp_proto: 1505 proto_unregister(&tcp_prot); 1506 goto out; 1507 } 1508 1509 fs_initcall(inet_init); 1510 1511 /* ------------------------------------------------------------------------ */ 1512 1513 #ifdef CONFIG_PROC_FS 1514 static int __init ipv4_proc_init(void) 1515 { 1516 int rc = 0; 1517 1518 if (raw_proc_init()) 1519 goto out_raw; 1520 if (tcp4_proc_init()) 1521 goto out_tcp; 1522 if (udp4_proc_init()) 1523 goto out_udp; 1524 if (ip_misc_proc_init()) 1525 goto out_misc; 1526 out: 1527 return rc; 1528 out_misc: 1529 udp4_proc_exit(); 1530 out_udp: 1531 tcp4_proc_exit(); 1532 out_tcp: 1533 raw_proc_exit(); 1534 out_raw: 1535 rc = -ENOMEM; 1536 goto out; 1537 } 1538 1539 #else /* CONFIG_PROC_FS */ 1540 static int __init ipv4_proc_init(void) 1541 { 1542 return 0; 1543 } 1544 #endif /* CONFIG_PROC_FS */ 1545 1546 MODULE_ALIAS_NETPROTO(PF_INET); 1547 1548 EXPORT_SYMBOL(inet_accept); 1549 EXPORT_SYMBOL(inet_bind); 1550 EXPORT_SYMBOL(inet_dgram_connect); 1551 EXPORT_SYMBOL(inet_dgram_ops); 1552 EXPORT_SYMBOL(inet_getname); 1553 EXPORT_SYMBOL(inet_ioctl); 1554 EXPORT_SYMBOL(inet_listen); 1555 EXPORT_SYMBOL(inet_register_protosw); 1556 EXPORT_SYMBOL(inet_release); 1557 EXPORT_SYMBOL(inet_sendmsg); 1558 EXPORT_SYMBOL(inet_shutdown); 1559 EXPORT_SYMBOL(inet_sock_destruct); 1560 EXPORT_SYMBOL(inet_stream_connect); 1561 EXPORT_SYMBOL(inet_stream_ops); 1562 EXPORT_SYMBOL(inet_unregister_protosw); 1563 EXPORT_SYMBOL(net_statistics); 1564 EXPORT_SYMBOL(sysctl_ip_nonlocal_bind); 1565