1 /* 2 * net/dccp/proto.c 3 * 4 * An implementation of the DCCP protocol 5 * Arnaldo Carvalho de Melo <acme@conectiva.com.br> 6 * 7 * This program is free software; you can redistribute it and/or modify it 8 * under the terms of the GNU General Public License version 2 as 9 * published by the Free Software Foundation. 10 */ 11 12 #include <linux/dccp.h> 13 #include <linux/module.h> 14 #include <linux/types.h> 15 #include <linux/sched.h> 16 #include <linux/kernel.h> 17 #include <linux/skbuff.h> 18 #include <linux/netdevice.h> 19 #include <linux/in.h> 20 #include <linux/if_arp.h> 21 #include <linux/init.h> 22 #include <linux/random.h> 23 #include <net/checksum.h> 24 25 #include <net/inet_sock.h> 26 #include <net/sock.h> 27 #include <net/xfrm.h> 28 29 #include <asm/ioctls.h> 30 #include <asm/semaphore.h> 31 #include <linux/spinlock.h> 32 #include <linux/timer.h> 33 #include <linux/delay.h> 34 #include <linux/poll.h> 35 36 #include "ccid.h" 37 #include "dccp.h" 38 #include "feat.h" 39 40 DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly; 41 42 EXPORT_SYMBOL_GPL(dccp_statistics); 43 44 atomic_t dccp_orphan_count = ATOMIC_INIT(0); 45 46 EXPORT_SYMBOL_GPL(dccp_orphan_count); 47 48 struct inet_hashinfo __cacheline_aligned dccp_hashinfo = { 49 .lhash_lock = RW_LOCK_UNLOCKED, 50 .lhash_users = ATOMIC_INIT(0), 51 .lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(dccp_hashinfo.lhash_wait), 52 }; 53 54 EXPORT_SYMBOL_GPL(dccp_hashinfo); 55 56 /* the maximum queue length for tx in packets. 0 is no limit */ 57 int sysctl_dccp_tx_qlen __read_mostly = 5; 58 59 void dccp_set_state(struct sock *sk, const int state) 60 { 61 const int oldstate = sk->sk_state; 62 63 dccp_pr_debug("%s(%p) %s --> %s\n", dccp_role(sk), sk, 64 dccp_state_name(oldstate), dccp_state_name(state)); 65 WARN_ON(state == oldstate); 66 67 switch (state) { 68 case DCCP_OPEN: 69 if (oldstate != DCCP_OPEN) 70 DCCP_INC_STATS(DCCP_MIB_CURRESTAB); 71 break; 72 73 case DCCP_CLOSED: 74 if (oldstate == DCCP_OPEN || oldstate == DCCP_ACTIVE_CLOSEREQ || 75 oldstate == DCCP_CLOSING) 76 DCCP_INC_STATS(DCCP_MIB_ESTABRESETS); 77 78 sk->sk_prot->unhash(sk); 79 if (inet_csk(sk)->icsk_bind_hash != NULL && 80 !(sk->sk_userlocks & SOCK_BINDPORT_LOCK)) 81 inet_put_port(&dccp_hashinfo, sk); 82 /* fall through */ 83 default: 84 if (oldstate == DCCP_OPEN) 85 DCCP_DEC_STATS(DCCP_MIB_CURRESTAB); 86 } 87 88 /* Change state AFTER socket is unhashed to avoid closed 89 * socket sitting in hash tables. 90 */ 91 sk->sk_state = state; 92 } 93 94 EXPORT_SYMBOL_GPL(dccp_set_state); 95 96 static void dccp_finish_passive_close(struct sock *sk) 97 { 98 switch (sk->sk_state) { 99 case DCCP_PASSIVE_CLOSE: 100 /* Node (client or server) has received Close packet. */ 101 dccp_send_reset(sk, DCCP_RESET_CODE_CLOSED); 102 dccp_set_state(sk, DCCP_CLOSED); 103 break; 104 case DCCP_PASSIVE_CLOSEREQ: 105 /* 106 * Client received CloseReq. We set the `active' flag so that 107 * dccp_send_close() retransmits the Close as per RFC 4340, 8.3. 108 */ 109 dccp_send_close(sk, 1); 110 dccp_set_state(sk, DCCP_CLOSING); 111 } 112 } 113 114 void dccp_done(struct sock *sk) 115 { 116 dccp_set_state(sk, DCCP_CLOSED); 117 dccp_clear_xmit_timers(sk); 118 119 sk->sk_shutdown = SHUTDOWN_MASK; 120 121 if (!sock_flag(sk, SOCK_DEAD)) 122 sk->sk_state_change(sk); 123 else 124 inet_csk_destroy_sock(sk); 125 } 126 127 EXPORT_SYMBOL_GPL(dccp_done); 128 129 const char *dccp_packet_name(const int type) 130 { 131 static const char *dccp_packet_names[] = { 132 [DCCP_PKT_REQUEST] = "REQUEST", 133 [DCCP_PKT_RESPONSE] = "RESPONSE", 134 [DCCP_PKT_DATA] = "DATA", 135 [DCCP_PKT_ACK] = "ACK", 136 [DCCP_PKT_DATAACK] = "DATAACK", 137 [DCCP_PKT_CLOSEREQ] = "CLOSEREQ", 138 [DCCP_PKT_CLOSE] = "CLOSE", 139 [DCCP_PKT_RESET] = "RESET", 140 [DCCP_PKT_SYNC] = "SYNC", 141 [DCCP_PKT_SYNCACK] = "SYNCACK", 142 }; 143 144 if (type >= DCCP_NR_PKT_TYPES) 145 return "INVALID"; 146 else 147 return dccp_packet_names[type]; 148 } 149 150 EXPORT_SYMBOL_GPL(dccp_packet_name); 151 152 const char *dccp_state_name(const int state) 153 { 154 static char *dccp_state_names[] = { 155 [DCCP_OPEN] = "OPEN", 156 [DCCP_REQUESTING] = "REQUESTING", 157 [DCCP_PARTOPEN] = "PARTOPEN", 158 [DCCP_LISTEN] = "LISTEN", 159 [DCCP_RESPOND] = "RESPOND", 160 [DCCP_CLOSING] = "CLOSING", 161 [DCCP_ACTIVE_CLOSEREQ] = "CLOSEREQ", 162 [DCCP_PASSIVE_CLOSE] = "PASSIVE_CLOSE", 163 [DCCP_PASSIVE_CLOSEREQ] = "PASSIVE_CLOSEREQ", 164 [DCCP_TIME_WAIT] = "TIME_WAIT", 165 [DCCP_CLOSED] = "CLOSED", 166 }; 167 168 if (state >= DCCP_MAX_STATES) 169 return "INVALID STATE!"; 170 else 171 return dccp_state_names[state]; 172 } 173 174 EXPORT_SYMBOL_GPL(dccp_state_name); 175 176 void dccp_hash(struct sock *sk) 177 { 178 inet_hash(&dccp_hashinfo, sk); 179 } 180 181 EXPORT_SYMBOL_GPL(dccp_hash); 182 183 void dccp_unhash(struct sock *sk) 184 { 185 inet_unhash(&dccp_hashinfo, sk); 186 } 187 188 EXPORT_SYMBOL_GPL(dccp_unhash); 189 190 int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized) 191 { 192 struct dccp_sock *dp = dccp_sk(sk); 193 struct dccp_minisock *dmsk = dccp_msk(sk); 194 struct inet_connection_sock *icsk = inet_csk(sk); 195 196 dccp_minisock_init(&dp->dccps_minisock); 197 198 icsk->icsk_rto = DCCP_TIMEOUT_INIT; 199 icsk->icsk_syn_retries = sysctl_dccp_request_retries; 200 sk->sk_state = DCCP_CLOSED; 201 sk->sk_write_space = dccp_write_space; 202 icsk->icsk_sync_mss = dccp_sync_mss; 203 dp->dccps_mss_cache = 536; 204 dp->dccps_rate_last = jiffies; 205 dp->dccps_role = DCCP_ROLE_UNDEFINED; 206 dp->dccps_service = DCCP_SERVICE_CODE_IS_ABSENT; 207 dp->dccps_l_ack_ratio = dp->dccps_r_ack_ratio = 1; 208 209 dccp_init_xmit_timers(sk); 210 211 /* 212 * FIXME: We're hardcoding the CCID, and doing this at this point makes 213 * the listening (master) sock get CCID control blocks, which is not 214 * necessary, but for now, to not mess with the test userspace apps, 215 * lets leave it here, later the real solution is to do this in a 216 * setsockopt(CCIDs-I-want/accept). -acme 217 */ 218 if (likely(ctl_sock_initialized)) { 219 int rc = dccp_feat_init(dmsk); 220 221 if (rc) 222 return rc; 223 224 if (dmsk->dccpms_send_ack_vector) { 225 dp->dccps_hc_rx_ackvec = dccp_ackvec_alloc(GFP_KERNEL); 226 if (dp->dccps_hc_rx_ackvec == NULL) 227 return -ENOMEM; 228 } 229 dp->dccps_hc_rx_ccid = ccid_hc_rx_new(dmsk->dccpms_rx_ccid, 230 sk, GFP_KERNEL); 231 dp->dccps_hc_tx_ccid = ccid_hc_tx_new(dmsk->dccpms_tx_ccid, 232 sk, GFP_KERNEL); 233 if (unlikely(dp->dccps_hc_rx_ccid == NULL || 234 dp->dccps_hc_tx_ccid == NULL)) { 235 ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk); 236 ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk); 237 if (dmsk->dccpms_send_ack_vector) { 238 dccp_ackvec_free(dp->dccps_hc_rx_ackvec); 239 dp->dccps_hc_rx_ackvec = NULL; 240 } 241 dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL; 242 return -ENOMEM; 243 } 244 } else { 245 /* control socket doesn't need feat nego */ 246 INIT_LIST_HEAD(&dmsk->dccpms_pending); 247 INIT_LIST_HEAD(&dmsk->dccpms_conf); 248 } 249 250 return 0; 251 } 252 253 EXPORT_SYMBOL_GPL(dccp_init_sock); 254 255 int dccp_destroy_sock(struct sock *sk) 256 { 257 struct dccp_sock *dp = dccp_sk(sk); 258 struct dccp_minisock *dmsk = dccp_msk(sk); 259 260 /* 261 * DCCP doesn't use sk_write_queue, just sk_send_head 262 * for retransmissions 263 */ 264 if (sk->sk_send_head != NULL) { 265 kfree_skb(sk->sk_send_head); 266 sk->sk_send_head = NULL; 267 } 268 269 /* Clean up a referenced DCCP bind bucket. */ 270 if (inet_csk(sk)->icsk_bind_hash != NULL) 271 inet_put_port(&dccp_hashinfo, sk); 272 273 kfree(dp->dccps_service_list); 274 dp->dccps_service_list = NULL; 275 276 if (dmsk->dccpms_send_ack_vector) { 277 dccp_ackvec_free(dp->dccps_hc_rx_ackvec); 278 dp->dccps_hc_rx_ackvec = NULL; 279 } 280 ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk); 281 ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk); 282 dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL; 283 284 /* clean up feature negotiation state */ 285 dccp_feat_clean(dmsk); 286 287 return 0; 288 } 289 290 EXPORT_SYMBOL_GPL(dccp_destroy_sock); 291 292 static inline int dccp_listen_start(struct sock *sk, int backlog) 293 { 294 struct dccp_sock *dp = dccp_sk(sk); 295 296 dp->dccps_role = DCCP_ROLE_LISTEN; 297 return inet_csk_listen_start(sk, backlog); 298 } 299 300 static inline int dccp_need_reset(int state) 301 { 302 return state != DCCP_CLOSED && state != DCCP_LISTEN && 303 state != DCCP_REQUESTING; 304 } 305 306 int dccp_disconnect(struct sock *sk, int flags) 307 { 308 struct inet_connection_sock *icsk = inet_csk(sk); 309 struct inet_sock *inet = inet_sk(sk); 310 int err = 0; 311 const int old_state = sk->sk_state; 312 313 if (old_state != DCCP_CLOSED) 314 dccp_set_state(sk, DCCP_CLOSED); 315 316 /* 317 * This corresponds to the ABORT function of RFC793, sec. 3.8 318 * TCP uses a RST segment, DCCP a Reset packet with Code 2, "Aborted". 319 */ 320 if (old_state == DCCP_LISTEN) { 321 inet_csk_listen_stop(sk); 322 } else if (dccp_need_reset(old_state)) { 323 dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED); 324 sk->sk_err = ECONNRESET; 325 } else if (old_state == DCCP_REQUESTING) 326 sk->sk_err = ECONNRESET; 327 328 dccp_clear_xmit_timers(sk); 329 __skb_queue_purge(&sk->sk_receive_queue); 330 if (sk->sk_send_head != NULL) { 331 __kfree_skb(sk->sk_send_head); 332 sk->sk_send_head = NULL; 333 } 334 335 inet->dport = 0; 336 337 if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK)) 338 inet_reset_saddr(sk); 339 340 sk->sk_shutdown = 0; 341 sock_reset_flag(sk, SOCK_DONE); 342 343 icsk->icsk_backoff = 0; 344 inet_csk_delack_init(sk); 345 __sk_dst_reset(sk); 346 347 BUG_TRAP(!inet->num || icsk->icsk_bind_hash); 348 349 sk->sk_error_report(sk); 350 return err; 351 } 352 353 EXPORT_SYMBOL_GPL(dccp_disconnect); 354 355 /* 356 * Wait for a DCCP event. 357 * 358 * Note that we don't need to lock the socket, as the upper poll layers 359 * take care of normal races (between the test and the event) and we don't 360 * go look at any of the socket buffers directly. 361 */ 362 unsigned int dccp_poll(struct file *file, struct socket *sock, 363 poll_table *wait) 364 { 365 unsigned int mask; 366 struct sock *sk = sock->sk; 367 368 poll_wait(file, sk->sk_sleep, wait); 369 if (sk->sk_state == DCCP_LISTEN) 370 return inet_csk_listen_poll(sk); 371 372 /* Socket is not locked. We are protected from async events 373 by poll logic and correct handling of state changes 374 made by another threads is impossible in any case. 375 */ 376 377 mask = 0; 378 if (sk->sk_err) 379 mask = POLLERR; 380 381 if (sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == DCCP_CLOSED) 382 mask |= POLLHUP; 383 if (sk->sk_shutdown & RCV_SHUTDOWN) 384 mask |= POLLIN | POLLRDNORM | POLLRDHUP; 385 386 /* Connected? */ 387 if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_RESPOND)) { 388 if (atomic_read(&sk->sk_rmem_alloc) > 0) 389 mask |= POLLIN | POLLRDNORM; 390 391 if (!(sk->sk_shutdown & SEND_SHUTDOWN)) { 392 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) { 393 mask |= POLLOUT | POLLWRNORM; 394 } else { /* send SIGIO later */ 395 set_bit(SOCK_ASYNC_NOSPACE, 396 &sk->sk_socket->flags); 397 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); 398 399 /* Race breaker. If space is freed after 400 * wspace test but before the flags are set, 401 * IO signal will be lost. 402 */ 403 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) 404 mask |= POLLOUT | POLLWRNORM; 405 } 406 } 407 } 408 return mask; 409 } 410 411 EXPORT_SYMBOL_GPL(dccp_poll); 412 413 int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg) 414 { 415 int rc = -ENOTCONN; 416 417 lock_sock(sk); 418 419 if (sk->sk_state == DCCP_LISTEN) 420 goto out; 421 422 switch (cmd) { 423 case SIOCINQ: { 424 struct sk_buff *skb; 425 unsigned long amount = 0; 426 427 skb = skb_peek(&sk->sk_receive_queue); 428 if (skb != NULL) { 429 /* 430 * We will only return the amount of this packet since 431 * that is all that will be read. 432 */ 433 amount = skb->len; 434 } 435 rc = put_user(amount, (int __user *)arg); 436 } 437 break; 438 default: 439 rc = -ENOIOCTLCMD; 440 break; 441 } 442 out: 443 release_sock(sk); 444 return rc; 445 } 446 447 EXPORT_SYMBOL_GPL(dccp_ioctl); 448 449 static int dccp_setsockopt_service(struct sock *sk, const __be32 service, 450 char __user *optval, int optlen) 451 { 452 struct dccp_sock *dp = dccp_sk(sk); 453 struct dccp_service_list *sl = NULL; 454 455 if (service == DCCP_SERVICE_INVALID_VALUE || 456 optlen > DCCP_SERVICE_LIST_MAX_LEN * sizeof(u32)) 457 return -EINVAL; 458 459 if (optlen > sizeof(service)) { 460 sl = kmalloc(optlen, GFP_KERNEL); 461 if (sl == NULL) 462 return -ENOMEM; 463 464 sl->dccpsl_nr = optlen / sizeof(u32) - 1; 465 if (copy_from_user(sl->dccpsl_list, 466 optval + sizeof(service), 467 optlen - sizeof(service)) || 468 dccp_list_has_service(sl, DCCP_SERVICE_INVALID_VALUE)) { 469 kfree(sl); 470 return -EFAULT; 471 } 472 } 473 474 lock_sock(sk); 475 dp->dccps_service = service; 476 477 kfree(dp->dccps_service_list); 478 479 dp->dccps_service_list = sl; 480 release_sock(sk); 481 return 0; 482 } 483 484 /* byte 1 is feature. the rest is the preference list */ 485 static int dccp_setsockopt_change(struct sock *sk, int type, 486 struct dccp_so_feat __user *optval) 487 { 488 struct dccp_so_feat opt; 489 u8 *val; 490 int rc; 491 492 if (copy_from_user(&opt, optval, sizeof(opt))) 493 return -EFAULT; 494 495 val = kmalloc(opt.dccpsf_len, GFP_KERNEL); 496 if (!val) 497 return -ENOMEM; 498 499 if (copy_from_user(val, opt.dccpsf_val, opt.dccpsf_len)) { 500 rc = -EFAULT; 501 goto out_free_val; 502 } 503 504 rc = dccp_feat_change(dccp_msk(sk), type, opt.dccpsf_feat, 505 val, opt.dccpsf_len, GFP_KERNEL); 506 if (rc) 507 goto out_free_val; 508 509 out: 510 return rc; 511 512 out_free_val: 513 kfree(val); 514 goto out; 515 } 516 517 static int do_dccp_setsockopt(struct sock *sk, int level, int optname, 518 char __user *optval, int optlen) 519 { 520 struct dccp_sock *dp = dccp_sk(sk); 521 int val, err = 0; 522 523 if (optlen < sizeof(int)) 524 return -EINVAL; 525 526 if (get_user(val, (int __user *)optval)) 527 return -EFAULT; 528 529 if (optname == DCCP_SOCKOPT_SERVICE) 530 return dccp_setsockopt_service(sk, val, optval, optlen); 531 532 lock_sock(sk); 533 switch (optname) { 534 case DCCP_SOCKOPT_PACKET_SIZE: 535 DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n"); 536 err = 0; 537 break; 538 case DCCP_SOCKOPT_CHANGE_L: 539 if (optlen != sizeof(struct dccp_so_feat)) 540 err = -EINVAL; 541 else 542 err = dccp_setsockopt_change(sk, DCCPO_CHANGE_L, 543 (struct dccp_so_feat __user *) 544 optval); 545 break; 546 case DCCP_SOCKOPT_CHANGE_R: 547 if (optlen != sizeof(struct dccp_so_feat)) 548 err = -EINVAL; 549 else 550 err = dccp_setsockopt_change(sk, DCCPO_CHANGE_R, 551 (struct dccp_so_feat __user *) 552 optval); 553 break; 554 case DCCP_SOCKOPT_SERVER_TIMEWAIT: 555 if (dp->dccps_role != DCCP_ROLE_SERVER) 556 err = -EOPNOTSUPP; 557 else 558 dp->dccps_server_timewait = (val != 0); 559 break; 560 case DCCP_SOCKOPT_SEND_CSCOV: /* sender side, RFC 4340, sec. 9.2 */ 561 if (val < 0 || val > 15) 562 err = -EINVAL; 563 else 564 dp->dccps_pcslen = val; 565 break; 566 case DCCP_SOCKOPT_RECV_CSCOV: /* receiver side, RFC 4340 sec. 9.2.1 */ 567 if (val < 0 || val > 15) 568 err = -EINVAL; 569 else { 570 dp->dccps_pcrlen = val; 571 /* FIXME: add feature negotiation, 572 * ChangeL(MinimumChecksumCoverage, val) */ 573 } 574 break; 575 default: 576 err = -ENOPROTOOPT; 577 break; 578 } 579 580 release_sock(sk); 581 return err; 582 } 583 584 int dccp_setsockopt(struct sock *sk, int level, int optname, 585 char __user *optval, int optlen) 586 { 587 if (level != SOL_DCCP) 588 return inet_csk(sk)->icsk_af_ops->setsockopt(sk, level, 589 optname, optval, 590 optlen); 591 return do_dccp_setsockopt(sk, level, optname, optval, optlen); 592 } 593 594 EXPORT_SYMBOL_GPL(dccp_setsockopt); 595 596 #ifdef CONFIG_COMPAT 597 int compat_dccp_setsockopt(struct sock *sk, int level, int optname, 598 char __user *optval, int optlen) 599 { 600 if (level != SOL_DCCP) 601 return inet_csk_compat_setsockopt(sk, level, optname, 602 optval, optlen); 603 return do_dccp_setsockopt(sk, level, optname, optval, optlen); 604 } 605 606 EXPORT_SYMBOL_GPL(compat_dccp_setsockopt); 607 #endif 608 609 static int dccp_getsockopt_service(struct sock *sk, int len, 610 __be32 __user *optval, 611 int __user *optlen) 612 { 613 const struct dccp_sock *dp = dccp_sk(sk); 614 const struct dccp_service_list *sl; 615 int err = -ENOENT, slen = 0, total_len = sizeof(u32); 616 617 lock_sock(sk); 618 if ((sl = dp->dccps_service_list) != NULL) { 619 slen = sl->dccpsl_nr * sizeof(u32); 620 total_len += slen; 621 } 622 623 err = -EINVAL; 624 if (total_len > len) 625 goto out; 626 627 err = 0; 628 if (put_user(total_len, optlen) || 629 put_user(dp->dccps_service, optval) || 630 (sl != NULL && copy_to_user(optval + 1, sl->dccpsl_list, slen))) 631 err = -EFAULT; 632 out: 633 release_sock(sk); 634 return err; 635 } 636 637 static int do_dccp_getsockopt(struct sock *sk, int level, int optname, 638 char __user *optval, int __user *optlen) 639 { 640 struct dccp_sock *dp; 641 int val, len; 642 643 if (get_user(len, optlen)) 644 return -EFAULT; 645 646 if (len < (int)sizeof(int)) 647 return -EINVAL; 648 649 dp = dccp_sk(sk); 650 651 switch (optname) { 652 case DCCP_SOCKOPT_PACKET_SIZE: 653 DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n"); 654 return 0; 655 case DCCP_SOCKOPT_SERVICE: 656 return dccp_getsockopt_service(sk, len, 657 (__be32 __user *)optval, optlen); 658 case DCCP_SOCKOPT_GET_CUR_MPS: 659 val = dp->dccps_mss_cache; 660 break; 661 case DCCP_SOCKOPT_SERVER_TIMEWAIT: 662 val = dp->dccps_server_timewait; 663 break; 664 case DCCP_SOCKOPT_SEND_CSCOV: 665 val = dp->dccps_pcslen; 666 break; 667 case DCCP_SOCKOPT_RECV_CSCOV: 668 val = dp->dccps_pcrlen; 669 break; 670 case 128 ... 191: 671 return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname, 672 len, (u32 __user *)optval, optlen); 673 case 192 ... 255: 674 return ccid_hc_tx_getsockopt(dp->dccps_hc_tx_ccid, sk, optname, 675 len, (u32 __user *)optval, optlen); 676 default: 677 return -ENOPROTOOPT; 678 } 679 680 len = sizeof(val); 681 if (put_user(len, optlen) || copy_to_user(optval, &val, len)) 682 return -EFAULT; 683 684 return 0; 685 } 686 687 int dccp_getsockopt(struct sock *sk, int level, int optname, 688 char __user *optval, int __user *optlen) 689 { 690 if (level != SOL_DCCP) 691 return inet_csk(sk)->icsk_af_ops->getsockopt(sk, level, 692 optname, optval, 693 optlen); 694 return do_dccp_getsockopt(sk, level, optname, optval, optlen); 695 } 696 697 EXPORT_SYMBOL_GPL(dccp_getsockopt); 698 699 #ifdef CONFIG_COMPAT 700 int compat_dccp_getsockopt(struct sock *sk, int level, int optname, 701 char __user *optval, int __user *optlen) 702 { 703 if (level != SOL_DCCP) 704 return inet_csk_compat_getsockopt(sk, level, optname, 705 optval, optlen); 706 return do_dccp_getsockopt(sk, level, optname, optval, optlen); 707 } 708 709 EXPORT_SYMBOL_GPL(compat_dccp_getsockopt); 710 #endif 711 712 int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, 713 size_t len) 714 { 715 const struct dccp_sock *dp = dccp_sk(sk); 716 const int flags = msg->msg_flags; 717 const int noblock = flags & MSG_DONTWAIT; 718 struct sk_buff *skb; 719 int rc, size; 720 long timeo; 721 722 if (len > dp->dccps_mss_cache) 723 return -EMSGSIZE; 724 725 lock_sock(sk); 726 727 if (sysctl_dccp_tx_qlen && 728 (sk->sk_write_queue.qlen >= sysctl_dccp_tx_qlen)) { 729 rc = -EAGAIN; 730 goto out_release; 731 } 732 733 timeo = sock_sndtimeo(sk, noblock); 734 735 /* 736 * We have to use sk_stream_wait_connect here to set sk_write_pending, 737 * so that the trick in dccp_rcv_request_sent_state_process. 738 */ 739 /* Wait for a connection to finish. */ 740 if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN)) 741 if ((rc = sk_stream_wait_connect(sk, &timeo)) != 0) 742 goto out_release; 743 744 size = sk->sk_prot->max_header + len; 745 release_sock(sk); 746 skb = sock_alloc_send_skb(sk, size, noblock, &rc); 747 lock_sock(sk); 748 if (skb == NULL) 749 goto out_release; 750 751 skb_reserve(skb, sk->sk_prot->max_header); 752 rc = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len); 753 if (rc != 0) 754 goto out_discard; 755 756 skb_queue_tail(&sk->sk_write_queue, skb); 757 dccp_write_xmit(sk,0); 758 out_release: 759 release_sock(sk); 760 return rc ? : len; 761 out_discard: 762 kfree_skb(skb); 763 goto out_release; 764 } 765 766 EXPORT_SYMBOL_GPL(dccp_sendmsg); 767 768 int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, 769 size_t len, int nonblock, int flags, int *addr_len) 770 { 771 const struct dccp_hdr *dh; 772 long timeo; 773 774 lock_sock(sk); 775 776 if (sk->sk_state == DCCP_LISTEN) { 777 len = -ENOTCONN; 778 goto out; 779 } 780 781 timeo = sock_rcvtimeo(sk, nonblock); 782 783 do { 784 struct sk_buff *skb = skb_peek(&sk->sk_receive_queue); 785 786 if (skb == NULL) 787 goto verify_sock_status; 788 789 dh = dccp_hdr(skb); 790 791 switch (dh->dccph_type) { 792 case DCCP_PKT_DATA: 793 case DCCP_PKT_DATAACK: 794 goto found_ok_skb; 795 796 case DCCP_PKT_CLOSE: 797 case DCCP_PKT_CLOSEREQ: 798 if (!(flags & MSG_PEEK)) 799 dccp_finish_passive_close(sk); 800 /* fall through */ 801 case DCCP_PKT_RESET: 802 dccp_pr_debug("found fin (%s) ok!\n", 803 dccp_packet_name(dh->dccph_type)); 804 len = 0; 805 goto found_fin_ok; 806 default: 807 dccp_pr_debug("packet_type=%s\n", 808 dccp_packet_name(dh->dccph_type)); 809 sk_eat_skb(sk, skb, 0); 810 } 811 verify_sock_status: 812 if (sock_flag(sk, SOCK_DONE)) { 813 len = 0; 814 break; 815 } 816 817 if (sk->sk_err) { 818 len = sock_error(sk); 819 break; 820 } 821 822 if (sk->sk_shutdown & RCV_SHUTDOWN) { 823 len = 0; 824 break; 825 } 826 827 if (sk->sk_state == DCCP_CLOSED) { 828 if (!sock_flag(sk, SOCK_DONE)) { 829 /* This occurs when user tries to read 830 * from never connected socket. 831 */ 832 len = -ENOTCONN; 833 break; 834 } 835 len = 0; 836 break; 837 } 838 839 if (!timeo) { 840 len = -EAGAIN; 841 break; 842 } 843 844 if (signal_pending(current)) { 845 len = sock_intr_errno(timeo); 846 break; 847 } 848 849 sk_wait_data(sk, &timeo); 850 continue; 851 found_ok_skb: 852 if (len > skb->len) 853 len = skb->len; 854 else if (len < skb->len) 855 msg->msg_flags |= MSG_TRUNC; 856 857 if (skb_copy_datagram_iovec(skb, 0, msg->msg_iov, len)) { 858 /* Exception. Bailout! */ 859 len = -EFAULT; 860 break; 861 } 862 found_fin_ok: 863 if (!(flags & MSG_PEEK)) 864 sk_eat_skb(sk, skb, 0); 865 break; 866 } while (1); 867 out: 868 release_sock(sk); 869 return len; 870 } 871 872 EXPORT_SYMBOL_GPL(dccp_recvmsg); 873 874 int inet_dccp_listen(struct socket *sock, int backlog) 875 { 876 struct sock *sk = sock->sk; 877 unsigned char old_state; 878 int err; 879 880 lock_sock(sk); 881 882 err = -EINVAL; 883 if (sock->state != SS_UNCONNECTED || sock->type != SOCK_DCCP) 884 goto out; 885 886 old_state = sk->sk_state; 887 if (!((1 << old_state) & (DCCPF_CLOSED | DCCPF_LISTEN))) 888 goto out; 889 890 /* Really, if the socket is already in listen state 891 * we can only allow the backlog to be adjusted. 892 */ 893 if (old_state != DCCP_LISTEN) { 894 /* 895 * FIXME: here it probably should be sk->sk_prot->listen_start 896 * see tcp_listen_start 897 */ 898 err = dccp_listen_start(sk, backlog); 899 if (err) 900 goto out; 901 } 902 sk->sk_max_ack_backlog = backlog; 903 err = 0; 904 905 out: 906 release_sock(sk); 907 return err; 908 } 909 910 EXPORT_SYMBOL_GPL(inet_dccp_listen); 911 912 static void dccp_terminate_connection(struct sock *sk) 913 { 914 u8 next_state = DCCP_CLOSED; 915 916 switch (sk->sk_state) { 917 case DCCP_PASSIVE_CLOSE: 918 case DCCP_PASSIVE_CLOSEREQ: 919 dccp_finish_passive_close(sk); 920 break; 921 case DCCP_PARTOPEN: 922 dccp_pr_debug("Stop PARTOPEN timer (%p)\n", sk); 923 inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK); 924 /* fall through */ 925 case DCCP_OPEN: 926 dccp_send_close(sk, 1); 927 928 if (dccp_sk(sk)->dccps_role == DCCP_ROLE_SERVER && 929 !dccp_sk(sk)->dccps_server_timewait) 930 next_state = DCCP_ACTIVE_CLOSEREQ; 931 else 932 next_state = DCCP_CLOSING; 933 /* fall through */ 934 default: 935 dccp_set_state(sk, next_state); 936 } 937 } 938 939 void dccp_close(struct sock *sk, long timeout) 940 { 941 struct dccp_sock *dp = dccp_sk(sk); 942 struct sk_buff *skb; 943 u32 data_was_unread = 0; 944 int state; 945 946 lock_sock(sk); 947 948 sk->sk_shutdown = SHUTDOWN_MASK; 949 950 if (sk->sk_state == DCCP_LISTEN) { 951 dccp_set_state(sk, DCCP_CLOSED); 952 953 /* Special case. */ 954 inet_csk_listen_stop(sk); 955 956 goto adjudge_to_death; 957 } 958 959 sk_stop_timer(sk, &dp->dccps_xmit_timer); 960 961 /* 962 * We need to flush the recv. buffs. We do this only on the 963 * descriptor close, not protocol-sourced closes, because the 964 *reader process may not have drained the data yet! 965 */ 966 while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) { 967 data_was_unread += skb->len; 968 __kfree_skb(skb); 969 } 970 971 if (data_was_unread) { 972 /* Unread data was tossed, send an appropriate Reset Code */ 973 DCCP_WARN("DCCP: ABORT -- %u bytes unread\n", data_was_unread); 974 dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED); 975 dccp_set_state(sk, DCCP_CLOSED); 976 } else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) { 977 /* Check zero linger _after_ checking for unread data. */ 978 sk->sk_prot->disconnect(sk, 0); 979 } else if (sk->sk_state != DCCP_CLOSED) { 980 dccp_terminate_connection(sk); 981 } 982 983 sk_stream_wait_close(sk, timeout); 984 985 adjudge_to_death: 986 state = sk->sk_state; 987 sock_hold(sk); 988 sock_orphan(sk); 989 atomic_inc(sk->sk_prot->orphan_count); 990 991 /* 992 * It is the last release_sock in its life. It will remove backlog. 993 */ 994 release_sock(sk); 995 /* 996 * Now socket is owned by kernel and we acquire BH lock 997 * to finish close. No need to check for user refs. 998 */ 999 local_bh_disable(); 1000 bh_lock_sock(sk); 1001 BUG_TRAP(!sock_owned_by_user(sk)); 1002 1003 /* Have we already been destroyed by a softirq or backlog? */ 1004 if (state != DCCP_CLOSED && sk->sk_state == DCCP_CLOSED) 1005 goto out; 1006 1007 if (sk->sk_state == DCCP_CLOSED) 1008 inet_csk_destroy_sock(sk); 1009 1010 /* Otherwise, socket is reprieved until protocol close. */ 1011 1012 out: 1013 bh_unlock_sock(sk); 1014 local_bh_enable(); 1015 sock_put(sk); 1016 } 1017 1018 EXPORT_SYMBOL_GPL(dccp_close); 1019 1020 void dccp_shutdown(struct sock *sk, int how) 1021 { 1022 dccp_pr_debug("called shutdown(%x)\n", how); 1023 } 1024 1025 EXPORT_SYMBOL_GPL(dccp_shutdown); 1026 1027 static int __init dccp_mib_init(void) 1028 { 1029 int rc = -ENOMEM; 1030 1031 dccp_statistics[0] = alloc_percpu(struct dccp_mib); 1032 if (dccp_statistics[0] == NULL) 1033 goto out; 1034 1035 dccp_statistics[1] = alloc_percpu(struct dccp_mib); 1036 if (dccp_statistics[1] == NULL) 1037 goto out_free_one; 1038 1039 rc = 0; 1040 out: 1041 return rc; 1042 out_free_one: 1043 free_percpu(dccp_statistics[0]); 1044 dccp_statistics[0] = NULL; 1045 goto out; 1046 1047 } 1048 1049 static void dccp_mib_exit(void) 1050 { 1051 free_percpu(dccp_statistics[0]); 1052 free_percpu(dccp_statistics[1]); 1053 dccp_statistics[0] = dccp_statistics[1] = NULL; 1054 } 1055 1056 static int thash_entries; 1057 module_param(thash_entries, int, 0444); 1058 MODULE_PARM_DESC(thash_entries, "Number of ehash buckets"); 1059 1060 #ifdef CONFIG_IP_DCCP_DEBUG 1061 int dccp_debug; 1062 module_param(dccp_debug, bool, 0444); 1063 MODULE_PARM_DESC(dccp_debug, "Enable debug messages"); 1064 1065 EXPORT_SYMBOL_GPL(dccp_debug); 1066 #endif 1067 1068 static int __init dccp_init(void) 1069 { 1070 unsigned long goal; 1071 int ehash_order, bhash_order, i; 1072 int rc = -ENOBUFS; 1073 1074 dccp_hashinfo.bind_bucket_cachep = 1075 kmem_cache_create("dccp_bind_bucket", 1076 sizeof(struct inet_bind_bucket), 0, 1077 SLAB_HWCACHE_ALIGN, NULL); 1078 if (!dccp_hashinfo.bind_bucket_cachep) 1079 goto out; 1080 1081 /* 1082 * Size and allocate the main established and bind bucket 1083 * hash tables. 1084 * 1085 * The methodology is similar to that of the buffer cache. 1086 */ 1087 if (num_physpages >= (128 * 1024)) 1088 goal = num_physpages >> (21 - PAGE_SHIFT); 1089 else 1090 goal = num_physpages >> (23 - PAGE_SHIFT); 1091 1092 if (thash_entries) 1093 goal = (thash_entries * 1094 sizeof(struct inet_ehash_bucket)) >> PAGE_SHIFT; 1095 for (ehash_order = 0; (1UL << ehash_order) < goal; ehash_order++) 1096 ; 1097 do { 1098 dccp_hashinfo.ehash_size = (1UL << ehash_order) * PAGE_SIZE / 1099 sizeof(struct inet_ehash_bucket); 1100 while (dccp_hashinfo.ehash_size & 1101 (dccp_hashinfo.ehash_size - 1)) 1102 dccp_hashinfo.ehash_size--; 1103 dccp_hashinfo.ehash = (struct inet_ehash_bucket *) 1104 __get_free_pages(GFP_ATOMIC, ehash_order); 1105 } while (!dccp_hashinfo.ehash && --ehash_order > 0); 1106 1107 if (!dccp_hashinfo.ehash) { 1108 DCCP_CRIT("Failed to allocate DCCP established hash table"); 1109 goto out_free_bind_bucket_cachep; 1110 } 1111 1112 for (i = 0; i < dccp_hashinfo.ehash_size; i++) { 1113 INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].chain); 1114 INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].twchain); 1115 } 1116 1117 if (inet_ehash_locks_alloc(&dccp_hashinfo)) 1118 goto out_free_dccp_ehash; 1119 1120 bhash_order = ehash_order; 1121 1122 do { 1123 dccp_hashinfo.bhash_size = (1UL << bhash_order) * PAGE_SIZE / 1124 sizeof(struct inet_bind_hashbucket); 1125 if ((dccp_hashinfo.bhash_size > (64 * 1024)) && 1126 bhash_order > 0) 1127 continue; 1128 dccp_hashinfo.bhash = (struct inet_bind_hashbucket *) 1129 __get_free_pages(GFP_ATOMIC, bhash_order); 1130 } while (!dccp_hashinfo.bhash && --bhash_order >= 0); 1131 1132 if (!dccp_hashinfo.bhash) { 1133 DCCP_CRIT("Failed to allocate DCCP bind hash table"); 1134 goto out_free_dccp_locks; 1135 } 1136 1137 for (i = 0; i < dccp_hashinfo.bhash_size; i++) { 1138 spin_lock_init(&dccp_hashinfo.bhash[i].lock); 1139 INIT_HLIST_HEAD(&dccp_hashinfo.bhash[i].chain); 1140 } 1141 1142 rc = dccp_mib_init(); 1143 if (rc) 1144 goto out_free_dccp_bhash; 1145 1146 rc = dccp_ackvec_init(); 1147 if (rc) 1148 goto out_free_dccp_mib; 1149 1150 rc = dccp_sysctl_init(); 1151 if (rc) 1152 goto out_ackvec_exit; 1153 1154 dccp_timestamping_init(); 1155 out: 1156 return rc; 1157 out_ackvec_exit: 1158 dccp_ackvec_exit(); 1159 out_free_dccp_mib: 1160 dccp_mib_exit(); 1161 out_free_dccp_bhash: 1162 free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order); 1163 dccp_hashinfo.bhash = NULL; 1164 out_free_dccp_locks: 1165 inet_ehash_locks_free(&dccp_hashinfo); 1166 out_free_dccp_ehash: 1167 free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order); 1168 dccp_hashinfo.ehash = NULL; 1169 out_free_bind_bucket_cachep: 1170 kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep); 1171 dccp_hashinfo.bind_bucket_cachep = NULL; 1172 goto out; 1173 } 1174 1175 static void __exit dccp_fini(void) 1176 { 1177 dccp_mib_exit(); 1178 free_pages((unsigned long)dccp_hashinfo.bhash, 1179 get_order(dccp_hashinfo.bhash_size * 1180 sizeof(struct inet_bind_hashbucket))); 1181 free_pages((unsigned long)dccp_hashinfo.ehash, 1182 get_order(dccp_hashinfo.ehash_size * 1183 sizeof(struct inet_ehash_bucket))); 1184 inet_ehash_locks_free(&dccp_hashinfo); 1185 kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep); 1186 dccp_ackvec_exit(); 1187 dccp_sysctl_exit(); 1188 } 1189 1190 module_init(dccp_init); 1191 module_exit(dccp_fini); 1192 1193 MODULE_LICENSE("GPL"); 1194 MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@conectiva.com.br>"); 1195 MODULE_DESCRIPTION("DCCP - Datagram Congestion Controlled Protocol"); 1196