1 /* 2 * Copyright (c) 2006, 2018 Oracle and/or its affiliates. All rights reserved. 3 * 4 * This software is available to you under a choice of one of two 5 * licenses. You may choose to be licensed under the terms of the GNU 6 * General Public License (GPL) Version 2, available from the file 7 * COPYING in the main directory of this source tree, or the 8 * OpenIB.org BSD license below: 9 * 10 * Redistribution and use in source and binary forms, with or 11 * without modification, are permitted provided that the following 12 * conditions are met: 13 * 14 * - Redistributions of source code must retain the above 15 * copyright notice, this list of conditions and the following 16 * disclaimer. 17 * 18 * - Redistributions in binary form must reproduce the above 19 * copyright notice, this list of conditions and the following 20 * disclaimer in the documentation and/or other materials 21 * provided with the distribution. 22 * 23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 * SOFTWARE. 31 * 32 */ 33 #include <linux/module.h> 34 #include <linux/errno.h> 35 #include <linux/kernel.h> 36 #include <linux/gfp.h> 37 #include <linux/in.h> 38 #include <linux/ipv6.h> 39 #include <linux/poll.h> 40 #include <net/sock.h> 41 42 #include "rds.h" 43 44 /* this is just used for stats gathering :/ */ 45 static DEFINE_SPINLOCK(rds_sock_lock); 46 static unsigned long rds_sock_count; 47 static LIST_HEAD(rds_sock_list); 48 DECLARE_WAIT_QUEUE_HEAD(rds_poll_waitq); 49 50 /* 51 * This is called as the final descriptor referencing this socket is closed. 52 * We have to unbind the socket so that another socket can be bound to the 53 * address it was using. 54 * 55 * We have to be careful about racing with the incoming path. sock_orphan() 56 * sets SOCK_DEAD and we use that as an indicator to the rx path that new 57 * messages shouldn't be queued. 58 */ 59 static int rds_release(struct socket *sock) 60 { 61 struct sock *sk = sock->sk; 62 struct rds_sock *rs; 63 64 if (!sk) 65 goto out; 66 67 rs = rds_sk_to_rs(sk); 68 69 sock_orphan(sk); 70 /* Note - rds_clear_recv_queue grabs rs_recv_lock, so 71 * that ensures the recv path has completed messing 72 * with the socket. */ 73 rds_clear_recv_queue(rs); 74 rds_cong_remove_socket(rs); 75 76 rds_remove_bound(rs); 77 78 rds_send_drop_to(rs, NULL); 79 rds_rdma_drop_keys(rs); 80 rds_notify_queue_get(rs, NULL); 81 rds_notify_msg_zcopy_purge(&rs->rs_zcookie_queue); 82 83 spin_lock_bh(&rds_sock_lock); 84 list_del_init(&rs->rs_item); 85 rds_sock_count--; 86 spin_unlock_bh(&rds_sock_lock); 87 88 rds_trans_put(rs->rs_transport); 89 90 sock->sk = NULL; 91 sock_put(sk); 92 out: 93 return 0; 94 } 95 96 /* 97 * Careful not to race with rds_release -> sock_orphan which clears sk_sleep. 98 * _bh() isn't OK here, we're called from interrupt handlers. It's probably OK 99 * to wake the waitqueue after sk_sleep is clear as we hold a sock ref, but 100 * this seems more conservative. 101 * NB - normally, one would use sk_callback_lock for this, but we can 102 * get here from interrupts, whereas the network code grabs sk_callback_lock 103 * with _lock_bh only - so relying on sk_callback_lock introduces livelocks. 104 */ 105 void rds_wake_sk_sleep(struct rds_sock *rs) 106 { 107 unsigned long flags; 108 109 read_lock_irqsave(&rs->rs_recv_lock, flags); 110 __rds_wake_sk_sleep(rds_rs_to_sk(rs)); 111 read_unlock_irqrestore(&rs->rs_recv_lock, flags); 112 } 113 114 static int rds_getname(struct socket *sock, struct sockaddr *uaddr, 115 int peer) 116 { 117 struct rds_sock *rs = rds_sk_to_rs(sock->sk); 118 struct sockaddr_in6 *sin6; 119 struct sockaddr_in *sin; 120 int uaddr_len; 121 122 /* racey, don't care */ 123 if (peer) { 124 if (ipv6_addr_any(&rs->rs_conn_addr)) 125 return -ENOTCONN; 126 127 if (ipv6_addr_v4mapped(&rs->rs_conn_addr)) { 128 sin = (struct sockaddr_in *)uaddr; 129 memset(sin->sin_zero, 0, sizeof(sin->sin_zero)); 130 sin->sin_family = AF_INET; 131 sin->sin_port = rs->rs_conn_port; 132 sin->sin_addr.s_addr = rs->rs_conn_addr_v4; 133 uaddr_len = sizeof(*sin); 134 } else { 135 sin6 = (struct sockaddr_in6 *)uaddr; 136 sin6->sin6_family = AF_INET6; 137 sin6->sin6_port = rs->rs_conn_port; 138 sin6->sin6_addr = rs->rs_conn_addr; 139 sin6->sin6_flowinfo = 0; 140 /* scope_id is the same as in the bound address. */ 141 sin6->sin6_scope_id = rs->rs_bound_scope_id; 142 uaddr_len = sizeof(*sin6); 143 } 144 } else { 145 /* If socket is not yet bound and the socket is connected, 146 * set the return address family to be the same as the 147 * connected address, but with 0 address value. If it is not 148 * connected, set the family to be AF_UNSPEC (value 0) and 149 * the address size to be that of an IPv4 address. 150 */ 151 if (ipv6_addr_any(&rs->rs_bound_addr)) { 152 if (ipv6_addr_any(&rs->rs_conn_addr)) { 153 sin = (struct sockaddr_in *)uaddr; 154 memset(sin, 0, sizeof(*sin)); 155 sin->sin_family = AF_UNSPEC; 156 return sizeof(*sin); 157 } 158 159 if (ipv6_addr_type(&rs->rs_conn_addr) & 160 IPV6_ADDR_MAPPED) { 161 sin = (struct sockaddr_in *)uaddr; 162 memset(sin, 0, sizeof(*sin)); 163 sin->sin_family = AF_INET; 164 return sizeof(*sin); 165 } 166 167 sin6 = (struct sockaddr_in6 *)uaddr; 168 memset(sin6, 0, sizeof(*sin6)); 169 sin6->sin6_family = AF_INET6; 170 return sizeof(*sin6); 171 } 172 if (ipv6_addr_v4mapped(&rs->rs_bound_addr)) { 173 sin = (struct sockaddr_in *)uaddr; 174 memset(sin->sin_zero, 0, sizeof(sin->sin_zero)); 175 sin->sin_family = AF_INET; 176 sin->sin_port = rs->rs_bound_port; 177 sin->sin_addr.s_addr = rs->rs_bound_addr_v4; 178 uaddr_len = sizeof(*sin); 179 } else { 180 sin6 = (struct sockaddr_in6 *)uaddr; 181 sin6->sin6_family = AF_INET6; 182 sin6->sin6_port = rs->rs_bound_port; 183 sin6->sin6_addr = rs->rs_bound_addr; 184 sin6->sin6_flowinfo = 0; 185 sin6->sin6_scope_id = rs->rs_bound_scope_id; 186 uaddr_len = sizeof(*sin6); 187 } 188 } 189 190 return uaddr_len; 191 } 192 193 /* 194 * RDS' poll is without a doubt the least intuitive part of the interface, 195 * as EPOLLIN and EPOLLOUT do not behave entirely as you would expect from 196 * a network protocol. 197 * 198 * EPOLLIN is asserted if 199 * - there is data on the receive queue. 200 * - to signal that a previously congested destination may have become 201 * uncongested 202 * - A notification has been queued to the socket (this can be a congestion 203 * update, or a RDMA completion, or a MSG_ZEROCOPY completion). 204 * 205 * EPOLLOUT is asserted if there is room on the send queue. This does not mean 206 * however, that the next sendmsg() call will succeed. If the application tries 207 * to send to a congested destination, the system call may still fail (and 208 * return ENOBUFS). 209 */ 210 static __poll_t rds_poll(struct file *file, struct socket *sock, 211 poll_table *wait) 212 { 213 struct sock *sk = sock->sk; 214 struct rds_sock *rs = rds_sk_to_rs(sk); 215 __poll_t mask = 0; 216 unsigned long flags; 217 218 poll_wait(file, sk_sleep(sk), wait); 219 220 if (rs->rs_seen_congestion) 221 poll_wait(file, &rds_poll_waitq, wait); 222 223 read_lock_irqsave(&rs->rs_recv_lock, flags); 224 if (!rs->rs_cong_monitor) { 225 /* When a congestion map was updated, we signal EPOLLIN for 226 * "historical" reasons. Applications can also poll for 227 * WRBAND instead. */ 228 if (rds_cong_updated_since(&rs->rs_cong_track)) 229 mask |= (EPOLLIN | EPOLLRDNORM | EPOLLWRBAND); 230 } else { 231 spin_lock(&rs->rs_lock); 232 if (rs->rs_cong_notify) 233 mask |= (EPOLLIN | EPOLLRDNORM); 234 spin_unlock(&rs->rs_lock); 235 } 236 if (!list_empty(&rs->rs_recv_queue) || 237 !list_empty(&rs->rs_notify_queue) || 238 !list_empty(&rs->rs_zcookie_queue.zcookie_head)) 239 mask |= (EPOLLIN | EPOLLRDNORM); 240 if (rs->rs_snd_bytes < rds_sk_sndbuf(rs)) 241 mask |= (EPOLLOUT | EPOLLWRNORM); 242 if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue)) 243 mask |= POLLERR; 244 read_unlock_irqrestore(&rs->rs_recv_lock, flags); 245 246 /* clear state any time we wake a seen-congested socket */ 247 if (mask) 248 rs->rs_seen_congestion = 0; 249 250 return mask; 251 } 252 253 static int rds_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) 254 { 255 return -ENOIOCTLCMD; 256 } 257 258 static int rds_cancel_sent_to(struct rds_sock *rs, char __user *optval, 259 int len) 260 { 261 struct sockaddr_in6 sin6; 262 struct sockaddr_in sin; 263 int ret = 0; 264 265 /* racing with another thread binding seems ok here */ 266 if (ipv6_addr_any(&rs->rs_bound_addr)) { 267 ret = -ENOTCONN; /* XXX not a great errno */ 268 goto out; 269 } 270 271 if (len < sizeof(struct sockaddr_in)) { 272 ret = -EINVAL; 273 goto out; 274 } else if (len < sizeof(struct sockaddr_in6)) { 275 /* Assume IPv4 */ 276 if (copy_from_user(&sin, optval, sizeof(struct sockaddr_in))) { 277 ret = -EFAULT; 278 goto out; 279 } 280 ipv6_addr_set_v4mapped(sin.sin_addr.s_addr, &sin6.sin6_addr); 281 sin6.sin6_port = sin.sin_port; 282 } else { 283 if (copy_from_user(&sin6, optval, 284 sizeof(struct sockaddr_in6))) { 285 ret = -EFAULT; 286 goto out; 287 } 288 } 289 290 rds_send_drop_to(rs, &sin6); 291 out: 292 return ret; 293 } 294 295 static int rds_set_bool_option(unsigned char *optvar, char __user *optval, 296 int optlen) 297 { 298 int value; 299 300 if (optlen < sizeof(int)) 301 return -EINVAL; 302 if (get_user(value, (int __user *) optval)) 303 return -EFAULT; 304 *optvar = !!value; 305 return 0; 306 } 307 308 static int rds_cong_monitor(struct rds_sock *rs, char __user *optval, 309 int optlen) 310 { 311 int ret; 312 313 ret = rds_set_bool_option(&rs->rs_cong_monitor, optval, optlen); 314 if (ret == 0) { 315 if (rs->rs_cong_monitor) { 316 rds_cong_add_socket(rs); 317 } else { 318 rds_cong_remove_socket(rs); 319 rs->rs_cong_mask = 0; 320 rs->rs_cong_notify = 0; 321 } 322 } 323 return ret; 324 } 325 326 static int rds_set_transport(struct rds_sock *rs, char __user *optval, 327 int optlen) 328 { 329 int t_type; 330 331 if (rs->rs_transport) 332 return -EOPNOTSUPP; /* previously attached to transport */ 333 334 if (optlen != sizeof(int)) 335 return -EINVAL; 336 337 if (copy_from_user(&t_type, (int __user *)optval, sizeof(t_type))) 338 return -EFAULT; 339 340 if (t_type < 0 || t_type >= RDS_TRANS_COUNT) 341 return -EINVAL; 342 343 rs->rs_transport = rds_trans_get(t_type); 344 345 return rs->rs_transport ? 0 : -ENOPROTOOPT; 346 } 347 348 static int rds_enable_recvtstamp(struct sock *sk, char __user *optval, 349 int optlen) 350 { 351 int val, valbool; 352 353 if (optlen != sizeof(int)) 354 return -EFAULT; 355 356 if (get_user(val, (int __user *)optval)) 357 return -EFAULT; 358 359 valbool = val ? 1 : 0; 360 361 if (valbool) 362 sock_set_flag(sk, SOCK_RCVTSTAMP); 363 else 364 sock_reset_flag(sk, SOCK_RCVTSTAMP); 365 366 return 0; 367 } 368 369 static int rds_recv_track_latency(struct rds_sock *rs, char __user *optval, 370 int optlen) 371 { 372 struct rds_rx_trace_so trace; 373 int i; 374 375 if (optlen != sizeof(struct rds_rx_trace_so)) 376 return -EFAULT; 377 378 if (copy_from_user(&trace, optval, sizeof(trace))) 379 return -EFAULT; 380 381 if (trace.rx_traces > RDS_MSG_RX_DGRAM_TRACE_MAX) 382 return -EFAULT; 383 384 rs->rs_rx_traces = trace.rx_traces; 385 for (i = 0; i < rs->rs_rx_traces; i++) { 386 if (trace.rx_trace_pos[i] > RDS_MSG_RX_DGRAM_TRACE_MAX) { 387 rs->rs_rx_traces = 0; 388 return -EFAULT; 389 } 390 rs->rs_rx_trace[i] = trace.rx_trace_pos[i]; 391 } 392 393 return 0; 394 } 395 396 static int rds_setsockopt(struct socket *sock, int level, int optname, 397 char __user *optval, unsigned int optlen) 398 { 399 struct rds_sock *rs = rds_sk_to_rs(sock->sk); 400 int ret; 401 402 if (level != SOL_RDS) { 403 ret = -ENOPROTOOPT; 404 goto out; 405 } 406 407 switch (optname) { 408 case RDS_CANCEL_SENT_TO: 409 ret = rds_cancel_sent_to(rs, optval, optlen); 410 break; 411 case RDS_GET_MR: 412 ret = rds_get_mr(rs, optval, optlen); 413 break; 414 case RDS_GET_MR_FOR_DEST: 415 ret = rds_get_mr_for_dest(rs, optval, optlen); 416 break; 417 case RDS_FREE_MR: 418 ret = rds_free_mr(rs, optval, optlen); 419 break; 420 case RDS_RECVERR: 421 ret = rds_set_bool_option(&rs->rs_recverr, optval, optlen); 422 break; 423 case RDS_CONG_MONITOR: 424 ret = rds_cong_monitor(rs, optval, optlen); 425 break; 426 case SO_RDS_TRANSPORT: 427 lock_sock(sock->sk); 428 ret = rds_set_transport(rs, optval, optlen); 429 release_sock(sock->sk); 430 break; 431 case SO_TIMESTAMP: 432 lock_sock(sock->sk); 433 ret = rds_enable_recvtstamp(sock->sk, optval, optlen); 434 release_sock(sock->sk); 435 break; 436 case SO_RDS_MSG_RXPATH_LATENCY: 437 ret = rds_recv_track_latency(rs, optval, optlen); 438 break; 439 default: 440 ret = -ENOPROTOOPT; 441 } 442 out: 443 return ret; 444 } 445 446 static int rds_getsockopt(struct socket *sock, int level, int optname, 447 char __user *optval, int __user *optlen) 448 { 449 struct rds_sock *rs = rds_sk_to_rs(sock->sk); 450 int ret = -ENOPROTOOPT, len; 451 int trans; 452 453 if (level != SOL_RDS) 454 goto out; 455 456 if (get_user(len, optlen)) { 457 ret = -EFAULT; 458 goto out; 459 } 460 461 switch (optname) { 462 case RDS_INFO_FIRST ... RDS_INFO_LAST: 463 ret = rds_info_getsockopt(sock, optname, optval, 464 optlen); 465 break; 466 467 case RDS_RECVERR: 468 if (len < sizeof(int)) 469 ret = -EINVAL; 470 else 471 if (put_user(rs->rs_recverr, (int __user *) optval) || 472 put_user(sizeof(int), optlen)) 473 ret = -EFAULT; 474 else 475 ret = 0; 476 break; 477 case SO_RDS_TRANSPORT: 478 if (len < sizeof(int)) { 479 ret = -EINVAL; 480 break; 481 } 482 trans = (rs->rs_transport ? rs->rs_transport->t_type : 483 RDS_TRANS_NONE); /* unbound */ 484 if (put_user(trans, (int __user *)optval) || 485 put_user(sizeof(int), optlen)) 486 ret = -EFAULT; 487 else 488 ret = 0; 489 break; 490 default: 491 break; 492 } 493 494 out: 495 return ret; 496 497 } 498 499 static int rds_connect(struct socket *sock, struct sockaddr *uaddr, 500 int addr_len, int flags) 501 { 502 struct sock *sk = sock->sk; 503 struct sockaddr_in *sin; 504 struct sockaddr_in6 *sin6; 505 struct rds_sock *rs = rds_sk_to_rs(sk); 506 int addr_type; 507 int ret = 0; 508 509 lock_sock(sk); 510 511 switch (uaddr->sa_family) { 512 case AF_INET: 513 sin = (struct sockaddr_in *)uaddr; 514 if (addr_len < sizeof(struct sockaddr_in)) { 515 ret = -EINVAL; 516 break; 517 } 518 if (sin->sin_addr.s_addr == htonl(INADDR_ANY)) { 519 ret = -EDESTADDRREQ; 520 break; 521 } 522 if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr)) || 523 sin->sin_addr.s_addr == htonl(INADDR_BROADCAST)) { 524 ret = -EINVAL; 525 break; 526 } 527 ipv6_addr_set_v4mapped(sin->sin_addr.s_addr, &rs->rs_conn_addr); 528 rs->rs_conn_port = sin->sin_port; 529 break; 530 531 case AF_INET6: 532 sin6 = (struct sockaddr_in6 *)uaddr; 533 if (addr_len < sizeof(struct sockaddr_in6)) { 534 ret = -EINVAL; 535 break; 536 } 537 addr_type = ipv6_addr_type(&sin6->sin6_addr); 538 if (!(addr_type & IPV6_ADDR_UNICAST)) { 539 __be32 addr4; 540 541 if (!(addr_type & IPV6_ADDR_MAPPED)) { 542 ret = -EPROTOTYPE; 543 break; 544 } 545 546 /* It is a mapped address. Need to do some sanity 547 * checks. 548 */ 549 addr4 = sin6->sin6_addr.s6_addr32[3]; 550 if (addr4 == htonl(INADDR_ANY) || 551 addr4 == htonl(INADDR_BROADCAST) || 552 IN_MULTICAST(ntohl(addr4))) { 553 ret = -EPROTOTYPE; 554 break; 555 } 556 } 557 558 if (addr_type & IPV6_ADDR_LINKLOCAL) { 559 /* If socket is arleady bound to a link local address, 560 * the peer address must be on the same link. 561 */ 562 if (sin6->sin6_scope_id == 0 || 563 (!ipv6_addr_any(&rs->rs_bound_addr) && 564 rs->rs_bound_scope_id && 565 sin6->sin6_scope_id != rs->rs_bound_scope_id)) { 566 ret = -EINVAL; 567 break; 568 } 569 /* Remember the connected address scope ID. It will 570 * be checked against the binding local address when 571 * the socket is bound. 572 */ 573 rs->rs_bound_scope_id = sin6->sin6_scope_id; 574 } 575 rs->rs_conn_addr = sin6->sin6_addr; 576 rs->rs_conn_port = sin6->sin6_port; 577 break; 578 579 default: 580 ret = -EAFNOSUPPORT; 581 break; 582 } 583 584 release_sock(sk); 585 return ret; 586 } 587 588 static struct proto rds_proto = { 589 .name = "RDS", 590 .owner = THIS_MODULE, 591 .obj_size = sizeof(struct rds_sock), 592 }; 593 594 static const struct proto_ops rds_proto_ops = { 595 .family = AF_RDS, 596 .owner = THIS_MODULE, 597 .release = rds_release, 598 .bind = rds_bind, 599 .connect = rds_connect, 600 .socketpair = sock_no_socketpair, 601 .accept = sock_no_accept, 602 .getname = rds_getname, 603 .poll = rds_poll, 604 .ioctl = rds_ioctl, 605 .listen = sock_no_listen, 606 .shutdown = sock_no_shutdown, 607 .setsockopt = rds_setsockopt, 608 .getsockopt = rds_getsockopt, 609 .sendmsg = rds_sendmsg, 610 .recvmsg = rds_recvmsg, 611 .mmap = sock_no_mmap, 612 .sendpage = sock_no_sendpage, 613 }; 614 615 static void rds_sock_destruct(struct sock *sk) 616 { 617 struct rds_sock *rs = rds_sk_to_rs(sk); 618 619 WARN_ON((&rs->rs_item != rs->rs_item.next || 620 &rs->rs_item != rs->rs_item.prev)); 621 } 622 623 static int __rds_create(struct socket *sock, struct sock *sk, int protocol) 624 { 625 struct rds_sock *rs; 626 627 sock_init_data(sock, sk); 628 sock->ops = &rds_proto_ops; 629 sk->sk_protocol = protocol; 630 sk->sk_destruct = rds_sock_destruct; 631 632 rs = rds_sk_to_rs(sk); 633 spin_lock_init(&rs->rs_lock); 634 rwlock_init(&rs->rs_recv_lock); 635 INIT_LIST_HEAD(&rs->rs_send_queue); 636 INIT_LIST_HEAD(&rs->rs_recv_queue); 637 INIT_LIST_HEAD(&rs->rs_notify_queue); 638 INIT_LIST_HEAD(&rs->rs_cong_list); 639 rds_message_zcopy_queue_init(&rs->rs_zcookie_queue); 640 spin_lock_init(&rs->rs_rdma_lock); 641 rs->rs_rdma_keys = RB_ROOT; 642 rs->rs_rx_traces = 0; 643 644 spin_lock_bh(&rds_sock_lock); 645 list_add_tail(&rs->rs_item, &rds_sock_list); 646 rds_sock_count++; 647 spin_unlock_bh(&rds_sock_lock); 648 649 return 0; 650 } 651 652 static int rds_create(struct net *net, struct socket *sock, int protocol, 653 int kern) 654 { 655 struct sock *sk; 656 657 if (sock->type != SOCK_SEQPACKET || protocol) 658 return -ESOCKTNOSUPPORT; 659 660 sk = sk_alloc(net, AF_RDS, GFP_ATOMIC, &rds_proto, kern); 661 if (!sk) 662 return -ENOMEM; 663 664 return __rds_create(sock, sk, protocol); 665 } 666 667 void rds_sock_addref(struct rds_sock *rs) 668 { 669 sock_hold(rds_rs_to_sk(rs)); 670 } 671 672 void rds_sock_put(struct rds_sock *rs) 673 { 674 sock_put(rds_rs_to_sk(rs)); 675 } 676 677 static const struct net_proto_family rds_family_ops = { 678 .family = AF_RDS, 679 .create = rds_create, 680 .owner = THIS_MODULE, 681 }; 682 683 static void rds_sock_inc_info(struct socket *sock, unsigned int len, 684 struct rds_info_iterator *iter, 685 struct rds_info_lengths *lens) 686 { 687 struct rds_sock *rs; 688 struct rds_incoming *inc; 689 unsigned int total = 0; 690 691 len /= sizeof(struct rds_info_message); 692 693 spin_lock_bh(&rds_sock_lock); 694 695 list_for_each_entry(rs, &rds_sock_list, rs_item) { 696 read_lock(&rs->rs_recv_lock); 697 698 /* XXX too lazy to maintain counts.. */ 699 list_for_each_entry(inc, &rs->rs_recv_queue, i_item) { 700 total++; 701 if (total <= len) 702 rds_inc_info_copy(inc, iter, 703 inc->i_saddr.s6_addr32[3], 704 rs->rs_bound_addr_v4, 705 1); 706 } 707 708 read_unlock(&rs->rs_recv_lock); 709 } 710 711 spin_unlock_bh(&rds_sock_lock); 712 713 lens->nr = total; 714 lens->each = sizeof(struct rds_info_message); 715 } 716 717 static void rds_sock_info(struct socket *sock, unsigned int len, 718 struct rds_info_iterator *iter, 719 struct rds_info_lengths *lens) 720 { 721 struct rds_info_socket sinfo; 722 struct rds_sock *rs; 723 724 len /= sizeof(struct rds_info_socket); 725 726 spin_lock_bh(&rds_sock_lock); 727 728 if (len < rds_sock_count) 729 goto out; 730 731 list_for_each_entry(rs, &rds_sock_list, rs_item) { 732 sinfo.sndbuf = rds_sk_sndbuf(rs); 733 sinfo.rcvbuf = rds_sk_rcvbuf(rs); 734 sinfo.bound_addr = rs->rs_bound_addr_v4; 735 sinfo.connected_addr = rs->rs_conn_addr_v4; 736 sinfo.bound_port = rs->rs_bound_port; 737 sinfo.connected_port = rs->rs_conn_port; 738 sinfo.inum = sock_i_ino(rds_rs_to_sk(rs)); 739 740 rds_info_copy(iter, &sinfo, sizeof(sinfo)); 741 } 742 743 out: 744 lens->nr = rds_sock_count; 745 lens->each = sizeof(struct rds_info_socket); 746 747 spin_unlock_bh(&rds_sock_lock); 748 } 749 750 static void rds_exit(void) 751 { 752 sock_unregister(rds_family_ops.family); 753 proto_unregister(&rds_proto); 754 rds_conn_exit(); 755 rds_cong_exit(); 756 rds_sysctl_exit(); 757 rds_threads_exit(); 758 rds_stats_exit(); 759 rds_page_exit(); 760 rds_bind_lock_destroy(); 761 rds_info_deregister_func(RDS_INFO_SOCKETS, rds_sock_info); 762 rds_info_deregister_func(RDS_INFO_RECV_MESSAGES, rds_sock_inc_info); 763 } 764 module_exit(rds_exit); 765 766 u32 rds_gen_num; 767 768 static int rds_init(void) 769 { 770 int ret; 771 772 net_get_random_once(&rds_gen_num, sizeof(rds_gen_num)); 773 774 ret = rds_bind_lock_init(); 775 if (ret) 776 goto out; 777 778 ret = rds_conn_init(); 779 if (ret) 780 goto out_bind; 781 782 ret = rds_threads_init(); 783 if (ret) 784 goto out_conn; 785 ret = rds_sysctl_init(); 786 if (ret) 787 goto out_threads; 788 ret = rds_stats_init(); 789 if (ret) 790 goto out_sysctl; 791 ret = proto_register(&rds_proto, 1); 792 if (ret) 793 goto out_stats; 794 ret = sock_register(&rds_family_ops); 795 if (ret) 796 goto out_proto; 797 798 rds_info_register_func(RDS_INFO_SOCKETS, rds_sock_info); 799 rds_info_register_func(RDS_INFO_RECV_MESSAGES, rds_sock_inc_info); 800 801 goto out; 802 803 out_proto: 804 proto_unregister(&rds_proto); 805 out_stats: 806 rds_stats_exit(); 807 out_sysctl: 808 rds_sysctl_exit(); 809 out_threads: 810 rds_threads_exit(); 811 out_conn: 812 rds_conn_exit(); 813 rds_cong_exit(); 814 rds_page_exit(); 815 out_bind: 816 rds_bind_lock_destroy(); 817 out: 818 return ret; 819 } 820 module_init(rds_init); 821 822 #define DRV_VERSION "4.0" 823 #define DRV_RELDATE "Feb 12, 2009" 824 825 MODULE_AUTHOR("Oracle Corporation <rds-devel@oss.oracle.com>"); 826 MODULE_DESCRIPTION("RDS: Reliable Datagram Sockets" 827 " v" DRV_VERSION " (" DRV_RELDATE ")"); 828 MODULE_VERSION(DRV_VERSION); 829 MODULE_LICENSE("Dual BSD/GPL"); 830 MODULE_ALIAS_NETPROTO(PF_RDS); 831