1 /* 2 * Copyright (c) 2006, 2018 Oracle and/or its affiliates. All rights reserved. 3 * 4 * This software is available to you under a choice of one of two 5 * licenses. You may choose to be licensed under the terms of the GNU 6 * General Public License (GPL) Version 2, available from the file 7 * COPYING in the main directory of this source tree, or the 8 * OpenIB.org BSD license below: 9 * 10 * Redistribution and use in source and binary forms, with or 11 * without modification, are permitted provided that the following 12 * conditions are met: 13 * 14 * - Redistributions of source code must retain the above 15 * copyright notice, this list of conditions and the following 16 * disclaimer. 17 * 18 * - Redistributions in binary form must reproduce the above 19 * copyright notice, this list of conditions and the following 20 * disclaimer in the documentation and/or other materials 21 * provided with the distribution. 22 * 23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 * SOFTWARE. 31 * 32 */ 33 #include <linux/module.h> 34 #include <linux/errno.h> 35 #include <linux/kernel.h> 36 #include <linux/gfp.h> 37 #include <linux/in.h> 38 #include <linux/ipv6.h> 39 #include <linux/poll.h> 40 #include <net/sock.h> 41 42 #include "rds.h" 43 44 /* this is just used for stats gathering :/ */ 45 static DEFINE_SPINLOCK(rds_sock_lock); 46 static unsigned long rds_sock_count; 47 static LIST_HEAD(rds_sock_list); 48 DECLARE_WAIT_QUEUE_HEAD(rds_poll_waitq); 49 50 /* 51 * This is called as the final descriptor referencing this socket is closed. 52 * We have to unbind the socket so that another socket can be bound to the 53 * address it was using. 54 * 55 * We have to be careful about racing with the incoming path. sock_orphan() 56 * sets SOCK_DEAD and we use that as an indicator to the rx path that new 57 * messages shouldn't be queued. 58 */ 59 static int rds_release(struct socket *sock) 60 { 61 struct sock *sk = sock->sk; 62 struct rds_sock *rs; 63 64 if (!sk) 65 goto out; 66 67 rs = rds_sk_to_rs(sk); 68 69 sock_orphan(sk); 70 /* Note - rds_clear_recv_queue grabs rs_recv_lock, so 71 * that ensures the recv path has completed messing 72 * with the socket. */ 73 rds_clear_recv_queue(rs); 74 rds_cong_remove_socket(rs); 75 76 rds_remove_bound(rs); 77 78 rds_send_drop_to(rs, NULL); 79 rds_rdma_drop_keys(rs); 80 rds_notify_queue_get(rs, NULL); 81 rds_notify_msg_zcopy_purge(&rs->rs_zcookie_queue); 82 83 spin_lock_bh(&rds_sock_lock); 84 list_del_init(&rs->rs_item); 85 rds_sock_count--; 86 spin_unlock_bh(&rds_sock_lock); 87 88 rds_trans_put(rs->rs_transport); 89 90 sock->sk = NULL; 91 sock_put(sk); 92 out: 93 return 0; 94 } 95 96 /* 97 * Careful not to race with rds_release -> sock_orphan which clears sk_sleep. 98 * _bh() isn't OK here, we're called from interrupt handlers. It's probably OK 99 * to wake the waitqueue after sk_sleep is clear as we hold a sock ref, but 100 * this seems more conservative. 101 * NB - normally, one would use sk_callback_lock for this, but we can 102 * get here from interrupts, whereas the network code grabs sk_callback_lock 103 * with _lock_bh only - so relying on sk_callback_lock introduces livelocks. 104 */ 105 void rds_wake_sk_sleep(struct rds_sock *rs) 106 { 107 unsigned long flags; 108 109 read_lock_irqsave(&rs->rs_recv_lock, flags); 110 __rds_wake_sk_sleep(rds_rs_to_sk(rs)); 111 read_unlock_irqrestore(&rs->rs_recv_lock, flags); 112 } 113 114 static int rds_getname(struct socket *sock, struct sockaddr *uaddr, 115 int peer) 116 { 117 struct rds_sock *rs = rds_sk_to_rs(sock->sk); 118 struct sockaddr_in6 *sin6; 119 struct sockaddr_in *sin; 120 int uaddr_len; 121 122 /* racey, don't care */ 123 if (peer) { 124 if (ipv6_addr_any(&rs->rs_conn_addr)) 125 return -ENOTCONN; 126 127 if (ipv6_addr_v4mapped(&rs->rs_conn_addr)) { 128 sin = (struct sockaddr_in *)uaddr; 129 memset(sin->sin_zero, 0, sizeof(sin->sin_zero)); 130 sin->sin_family = AF_INET; 131 sin->sin_port = rs->rs_conn_port; 132 sin->sin_addr.s_addr = rs->rs_conn_addr_v4; 133 uaddr_len = sizeof(*sin); 134 } else { 135 sin6 = (struct sockaddr_in6 *)uaddr; 136 sin6->sin6_family = AF_INET6; 137 sin6->sin6_port = rs->rs_conn_port; 138 sin6->sin6_addr = rs->rs_conn_addr; 139 sin6->sin6_flowinfo = 0; 140 /* scope_id is the same as in the bound address. */ 141 sin6->sin6_scope_id = rs->rs_bound_scope_id; 142 uaddr_len = sizeof(*sin6); 143 } 144 } else { 145 /* If socket is not yet bound and the socket is connected, 146 * set the return address family to be the same as the 147 * connected address, but with 0 address value. If it is not 148 * connected, set the family to be AF_UNSPEC (value 0) and 149 * the address size to be that of an IPv4 address. 150 */ 151 if (ipv6_addr_any(&rs->rs_bound_addr)) { 152 if (ipv6_addr_any(&rs->rs_conn_addr)) { 153 sin = (struct sockaddr_in *)uaddr; 154 memset(sin, 0, sizeof(*sin)); 155 sin->sin_family = AF_UNSPEC; 156 return sizeof(*sin); 157 } 158 159 #if IS_ENABLED(CONFIG_IPV6) 160 if (!(ipv6_addr_type(&rs->rs_conn_addr) & 161 IPV6_ADDR_MAPPED)) { 162 sin6 = (struct sockaddr_in6 *)uaddr; 163 memset(sin6, 0, sizeof(*sin6)); 164 sin6->sin6_family = AF_INET6; 165 return sizeof(*sin6); 166 } 167 #endif 168 169 sin = (struct sockaddr_in *)uaddr; 170 memset(sin, 0, sizeof(*sin)); 171 sin->sin_family = AF_INET; 172 return sizeof(*sin); 173 } 174 if (ipv6_addr_v4mapped(&rs->rs_bound_addr)) { 175 sin = (struct sockaddr_in *)uaddr; 176 memset(sin->sin_zero, 0, sizeof(sin->sin_zero)); 177 sin->sin_family = AF_INET; 178 sin->sin_port = rs->rs_bound_port; 179 sin->sin_addr.s_addr = rs->rs_bound_addr_v4; 180 uaddr_len = sizeof(*sin); 181 } else { 182 sin6 = (struct sockaddr_in6 *)uaddr; 183 sin6->sin6_family = AF_INET6; 184 sin6->sin6_port = rs->rs_bound_port; 185 sin6->sin6_addr = rs->rs_bound_addr; 186 sin6->sin6_flowinfo = 0; 187 sin6->sin6_scope_id = rs->rs_bound_scope_id; 188 uaddr_len = sizeof(*sin6); 189 } 190 } 191 192 return uaddr_len; 193 } 194 195 /* 196 * RDS' poll is without a doubt the least intuitive part of the interface, 197 * as EPOLLIN and EPOLLOUT do not behave entirely as you would expect from 198 * a network protocol. 199 * 200 * EPOLLIN is asserted if 201 * - there is data on the receive queue. 202 * - to signal that a previously congested destination may have become 203 * uncongested 204 * - A notification has been queued to the socket (this can be a congestion 205 * update, or a RDMA completion, or a MSG_ZEROCOPY completion). 206 * 207 * EPOLLOUT is asserted if there is room on the send queue. This does not mean 208 * however, that the next sendmsg() call will succeed. If the application tries 209 * to send to a congested destination, the system call may still fail (and 210 * return ENOBUFS). 211 */ 212 static __poll_t rds_poll(struct file *file, struct socket *sock, 213 poll_table *wait) 214 { 215 struct sock *sk = sock->sk; 216 struct rds_sock *rs = rds_sk_to_rs(sk); 217 __poll_t mask = 0; 218 unsigned long flags; 219 220 poll_wait(file, sk_sleep(sk), wait); 221 222 if (rs->rs_seen_congestion) 223 poll_wait(file, &rds_poll_waitq, wait); 224 225 read_lock_irqsave(&rs->rs_recv_lock, flags); 226 if (!rs->rs_cong_monitor) { 227 /* When a congestion map was updated, we signal EPOLLIN for 228 * "historical" reasons. Applications can also poll for 229 * WRBAND instead. */ 230 if (rds_cong_updated_since(&rs->rs_cong_track)) 231 mask |= (EPOLLIN | EPOLLRDNORM | EPOLLWRBAND); 232 } else { 233 spin_lock(&rs->rs_lock); 234 if (rs->rs_cong_notify) 235 mask |= (EPOLLIN | EPOLLRDNORM); 236 spin_unlock(&rs->rs_lock); 237 } 238 if (!list_empty(&rs->rs_recv_queue) || 239 !list_empty(&rs->rs_notify_queue) || 240 !list_empty(&rs->rs_zcookie_queue.zcookie_head)) 241 mask |= (EPOLLIN | EPOLLRDNORM); 242 if (rs->rs_snd_bytes < rds_sk_sndbuf(rs)) 243 mask |= (EPOLLOUT | EPOLLWRNORM); 244 if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue)) 245 mask |= POLLERR; 246 read_unlock_irqrestore(&rs->rs_recv_lock, flags); 247 248 /* clear state any time we wake a seen-congested socket */ 249 if (mask) 250 rs->rs_seen_congestion = 0; 251 252 return mask; 253 } 254 255 static int rds_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) 256 { 257 return -ENOIOCTLCMD; 258 } 259 260 static int rds_cancel_sent_to(struct rds_sock *rs, char __user *optval, 261 int len) 262 { 263 struct sockaddr_in6 sin6; 264 struct sockaddr_in sin; 265 int ret = 0; 266 267 /* racing with another thread binding seems ok here */ 268 if (ipv6_addr_any(&rs->rs_bound_addr)) { 269 ret = -ENOTCONN; /* XXX not a great errno */ 270 goto out; 271 } 272 273 if (len < sizeof(struct sockaddr_in)) { 274 ret = -EINVAL; 275 goto out; 276 } else if (len < sizeof(struct sockaddr_in6)) { 277 /* Assume IPv4 */ 278 if (copy_from_user(&sin, optval, sizeof(struct sockaddr_in))) { 279 ret = -EFAULT; 280 goto out; 281 } 282 ipv6_addr_set_v4mapped(sin.sin_addr.s_addr, &sin6.sin6_addr); 283 sin6.sin6_port = sin.sin_port; 284 } else { 285 if (copy_from_user(&sin6, optval, 286 sizeof(struct sockaddr_in6))) { 287 ret = -EFAULT; 288 goto out; 289 } 290 } 291 292 rds_send_drop_to(rs, &sin6); 293 out: 294 return ret; 295 } 296 297 static int rds_set_bool_option(unsigned char *optvar, char __user *optval, 298 int optlen) 299 { 300 int value; 301 302 if (optlen < sizeof(int)) 303 return -EINVAL; 304 if (get_user(value, (int __user *) optval)) 305 return -EFAULT; 306 *optvar = !!value; 307 return 0; 308 } 309 310 static int rds_cong_monitor(struct rds_sock *rs, char __user *optval, 311 int optlen) 312 { 313 int ret; 314 315 ret = rds_set_bool_option(&rs->rs_cong_monitor, optval, optlen); 316 if (ret == 0) { 317 if (rs->rs_cong_monitor) { 318 rds_cong_add_socket(rs); 319 } else { 320 rds_cong_remove_socket(rs); 321 rs->rs_cong_mask = 0; 322 rs->rs_cong_notify = 0; 323 } 324 } 325 return ret; 326 } 327 328 static int rds_set_transport(struct rds_sock *rs, char __user *optval, 329 int optlen) 330 { 331 int t_type; 332 333 if (rs->rs_transport) 334 return -EOPNOTSUPP; /* previously attached to transport */ 335 336 if (optlen != sizeof(int)) 337 return -EINVAL; 338 339 if (copy_from_user(&t_type, (int __user *)optval, sizeof(t_type))) 340 return -EFAULT; 341 342 if (t_type < 0 || t_type >= RDS_TRANS_COUNT) 343 return -EINVAL; 344 345 rs->rs_transport = rds_trans_get(t_type); 346 347 return rs->rs_transport ? 0 : -ENOPROTOOPT; 348 } 349 350 static int rds_enable_recvtstamp(struct sock *sk, char __user *optval, 351 int optlen) 352 { 353 int val, valbool; 354 355 if (optlen != sizeof(int)) 356 return -EFAULT; 357 358 if (get_user(val, (int __user *)optval)) 359 return -EFAULT; 360 361 valbool = val ? 1 : 0; 362 363 if (valbool) 364 sock_set_flag(sk, SOCK_RCVTSTAMP); 365 else 366 sock_reset_flag(sk, SOCK_RCVTSTAMP); 367 368 return 0; 369 } 370 371 static int rds_recv_track_latency(struct rds_sock *rs, char __user *optval, 372 int optlen) 373 { 374 struct rds_rx_trace_so trace; 375 int i; 376 377 if (optlen != sizeof(struct rds_rx_trace_so)) 378 return -EFAULT; 379 380 if (copy_from_user(&trace, optval, sizeof(trace))) 381 return -EFAULT; 382 383 if (trace.rx_traces > RDS_MSG_RX_DGRAM_TRACE_MAX) 384 return -EFAULT; 385 386 rs->rs_rx_traces = trace.rx_traces; 387 for (i = 0; i < rs->rs_rx_traces; i++) { 388 if (trace.rx_trace_pos[i] > RDS_MSG_RX_DGRAM_TRACE_MAX) { 389 rs->rs_rx_traces = 0; 390 return -EFAULT; 391 } 392 rs->rs_rx_trace[i] = trace.rx_trace_pos[i]; 393 } 394 395 return 0; 396 } 397 398 static int rds_setsockopt(struct socket *sock, int level, int optname, 399 char __user *optval, unsigned int optlen) 400 { 401 struct rds_sock *rs = rds_sk_to_rs(sock->sk); 402 int ret; 403 404 if (level != SOL_RDS) { 405 ret = -ENOPROTOOPT; 406 goto out; 407 } 408 409 switch (optname) { 410 case RDS_CANCEL_SENT_TO: 411 ret = rds_cancel_sent_to(rs, optval, optlen); 412 break; 413 case RDS_GET_MR: 414 ret = rds_get_mr(rs, optval, optlen); 415 break; 416 case RDS_GET_MR_FOR_DEST: 417 ret = rds_get_mr_for_dest(rs, optval, optlen); 418 break; 419 case RDS_FREE_MR: 420 ret = rds_free_mr(rs, optval, optlen); 421 break; 422 case RDS_RECVERR: 423 ret = rds_set_bool_option(&rs->rs_recverr, optval, optlen); 424 break; 425 case RDS_CONG_MONITOR: 426 ret = rds_cong_monitor(rs, optval, optlen); 427 break; 428 case SO_RDS_TRANSPORT: 429 lock_sock(sock->sk); 430 ret = rds_set_transport(rs, optval, optlen); 431 release_sock(sock->sk); 432 break; 433 case SO_TIMESTAMP: 434 lock_sock(sock->sk); 435 ret = rds_enable_recvtstamp(sock->sk, optval, optlen); 436 release_sock(sock->sk); 437 break; 438 case SO_RDS_MSG_RXPATH_LATENCY: 439 ret = rds_recv_track_latency(rs, optval, optlen); 440 break; 441 default: 442 ret = -ENOPROTOOPT; 443 } 444 out: 445 return ret; 446 } 447 448 static int rds_getsockopt(struct socket *sock, int level, int optname, 449 char __user *optval, int __user *optlen) 450 { 451 struct rds_sock *rs = rds_sk_to_rs(sock->sk); 452 int ret = -ENOPROTOOPT, len; 453 int trans; 454 455 if (level != SOL_RDS) 456 goto out; 457 458 if (get_user(len, optlen)) { 459 ret = -EFAULT; 460 goto out; 461 } 462 463 switch (optname) { 464 case RDS_INFO_FIRST ... RDS_INFO_LAST: 465 ret = rds_info_getsockopt(sock, optname, optval, 466 optlen); 467 break; 468 469 case RDS_RECVERR: 470 if (len < sizeof(int)) 471 ret = -EINVAL; 472 else 473 if (put_user(rs->rs_recverr, (int __user *) optval) || 474 put_user(sizeof(int), optlen)) 475 ret = -EFAULT; 476 else 477 ret = 0; 478 break; 479 case SO_RDS_TRANSPORT: 480 if (len < sizeof(int)) { 481 ret = -EINVAL; 482 break; 483 } 484 trans = (rs->rs_transport ? rs->rs_transport->t_type : 485 RDS_TRANS_NONE); /* unbound */ 486 if (put_user(trans, (int __user *)optval) || 487 put_user(sizeof(int), optlen)) 488 ret = -EFAULT; 489 else 490 ret = 0; 491 break; 492 default: 493 break; 494 } 495 496 out: 497 return ret; 498 499 } 500 501 static int rds_connect(struct socket *sock, struct sockaddr *uaddr, 502 int addr_len, int flags) 503 { 504 struct sock *sk = sock->sk; 505 struct sockaddr_in *sin; 506 struct rds_sock *rs = rds_sk_to_rs(sk); 507 int ret = 0; 508 509 lock_sock(sk); 510 511 switch (uaddr->sa_family) { 512 case AF_INET: 513 sin = (struct sockaddr_in *)uaddr; 514 if (addr_len < sizeof(struct sockaddr_in)) { 515 ret = -EINVAL; 516 break; 517 } 518 if (sin->sin_addr.s_addr == htonl(INADDR_ANY)) { 519 ret = -EDESTADDRREQ; 520 break; 521 } 522 if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr)) || 523 sin->sin_addr.s_addr == htonl(INADDR_BROADCAST)) { 524 ret = -EINVAL; 525 break; 526 } 527 ipv6_addr_set_v4mapped(sin->sin_addr.s_addr, &rs->rs_conn_addr); 528 rs->rs_conn_port = sin->sin_port; 529 break; 530 531 #if IS_ENABLED(CONFIG_IPV6) 532 case AF_INET6: { 533 struct sockaddr_in6 *sin6; 534 int addr_type; 535 536 sin6 = (struct sockaddr_in6 *)uaddr; 537 if (addr_len < sizeof(struct sockaddr_in6)) { 538 ret = -EINVAL; 539 break; 540 } 541 addr_type = ipv6_addr_type(&sin6->sin6_addr); 542 if (!(addr_type & IPV6_ADDR_UNICAST)) { 543 __be32 addr4; 544 545 if (!(addr_type & IPV6_ADDR_MAPPED)) { 546 ret = -EPROTOTYPE; 547 break; 548 } 549 550 /* It is a mapped address. Need to do some sanity 551 * checks. 552 */ 553 addr4 = sin6->sin6_addr.s6_addr32[3]; 554 if (addr4 == htonl(INADDR_ANY) || 555 addr4 == htonl(INADDR_BROADCAST) || 556 IN_MULTICAST(ntohl(addr4))) { 557 ret = -EPROTOTYPE; 558 break; 559 } 560 } 561 562 if (addr_type & IPV6_ADDR_LINKLOCAL) { 563 /* If socket is arleady bound to a link local address, 564 * the peer address must be on the same link. 565 */ 566 if (sin6->sin6_scope_id == 0 || 567 (!ipv6_addr_any(&rs->rs_bound_addr) && 568 rs->rs_bound_scope_id && 569 sin6->sin6_scope_id != rs->rs_bound_scope_id)) { 570 ret = -EINVAL; 571 break; 572 } 573 /* Remember the connected address scope ID. It will 574 * be checked against the binding local address when 575 * the socket is bound. 576 */ 577 rs->rs_bound_scope_id = sin6->sin6_scope_id; 578 } 579 rs->rs_conn_addr = sin6->sin6_addr; 580 rs->rs_conn_port = sin6->sin6_port; 581 break; 582 } 583 #endif 584 585 default: 586 ret = -EAFNOSUPPORT; 587 break; 588 } 589 590 release_sock(sk); 591 return ret; 592 } 593 594 static struct proto rds_proto = { 595 .name = "RDS", 596 .owner = THIS_MODULE, 597 .obj_size = sizeof(struct rds_sock), 598 }; 599 600 static const struct proto_ops rds_proto_ops = { 601 .family = AF_RDS, 602 .owner = THIS_MODULE, 603 .release = rds_release, 604 .bind = rds_bind, 605 .connect = rds_connect, 606 .socketpair = sock_no_socketpair, 607 .accept = sock_no_accept, 608 .getname = rds_getname, 609 .poll = rds_poll, 610 .ioctl = rds_ioctl, 611 .listen = sock_no_listen, 612 .shutdown = sock_no_shutdown, 613 .setsockopt = rds_setsockopt, 614 .getsockopt = rds_getsockopt, 615 .sendmsg = rds_sendmsg, 616 .recvmsg = rds_recvmsg, 617 .mmap = sock_no_mmap, 618 .sendpage = sock_no_sendpage, 619 }; 620 621 static void rds_sock_destruct(struct sock *sk) 622 { 623 struct rds_sock *rs = rds_sk_to_rs(sk); 624 625 WARN_ON((&rs->rs_item != rs->rs_item.next || 626 &rs->rs_item != rs->rs_item.prev)); 627 } 628 629 static int __rds_create(struct socket *sock, struct sock *sk, int protocol) 630 { 631 struct rds_sock *rs; 632 633 sock_init_data(sock, sk); 634 sock->ops = &rds_proto_ops; 635 sk->sk_protocol = protocol; 636 sk->sk_destruct = rds_sock_destruct; 637 638 rs = rds_sk_to_rs(sk); 639 spin_lock_init(&rs->rs_lock); 640 rwlock_init(&rs->rs_recv_lock); 641 INIT_LIST_HEAD(&rs->rs_send_queue); 642 INIT_LIST_HEAD(&rs->rs_recv_queue); 643 INIT_LIST_HEAD(&rs->rs_notify_queue); 644 INIT_LIST_HEAD(&rs->rs_cong_list); 645 rds_message_zcopy_queue_init(&rs->rs_zcookie_queue); 646 spin_lock_init(&rs->rs_rdma_lock); 647 rs->rs_rdma_keys = RB_ROOT; 648 rs->rs_rx_traces = 0; 649 650 spin_lock_bh(&rds_sock_lock); 651 list_add_tail(&rs->rs_item, &rds_sock_list); 652 rds_sock_count++; 653 spin_unlock_bh(&rds_sock_lock); 654 655 return 0; 656 } 657 658 static int rds_create(struct net *net, struct socket *sock, int protocol, 659 int kern) 660 { 661 struct sock *sk; 662 663 if (sock->type != SOCK_SEQPACKET || protocol) 664 return -ESOCKTNOSUPPORT; 665 666 sk = sk_alloc(net, AF_RDS, GFP_ATOMIC, &rds_proto, kern); 667 if (!sk) 668 return -ENOMEM; 669 670 return __rds_create(sock, sk, protocol); 671 } 672 673 void rds_sock_addref(struct rds_sock *rs) 674 { 675 sock_hold(rds_rs_to_sk(rs)); 676 } 677 678 void rds_sock_put(struct rds_sock *rs) 679 { 680 sock_put(rds_rs_to_sk(rs)); 681 } 682 683 static const struct net_proto_family rds_family_ops = { 684 .family = AF_RDS, 685 .create = rds_create, 686 .owner = THIS_MODULE, 687 }; 688 689 static void rds_sock_inc_info(struct socket *sock, unsigned int len, 690 struct rds_info_iterator *iter, 691 struct rds_info_lengths *lens) 692 { 693 struct rds_sock *rs; 694 struct rds_incoming *inc; 695 unsigned int total = 0; 696 697 len /= sizeof(struct rds_info_message); 698 699 spin_lock_bh(&rds_sock_lock); 700 701 list_for_each_entry(rs, &rds_sock_list, rs_item) { 702 read_lock(&rs->rs_recv_lock); 703 704 /* XXX too lazy to maintain counts.. */ 705 list_for_each_entry(inc, &rs->rs_recv_queue, i_item) { 706 total++; 707 if (total <= len) 708 rds_inc_info_copy(inc, iter, 709 inc->i_saddr.s6_addr32[3], 710 rs->rs_bound_addr_v4, 711 1); 712 } 713 714 read_unlock(&rs->rs_recv_lock); 715 } 716 717 spin_unlock_bh(&rds_sock_lock); 718 719 lens->nr = total; 720 lens->each = sizeof(struct rds_info_message); 721 } 722 723 static void rds_sock_info(struct socket *sock, unsigned int len, 724 struct rds_info_iterator *iter, 725 struct rds_info_lengths *lens) 726 { 727 struct rds_info_socket sinfo; 728 struct rds_sock *rs; 729 730 len /= sizeof(struct rds_info_socket); 731 732 spin_lock_bh(&rds_sock_lock); 733 734 if (len < rds_sock_count) 735 goto out; 736 737 list_for_each_entry(rs, &rds_sock_list, rs_item) { 738 sinfo.sndbuf = rds_sk_sndbuf(rs); 739 sinfo.rcvbuf = rds_sk_rcvbuf(rs); 740 sinfo.bound_addr = rs->rs_bound_addr_v4; 741 sinfo.connected_addr = rs->rs_conn_addr_v4; 742 sinfo.bound_port = rs->rs_bound_port; 743 sinfo.connected_port = rs->rs_conn_port; 744 sinfo.inum = sock_i_ino(rds_rs_to_sk(rs)); 745 746 rds_info_copy(iter, &sinfo, sizeof(sinfo)); 747 } 748 749 out: 750 lens->nr = rds_sock_count; 751 lens->each = sizeof(struct rds_info_socket); 752 753 spin_unlock_bh(&rds_sock_lock); 754 } 755 756 static void rds_exit(void) 757 { 758 sock_unregister(rds_family_ops.family); 759 proto_unregister(&rds_proto); 760 rds_conn_exit(); 761 rds_cong_exit(); 762 rds_sysctl_exit(); 763 rds_threads_exit(); 764 rds_stats_exit(); 765 rds_page_exit(); 766 rds_bind_lock_destroy(); 767 rds_info_deregister_func(RDS_INFO_SOCKETS, rds_sock_info); 768 rds_info_deregister_func(RDS_INFO_RECV_MESSAGES, rds_sock_inc_info); 769 } 770 module_exit(rds_exit); 771 772 u32 rds_gen_num; 773 774 static int rds_init(void) 775 { 776 int ret; 777 778 net_get_random_once(&rds_gen_num, sizeof(rds_gen_num)); 779 780 ret = rds_bind_lock_init(); 781 if (ret) 782 goto out; 783 784 ret = rds_conn_init(); 785 if (ret) 786 goto out_bind; 787 788 ret = rds_threads_init(); 789 if (ret) 790 goto out_conn; 791 ret = rds_sysctl_init(); 792 if (ret) 793 goto out_threads; 794 ret = rds_stats_init(); 795 if (ret) 796 goto out_sysctl; 797 ret = proto_register(&rds_proto, 1); 798 if (ret) 799 goto out_stats; 800 ret = sock_register(&rds_family_ops); 801 if (ret) 802 goto out_proto; 803 804 rds_info_register_func(RDS_INFO_SOCKETS, rds_sock_info); 805 rds_info_register_func(RDS_INFO_RECV_MESSAGES, rds_sock_inc_info); 806 807 goto out; 808 809 out_proto: 810 proto_unregister(&rds_proto); 811 out_stats: 812 rds_stats_exit(); 813 out_sysctl: 814 rds_sysctl_exit(); 815 out_threads: 816 rds_threads_exit(); 817 out_conn: 818 rds_conn_exit(); 819 rds_cong_exit(); 820 rds_page_exit(); 821 out_bind: 822 rds_bind_lock_destroy(); 823 out: 824 return ret; 825 } 826 module_init(rds_init); 827 828 #define DRV_VERSION "4.0" 829 #define DRV_RELDATE "Feb 12, 2009" 830 831 MODULE_AUTHOR("Oracle Corporation <rds-devel@oss.oracle.com>"); 832 MODULE_DESCRIPTION("RDS: Reliable Datagram Sockets" 833 " v" DRV_VERSION " (" DRV_RELDATE ")"); 834 MODULE_VERSION(DRV_VERSION); 835 MODULE_LICENSE("Dual BSD/GPL"); 836 MODULE_ALIAS_NETPROTO(PF_RDS); 837