1 /* 2 * Copyright (c) 2006, 2018 Oracle and/or its affiliates. All rights reserved. 3 * 4 * This software is available to you under a choice of one of two 5 * licenses. You may choose to be licensed under the terms of the GNU 6 * General Public License (GPL) Version 2, available from the file 7 * COPYING in the main directory of this source tree, or the 8 * OpenIB.org BSD license below: 9 * 10 * Redistribution and use in source and binary forms, with or 11 * without modification, are permitted provided that the following 12 * conditions are met: 13 * 14 * - Redistributions of source code must retain the above 15 * copyright notice, this list of conditions and the following 16 * disclaimer. 17 * 18 * - Redistributions in binary form must reproduce the above 19 * copyright notice, this list of conditions and the following 20 * disclaimer in the documentation and/or other materials 21 * provided with the distribution. 22 * 23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 * SOFTWARE. 31 * 32 */ 33 #include <linux/kernel.h> 34 #include <linux/list.h> 35 #include <linux/slab.h> 36 #include <linux/export.h> 37 #include <net/ipv6.h> 38 #include <net/inet6_hashtables.h> 39 #include <net/addrconf.h> 40 41 #include "rds.h" 42 #include "loop.h" 43 44 #define RDS_CONNECTION_HASH_BITS 12 45 #define RDS_CONNECTION_HASH_ENTRIES (1 << RDS_CONNECTION_HASH_BITS) 46 #define RDS_CONNECTION_HASH_MASK (RDS_CONNECTION_HASH_ENTRIES - 1) 47 48 /* converting this to RCU is a chore for another day.. */ 49 static DEFINE_SPINLOCK(rds_conn_lock); 50 static unsigned long rds_conn_count; 51 static struct hlist_head rds_conn_hash[RDS_CONNECTION_HASH_ENTRIES]; 52 static struct kmem_cache *rds_conn_slab; 53 54 static struct hlist_head *rds_conn_bucket(const struct in6_addr *laddr, 55 const struct in6_addr *faddr) 56 { 57 static u32 rds6_hash_secret __read_mostly; 58 static u32 rds_hash_secret __read_mostly; 59 60 u32 lhash, fhash, hash; 61 62 net_get_random_once(&rds_hash_secret, sizeof(rds_hash_secret)); 63 net_get_random_once(&rds6_hash_secret, sizeof(rds6_hash_secret)); 64 65 lhash = (__force u32)laddr->s6_addr32[3]; 66 fhash = __ipv6_addr_jhash(faddr, rds6_hash_secret); 67 hash = __inet6_ehashfn(lhash, 0, fhash, 0, rds_hash_secret); 68 69 return &rds_conn_hash[hash & RDS_CONNECTION_HASH_MASK]; 70 } 71 72 #define rds_conn_info_set(var, test, suffix) do { \ 73 if (test) \ 74 var |= RDS_INFO_CONNECTION_FLAG_##suffix; \ 75 } while (0) 76 77 /* rcu read lock must be held or the connection spinlock */ 78 static struct rds_connection *rds_conn_lookup(struct net *net, 79 struct hlist_head *head, 80 const struct in6_addr *laddr, 81 const struct in6_addr *faddr, 82 struct rds_transport *trans, 83 int dev_if) 84 { 85 struct rds_connection *conn, *ret = NULL; 86 87 hlist_for_each_entry_rcu(conn, head, c_hash_node) { 88 if (ipv6_addr_equal(&conn->c_faddr, faddr) && 89 ipv6_addr_equal(&conn->c_laddr, laddr) && 90 conn->c_trans == trans && 91 net == rds_conn_net(conn) && 92 conn->c_dev_if == dev_if) { 93 ret = conn; 94 break; 95 } 96 } 97 rdsdebug("returning conn %p for %pI6c -> %pI6c\n", ret, 98 laddr, faddr); 99 return ret; 100 } 101 102 /* 103 * This is called by transports as they're bringing down a connection. 104 * It clears partial message state so that the transport can start sending 105 * and receiving over this connection again in the future. It is up to 106 * the transport to have serialized this call with its send and recv. 107 */ 108 static void rds_conn_path_reset(struct rds_conn_path *cp) 109 { 110 struct rds_connection *conn = cp->cp_conn; 111 112 rdsdebug("connection %pI6c to %pI6c reset\n", 113 &conn->c_laddr, &conn->c_faddr); 114 115 rds_stats_inc(s_conn_reset); 116 rds_send_path_reset(cp); 117 cp->cp_flags = 0; 118 119 /* Do not clear next_rx_seq here, else we cannot distinguish 120 * retransmitted packets from new packets, and will hand all 121 * of them to the application. That is not consistent with the 122 * reliability guarantees of RDS. */ 123 } 124 125 static void __rds_conn_path_init(struct rds_connection *conn, 126 struct rds_conn_path *cp, bool is_outgoing) 127 { 128 spin_lock_init(&cp->cp_lock); 129 cp->cp_next_tx_seq = 1; 130 init_waitqueue_head(&cp->cp_waitq); 131 INIT_LIST_HEAD(&cp->cp_send_queue); 132 INIT_LIST_HEAD(&cp->cp_retrans); 133 134 cp->cp_conn = conn; 135 atomic_set(&cp->cp_state, RDS_CONN_DOWN); 136 cp->cp_send_gen = 0; 137 cp->cp_reconnect_jiffies = 0; 138 INIT_DELAYED_WORK(&cp->cp_send_w, rds_send_worker); 139 INIT_DELAYED_WORK(&cp->cp_recv_w, rds_recv_worker); 140 INIT_DELAYED_WORK(&cp->cp_conn_w, rds_connect_worker); 141 INIT_WORK(&cp->cp_down_w, rds_shutdown_worker); 142 mutex_init(&cp->cp_cm_lock); 143 cp->cp_flags = 0; 144 } 145 146 /* 147 * There is only every one 'conn' for a given pair of addresses in the 148 * system at a time. They contain messages to be retransmitted and so 149 * span the lifetime of the actual underlying transport connections. 150 * 151 * For now they are not garbage collected once they're created. They 152 * are torn down as the module is removed, if ever. 153 */ 154 static struct rds_connection *__rds_conn_create(struct net *net, 155 const struct in6_addr *laddr, 156 const struct in6_addr *faddr, 157 struct rds_transport *trans, 158 gfp_t gfp, 159 int is_outgoing, 160 int dev_if) 161 { 162 struct rds_connection *conn, *parent = NULL; 163 struct hlist_head *head = rds_conn_bucket(laddr, faddr); 164 struct rds_transport *loop_trans; 165 unsigned long flags; 166 int ret, i; 167 int npaths = (trans->t_mp_capable ? RDS_MPATH_WORKERS : 1); 168 169 rcu_read_lock(); 170 conn = rds_conn_lookup(net, head, laddr, faddr, trans, dev_if); 171 if (conn && 172 conn->c_loopback && 173 conn->c_trans != &rds_loop_transport && 174 ipv6_addr_equal(laddr, faddr) && 175 !is_outgoing) { 176 /* This is a looped back IB connection, and we're 177 * called by the code handling the incoming connect. 178 * We need a second connection object into which we 179 * can stick the other QP. */ 180 parent = conn; 181 conn = parent->c_passive; 182 } 183 rcu_read_unlock(); 184 if (conn) 185 goto out; 186 187 conn = kmem_cache_zalloc(rds_conn_slab, gfp); 188 if (!conn) { 189 conn = ERR_PTR(-ENOMEM); 190 goto out; 191 } 192 conn->c_path = kcalloc(npaths, sizeof(struct rds_conn_path), gfp); 193 if (!conn->c_path) { 194 kmem_cache_free(rds_conn_slab, conn); 195 conn = ERR_PTR(-ENOMEM); 196 goto out; 197 } 198 199 INIT_HLIST_NODE(&conn->c_hash_node); 200 conn->c_laddr = *laddr; 201 conn->c_isv6 = !ipv6_addr_v4mapped(laddr); 202 conn->c_faddr = *faddr; 203 conn->c_dev_if = dev_if; 204 /* If the local address is link local, set c_bound_if to be the 205 * index used for this connection. Otherwise, set it to 0 as 206 * the socket is not bound to an interface. c_bound_if is used 207 * to look up a socket when a packet is received 208 */ 209 if (ipv6_addr_type(laddr) & IPV6_ADDR_LINKLOCAL) 210 conn->c_bound_if = dev_if; 211 else 212 conn->c_bound_if = 0; 213 214 rds_conn_net_set(conn, net); 215 216 ret = rds_cong_get_maps(conn); 217 if (ret) { 218 kfree(conn->c_path); 219 kmem_cache_free(rds_conn_slab, conn); 220 conn = ERR_PTR(ret); 221 goto out; 222 } 223 224 /* 225 * This is where a connection becomes loopback. If *any* RDS sockets 226 * can bind to the destination address then we'd rather the messages 227 * flow through loopback rather than either transport. 228 */ 229 loop_trans = rds_trans_get_preferred(net, faddr, conn->c_dev_if); 230 if (loop_trans) { 231 rds_trans_put(loop_trans); 232 conn->c_loopback = 1; 233 if (is_outgoing && trans->t_prefer_loopback) { 234 /* "outgoing" connection - and the transport 235 * says it wants the connection handled by the 236 * loopback transport. This is what TCP does. 237 */ 238 trans = &rds_loop_transport; 239 } 240 } 241 242 conn->c_trans = trans; 243 244 init_waitqueue_head(&conn->c_hs_waitq); 245 for (i = 0; i < npaths; i++) { 246 __rds_conn_path_init(conn, &conn->c_path[i], 247 is_outgoing); 248 conn->c_path[i].cp_index = i; 249 } 250 rcu_read_lock(); 251 if (rds_destroy_pending(conn)) 252 ret = -ENETDOWN; 253 else 254 ret = trans->conn_alloc(conn, GFP_ATOMIC); 255 if (ret) { 256 rcu_read_unlock(); 257 kfree(conn->c_path); 258 kmem_cache_free(rds_conn_slab, conn); 259 conn = ERR_PTR(ret); 260 goto out; 261 } 262 263 rdsdebug("allocated conn %p for %pI6c -> %pI6c over %s %s\n", 264 conn, laddr, faddr, 265 strnlen(trans->t_name, sizeof(trans->t_name)) ? 266 trans->t_name : "[unknown]", is_outgoing ? "(outgoing)" : ""); 267 268 /* 269 * Since we ran without holding the conn lock, someone could 270 * have created the same conn (either normal or passive) in the 271 * interim. We check while holding the lock. If we won, we complete 272 * init and return our conn. If we lost, we rollback and return the 273 * other one. 274 */ 275 spin_lock_irqsave(&rds_conn_lock, flags); 276 if (parent) { 277 /* Creating passive conn */ 278 if (parent->c_passive) { 279 trans->conn_free(conn->c_path[0].cp_transport_data); 280 kfree(conn->c_path); 281 kmem_cache_free(rds_conn_slab, conn); 282 conn = parent->c_passive; 283 } else { 284 parent->c_passive = conn; 285 rds_cong_add_conn(conn); 286 rds_conn_count++; 287 } 288 } else { 289 /* Creating normal conn */ 290 struct rds_connection *found; 291 292 found = rds_conn_lookup(net, head, laddr, faddr, trans, 293 dev_if); 294 if (found) { 295 struct rds_conn_path *cp; 296 int i; 297 298 for (i = 0; i < npaths; i++) { 299 cp = &conn->c_path[i]; 300 /* The ->conn_alloc invocation may have 301 * allocated resource for all paths, so all 302 * of them may have to be freed here. 303 */ 304 if (cp->cp_transport_data) 305 trans->conn_free(cp->cp_transport_data); 306 } 307 kfree(conn->c_path); 308 kmem_cache_free(rds_conn_slab, conn); 309 conn = found; 310 } else { 311 conn->c_my_gen_num = rds_gen_num; 312 conn->c_peer_gen_num = 0; 313 hlist_add_head_rcu(&conn->c_hash_node, head); 314 rds_cong_add_conn(conn); 315 rds_conn_count++; 316 } 317 } 318 spin_unlock_irqrestore(&rds_conn_lock, flags); 319 rcu_read_unlock(); 320 321 out: 322 return conn; 323 } 324 325 struct rds_connection *rds_conn_create(struct net *net, 326 const struct in6_addr *laddr, 327 const struct in6_addr *faddr, 328 struct rds_transport *trans, gfp_t gfp, 329 int dev_if) 330 { 331 return __rds_conn_create(net, laddr, faddr, trans, gfp, 0, dev_if); 332 } 333 EXPORT_SYMBOL_GPL(rds_conn_create); 334 335 struct rds_connection *rds_conn_create_outgoing(struct net *net, 336 const struct in6_addr *laddr, 337 const struct in6_addr *faddr, 338 struct rds_transport *trans, 339 gfp_t gfp, int dev_if) 340 { 341 return __rds_conn_create(net, laddr, faddr, trans, gfp, 1, dev_if); 342 } 343 EXPORT_SYMBOL_GPL(rds_conn_create_outgoing); 344 345 void rds_conn_shutdown(struct rds_conn_path *cp) 346 { 347 struct rds_connection *conn = cp->cp_conn; 348 349 /* shut it down unless it's down already */ 350 if (!rds_conn_path_transition(cp, RDS_CONN_DOWN, RDS_CONN_DOWN)) { 351 /* 352 * Quiesce the connection mgmt handlers before we start tearing 353 * things down. We don't hold the mutex for the entire 354 * duration of the shutdown operation, else we may be 355 * deadlocking with the CM handler. Instead, the CM event 356 * handler is supposed to check for state DISCONNECTING 357 */ 358 mutex_lock(&cp->cp_cm_lock); 359 if (!rds_conn_path_transition(cp, RDS_CONN_UP, 360 RDS_CONN_DISCONNECTING) && 361 !rds_conn_path_transition(cp, RDS_CONN_ERROR, 362 RDS_CONN_DISCONNECTING)) { 363 rds_conn_path_error(cp, 364 "shutdown called in state %d\n", 365 atomic_read(&cp->cp_state)); 366 mutex_unlock(&cp->cp_cm_lock); 367 return; 368 } 369 mutex_unlock(&cp->cp_cm_lock); 370 371 wait_event(cp->cp_waitq, 372 !test_bit(RDS_IN_XMIT, &cp->cp_flags)); 373 wait_event(cp->cp_waitq, 374 !test_bit(RDS_RECV_REFILL, &cp->cp_flags)); 375 376 conn->c_trans->conn_path_shutdown(cp); 377 rds_conn_path_reset(cp); 378 379 if (!rds_conn_path_transition(cp, RDS_CONN_DISCONNECTING, 380 RDS_CONN_DOWN) && 381 !rds_conn_path_transition(cp, RDS_CONN_ERROR, 382 RDS_CONN_DOWN)) { 383 /* This can happen - eg when we're in the middle of tearing 384 * down the connection, and someone unloads the rds module. 385 * Quite reproducible with loopback connections. 386 * Mostly harmless. 387 * 388 * Note that this also happens with rds-tcp because 389 * we could have triggered rds_conn_path_drop in irq 390 * mode from rds_tcp_state change on the receipt of 391 * a FIN, thus we need to recheck for RDS_CONN_ERROR 392 * here. 393 */ 394 rds_conn_path_error(cp, "%s: failed to transition " 395 "to state DOWN, current state " 396 "is %d\n", __func__, 397 atomic_read(&cp->cp_state)); 398 return; 399 } 400 } 401 402 /* Then reconnect if it's still live. 403 * The passive side of an IB loopback connection is never added 404 * to the conn hash, so we never trigger a reconnect on this 405 * conn - the reconnect is always triggered by the active peer. */ 406 cancel_delayed_work_sync(&cp->cp_conn_w); 407 rcu_read_lock(); 408 if (!hlist_unhashed(&conn->c_hash_node)) { 409 rcu_read_unlock(); 410 rds_queue_reconnect(cp); 411 } else { 412 rcu_read_unlock(); 413 } 414 } 415 416 /* destroy a single rds_conn_path. rds_conn_destroy() iterates over 417 * all paths using rds_conn_path_destroy() 418 */ 419 static void rds_conn_path_destroy(struct rds_conn_path *cp) 420 { 421 struct rds_message *rm, *rtmp; 422 423 if (!cp->cp_transport_data) 424 return; 425 426 /* make sure lingering queued work won't try to ref the conn */ 427 cancel_delayed_work_sync(&cp->cp_send_w); 428 cancel_delayed_work_sync(&cp->cp_recv_w); 429 430 rds_conn_path_drop(cp, true); 431 flush_work(&cp->cp_down_w); 432 433 /* tear down queued messages */ 434 list_for_each_entry_safe(rm, rtmp, 435 &cp->cp_send_queue, 436 m_conn_item) { 437 list_del_init(&rm->m_conn_item); 438 BUG_ON(!list_empty(&rm->m_sock_item)); 439 rds_message_put(rm); 440 } 441 if (cp->cp_xmit_rm) 442 rds_message_put(cp->cp_xmit_rm); 443 444 WARN_ON(delayed_work_pending(&cp->cp_send_w)); 445 WARN_ON(delayed_work_pending(&cp->cp_recv_w)); 446 WARN_ON(delayed_work_pending(&cp->cp_conn_w)); 447 WARN_ON(work_pending(&cp->cp_down_w)); 448 449 cp->cp_conn->c_trans->conn_free(cp->cp_transport_data); 450 } 451 452 /* 453 * Stop and free a connection. 454 * 455 * This can only be used in very limited circumstances. It assumes that once 456 * the conn has been shutdown that no one else is referencing the connection. 457 * We can only ensure this in the rmmod path in the current code. 458 */ 459 void rds_conn_destroy(struct rds_connection *conn) 460 { 461 unsigned long flags; 462 int i; 463 struct rds_conn_path *cp; 464 int npaths = (conn->c_trans->t_mp_capable ? RDS_MPATH_WORKERS : 1); 465 466 rdsdebug("freeing conn %p for %pI4 -> " 467 "%pI4\n", conn, &conn->c_laddr, 468 &conn->c_faddr); 469 470 /* Ensure conn will not be scheduled for reconnect */ 471 spin_lock_irq(&rds_conn_lock); 472 hlist_del_init_rcu(&conn->c_hash_node); 473 spin_unlock_irq(&rds_conn_lock); 474 synchronize_rcu(); 475 476 /* shut the connection down */ 477 for (i = 0; i < npaths; i++) { 478 cp = &conn->c_path[i]; 479 rds_conn_path_destroy(cp); 480 BUG_ON(!list_empty(&cp->cp_retrans)); 481 } 482 483 /* 484 * The congestion maps aren't freed up here. They're 485 * freed by rds_cong_exit() after all the connections 486 * have been freed. 487 */ 488 rds_cong_remove_conn(conn); 489 490 kfree(conn->c_path); 491 kmem_cache_free(rds_conn_slab, conn); 492 493 spin_lock_irqsave(&rds_conn_lock, flags); 494 rds_conn_count--; 495 spin_unlock_irqrestore(&rds_conn_lock, flags); 496 } 497 EXPORT_SYMBOL_GPL(rds_conn_destroy); 498 499 static void __rds_inc_msg_cp(struct rds_incoming *inc, 500 struct rds_info_iterator *iter, 501 void *saddr, void *daddr, int flip, bool isv6) 502 { 503 if (isv6) 504 rds6_inc_info_copy(inc, iter, saddr, daddr, flip); 505 else 506 rds_inc_info_copy(inc, iter, *(__be32 *)saddr, 507 *(__be32 *)daddr, flip); 508 } 509 510 static void rds_conn_message_info_cmn(struct socket *sock, unsigned int len, 511 struct rds_info_iterator *iter, 512 struct rds_info_lengths *lens, 513 int want_send, bool isv6) 514 { 515 struct hlist_head *head; 516 struct list_head *list; 517 struct rds_connection *conn; 518 struct rds_message *rm; 519 unsigned int total = 0; 520 unsigned long flags; 521 size_t i; 522 int j; 523 524 if (isv6) 525 len /= sizeof(struct rds6_info_message); 526 else 527 len /= sizeof(struct rds_info_message); 528 529 rcu_read_lock(); 530 531 for (i = 0, head = rds_conn_hash; i < ARRAY_SIZE(rds_conn_hash); 532 i++, head++) { 533 hlist_for_each_entry_rcu(conn, head, c_hash_node) { 534 struct rds_conn_path *cp; 535 int npaths; 536 537 if (!isv6 && conn->c_isv6) 538 continue; 539 540 npaths = (conn->c_trans->t_mp_capable ? 541 RDS_MPATH_WORKERS : 1); 542 543 for (j = 0; j < npaths; j++) { 544 cp = &conn->c_path[j]; 545 if (want_send) 546 list = &cp->cp_send_queue; 547 else 548 list = &cp->cp_retrans; 549 550 spin_lock_irqsave(&cp->cp_lock, flags); 551 552 /* XXX too lazy to maintain counts.. */ 553 list_for_each_entry(rm, list, m_conn_item) { 554 total++; 555 if (total <= len) 556 __rds_inc_msg_cp(&rm->m_inc, 557 iter, 558 &conn->c_laddr, 559 &conn->c_faddr, 560 0, isv6); 561 } 562 563 spin_unlock_irqrestore(&cp->cp_lock, flags); 564 } 565 } 566 } 567 rcu_read_unlock(); 568 569 lens->nr = total; 570 if (isv6) 571 lens->each = sizeof(struct rds6_info_message); 572 else 573 lens->each = sizeof(struct rds_info_message); 574 } 575 576 static void rds_conn_message_info(struct socket *sock, unsigned int len, 577 struct rds_info_iterator *iter, 578 struct rds_info_lengths *lens, 579 int want_send) 580 { 581 rds_conn_message_info_cmn(sock, len, iter, lens, want_send, false); 582 } 583 584 static void rds6_conn_message_info(struct socket *sock, unsigned int len, 585 struct rds_info_iterator *iter, 586 struct rds_info_lengths *lens, 587 int want_send) 588 { 589 rds_conn_message_info_cmn(sock, len, iter, lens, want_send, true); 590 } 591 592 static void rds_conn_message_info_send(struct socket *sock, unsigned int len, 593 struct rds_info_iterator *iter, 594 struct rds_info_lengths *lens) 595 { 596 rds_conn_message_info(sock, len, iter, lens, 1); 597 } 598 599 static void rds6_conn_message_info_send(struct socket *sock, unsigned int len, 600 struct rds_info_iterator *iter, 601 struct rds_info_lengths *lens) 602 { 603 rds6_conn_message_info(sock, len, iter, lens, 1); 604 } 605 606 static void rds_conn_message_info_retrans(struct socket *sock, 607 unsigned int len, 608 struct rds_info_iterator *iter, 609 struct rds_info_lengths *lens) 610 { 611 rds_conn_message_info(sock, len, iter, lens, 0); 612 } 613 614 static void rds6_conn_message_info_retrans(struct socket *sock, 615 unsigned int len, 616 struct rds_info_iterator *iter, 617 struct rds_info_lengths *lens) 618 { 619 rds6_conn_message_info(sock, len, iter, lens, 0); 620 } 621 622 void rds_for_each_conn_info(struct socket *sock, unsigned int len, 623 struct rds_info_iterator *iter, 624 struct rds_info_lengths *lens, 625 int (*visitor)(struct rds_connection *, void *), 626 u64 *buffer, 627 size_t item_len) 628 { 629 struct hlist_head *head; 630 struct rds_connection *conn; 631 size_t i; 632 633 rcu_read_lock(); 634 635 lens->nr = 0; 636 lens->each = item_len; 637 638 for (i = 0, head = rds_conn_hash; i < ARRAY_SIZE(rds_conn_hash); 639 i++, head++) { 640 hlist_for_each_entry_rcu(conn, head, c_hash_node) { 641 642 /* XXX no c_lock usage.. */ 643 if (!visitor(conn, buffer)) 644 continue; 645 646 /* We copy as much as we can fit in the buffer, 647 * but we count all items so that the caller 648 * can resize the buffer. */ 649 if (len >= item_len) { 650 rds_info_copy(iter, buffer, item_len); 651 len -= item_len; 652 } 653 lens->nr++; 654 } 655 } 656 rcu_read_unlock(); 657 } 658 EXPORT_SYMBOL_GPL(rds_for_each_conn_info); 659 660 static void rds_walk_conn_path_info(struct socket *sock, unsigned int len, 661 struct rds_info_iterator *iter, 662 struct rds_info_lengths *lens, 663 int (*visitor)(struct rds_conn_path *, void *), 664 u64 *buffer, 665 size_t item_len) 666 { 667 struct hlist_head *head; 668 struct rds_connection *conn; 669 size_t i; 670 671 rcu_read_lock(); 672 673 lens->nr = 0; 674 lens->each = item_len; 675 676 for (i = 0, head = rds_conn_hash; i < ARRAY_SIZE(rds_conn_hash); 677 i++, head++) { 678 hlist_for_each_entry_rcu(conn, head, c_hash_node) { 679 struct rds_conn_path *cp; 680 681 /* XXX We only copy the information from the first 682 * path for now. The problem is that if there are 683 * more than one underlying paths, we cannot report 684 * information of all of them using the existing 685 * API. For example, there is only one next_tx_seq, 686 * which path's next_tx_seq should we report? It is 687 * a bug in the design of MPRDS. 688 */ 689 cp = conn->c_path; 690 691 /* XXX no cp_lock usage.. */ 692 if (!visitor(cp, buffer)) 693 continue; 694 695 /* We copy as much as we can fit in the buffer, 696 * but we count all items so that the caller 697 * can resize the buffer. 698 */ 699 if (len >= item_len) { 700 rds_info_copy(iter, buffer, item_len); 701 len -= item_len; 702 } 703 lens->nr++; 704 } 705 } 706 rcu_read_unlock(); 707 } 708 709 static int rds_conn_info_visitor(struct rds_conn_path *cp, void *buffer) 710 { 711 struct rds_info_connection *cinfo = buffer; 712 struct rds_connection *conn = cp->cp_conn; 713 714 if (conn->c_isv6) 715 return 0; 716 717 cinfo->next_tx_seq = cp->cp_next_tx_seq; 718 cinfo->next_rx_seq = cp->cp_next_rx_seq; 719 cinfo->laddr = conn->c_laddr.s6_addr32[3]; 720 cinfo->faddr = conn->c_faddr.s6_addr32[3]; 721 strncpy(cinfo->transport, conn->c_trans->t_name, 722 sizeof(cinfo->transport)); 723 cinfo->flags = 0; 724 725 rds_conn_info_set(cinfo->flags, test_bit(RDS_IN_XMIT, &cp->cp_flags), 726 SENDING); 727 /* XXX Future: return the state rather than these funky bits */ 728 rds_conn_info_set(cinfo->flags, 729 atomic_read(&cp->cp_state) == RDS_CONN_CONNECTING, 730 CONNECTING); 731 rds_conn_info_set(cinfo->flags, 732 atomic_read(&cp->cp_state) == RDS_CONN_UP, 733 CONNECTED); 734 return 1; 735 } 736 737 static int rds6_conn_info_visitor(struct rds_conn_path *cp, void *buffer) 738 { 739 struct rds6_info_connection *cinfo6 = buffer; 740 struct rds_connection *conn = cp->cp_conn; 741 742 cinfo6->next_tx_seq = cp->cp_next_tx_seq; 743 cinfo6->next_rx_seq = cp->cp_next_rx_seq; 744 cinfo6->laddr = conn->c_laddr; 745 cinfo6->faddr = conn->c_faddr; 746 strncpy(cinfo6->transport, conn->c_trans->t_name, 747 sizeof(cinfo6->transport)); 748 cinfo6->flags = 0; 749 750 rds_conn_info_set(cinfo6->flags, test_bit(RDS_IN_XMIT, &cp->cp_flags), 751 SENDING); 752 /* XXX Future: return the state rather than these funky bits */ 753 rds_conn_info_set(cinfo6->flags, 754 atomic_read(&cp->cp_state) == RDS_CONN_CONNECTING, 755 CONNECTING); 756 rds_conn_info_set(cinfo6->flags, 757 atomic_read(&cp->cp_state) == RDS_CONN_UP, 758 CONNECTED); 759 /* Just return 1 as there is no error case. This is a helper function 760 * for rds_walk_conn_path_info() and it wants a return value. 761 */ 762 return 1; 763 } 764 765 static void rds_conn_info(struct socket *sock, unsigned int len, 766 struct rds_info_iterator *iter, 767 struct rds_info_lengths *lens) 768 { 769 u64 buffer[(sizeof(struct rds_info_connection) + 7) / 8]; 770 771 rds_walk_conn_path_info(sock, len, iter, lens, 772 rds_conn_info_visitor, 773 buffer, 774 sizeof(struct rds_info_connection)); 775 } 776 777 static void rds6_conn_info(struct socket *sock, unsigned int len, 778 struct rds_info_iterator *iter, 779 struct rds_info_lengths *lens) 780 { 781 u64 buffer[(sizeof(struct rds6_info_connection) + 7) / 8]; 782 783 rds_walk_conn_path_info(sock, len, iter, lens, 784 rds6_conn_info_visitor, 785 buffer, 786 sizeof(struct rds6_info_connection)); 787 } 788 789 int rds_conn_init(void) 790 { 791 int ret; 792 793 ret = rds_loop_net_init(); /* register pernet callback */ 794 if (ret) 795 return ret; 796 797 rds_conn_slab = kmem_cache_create("rds_connection", 798 sizeof(struct rds_connection), 799 0, 0, NULL); 800 if (!rds_conn_slab) { 801 rds_loop_net_exit(); 802 return -ENOMEM; 803 } 804 805 rds_info_register_func(RDS_INFO_CONNECTIONS, rds_conn_info); 806 rds_info_register_func(RDS_INFO_SEND_MESSAGES, 807 rds_conn_message_info_send); 808 rds_info_register_func(RDS_INFO_RETRANS_MESSAGES, 809 rds_conn_message_info_retrans); 810 rds_info_register_func(RDS6_INFO_CONNECTIONS, rds6_conn_info); 811 rds_info_register_func(RDS6_INFO_SEND_MESSAGES, 812 rds6_conn_message_info_send); 813 rds_info_register_func(RDS6_INFO_RETRANS_MESSAGES, 814 rds6_conn_message_info_retrans); 815 816 return 0; 817 } 818 819 void rds_conn_exit(void) 820 { 821 rds_loop_net_exit(); /* unregister pernet callback */ 822 rds_loop_exit(); 823 824 WARN_ON(!hlist_empty(rds_conn_hash)); 825 826 kmem_cache_destroy(rds_conn_slab); 827 828 rds_info_deregister_func(RDS_INFO_CONNECTIONS, rds_conn_info); 829 rds_info_deregister_func(RDS_INFO_SEND_MESSAGES, 830 rds_conn_message_info_send); 831 rds_info_deregister_func(RDS_INFO_RETRANS_MESSAGES, 832 rds_conn_message_info_retrans); 833 rds_info_deregister_func(RDS6_INFO_CONNECTIONS, rds6_conn_info); 834 rds_info_deregister_func(RDS6_INFO_SEND_MESSAGES, 835 rds6_conn_message_info_send); 836 rds_info_deregister_func(RDS6_INFO_RETRANS_MESSAGES, 837 rds6_conn_message_info_retrans); 838 } 839 840 /* 841 * Force a disconnect 842 */ 843 void rds_conn_path_drop(struct rds_conn_path *cp, bool destroy) 844 { 845 atomic_set(&cp->cp_state, RDS_CONN_ERROR); 846 847 rcu_read_lock(); 848 if (!destroy && rds_destroy_pending(cp->cp_conn)) { 849 rcu_read_unlock(); 850 return; 851 } 852 queue_work(rds_wq, &cp->cp_down_w); 853 rcu_read_unlock(); 854 } 855 EXPORT_SYMBOL_GPL(rds_conn_path_drop); 856 857 void rds_conn_drop(struct rds_connection *conn) 858 { 859 WARN_ON(conn->c_trans->t_mp_capable); 860 rds_conn_path_drop(&conn->c_path[0], false); 861 } 862 EXPORT_SYMBOL_GPL(rds_conn_drop); 863 864 /* 865 * If the connection is down, trigger a connect. We may have scheduled a 866 * delayed reconnect however - in this case we should not interfere. 867 */ 868 void rds_conn_path_connect_if_down(struct rds_conn_path *cp) 869 { 870 rcu_read_lock(); 871 if (rds_destroy_pending(cp->cp_conn)) { 872 rcu_read_unlock(); 873 return; 874 } 875 if (rds_conn_path_state(cp) == RDS_CONN_DOWN && 876 !test_and_set_bit(RDS_RECONNECT_PENDING, &cp->cp_flags)) 877 queue_delayed_work(rds_wq, &cp->cp_conn_w, 0); 878 rcu_read_unlock(); 879 } 880 EXPORT_SYMBOL_GPL(rds_conn_path_connect_if_down); 881 882 void rds_conn_connect_if_down(struct rds_connection *conn) 883 { 884 WARN_ON(conn->c_trans->t_mp_capable); 885 rds_conn_path_connect_if_down(&conn->c_path[0]); 886 } 887 EXPORT_SYMBOL_GPL(rds_conn_connect_if_down); 888 889 void 890 __rds_conn_path_error(struct rds_conn_path *cp, const char *fmt, ...) 891 { 892 va_list ap; 893 894 va_start(ap, fmt); 895 vprintk(fmt, ap); 896 va_end(ap); 897 898 rds_conn_path_drop(cp, false); 899 } 900