1 /* 2 * Copyright (c) 2006, 2017 Oracle and/or its affiliates. All rights reserved. 3 * 4 * This software is available to you under a choice of one of two 5 * licenses. You may choose to be licensed under the terms of the GNU 6 * General Public License (GPL) Version 2, available from the file 7 * COPYING in the main directory of this source tree, or the 8 * OpenIB.org BSD license below: 9 * 10 * Redistribution and use in source and binary forms, with or 11 * without modification, are permitted provided that the following 12 * conditions are met: 13 * 14 * - Redistributions of source code must retain the above 15 * copyright notice, this list of conditions and the following 16 * disclaimer. 17 * 18 * - Redistributions in binary form must reproduce the above 19 * copyright notice, this list of conditions and the following 20 * disclaimer in the documentation and/or other materials 21 * provided with the distribution. 22 * 23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 * SOFTWARE. 31 * 32 */ 33 #include <linux/kernel.h> 34 #include <linux/list.h> 35 #include <linux/slab.h> 36 #include <linux/export.h> 37 #include <net/ipv6.h> 38 #include <net/inet6_hashtables.h> 39 40 #include "rds.h" 41 #include "loop.h" 42 43 #define RDS_CONNECTION_HASH_BITS 12 44 #define RDS_CONNECTION_HASH_ENTRIES (1 << RDS_CONNECTION_HASH_BITS) 45 #define RDS_CONNECTION_HASH_MASK (RDS_CONNECTION_HASH_ENTRIES - 1) 46 47 /* converting this to RCU is a chore for another day.. */ 48 static DEFINE_SPINLOCK(rds_conn_lock); 49 static unsigned long rds_conn_count; 50 static struct hlist_head rds_conn_hash[RDS_CONNECTION_HASH_ENTRIES]; 51 static struct kmem_cache *rds_conn_slab; 52 53 static struct hlist_head *rds_conn_bucket(const struct in6_addr *laddr, 54 const struct in6_addr *faddr) 55 { 56 static u32 rds6_hash_secret __read_mostly; 57 static u32 rds_hash_secret __read_mostly; 58 59 u32 lhash, fhash, hash; 60 61 net_get_random_once(&rds_hash_secret, sizeof(rds_hash_secret)); 62 net_get_random_once(&rds6_hash_secret, sizeof(rds6_hash_secret)); 63 64 lhash = (__force u32)laddr->s6_addr32[3]; 65 fhash = __ipv6_addr_jhash(faddr, rds6_hash_secret); 66 hash = __inet6_ehashfn(lhash, 0, fhash, 0, rds_hash_secret); 67 68 return &rds_conn_hash[hash & RDS_CONNECTION_HASH_MASK]; 69 } 70 71 #define rds_conn_info_set(var, test, suffix) do { \ 72 if (test) \ 73 var |= RDS_INFO_CONNECTION_FLAG_##suffix; \ 74 } while (0) 75 76 /* rcu read lock must be held or the connection spinlock */ 77 static struct rds_connection *rds_conn_lookup(struct net *net, 78 struct hlist_head *head, 79 const struct in6_addr *laddr, 80 const struct in6_addr *faddr, 81 struct rds_transport *trans, 82 int dev_if) 83 { 84 struct rds_connection *conn, *ret = NULL; 85 86 hlist_for_each_entry_rcu(conn, head, c_hash_node) { 87 if (ipv6_addr_equal(&conn->c_faddr, faddr) && 88 ipv6_addr_equal(&conn->c_laddr, laddr) && 89 conn->c_trans == trans && 90 net == rds_conn_net(conn) && 91 conn->c_dev_if == dev_if) { 92 ret = conn; 93 break; 94 } 95 } 96 rdsdebug("returning conn %p for %pI6c -> %pI6c\n", ret, 97 laddr, faddr); 98 return ret; 99 } 100 101 /* 102 * This is called by transports as they're bringing down a connection. 103 * It clears partial message state so that the transport can start sending 104 * and receiving over this connection again in the future. It is up to 105 * the transport to have serialized this call with its send and recv. 106 */ 107 static void rds_conn_path_reset(struct rds_conn_path *cp) 108 { 109 struct rds_connection *conn = cp->cp_conn; 110 111 rdsdebug("connection %pI6c to %pI6c reset\n", 112 &conn->c_laddr, &conn->c_faddr); 113 114 rds_stats_inc(s_conn_reset); 115 rds_send_path_reset(cp); 116 cp->cp_flags = 0; 117 118 /* Do not clear next_rx_seq here, else we cannot distinguish 119 * retransmitted packets from new packets, and will hand all 120 * of them to the application. That is not consistent with the 121 * reliability guarantees of RDS. */ 122 } 123 124 static void __rds_conn_path_init(struct rds_connection *conn, 125 struct rds_conn_path *cp, bool is_outgoing) 126 { 127 spin_lock_init(&cp->cp_lock); 128 cp->cp_next_tx_seq = 1; 129 init_waitqueue_head(&cp->cp_waitq); 130 INIT_LIST_HEAD(&cp->cp_send_queue); 131 INIT_LIST_HEAD(&cp->cp_retrans); 132 133 cp->cp_conn = conn; 134 atomic_set(&cp->cp_state, RDS_CONN_DOWN); 135 cp->cp_send_gen = 0; 136 cp->cp_reconnect_jiffies = 0; 137 INIT_DELAYED_WORK(&cp->cp_send_w, rds_send_worker); 138 INIT_DELAYED_WORK(&cp->cp_recv_w, rds_recv_worker); 139 INIT_DELAYED_WORK(&cp->cp_conn_w, rds_connect_worker); 140 INIT_WORK(&cp->cp_down_w, rds_shutdown_worker); 141 mutex_init(&cp->cp_cm_lock); 142 cp->cp_flags = 0; 143 } 144 145 /* 146 * There is only every one 'conn' for a given pair of addresses in the 147 * system at a time. They contain messages to be retransmitted and so 148 * span the lifetime of the actual underlying transport connections. 149 * 150 * For now they are not garbage collected once they're created. They 151 * are torn down as the module is removed, if ever. 152 */ 153 static struct rds_connection *__rds_conn_create(struct net *net, 154 const struct in6_addr *laddr, 155 const struct in6_addr *faddr, 156 struct rds_transport *trans, 157 gfp_t gfp, 158 int is_outgoing, 159 int dev_if) 160 { 161 struct rds_connection *conn, *parent = NULL; 162 struct hlist_head *head = rds_conn_bucket(laddr, faddr); 163 struct rds_transport *loop_trans; 164 unsigned long flags; 165 int ret, i; 166 int npaths = (trans->t_mp_capable ? RDS_MPATH_WORKERS : 1); 167 168 rcu_read_lock(); 169 conn = rds_conn_lookup(net, head, laddr, faddr, trans, dev_if); 170 if (conn && 171 conn->c_loopback && 172 conn->c_trans != &rds_loop_transport && 173 ipv6_addr_equal(laddr, faddr) && 174 !is_outgoing) { 175 /* This is a looped back IB connection, and we're 176 * called by the code handling the incoming connect. 177 * We need a second connection object into which we 178 * can stick the other QP. */ 179 parent = conn; 180 conn = parent->c_passive; 181 } 182 rcu_read_unlock(); 183 if (conn) 184 goto out; 185 186 conn = kmem_cache_zalloc(rds_conn_slab, gfp); 187 if (!conn) { 188 conn = ERR_PTR(-ENOMEM); 189 goto out; 190 } 191 conn->c_path = kcalloc(npaths, sizeof(struct rds_conn_path), gfp); 192 if (!conn->c_path) { 193 kmem_cache_free(rds_conn_slab, conn); 194 conn = ERR_PTR(-ENOMEM); 195 goto out; 196 } 197 198 INIT_HLIST_NODE(&conn->c_hash_node); 199 conn->c_laddr = *laddr; 200 conn->c_isv6 = !ipv6_addr_v4mapped(laddr); 201 conn->c_faddr = *faddr; 202 conn->c_dev_if = dev_if; 203 204 rds_conn_net_set(conn, net); 205 206 ret = rds_cong_get_maps(conn); 207 if (ret) { 208 kfree(conn->c_path); 209 kmem_cache_free(rds_conn_slab, conn); 210 conn = ERR_PTR(ret); 211 goto out; 212 } 213 214 /* 215 * This is where a connection becomes loopback. If *any* RDS sockets 216 * can bind to the destination address then we'd rather the messages 217 * flow through loopback rather than either transport. 218 */ 219 loop_trans = rds_trans_get_preferred(net, faddr, conn->c_dev_if); 220 if (loop_trans) { 221 rds_trans_put(loop_trans); 222 conn->c_loopback = 1; 223 if (is_outgoing && trans->t_prefer_loopback) { 224 /* "outgoing" connection - and the transport 225 * says it wants the connection handled by the 226 * loopback transport. This is what TCP does. 227 */ 228 trans = &rds_loop_transport; 229 } 230 } 231 232 conn->c_trans = trans; 233 234 init_waitqueue_head(&conn->c_hs_waitq); 235 for (i = 0; i < npaths; i++) { 236 __rds_conn_path_init(conn, &conn->c_path[i], 237 is_outgoing); 238 conn->c_path[i].cp_index = i; 239 } 240 rcu_read_lock(); 241 if (rds_destroy_pending(conn)) 242 ret = -ENETDOWN; 243 else 244 ret = trans->conn_alloc(conn, GFP_ATOMIC); 245 if (ret) { 246 rcu_read_unlock(); 247 kfree(conn->c_path); 248 kmem_cache_free(rds_conn_slab, conn); 249 conn = ERR_PTR(ret); 250 goto out; 251 } 252 253 rdsdebug("allocated conn %p for %pI6c -> %pI6c over %s %s\n", 254 conn, laddr, faddr, 255 strnlen(trans->t_name, sizeof(trans->t_name)) ? 256 trans->t_name : "[unknown]", is_outgoing ? "(outgoing)" : ""); 257 258 /* 259 * Since we ran without holding the conn lock, someone could 260 * have created the same conn (either normal or passive) in the 261 * interim. We check while holding the lock. If we won, we complete 262 * init and return our conn. If we lost, we rollback and return the 263 * other one. 264 */ 265 spin_lock_irqsave(&rds_conn_lock, flags); 266 if (parent) { 267 /* Creating passive conn */ 268 if (parent->c_passive) { 269 trans->conn_free(conn->c_path[0].cp_transport_data); 270 kfree(conn->c_path); 271 kmem_cache_free(rds_conn_slab, conn); 272 conn = parent->c_passive; 273 } else { 274 parent->c_passive = conn; 275 rds_cong_add_conn(conn); 276 rds_conn_count++; 277 } 278 } else { 279 /* Creating normal conn */ 280 struct rds_connection *found; 281 282 found = rds_conn_lookup(net, head, laddr, faddr, trans, 283 dev_if); 284 if (found) { 285 struct rds_conn_path *cp; 286 int i; 287 288 for (i = 0; i < npaths; i++) { 289 cp = &conn->c_path[i]; 290 /* The ->conn_alloc invocation may have 291 * allocated resource for all paths, so all 292 * of them may have to be freed here. 293 */ 294 if (cp->cp_transport_data) 295 trans->conn_free(cp->cp_transport_data); 296 } 297 kfree(conn->c_path); 298 kmem_cache_free(rds_conn_slab, conn); 299 conn = found; 300 } else { 301 conn->c_my_gen_num = rds_gen_num; 302 conn->c_peer_gen_num = 0; 303 hlist_add_head_rcu(&conn->c_hash_node, head); 304 rds_cong_add_conn(conn); 305 rds_conn_count++; 306 } 307 } 308 spin_unlock_irqrestore(&rds_conn_lock, flags); 309 rcu_read_unlock(); 310 311 out: 312 return conn; 313 } 314 315 struct rds_connection *rds_conn_create(struct net *net, 316 const struct in6_addr *laddr, 317 const struct in6_addr *faddr, 318 struct rds_transport *trans, gfp_t gfp, 319 int dev_if) 320 { 321 return __rds_conn_create(net, laddr, faddr, trans, gfp, 0, dev_if); 322 } 323 EXPORT_SYMBOL_GPL(rds_conn_create); 324 325 struct rds_connection *rds_conn_create_outgoing(struct net *net, 326 const struct in6_addr *laddr, 327 const struct in6_addr *faddr, 328 struct rds_transport *trans, 329 gfp_t gfp, int dev_if) 330 { 331 return __rds_conn_create(net, laddr, faddr, trans, gfp, 1, dev_if); 332 } 333 EXPORT_SYMBOL_GPL(rds_conn_create_outgoing); 334 335 void rds_conn_shutdown(struct rds_conn_path *cp) 336 { 337 struct rds_connection *conn = cp->cp_conn; 338 339 /* shut it down unless it's down already */ 340 if (!rds_conn_path_transition(cp, RDS_CONN_DOWN, RDS_CONN_DOWN)) { 341 /* 342 * Quiesce the connection mgmt handlers before we start tearing 343 * things down. We don't hold the mutex for the entire 344 * duration of the shutdown operation, else we may be 345 * deadlocking with the CM handler. Instead, the CM event 346 * handler is supposed to check for state DISCONNECTING 347 */ 348 mutex_lock(&cp->cp_cm_lock); 349 if (!rds_conn_path_transition(cp, RDS_CONN_UP, 350 RDS_CONN_DISCONNECTING) && 351 !rds_conn_path_transition(cp, RDS_CONN_ERROR, 352 RDS_CONN_DISCONNECTING)) { 353 rds_conn_path_error(cp, 354 "shutdown called in state %d\n", 355 atomic_read(&cp->cp_state)); 356 mutex_unlock(&cp->cp_cm_lock); 357 return; 358 } 359 mutex_unlock(&cp->cp_cm_lock); 360 361 wait_event(cp->cp_waitq, 362 !test_bit(RDS_IN_XMIT, &cp->cp_flags)); 363 wait_event(cp->cp_waitq, 364 !test_bit(RDS_RECV_REFILL, &cp->cp_flags)); 365 366 conn->c_trans->conn_path_shutdown(cp); 367 rds_conn_path_reset(cp); 368 369 if (!rds_conn_path_transition(cp, RDS_CONN_DISCONNECTING, 370 RDS_CONN_DOWN) && 371 !rds_conn_path_transition(cp, RDS_CONN_ERROR, 372 RDS_CONN_DOWN)) { 373 /* This can happen - eg when we're in the middle of tearing 374 * down the connection, and someone unloads the rds module. 375 * Quite reproducible with loopback connections. 376 * Mostly harmless. 377 * 378 * Note that this also happens with rds-tcp because 379 * we could have triggered rds_conn_path_drop in irq 380 * mode from rds_tcp_state change on the receipt of 381 * a FIN, thus we need to recheck for RDS_CONN_ERROR 382 * here. 383 */ 384 rds_conn_path_error(cp, "%s: failed to transition " 385 "to state DOWN, current state " 386 "is %d\n", __func__, 387 atomic_read(&cp->cp_state)); 388 return; 389 } 390 } 391 392 /* Then reconnect if it's still live. 393 * The passive side of an IB loopback connection is never added 394 * to the conn hash, so we never trigger a reconnect on this 395 * conn - the reconnect is always triggered by the active peer. */ 396 cancel_delayed_work_sync(&cp->cp_conn_w); 397 rcu_read_lock(); 398 if (!hlist_unhashed(&conn->c_hash_node)) { 399 rcu_read_unlock(); 400 rds_queue_reconnect(cp); 401 } else { 402 rcu_read_unlock(); 403 } 404 } 405 406 /* destroy a single rds_conn_path. rds_conn_destroy() iterates over 407 * all paths using rds_conn_path_destroy() 408 */ 409 static void rds_conn_path_destroy(struct rds_conn_path *cp) 410 { 411 struct rds_message *rm, *rtmp; 412 413 if (!cp->cp_transport_data) 414 return; 415 416 /* make sure lingering queued work won't try to ref the conn */ 417 cancel_delayed_work_sync(&cp->cp_send_w); 418 cancel_delayed_work_sync(&cp->cp_recv_w); 419 420 rds_conn_path_drop(cp, true); 421 flush_work(&cp->cp_down_w); 422 423 /* tear down queued messages */ 424 list_for_each_entry_safe(rm, rtmp, 425 &cp->cp_send_queue, 426 m_conn_item) { 427 list_del_init(&rm->m_conn_item); 428 BUG_ON(!list_empty(&rm->m_sock_item)); 429 rds_message_put(rm); 430 } 431 if (cp->cp_xmit_rm) 432 rds_message_put(cp->cp_xmit_rm); 433 434 WARN_ON(delayed_work_pending(&cp->cp_send_w)); 435 WARN_ON(delayed_work_pending(&cp->cp_recv_w)); 436 WARN_ON(delayed_work_pending(&cp->cp_conn_w)); 437 WARN_ON(work_pending(&cp->cp_down_w)); 438 439 cp->cp_conn->c_trans->conn_free(cp->cp_transport_data); 440 } 441 442 /* 443 * Stop and free a connection. 444 * 445 * This can only be used in very limited circumstances. It assumes that once 446 * the conn has been shutdown that no one else is referencing the connection. 447 * We can only ensure this in the rmmod path in the current code. 448 */ 449 void rds_conn_destroy(struct rds_connection *conn) 450 { 451 unsigned long flags; 452 int i; 453 struct rds_conn_path *cp; 454 int npaths = (conn->c_trans->t_mp_capable ? RDS_MPATH_WORKERS : 1); 455 456 rdsdebug("freeing conn %p for %pI4 -> " 457 "%pI4\n", conn, &conn->c_laddr, 458 &conn->c_faddr); 459 460 /* Ensure conn will not be scheduled for reconnect */ 461 spin_lock_irq(&rds_conn_lock); 462 hlist_del_init_rcu(&conn->c_hash_node); 463 spin_unlock_irq(&rds_conn_lock); 464 synchronize_rcu(); 465 466 /* shut the connection down */ 467 for (i = 0; i < npaths; i++) { 468 cp = &conn->c_path[i]; 469 rds_conn_path_destroy(cp); 470 BUG_ON(!list_empty(&cp->cp_retrans)); 471 } 472 473 /* 474 * The congestion maps aren't freed up here. They're 475 * freed by rds_cong_exit() after all the connections 476 * have been freed. 477 */ 478 rds_cong_remove_conn(conn); 479 480 kfree(conn->c_path); 481 kmem_cache_free(rds_conn_slab, conn); 482 483 spin_lock_irqsave(&rds_conn_lock, flags); 484 rds_conn_count--; 485 spin_unlock_irqrestore(&rds_conn_lock, flags); 486 } 487 EXPORT_SYMBOL_GPL(rds_conn_destroy); 488 489 static void rds_conn_message_info(struct socket *sock, unsigned int len, 490 struct rds_info_iterator *iter, 491 struct rds_info_lengths *lens, 492 int want_send) 493 { 494 struct hlist_head *head; 495 struct list_head *list; 496 struct rds_connection *conn; 497 struct rds_message *rm; 498 unsigned int total = 0; 499 unsigned long flags; 500 size_t i; 501 int j; 502 503 len /= sizeof(struct rds_info_message); 504 505 rcu_read_lock(); 506 507 for (i = 0, head = rds_conn_hash; i < ARRAY_SIZE(rds_conn_hash); 508 i++, head++) { 509 hlist_for_each_entry_rcu(conn, head, c_hash_node) { 510 struct rds_conn_path *cp; 511 int npaths; 512 513 npaths = (conn->c_trans->t_mp_capable ? 514 RDS_MPATH_WORKERS : 1); 515 516 for (j = 0; j < npaths; j++) { 517 cp = &conn->c_path[j]; 518 if (want_send) 519 list = &cp->cp_send_queue; 520 else 521 list = &cp->cp_retrans; 522 523 spin_lock_irqsave(&cp->cp_lock, flags); 524 525 /* XXX too lazy to maintain counts.. */ 526 list_for_each_entry(rm, list, m_conn_item) { 527 __be32 laddr; 528 __be32 faddr; 529 530 total++; 531 laddr = conn->c_laddr.s6_addr32[3]; 532 faddr = conn->c_faddr.s6_addr32[3]; 533 if (total <= len) 534 rds_inc_info_copy(&rm->m_inc, 535 iter, 536 laddr, 537 faddr, 538 0); 539 } 540 541 spin_unlock_irqrestore(&cp->cp_lock, flags); 542 } 543 } 544 } 545 rcu_read_unlock(); 546 547 lens->nr = total; 548 lens->each = sizeof(struct rds_info_message); 549 } 550 551 static void rds_conn_message_info_send(struct socket *sock, unsigned int len, 552 struct rds_info_iterator *iter, 553 struct rds_info_lengths *lens) 554 { 555 rds_conn_message_info(sock, len, iter, lens, 1); 556 } 557 558 static void rds_conn_message_info_retrans(struct socket *sock, 559 unsigned int len, 560 struct rds_info_iterator *iter, 561 struct rds_info_lengths *lens) 562 { 563 rds_conn_message_info(sock, len, iter, lens, 0); 564 } 565 566 void rds_for_each_conn_info(struct socket *sock, unsigned int len, 567 struct rds_info_iterator *iter, 568 struct rds_info_lengths *lens, 569 int (*visitor)(struct rds_connection *, void *), 570 u64 *buffer, 571 size_t item_len) 572 { 573 struct hlist_head *head; 574 struct rds_connection *conn; 575 size_t i; 576 577 rcu_read_lock(); 578 579 lens->nr = 0; 580 lens->each = item_len; 581 582 for (i = 0, head = rds_conn_hash; i < ARRAY_SIZE(rds_conn_hash); 583 i++, head++) { 584 hlist_for_each_entry_rcu(conn, head, c_hash_node) { 585 586 /* XXX no c_lock usage.. */ 587 if (!visitor(conn, buffer)) 588 continue; 589 590 /* We copy as much as we can fit in the buffer, 591 * but we count all items so that the caller 592 * can resize the buffer. */ 593 if (len >= item_len) { 594 rds_info_copy(iter, buffer, item_len); 595 len -= item_len; 596 } 597 lens->nr++; 598 } 599 } 600 rcu_read_unlock(); 601 } 602 EXPORT_SYMBOL_GPL(rds_for_each_conn_info); 603 604 static void rds_walk_conn_path_info(struct socket *sock, unsigned int len, 605 struct rds_info_iterator *iter, 606 struct rds_info_lengths *lens, 607 int (*visitor)(struct rds_conn_path *, void *), 608 u64 *buffer, 609 size_t item_len) 610 { 611 struct hlist_head *head; 612 struct rds_connection *conn; 613 size_t i; 614 615 rcu_read_lock(); 616 617 lens->nr = 0; 618 lens->each = item_len; 619 620 for (i = 0, head = rds_conn_hash; i < ARRAY_SIZE(rds_conn_hash); 621 i++, head++) { 622 hlist_for_each_entry_rcu(conn, head, c_hash_node) { 623 struct rds_conn_path *cp; 624 625 /* XXX We only copy the information from the first 626 * path for now. The problem is that if there are 627 * more than one underlying paths, we cannot report 628 * information of all of them using the existing 629 * API. For example, there is only one next_tx_seq, 630 * which path's next_tx_seq should we report? It is 631 * a bug in the design of MPRDS. 632 */ 633 cp = conn->c_path; 634 635 /* XXX no cp_lock usage.. */ 636 if (!visitor(cp, buffer)) 637 continue; 638 639 /* We copy as much as we can fit in the buffer, 640 * but we count all items so that the caller 641 * can resize the buffer. 642 */ 643 if (len >= item_len) { 644 rds_info_copy(iter, buffer, item_len); 645 len -= item_len; 646 } 647 lens->nr++; 648 } 649 } 650 rcu_read_unlock(); 651 } 652 653 static int rds_conn_info_visitor(struct rds_conn_path *cp, void *buffer) 654 { 655 struct rds_info_connection *cinfo = buffer; 656 struct rds_connection *conn = cp->cp_conn; 657 658 cinfo->next_tx_seq = cp->cp_next_tx_seq; 659 cinfo->next_rx_seq = cp->cp_next_rx_seq; 660 cinfo->laddr = conn->c_laddr.s6_addr32[3]; 661 cinfo->faddr = conn->c_faddr.s6_addr32[3]; 662 strncpy(cinfo->transport, conn->c_trans->t_name, 663 sizeof(cinfo->transport)); 664 cinfo->flags = 0; 665 666 rds_conn_info_set(cinfo->flags, test_bit(RDS_IN_XMIT, &cp->cp_flags), 667 SENDING); 668 /* XXX Future: return the state rather than these funky bits */ 669 rds_conn_info_set(cinfo->flags, 670 atomic_read(&cp->cp_state) == RDS_CONN_CONNECTING, 671 CONNECTING); 672 rds_conn_info_set(cinfo->flags, 673 atomic_read(&cp->cp_state) == RDS_CONN_UP, 674 CONNECTED); 675 return 1; 676 } 677 678 static void rds_conn_info(struct socket *sock, unsigned int len, 679 struct rds_info_iterator *iter, 680 struct rds_info_lengths *lens) 681 { 682 u64 buffer[(sizeof(struct rds_info_connection) + 7) / 8]; 683 684 rds_walk_conn_path_info(sock, len, iter, lens, 685 rds_conn_info_visitor, 686 buffer, 687 sizeof(struct rds_info_connection)); 688 } 689 690 int rds_conn_init(void) 691 { 692 int ret; 693 694 ret = rds_loop_net_init(); /* register pernet callback */ 695 if (ret) 696 return ret; 697 698 rds_conn_slab = kmem_cache_create("rds_connection", 699 sizeof(struct rds_connection), 700 0, 0, NULL); 701 if (!rds_conn_slab) { 702 rds_loop_net_exit(); 703 return -ENOMEM; 704 } 705 706 rds_info_register_func(RDS_INFO_CONNECTIONS, rds_conn_info); 707 rds_info_register_func(RDS_INFO_SEND_MESSAGES, 708 rds_conn_message_info_send); 709 rds_info_register_func(RDS_INFO_RETRANS_MESSAGES, 710 rds_conn_message_info_retrans); 711 712 return 0; 713 } 714 715 void rds_conn_exit(void) 716 { 717 rds_loop_net_exit(); /* unregister pernet callback */ 718 rds_loop_exit(); 719 720 WARN_ON(!hlist_empty(rds_conn_hash)); 721 722 kmem_cache_destroy(rds_conn_slab); 723 724 rds_info_deregister_func(RDS_INFO_CONNECTIONS, rds_conn_info); 725 rds_info_deregister_func(RDS_INFO_SEND_MESSAGES, 726 rds_conn_message_info_send); 727 rds_info_deregister_func(RDS_INFO_RETRANS_MESSAGES, 728 rds_conn_message_info_retrans); 729 } 730 731 /* 732 * Force a disconnect 733 */ 734 void rds_conn_path_drop(struct rds_conn_path *cp, bool destroy) 735 { 736 atomic_set(&cp->cp_state, RDS_CONN_ERROR); 737 738 rcu_read_lock(); 739 if (!destroy && rds_destroy_pending(cp->cp_conn)) { 740 rcu_read_unlock(); 741 return; 742 } 743 queue_work(rds_wq, &cp->cp_down_w); 744 rcu_read_unlock(); 745 } 746 EXPORT_SYMBOL_GPL(rds_conn_path_drop); 747 748 void rds_conn_drop(struct rds_connection *conn) 749 { 750 WARN_ON(conn->c_trans->t_mp_capable); 751 rds_conn_path_drop(&conn->c_path[0], false); 752 } 753 EXPORT_SYMBOL_GPL(rds_conn_drop); 754 755 /* 756 * If the connection is down, trigger a connect. We may have scheduled a 757 * delayed reconnect however - in this case we should not interfere. 758 */ 759 void rds_conn_path_connect_if_down(struct rds_conn_path *cp) 760 { 761 rcu_read_lock(); 762 if (rds_destroy_pending(cp->cp_conn)) { 763 rcu_read_unlock(); 764 return; 765 } 766 if (rds_conn_path_state(cp) == RDS_CONN_DOWN && 767 !test_and_set_bit(RDS_RECONNECT_PENDING, &cp->cp_flags)) 768 queue_delayed_work(rds_wq, &cp->cp_conn_w, 0); 769 rcu_read_unlock(); 770 } 771 EXPORT_SYMBOL_GPL(rds_conn_path_connect_if_down); 772 773 void rds_conn_connect_if_down(struct rds_connection *conn) 774 { 775 WARN_ON(conn->c_trans->t_mp_capable); 776 rds_conn_path_connect_if_down(&conn->c_path[0]); 777 } 778 EXPORT_SYMBOL_GPL(rds_conn_connect_if_down); 779 780 void 781 __rds_conn_path_error(struct rds_conn_path *cp, const char *fmt, ...) 782 { 783 va_list ap; 784 785 va_start(ap, fmt); 786 vprintk(fmt, ap); 787 va_end(ap); 788 789 rds_conn_path_drop(cp, false); 790 } 791