1 /* 2 * Shared Memory Communications over RDMA (SMC-R) and RoCE 3 * 4 * AF_SMC protocol family socket handler keeping the AF_INET sock address type 5 * applies to SOCK_STREAM sockets only 6 * offers an alternative communication option for TCP-protocol sockets 7 * applicable with RoCE-cards only 8 * 9 * Initial restrictions: 10 * - non-blocking connect postponed 11 * - IPv6 support postponed 12 * - support for alternate links postponed 13 * - partial support for non-blocking sockets only 14 * - support for urgent data postponed 15 * 16 * Copyright IBM Corp. 2016 17 * 18 * Author(s): Ursula Braun <ubraun@linux.vnet.ibm.com> 19 * based on prototype from Frank Blaschka 20 */ 21 22 #define KMSG_COMPONENT "smc" 23 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt 24 25 #include <linux/module.h> 26 #include <linux/socket.h> 27 #include <linux/inetdevice.h> 28 #include <linux/workqueue.h> 29 #include <linux/in.h> 30 #include <linux/sched/signal.h> 31 32 #include <net/sock.h> 33 #include <net/tcp.h> 34 #include <net/smc.h> 35 36 #include "smc.h" 37 #include "smc_clc.h" 38 #include "smc_llc.h" 39 #include "smc_cdc.h" 40 #include "smc_core.h" 41 #include "smc_ib.h" 42 #include "smc_pnet.h" 43 #include "smc_tx.h" 44 #include "smc_rx.h" 45 #include "smc_close.h" 46 47 static DEFINE_MUTEX(smc_create_lgr_pending); /* serialize link group 48 * creation 49 */ 50 51 struct smc_lgr_list smc_lgr_list = { /* established link groups */ 52 .lock = __SPIN_LOCK_UNLOCKED(smc_lgr_list.lock), 53 .list = LIST_HEAD_INIT(smc_lgr_list.list), 54 }; 55 56 static void smc_tcp_listen_work(struct work_struct *); 57 58 static void smc_set_keepalive(struct sock *sk, int val) 59 { 60 struct smc_sock *smc = smc_sk(sk); 61 62 smc->clcsock->sk->sk_prot->keepalive(smc->clcsock->sk, val); 63 } 64 65 static struct smc_hashinfo smc_v4_hashinfo = { 66 .lock = __RW_LOCK_UNLOCKED(smc_v4_hashinfo.lock), 67 }; 68 69 int smc_hash_sk(struct sock *sk) 70 { 71 struct smc_hashinfo *h = sk->sk_prot->h.smc_hash; 72 struct hlist_head *head; 73 74 head = &h->ht; 75 76 write_lock_bh(&h->lock); 77 sk_add_node(sk, head); 78 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); 79 write_unlock_bh(&h->lock); 80 81 return 0; 82 } 83 EXPORT_SYMBOL_GPL(smc_hash_sk); 84 85 void smc_unhash_sk(struct sock *sk) 86 { 87 struct smc_hashinfo *h = sk->sk_prot->h.smc_hash; 88 89 write_lock_bh(&h->lock); 90 if (sk_del_node_init(sk)) 91 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); 92 write_unlock_bh(&h->lock); 93 } 94 EXPORT_SYMBOL_GPL(smc_unhash_sk); 95 96 struct proto smc_proto = { 97 .name = "SMC", 98 .owner = THIS_MODULE, 99 .keepalive = smc_set_keepalive, 100 .hash = smc_hash_sk, 101 .unhash = smc_unhash_sk, 102 .obj_size = sizeof(struct smc_sock), 103 .h.smc_hash = &smc_v4_hashinfo, 104 .slab_flags = SLAB_DESTROY_BY_RCU, 105 }; 106 EXPORT_SYMBOL_GPL(smc_proto); 107 108 static int smc_release(struct socket *sock) 109 { 110 struct sock *sk = sock->sk; 111 struct smc_sock *smc; 112 int rc = 0; 113 114 if (!sk) 115 goto out; 116 117 smc = smc_sk(sk); 118 sock_hold(sk); 119 if (sk->sk_state == SMC_LISTEN) 120 /* smc_close_non_accepted() is called and acquires 121 * sock lock for child sockets again 122 */ 123 lock_sock_nested(sk, SINGLE_DEPTH_NESTING); 124 else 125 lock_sock(sk); 126 127 if (smc->use_fallback) { 128 sk->sk_state = SMC_CLOSED; 129 sk->sk_state_change(sk); 130 } else { 131 rc = smc_close_active(smc); 132 sock_set_flag(sk, SOCK_DEAD); 133 sk->sk_shutdown |= SHUTDOWN_MASK; 134 } 135 if (smc->clcsock) { 136 sock_release(smc->clcsock); 137 smc->clcsock = NULL; 138 } 139 140 /* detach socket */ 141 sock_orphan(sk); 142 sock->sk = NULL; 143 if (smc->use_fallback) { 144 schedule_delayed_work(&smc->sock_put_work, TCP_TIMEWAIT_LEN); 145 } else if (sk->sk_state == SMC_CLOSED) { 146 smc_conn_free(&smc->conn); 147 schedule_delayed_work(&smc->sock_put_work, 148 SMC_CLOSE_SOCK_PUT_DELAY); 149 } 150 sk->sk_prot->unhash(sk); 151 release_sock(sk); 152 153 sock_put(sk); 154 out: 155 return rc; 156 } 157 158 static void smc_destruct(struct sock *sk) 159 { 160 if (sk->sk_state != SMC_CLOSED) 161 return; 162 if (!sock_flag(sk, SOCK_DEAD)) 163 return; 164 165 sk_refcnt_debug_dec(sk); 166 } 167 168 static struct sock *smc_sock_alloc(struct net *net, struct socket *sock) 169 { 170 struct smc_sock *smc; 171 struct sock *sk; 172 173 sk = sk_alloc(net, PF_SMC, GFP_KERNEL, &smc_proto, 0); 174 if (!sk) 175 return NULL; 176 177 sock_init_data(sock, sk); /* sets sk_refcnt to 1 */ 178 sk->sk_state = SMC_INIT; 179 sk->sk_destruct = smc_destruct; 180 sk->sk_protocol = SMCPROTO_SMC; 181 smc = smc_sk(sk); 182 INIT_WORK(&smc->tcp_listen_work, smc_tcp_listen_work); 183 INIT_LIST_HEAD(&smc->accept_q); 184 spin_lock_init(&smc->accept_q_lock); 185 INIT_DELAYED_WORK(&smc->sock_put_work, smc_close_sock_put_work); 186 sk->sk_prot->hash(sk); 187 sk_refcnt_debug_inc(sk); 188 189 return sk; 190 } 191 192 static int smc_bind(struct socket *sock, struct sockaddr *uaddr, 193 int addr_len) 194 { 195 struct sockaddr_in *addr = (struct sockaddr_in *)uaddr; 196 struct sock *sk = sock->sk; 197 struct smc_sock *smc; 198 int rc; 199 200 smc = smc_sk(sk); 201 202 /* replicate tests from inet_bind(), to be safe wrt. future changes */ 203 rc = -EINVAL; 204 if (addr_len < sizeof(struct sockaddr_in)) 205 goto out; 206 207 rc = -EAFNOSUPPORT; 208 /* accept AF_UNSPEC (mapped to AF_INET) only if s_addr is INADDR_ANY */ 209 if ((addr->sin_family != AF_INET) && 210 ((addr->sin_family != AF_UNSPEC) || 211 (addr->sin_addr.s_addr != htonl(INADDR_ANY)))) 212 goto out; 213 214 lock_sock(sk); 215 216 /* Check if socket is already active */ 217 rc = -EINVAL; 218 if (sk->sk_state != SMC_INIT) 219 goto out_rel; 220 221 smc->clcsock->sk->sk_reuse = sk->sk_reuse; 222 rc = kernel_bind(smc->clcsock, uaddr, addr_len); 223 224 out_rel: 225 release_sock(sk); 226 out: 227 return rc; 228 } 229 230 static void smc_copy_sock_settings(struct sock *nsk, struct sock *osk, 231 unsigned long mask) 232 { 233 /* options we don't get control via setsockopt for */ 234 nsk->sk_type = osk->sk_type; 235 nsk->sk_sndbuf = osk->sk_sndbuf; 236 nsk->sk_rcvbuf = osk->sk_rcvbuf; 237 nsk->sk_sndtimeo = osk->sk_sndtimeo; 238 nsk->sk_rcvtimeo = osk->sk_rcvtimeo; 239 nsk->sk_mark = osk->sk_mark; 240 nsk->sk_priority = osk->sk_priority; 241 nsk->sk_rcvlowat = osk->sk_rcvlowat; 242 nsk->sk_bound_dev_if = osk->sk_bound_dev_if; 243 nsk->sk_err = osk->sk_err; 244 245 nsk->sk_flags &= ~mask; 246 nsk->sk_flags |= osk->sk_flags & mask; 247 } 248 249 #define SK_FLAGS_SMC_TO_CLC ((1UL << SOCK_URGINLINE) | \ 250 (1UL << SOCK_KEEPOPEN) | \ 251 (1UL << SOCK_LINGER) | \ 252 (1UL << SOCK_BROADCAST) | \ 253 (1UL << SOCK_TIMESTAMP) | \ 254 (1UL << SOCK_DBG) | \ 255 (1UL << SOCK_RCVTSTAMP) | \ 256 (1UL << SOCK_RCVTSTAMPNS) | \ 257 (1UL << SOCK_LOCALROUTE) | \ 258 (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE) | \ 259 (1UL << SOCK_RXQ_OVFL) | \ 260 (1UL << SOCK_WIFI_STATUS) | \ 261 (1UL << SOCK_NOFCS) | \ 262 (1UL << SOCK_FILTER_LOCKED)) 263 /* copy only relevant settings and flags of SOL_SOCKET level from smc to 264 * clc socket (since smc is not called for these options from net/core) 265 */ 266 static void smc_copy_sock_settings_to_clc(struct smc_sock *smc) 267 { 268 smc_copy_sock_settings(smc->clcsock->sk, &smc->sk, SK_FLAGS_SMC_TO_CLC); 269 } 270 271 #define SK_FLAGS_CLC_TO_SMC ((1UL << SOCK_URGINLINE) | \ 272 (1UL << SOCK_KEEPOPEN) | \ 273 (1UL << SOCK_LINGER) | \ 274 (1UL << SOCK_DBG)) 275 /* copy only settings and flags relevant for smc from clc to smc socket */ 276 static void smc_copy_sock_settings_to_smc(struct smc_sock *smc) 277 { 278 smc_copy_sock_settings(&smc->sk, smc->clcsock->sk, SK_FLAGS_CLC_TO_SMC); 279 } 280 281 /* determine subnet and mask of internal TCP socket */ 282 int smc_netinfo_by_tcpsk(struct socket *clcsock, 283 __be32 *subnet, u8 *prefix_len) 284 { 285 struct dst_entry *dst = sk_dst_get(clcsock->sk); 286 struct sockaddr_in addr; 287 int rc = -ENOENT; 288 int len; 289 290 if (!dst) { 291 rc = -ENOTCONN; 292 goto out; 293 } 294 if (!dst->dev) { 295 rc = -ENODEV; 296 goto out_rel; 297 } 298 299 /* get address to which the internal TCP socket is bound */ 300 kernel_getsockname(clcsock, (struct sockaddr *)&addr, &len); 301 /* analyze IPv4 specific data of net_device belonging to TCP socket */ 302 for_ifa(dst->dev->ip_ptr) { 303 if (ifa->ifa_address != addr.sin_addr.s_addr) 304 continue; 305 *prefix_len = inet_mask_len(ifa->ifa_mask); 306 *subnet = ifa->ifa_address & ifa->ifa_mask; 307 rc = 0; 308 break; 309 } endfor_ifa(dst->dev->ip_ptr); 310 311 out_rel: 312 dst_release(dst); 313 out: 314 return rc; 315 } 316 317 static int smc_clnt_conf_first_link(struct smc_sock *smc, union ib_gid *gid) 318 { 319 struct smc_link_group *lgr = smc->conn.lgr; 320 struct smc_link *link; 321 int rest; 322 int rc; 323 324 link = &lgr->lnk[SMC_SINGLE_LINK]; 325 /* receive CONFIRM LINK request from server over RoCE fabric */ 326 rest = wait_for_completion_interruptible_timeout( 327 &link->llc_confirm, 328 SMC_LLC_WAIT_FIRST_TIME); 329 if (rest <= 0) { 330 struct smc_clc_msg_decline dclc; 331 332 rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc), 333 SMC_CLC_DECLINE); 334 return rc; 335 } 336 337 rc = smc_ib_modify_qp_rts(link); 338 if (rc) 339 return SMC_CLC_DECL_INTERR; 340 341 smc_wr_remember_qp_attr(link); 342 /* send CONFIRM LINK response over RoCE fabric */ 343 rc = smc_llc_send_confirm_link(link, 344 link->smcibdev->mac[link->ibport - 1], 345 gid, SMC_LLC_RESP); 346 if (rc < 0) 347 return SMC_CLC_DECL_TCL; 348 349 return rc; 350 } 351 352 static void smc_conn_save_peer_info(struct smc_sock *smc, 353 struct smc_clc_msg_accept_confirm *clc) 354 { 355 smc->conn.peer_conn_idx = clc->conn_idx; 356 smc->conn.local_tx_ctrl.token = ntohl(clc->rmbe_alert_token); 357 smc->conn.peer_rmbe_size = smc_uncompress_bufsize(clc->rmbe_size); 358 atomic_set(&smc->conn.peer_rmbe_space, smc->conn.peer_rmbe_size); 359 } 360 361 static void smc_link_save_peer_info(struct smc_link *link, 362 struct smc_clc_msg_accept_confirm *clc) 363 { 364 link->peer_qpn = ntoh24(clc->qpn); 365 memcpy(link->peer_gid, clc->lcl.gid, SMC_GID_SIZE); 366 memcpy(link->peer_mac, clc->lcl.mac, sizeof(link->peer_mac)); 367 link->peer_psn = ntoh24(clc->psn); 368 link->peer_mtu = clc->qp_mtu; 369 } 370 371 /* setup for RDMA connection of client */ 372 static int smc_connect_rdma(struct smc_sock *smc) 373 { 374 struct sockaddr_in *inaddr = (struct sockaddr_in *)smc->addr; 375 struct smc_clc_msg_accept_confirm aclc; 376 int local_contact = SMC_FIRST_CONTACT; 377 struct smc_ib_device *smcibdev; 378 struct smc_link *link; 379 u8 srv_first_contact; 380 int reason_code = 0; 381 int rc = 0; 382 u8 ibport; 383 384 /* IPSec connections opt out of SMC-R optimizations */ 385 if (using_ipsec(smc)) { 386 reason_code = SMC_CLC_DECL_IPSEC; 387 goto decline_rdma; 388 } 389 390 /* PNET table look up: search active ib_device and port 391 * within same PNETID that also contains the ethernet device 392 * used for the internal TCP socket 393 */ 394 smc_pnet_find_roce_resource(smc->clcsock->sk, &smcibdev, &ibport); 395 if (!smcibdev) { 396 reason_code = SMC_CLC_DECL_CNFERR; /* configuration error */ 397 goto decline_rdma; 398 } 399 400 /* do inband token exchange */ 401 reason_code = smc_clc_send_proposal(smc, smcibdev, ibport); 402 if (reason_code < 0) { 403 rc = reason_code; 404 goto out_err; 405 } 406 if (reason_code > 0) /* configuration error */ 407 goto decline_rdma; 408 /* receive SMC Accept CLC message */ 409 reason_code = smc_clc_wait_msg(smc, &aclc, sizeof(aclc), 410 SMC_CLC_ACCEPT); 411 if (reason_code < 0) { 412 rc = reason_code; 413 goto out_err; 414 } 415 if (reason_code > 0) 416 goto decline_rdma; 417 418 srv_first_contact = aclc.hdr.flag; 419 mutex_lock(&smc_create_lgr_pending); 420 local_contact = smc_conn_create(smc, inaddr->sin_addr.s_addr, smcibdev, 421 ibport, &aclc.lcl, srv_first_contact); 422 if (local_contact < 0) { 423 rc = local_contact; 424 if (rc == -ENOMEM) 425 reason_code = SMC_CLC_DECL_MEM;/* insufficient memory*/ 426 else if (rc == -ENOLINK) 427 reason_code = SMC_CLC_DECL_SYNCERR; /* synchr. error */ 428 goto decline_rdma_unlock; 429 } 430 link = &smc->conn.lgr->lnk[SMC_SINGLE_LINK]; 431 432 smc_conn_save_peer_info(smc, &aclc); 433 434 rc = smc_sndbuf_create(smc); 435 if (rc) { 436 reason_code = SMC_CLC_DECL_MEM; 437 goto decline_rdma_unlock; 438 } 439 rc = smc_rmb_create(smc); 440 if (rc) { 441 reason_code = SMC_CLC_DECL_MEM; 442 goto decline_rdma_unlock; 443 } 444 445 if (local_contact == SMC_FIRST_CONTACT) 446 smc_link_save_peer_info(link, &aclc); 447 448 rc = smc_rmb_rtoken_handling(&smc->conn, &aclc); 449 if (rc) { 450 reason_code = SMC_CLC_DECL_INTERR; 451 goto decline_rdma_unlock; 452 } 453 454 if (local_contact == SMC_FIRST_CONTACT) { 455 rc = smc_ib_ready_link(link); 456 if (rc) { 457 reason_code = SMC_CLC_DECL_INTERR; 458 goto decline_rdma_unlock; 459 } 460 } 461 462 rc = smc_clc_send_confirm(smc); 463 if (rc) 464 goto out_err_unlock; 465 466 if (local_contact == SMC_FIRST_CONTACT) { 467 /* QP confirmation over RoCE fabric */ 468 reason_code = smc_clnt_conf_first_link( 469 smc, &smcibdev->gid[ibport - 1]); 470 if (reason_code < 0) { 471 rc = reason_code; 472 goto out_err_unlock; 473 } 474 if (reason_code > 0) 475 goto decline_rdma_unlock; 476 } 477 478 mutex_unlock(&smc_create_lgr_pending); 479 smc_tx_init(smc); 480 smc_rx_init(smc); 481 482 out_connected: 483 smc_copy_sock_settings_to_clc(smc); 484 if (smc->sk.sk_state == SMC_INIT) 485 smc->sk.sk_state = SMC_ACTIVE; 486 487 return rc ? rc : local_contact; 488 489 decline_rdma_unlock: 490 mutex_unlock(&smc_create_lgr_pending); 491 smc_conn_free(&smc->conn); 492 decline_rdma: 493 /* RDMA setup failed, switch back to TCP */ 494 smc->use_fallback = true; 495 if (reason_code && (reason_code != SMC_CLC_DECL_REPLY)) { 496 rc = smc_clc_send_decline(smc, reason_code, 0); 497 if (rc < sizeof(struct smc_clc_msg_decline)) 498 goto out_err; 499 } 500 goto out_connected; 501 502 out_err_unlock: 503 mutex_unlock(&smc_create_lgr_pending); 504 smc_conn_free(&smc->conn); 505 out_err: 506 return rc; 507 } 508 509 static int smc_connect(struct socket *sock, struct sockaddr *addr, 510 int alen, int flags) 511 { 512 struct sock *sk = sock->sk; 513 struct smc_sock *smc; 514 int rc = -EINVAL; 515 516 smc = smc_sk(sk); 517 518 /* separate smc parameter checking to be safe */ 519 if (alen < sizeof(addr->sa_family)) 520 goto out_err; 521 if (addr->sa_family != AF_INET) 522 goto out_err; 523 smc->addr = addr; /* needed for nonblocking connect */ 524 525 lock_sock(sk); 526 switch (sk->sk_state) { 527 default: 528 goto out; 529 case SMC_ACTIVE: 530 rc = -EISCONN; 531 goto out; 532 case SMC_INIT: 533 rc = 0; 534 break; 535 } 536 537 smc_copy_sock_settings_to_clc(smc); 538 rc = kernel_connect(smc->clcsock, addr, alen, flags); 539 if (rc) 540 goto out; 541 542 /* setup RDMA connection */ 543 rc = smc_connect_rdma(smc); 544 if (rc < 0) 545 goto out; 546 else 547 rc = 0; /* success cases including fallback */ 548 549 out: 550 release_sock(sk); 551 out_err: 552 return rc; 553 } 554 555 static int smc_clcsock_accept(struct smc_sock *lsmc, struct smc_sock **new_smc) 556 { 557 struct sock *sk = &lsmc->sk; 558 struct socket *new_clcsock; 559 struct sock *new_sk; 560 int rc; 561 562 release_sock(&lsmc->sk); 563 new_sk = smc_sock_alloc(sock_net(sk), NULL); 564 if (!new_sk) { 565 rc = -ENOMEM; 566 lsmc->sk.sk_err = ENOMEM; 567 *new_smc = NULL; 568 lock_sock(&lsmc->sk); 569 goto out; 570 } 571 *new_smc = smc_sk(new_sk); 572 573 rc = kernel_accept(lsmc->clcsock, &new_clcsock, 0); 574 lock_sock(&lsmc->sk); 575 if (rc < 0) { 576 lsmc->sk.sk_err = -rc; 577 new_sk->sk_state = SMC_CLOSED; 578 sock_set_flag(new_sk, SOCK_DEAD); 579 sk->sk_prot->unhash(new_sk); 580 sock_put(new_sk); 581 *new_smc = NULL; 582 goto out; 583 } 584 if (lsmc->sk.sk_state == SMC_CLOSED) { 585 if (new_clcsock) 586 sock_release(new_clcsock); 587 new_sk->sk_state = SMC_CLOSED; 588 sock_set_flag(new_sk, SOCK_DEAD); 589 sk->sk_prot->unhash(new_sk); 590 sock_put(new_sk); 591 *new_smc = NULL; 592 goto out; 593 } 594 595 (*new_smc)->clcsock = new_clcsock; 596 out: 597 return rc; 598 } 599 600 /* add a just created sock to the accept queue of the listen sock as 601 * candidate for a following socket accept call from user space 602 */ 603 static void smc_accept_enqueue(struct sock *parent, struct sock *sk) 604 { 605 struct smc_sock *par = smc_sk(parent); 606 607 sock_hold(sk); 608 spin_lock(&par->accept_q_lock); 609 list_add_tail(&smc_sk(sk)->accept_q, &par->accept_q); 610 spin_unlock(&par->accept_q_lock); 611 sk_acceptq_added(parent); 612 } 613 614 /* remove a socket from the accept queue of its parental listening socket */ 615 static void smc_accept_unlink(struct sock *sk) 616 { 617 struct smc_sock *par = smc_sk(sk)->listen_smc; 618 619 spin_lock(&par->accept_q_lock); 620 list_del_init(&smc_sk(sk)->accept_q); 621 spin_unlock(&par->accept_q_lock); 622 sk_acceptq_removed(&smc_sk(sk)->listen_smc->sk); 623 sock_put(sk); 624 } 625 626 /* remove a sock from the accept queue to bind it to a new socket created 627 * for a socket accept call from user space 628 */ 629 struct sock *smc_accept_dequeue(struct sock *parent, 630 struct socket *new_sock) 631 { 632 struct smc_sock *isk, *n; 633 struct sock *new_sk; 634 635 list_for_each_entry_safe(isk, n, &smc_sk(parent)->accept_q, accept_q) { 636 new_sk = (struct sock *)isk; 637 638 smc_accept_unlink(new_sk); 639 if (new_sk->sk_state == SMC_CLOSED) { 640 /* tbd in follow-on patch: close this sock */ 641 continue; 642 } 643 if (new_sock) 644 sock_graft(new_sk, new_sock); 645 return new_sk; 646 } 647 return NULL; 648 } 649 650 /* clean up for a created but never accepted sock */ 651 void smc_close_non_accepted(struct sock *sk) 652 { 653 struct smc_sock *smc = smc_sk(sk); 654 655 sock_hold(sk); 656 lock_sock(sk); 657 if (!sk->sk_lingertime) 658 /* wait for peer closing */ 659 sk->sk_lingertime = SMC_MAX_STREAM_WAIT_TIMEOUT; 660 if (!smc->use_fallback) 661 smc_close_active(smc); 662 if (smc->clcsock) { 663 struct socket *tcp; 664 665 tcp = smc->clcsock; 666 smc->clcsock = NULL; 667 sock_release(tcp); 668 } 669 sock_set_flag(sk, SOCK_DEAD); 670 sk->sk_shutdown |= SHUTDOWN_MASK; 671 if (smc->use_fallback) { 672 schedule_delayed_work(&smc->sock_put_work, TCP_TIMEWAIT_LEN); 673 } else { 674 smc_conn_free(&smc->conn); 675 schedule_delayed_work(&smc->sock_put_work, 676 SMC_CLOSE_SOCK_PUT_DELAY); 677 } 678 release_sock(sk); 679 sock_put(sk); 680 } 681 682 static int smc_serv_conf_first_link(struct smc_sock *smc) 683 { 684 struct smc_link_group *lgr = smc->conn.lgr; 685 struct smc_link *link; 686 int rest; 687 int rc; 688 689 link = &lgr->lnk[SMC_SINGLE_LINK]; 690 /* send CONFIRM LINK request to client over the RoCE fabric */ 691 rc = smc_llc_send_confirm_link(link, 692 link->smcibdev->mac[link->ibport - 1], 693 &link->smcibdev->gid[link->ibport - 1], 694 SMC_LLC_REQ); 695 if (rc < 0) 696 return SMC_CLC_DECL_TCL; 697 698 /* receive CONFIRM LINK response from client over the RoCE fabric */ 699 rest = wait_for_completion_interruptible_timeout( 700 &link->llc_confirm_resp, 701 SMC_LLC_WAIT_FIRST_TIME); 702 if (rest <= 0) { 703 struct smc_clc_msg_decline dclc; 704 705 rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc), 706 SMC_CLC_DECLINE); 707 } 708 709 return rc; 710 } 711 712 /* setup for RDMA connection of server */ 713 static void smc_listen_work(struct work_struct *work) 714 { 715 struct smc_sock *new_smc = container_of(work, struct smc_sock, 716 smc_listen_work); 717 struct socket *newclcsock = new_smc->clcsock; 718 struct smc_sock *lsmc = new_smc->listen_smc; 719 struct smc_clc_msg_accept_confirm cclc; 720 int local_contact = SMC_REUSE_CONTACT; 721 struct sock *newsmcsk = &new_smc->sk; 722 struct smc_clc_msg_proposal pclc; 723 struct smc_ib_device *smcibdev; 724 struct sockaddr_in peeraddr; 725 struct smc_link *link; 726 int reason_code = 0; 727 int rc = 0, len; 728 __be32 subnet; 729 u8 prefix_len; 730 u8 ibport; 731 732 /* do inband token exchange - 733 *wait for and receive SMC Proposal CLC message 734 */ 735 reason_code = smc_clc_wait_msg(new_smc, &pclc, sizeof(pclc), 736 SMC_CLC_PROPOSAL); 737 if (reason_code < 0) 738 goto out_err; 739 if (reason_code > 0) 740 goto decline_rdma; 741 742 /* IPSec connections opt out of SMC-R optimizations */ 743 if (using_ipsec(new_smc)) { 744 reason_code = SMC_CLC_DECL_IPSEC; 745 goto decline_rdma; 746 } 747 748 /* PNET table look up: search active ib_device and port 749 * within same PNETID that also contains the ethernet device 750 * used for the internal TCP socket 751 */ 752 smc_pnet_find_roce_resource(newclcsock->sk, &smcibdev, &ibport); 753 if (!smcibdev) { 754 reason_code = SMC_CLC_DECL_CNFERR; /* configuration error */ 755 goto decline_rdma; 756 } 757 758 /* determine subnet and mask from internal TCP socket */ 759 rc = smc_netinfo_by_tcpsk(newclcsock, &subnet, &prefix_len); 760 if (rc) { 761 reason_code = SMC_CLC_DECL_CNFERR; /* configuration error */ 762 goto decline_rdma; 763 } 764 if ((pclc.outgoing_subnet != subnet) || 765 (pclc.prefix_len != prefix_len)) { 766 reason_code = SMC_CLC_DECL_CNFERR; /* configuration error */ 767 goto decline_rdma; 768 } 769 770 /* get address of the peer connected to the internal TCP socket */ 771 kernel_getpeername(newclcsock, (struct sockaddr *)&peeraddr, &len); 772 773 /* allocate connection / link group */ 774 mutex_lock(&smc_create_lgr_pending); 775 local_contact = smc_conn_create(new_smc, peeraddr.sin_addr.s_addr, 776 smcibdev, ibport, &pclc.lcl, 0); 777 if (local_contact == SMC_REUSE_CONTACT) 778 /* lock no longer needed, free it due to following 779 * smc_clc_wait_msg() call 780 */ 781 mutex_unlock(&smc_create_lgr_pending); 782 if (local_contact < 0) { 783 rc = local_contact; 784 if (rc == -ENOMEM) 785 reason_code = SMC_CLC_DECL_MEM;/* insufficient memory*/ 786 else if (rc == -ENOLINK) 787 reason_code = SMC_CLC_DECL_SYNCERR; /* synchr. error */ 788 goto decline_rdma; 789 } 790 link = &new_smc->conn.lgr->lnk[SMC_SINGLE_LINK]; 791 792 rc = smc_sndbuf_create(new_smc); 793 if (rc) { 794 reason_code = SMC_CLC_DECL_MEM; 795 goto decline_rdma; 796 } 797 rc = smc_rmb_create(new_smc); 798 if (rc) { 799 reason_code = SMC_CLC_DECL_MEM; 800 goto decline_rdma; 801 } 802 803 rc = smc_clc_send_accept(new_smc, local_contact); 804 if (rc) 805 goto out_err; 806 807 /* receive SMC Confirm CLC message */ 808 reason_code = smc_clc_wait_msg(new_smc, &cclc, sizeof(cclc), 809 SMC_CLC_CONFIRM); 810 if (reason_code < 0) 811 goto out_err; 812 if (reason_code > 0) 813 goto decline_rdma; 814 smc_conn_save_peer_info(new_smc, &cclc); 815 if (local_contact == SMC_FIRST_CONTACT) 816 smc_link_save_peer_info(link, &cclc); 817 818 rc = smc_rmb_rtoken_handling(&new_smc->conn, &cclc); 819 if (rc) { 820 reason_code = SMC_CLC_DECL_INTERR; 821 goto decline_rdma; 822 } 823 824 if (local_contact == SMC_FIRST_CONTACT) { 825 rc = smc_ib_ready_link(link); 826 if (rc) { 827 reason_code = SMC_CLC_DECL_INTERR; 828 goto decline_rdma; 829 } 830 /* QP confirmation over RoCE fabric */ 831 reason_code = smc_serv_conf_first_link(new_smc); 832 if (reason_code < 0) { 833 /* peer is not aware of a problem */ 834 rc = reason_code; 835 goto out_err; 836 } 837 if (reason_code > 0) 838 goto decline_rdma; 839 } 840 841 smc_tx_init(new_smc); 842 smc_rx_init(new_smc); 843 844 out_connected: 845 sk_refcnt_debug_inc(newsmcsk); 846 if (newsmcsk->sk_state == SMC_INIT) 847 newsmcsk->sk_state = SMC_ACTIVE; 848 enqueue: 849 if (local_contact == SMC_FIRST_CONTACT) 850 mutex_unlock(&smc_create_lgr_pending); 851 lock_sock_nested(&lsmc->sk, SINGLE_DEPTH_NESTING); 852 if (lsmc->sk.sk_state == SMC_LISTEN) { 853 smc_accept_enqueue(&lsmc->sk, newsmcsk); 854 } else { /* no longer listening */ 855 smc_close_non_accepted(newsmcsk); 856 } 857 release_sock(&lsmc->sk); 858 859 /* Wake up accept */ 860 lsmc->sk.sk_data_ready(&lsmc->sk); 861 sock_put(&lsmc->sk); /* sock_hold in smc_tcp_listen_work */ 862 return; 863 864 decline_rdma: 865 /* RDMA setup failed, switch back to TCP */ 866 smc_conn_free(&new_smc->conn); 867 new_smc->use_fallback = true; 868 if (reason_code && (reason_code != SMC_CLC_DECL_REPLY)) { 869 rc = smc_clc_send_decline(new_smc, reason_code, 0); 870 if (rc < sizeof(struct smc_clc_msg_decline)) 871 goto out_err; 872 } 873 goto out_connected; 874 875 out_err: 876 newsmcsk->sk_state = SMC_CLOSED; 877 smc_conn_free(&new_smc->conn); 878 goto enqueue; /* queue new sock with sk_err set */ 879 } 880 881 static void smc_tcp_listen_work(struct work_struct *work) 882 { 883 struct smc_sock *lsmc = container_of(work, struct smc_sock, 884 tcp_listen_work); 885 struct smc_sock *new_smc; 886 int rc = 0; 887 888 lock_sock(&lsmc->sk); 889 while (lsmc->sk.sk_state == SMC_LISTEN) { 890 rc = smc_clcsock_accept(lsmc, &new_smc); 891 if (rc) 892 goto out; 893 if (!new_smc) 894 continue; 895 896 new_smc->listen_smc = lsmc; 897 new_smc->use_fallback = false; /* assume rdma capability first*/ 898 sock_hold(&lsmc->sk); /* sock_put in smc_listen_work */ 899 INIT_WORK(&new_smc->smc_listen_work, smc_listen_work); 900 smc_copy_sock_settings_to_smc(new_smc); 901 schedule_work(&new_smc->smc_listen_work); 902 } 903 904 out: 905 release_sock(&lsmc->sk); 906 lsmc->sk.sk_data_ready(&lsmc->sk); /* no more listening, wake accept */ 907 } 908 909 static int smc_listen(struct socket *sock, int backlog) 910 { 911 struct sock *sk = sock->sk; 912 struct smc_sock *smc; 913 int rc; 914 915 smc = smc_sk(sk); 916 lock_sock(sk); 917 918 rc = -EINVAL; 919 if ((sk->sk_state != SMC_INIT) && (sk->sk_state != SMC_LISTEN)) 920 goto out; 921 922 rc = 0; 923 if (sk->sk_state == SMC_LISTEN) { 924 sk->sk_max_ack_backlog = backlog; 925 goto out; 926 } 927 /* some socket options are handled in core, so we could not apply 928 * them to the clc socket -- copy smc socket options to clc socket 929 */ 930 smc_copy_sock_settings_to_clc(smc); 931 932 rc = kernel_listen(smc->clcsock, backlog); 933 if (rc) 934 goto out; 935 sk->sk_max_ack_backlog = backlog; 936 sk->sk_ack_backlog = 0; 937 sk->sk_state = SMC_LISTEN; 938 INIT_WORK(&smc->tcp_listen_work, smc_tcp_listen_work); 939 schedule_work(&smc->tcp_listen_work); 940 941 out: 942 release_sock(sk); 943 return rc; 944 } 945 946 static int smc_accept(struct socket *sock, struct socket *new_sock, 947 int flags) 948 { 949 struct sock *sk = sock->sk, *nsk; 950 DECLARE_WAITQUEUE(wait, current); 951 struct smc_sock *lsmc; 952 long timeo; 953 int rc = 0; 954 955 lsmc = smc_sk(sk); 956 lock_sock(sk); 957 958 if (lsmc->sk.sk_state != SMC_LISTEN) { 959 rc = -EINVAL; 960 goto out; 961 } 962 963 /* Wait for an incoming connection */ 964 timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK); 965 add_wait_queue_exclusive(sk_sleep(sk), &wait); 966 while (!(nsk = smc_accept_dequeue(sk, new_sock))) { 967 set_current_state(TASK_INTERRUPTIBLE); 968 if (!timeo) { 969 rc = -EAGAIN; 970 break; 971 } 972 release_sock(sk); 973 timeo = schedule_timeout(timeo); 974 /* wakeup by sk_data_ready in smc_listen_work() */ 975 sched_annotate_sleep(); 976 lock_sock(sk); 977 if (signal_pending(current)) { 978 rc = sock_intr_errno(timeo); 979 break; 980 } 981 } 982 set_current_state(TASK_RUNNING); 983 remove_wait_queue(sk_sleep(sk), &wait); 984 985 if (!rc) 986 rc = sock_error(nsk); 987 988 out: 989 release_sock(sk); 990 return rc; 991 } 992 993 static int smc_getname(struct socket *sock, struct sockaddr *addr, 994 int *len, int peer) 995 { 996 struct smc_sock *smc; 997 998 if (peer && (sock->sk->sk_state != SMC_ACTIVE) && 999 (sock->sk->sk_state != SMC_APPCLOSEWAIT1)) 1000 return -ENOTCONN; 1001 1002 smc = smc_sk(sock->sk); 1003 1004 return smc->clcsock->ops->getname(smc->clcsock, addr, len, peer); 1005 } 1006 1007 static int smc_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) 1008 { 1009 struct sock *sk = sock->sk; 1010 struct smc_sock *smc; 1011 int rc = -EPIPE; 1012 1013 smc = smc_sk(sk); 1014 lock_sock(sk); 1015 if ((sk->sk_state != SMC_ACTIVE) && 1016 (sk->sk_state != SMC_APPCLOSEWAIT1) && 1017 (sk->sk_state != SMC_INIT)) 1018 goto out; 1019 if (smc->use_fallback) 1020 rc = smc->clcsock->ops->sendmsg(smc->clcsock, msg, len); 1021 else 1022 rc = smc_tx_sendmsg(smc, msg, len); 1023 out: 1024 release_sock(sk); 1025 return rc; 1026 } 1027 1028 static int smc_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, 1029 int flags) 1030 { 1031 struct sock *sk = sock->sk; 1032 struct smc_sock *smc; 1033 int rc = -ENOTCONN; 1034 1035 smc = smc_sk(sk); 1036 lock_sock(sk); 1037 if ((sk->sk_state == SMC_INIT) || 1038 (sk->sk_state == SMC_LISTEN) || 1039 (sk->sk_state == SMC_CLOSED)) 1040 goto out; 1041 1042 if (sk->sk_state == SMC_PEERFINCLOSEWAIT) { 1043 rc = 0; 1044 goto out; 1045 } 1046 1047 if (smc->use_fallback) 1048 rc = smc->clcsock->ops->recvmsg(smc->clcsock, msg, len, flags); 1049 else 1050 rc = smc_rx_recvmsg(smc, msg, len, flags); 1051 1052 out: 1053 release_sock(sk); 1054 return rc; 1055 } 1056 1057 static unsigned int smc_accept_poll(struct sock *parent) 1058 { 1059 struct smc_sock *isk; 1060 struct sock *sk; 1061 1062 lock_sock(parent); 1063 list_for_each_entry(isk, &smc_sk(parent)->accept_q, accept_q) { 1064 sk = (struct sock *)isk; 1065 1066 if (sk->sk_state == SMC_ACTIVE) { 1067 release_sock(parent); 1068 return POLLIN | POLLRDNORM; 1069 } 1070 } 1071 release_sock(parent); 1072 1073 return 0; 1074 } 1075 1076 static unsigned int smc_poll(struct file *file, struct socket *sock, 1077 poll_table *wait) 1078 { 1079 struct sock *sk = sock->sk; 1080 unsigned int mask = 0; 1081 struct smc_sock *smc; 1082 int rc; 1083 1084 smc = smc_sk(sock->sk); 1085 if ((sk->sk_state == SMC_INIT) || smc->use_fallback) { 1086 /* delegate to CLC child sock */ 1087 mask = smc->clcsock->ops->poll(file, smc->clcsock, wait); 1088 /* if non-blocking connect finished ... */ 1089 lock_sock(sk); 1090 if ((sk->sk_state == SMC_INIT) && (mask & POLLOUT)) { 1091 sk->sk_err = smc->clcsock->sk->sk_err; 1092 if (sk->sk_err) { 1093 mask |= POLLERR; 1094 } else { 1095 rc = smc_connect_rdma(smc); 1096 if (rc < 0) 1097 mask |= POLLERR; 1098 else 1099 /* success cases including fallback */ 1100 mask |= POLLOUT | POLLWRNORM; 1101 } 1102 } 1103 release_sock(sk); 1104 } else { 1105 sock_poll_wait(file, sk_sleep(sk), wait); 1106 if (sk->sk_state == SMC_LISTEN) 1107 /* woken up by sk_data_ready in smc_listen_work() */ 1108 mask |= smc_accept_poll(sk); 1109 if (sk->sk_err) 1110 mask |= POLLERR; 1111 if (atomic_read(&smc->conn.sndbuf_space) || 1112 (sk->sk_shutdown & SEND_SHUTDOWN)) { 1113 mask |= POLLOUT | POLLWRNORM; 1114 } else { 1115 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk); 1116 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); 1117 } 1118 if (atomic_read(&smc->conn.bytes_to_rcv)) 1119 mask |= POLLIN | POLLRDNORM; 1120 if ((sk->sk_shutdown == SHUTDOWN_MASK) || 1121 (sk->sk_state == SMC_CLOSED)) 1122 mask |= POLLHUP; 1123 if (sk->sk_shutdown & RCV_SHUTDOWN) 1124 mask |= POLLIN | POLLRDNORM | POLLRDHUP; 1125 if (sk->sk_state == SMC_APPCLOSEWAIT1) 1126 mask |= POLLIN; 1127 1128 } 1129 1130 return mask; 1131 } 1132 1133 static int smc_shutdown(struct socket *sock, int how) 1134 { 1135 struct sock *sk = sock->sk; 1136 struct smc_sock *smc; 1137 int rc = -EINVAL; 1138 int rc1 = 0; 1139 1140 smc = smc_sk(sk); 1141 1142 if ((how < SHUT_RD) || (how > SHUT_RDWR)) 1143 return rc; 1144 1145 lock_sock(sk); 1146 1147 rc = -ENOTCONN; 1148 if ((sk->sk_state != SMC_LISTEN) && 1149 (sk->sk_state != SMC_ACTIVE) && 1150 (sk->sk_state != SMC_PEERCLOSEWAIT1) && 1151 (sk->sk_state != SMC_PEERCLOSEWAIT2) && 1152 (sk->sk_state != SMC_APPCLOSEWAIT1) && 1153 (sk->sk_state != SMC_APPCLOSEWAIT2) && 1154 (sk->sk_state != SMC_APPFINCLOSEWAIT)) 1155 goto out; 1156 if (smc->use_fallback) { 1157 rc = kernel_sock_shutdown(smc->clcsock, how); 1158 sk->sk_shutdown = smc->clcsock->sk->sk_shutdown; 1159 if (sk->sk_shutdown == SHUTDOWN_MASK) 1160 sk->sk_state = SMC_CLOSED; 1161 goto out; 1162 } 1163 switch (how) { 1164 case SHUT_RDWR: /* shutdown in both directions */ 1165 rc = smc_close_active(smc); 1166 break; 1167 case SHUT_WR: 1168 rc = smc_close_shutdown_write(smc); 1169 break; 1170 case SHUT_RD: 1171 if (sk->sk_state == SMC_LISTEN) 1172 rc = smc_close_active(smc); 1173 else 1174 rc = 0; 1175 /* nothing more to do because peer is not involved */ 1176 break; 1177 } 1178 rc1 = kernel_sock_shutdown(smc->clcsock, how); 1179 /* map sock_shutdown_cmd constants to sk_shutdown value range */ 1180 sk->sk_shutdown |= how + 1; 1181 1182 out: 1183 release_sock(sk); 1184 return rc ? rc : rc1; 1185 } 1186 1187 static int smc_setsockopt(struct socket *sock, int level, int optname, 1188 char __user *optval, unsigned int optlen) 1189 { 1190 struct sock *sk = sock->sk; 1191 struct smc_sock *smc; 1192 1193 smc = smc_sk(sk); 1194 1195 /* generic setsockopts reaching us here always apply to the 1196 * CLC socket 1197 */ 1198 return smc->clcsock->ops->setsockopt(smc->clcsock, level, optname, 1199 optval, optlen); 1200 } 1201 1202 static int smc_getsockopt(struct socket *sock, int level, int optname, 1203 char __user *optval, int __user *optlen) 1204 { 1205 struct smc_sock *smc; 1206 1207 smc = smc_sk(sock->sk); 1208 /* socket options apply to the CLC socket */ 1209 return smc->clcsock->ops->getsockopt(smc->clcsock, level, optname, 1210 optval, optlen); 1211 } 1212 1213 static int smc_ioctl(struct socket *sock, unsigned int cmd, 1214 unsigned long arg) 1215 { 1216 struct smc_sock *smc; 1217 1218 smc = smc_sk(sock->sk); 1219 if (smc->use_fallback) 1220 return smc->clcsock->ops->ioctl(smc->clcsock, cmd, arg); 1221 else 1222 return sock_no_ioctl(sock, cmd, arg); 1223 } 1224 1225 static ssize_t smc_sendpage(struct socket *sock, struct page *page, 1226 int offset, size_t size, int flags) 1227 { 1228 struct sock *sk = sock->sk; 1229 struct smc_sock *smc; 1230 int rc = -EPIPE; 1231 1232 smc = smc_sk(sk); 1233 lock_sock(sk); 1234 if (sk->sk_state != SMC_ACTIVE) 1235 goto out; 1236 if (smc->use_fallback) 1237 rc = kernel_sendpage(smc->clcsock, page, offset, 1238 size, flags); 1239 else 1240 rc = sock_no_sendpage(sock, page, offset, size, flags); 1241 1242 out: 1243 release_sock(sk); 1244 return rc; 1245 } 1246 1247 static ssize_t smc_splice_read(struct socket *sock, loff_t *ppos, 1248 struct pipe_inode_info *pipe, size_t len, 1249 unsigned int flags) 1250 { 1251 struct sock *sk = sock->sk; 1252 struct smc_sock *smc; 1253 int rc = -ENOTCONN; 1254 1255 smc = smc_sk(sk); 1256 lock_sock(sk); 1257 if ((sk->sk_state != SMC_ACTIVE) && (sk->sk_state != SMC_CLOSED)) 1258 goto out; 1259 if (smc->use_fallback) { 1260 rc = smc->clcsock->ops->splice_read(smc->clcsock, ppos, 1261 pipe, len, flags); 1262 } else { 1263 rc = -EOPNOTSUPP; 1264 } 1265 out: 1266 release_sock(sk); 1267 return rc; 1268 } 1269 1270 /* must look like tcp */ 1271 static const struct proto_ops smc_sock_ops = { 1272 .family = PF_SMC, 1273 .owner = THIS_MODULE, 1274 .release = smc_release, 1275 .bind = smc_bind, 1276 .connect = smc_connect, 1277 .socketpair = sock_no_socketpair, 1278 .accept = smc_accept, 1279 .getname = smc_getname, 1280 .poll = smc_poll, 1281 .ioctl = smc_ioctl, 1282 .listen = smc_listen, 1283 .shutdown = smc_shutdown, 1284 .setsockopt = smc_setsockopt, 1285 .getsockopt = smc_getsockopt, 1286 .sendmsg = smc_sendmsg, 1287 .recvmsg = smc_recvmsg, 1288 .mmap = sock_no_mmap, 1289 .sendpage = smc_sendpage, 1290 .splice_read = smc_splice_read, 1291 }; 1292 1293 static int smc_create(struct net *net, struct socket *sock, int protocol, 1294 int kern) 1295 { 1296 struct smc_sock *smc; 1297 struct sock *sk; 1298 int rc; 1299 1300 rc = -ESOCKTNOSUPPORT; 1301 if (sock->type != SOCK_STREAM) 1302 goto out; 1303 1304 rc = -EPROTONOSUPPORT; 1305 if ((protocol != IPPROTO_IP) && (protocol != IPPROTO_TCP)) 1306 goto out; 1307 1308 rc = -ENOBUFS; 1309 sock->ops = &smc_sock_ops; 1310 sk = smc_sock_alloc(net, sock); 1311 if (!sk) 1312 goto out; 1313 1314 /* create internal TCP socket for CLC handshake and fallback */ 1315 smc = smc_sk(sk); 1316 smc->use_fallback = false; /* assume rdma capability first */ 1317 rc = sock_create_kern(net, PF_INET, SOCK_STREAM, 1318 IPPROTO_TCP, &smc->clcsock); 1319 if (rc) 1320 sk_common_release(sk); 1321 smc->sk.sk_sndbuf = max(smc->clcsock->sk->sk_sndbuf, SMC_BUF_MIN_SIZE); 1322 smc->sk.sk_rcvbuf = max(smc->clcsock->sk->sk_rcvbuf, SMC_BUF_MIN_SIZE); 1323 1324 out: 1325 return rc; 1326 } 1327 1328 static const struct net_proto_family smc_sock_family_ops = { 1329 .family = PF_SMC, 1330 .owner = THIS_MODULE, 1331 .create = smc_create, 1332 }; 1333 1334 static int __init smc_init(void) 1335 { 1336 int rc; 1337 1338 rc = smc_pnet_init(); 1339 if (rc) 1340 return rc; 1341 1342 rc = smc_llc_init(); 1343 if (rc) { 1344 pr_err("%s: smc_llc_init fails with %d\n", __func__, rc); 1345 goto out_pnet; 1346 } 1347 1348 rc = smc_cdc_init(); 1349 if (rc) { 1350 pr_err("%s: smc_cdc_init fails with %d\n", __func__, rc); 1351 goto out_pnet; 1352 } 1353 1354 rc = proto_register(&smc_proto, 1); 1355 if (rc) { 1356 pr_err("%s: proto_register fails with %d\n", __func__, rc); 1357 goto out_pnet; 1358 } 1359 1360 rc = sock_register(&smc_sock_family_ops); 1361 if (rc) { 1362 pr_err("%s: sock_register fails with %d\n", __func__, rc); 1363 goto out_proto; 1364 } 1365 INIT_HLIST_HEAD(&smc_v4_hashinfo.ht); 1366 1367 rc = smc_ib_register_client(); 1368 if (rc) { 1369 pr_err("%s: ib_register fails with %d\n", __func__, rc); 1370 goto out_sock; 1371 } 1372 1373 return 0; 1374 1375 out_sock: 1376 sock_unregister(PF_SMC); 1377 out_proto: 1378 proto_unregister(&smc_proto); 1379 out_pnet: 1380 smc_pnet_exit(); 1381 return rc; 1382 } 1383 1384 static void __exit smc_exit(void) 1385 { 1386 struct smc_link_group *lgr, *lg; 1387 LIST_HEAD(lgr_freeing_list); 1388 1389 spin_lock_bh(&smc_lgr_list.lock); 1390 if (!list_empty(&smc_lgr_list.list)) 1391 list_splice_init(&smc_lgr_list.list, &lgr_freeing_list); 1392 spin_unlock_bh(&smc_lgr_list.lock); 1393 list_for_each_entry_safe(lgr, lg, &lgr_freeing_list, list) { 1394 list_del_init(&lgr->list); 1395 smc_lgr_free(lgr); /* free link group */ 1396 } 1397 smc_ib_unregister_client(); 1398 sock_unregister(PF_SMC); 1399 proto_unregister(&smc_proto); 1400 smc_pnet_exit(); 1401 } 1402 1403 module_init(smc_init); 1404 module_exit(smc_exit); 1405 1406 MODULE_AUTHOR("Ursula Braun <ubraun@linux.vnet.ibm.com>"); 1407 MODULE_DESCRIPTION("smc socket address family"); 1408 MODULE_LICENSE("GPL"); 1409 MODULE_ALIAS_NETPROTO(PF_SMC); 1410