1 /* 2 * Shared Memory Communications over RDMA (SMC-R) and RoCE 3 * 4 * AF_SMC protocol family socket handler keeping the AF_INET sock address type 5 * applies to SOCK_STREAM sockets only 6 * offers an alternative communication option for TCP-protocol sockets 7 * applicable with RoCE-cards only 8 * 9 * Initial restrictions: 10 * - non-blocking connect postponed 11 * - IPv6 support postponed 12 * - support for alternate links postponed 13 * - partial support for non-blocking sockets only 14 * - support for urgent data postponed 15 * 16 * Copyright IBM Corp. 2016 17 * 18 * Author(s): Ursula Braun <ubraun@linux.vnet.ibm.com> 19 * based on prototype from Frank Blaschka 20 */ 21 22 #define KMSG_COMPONENT "smc" 23 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt 24 25 #include <linux/module.h> 26 #include <linux/socket.h> 27 #include <linux/inetdevice.h> 28 #include <linux/workqueue.h> 29 #include <linux/in.h> 30 #include <linux/sched/signal.h> 31 32 #include <net/sock.h> 33 #include <net/tcp.h> 34 #include <net/smc.h> 35 36 #include "smc.h" 37 #include "smc_clc.h" 38 #include "smc_llc.h" 39 #include "smc_cdc.h" 40 #include "smc_core.h" 41 #include "smc_ib.h" 42 #include "smc_pnet.h" 43 #include "smc_tx.h" 44 #include "smc_rx.h" 45 #include "smc_close.h" 46 47 static DEFINE_MUTEX(smc_create_lgr_pending); /* serialize link group 48 * creation 49 */ 50 51 struct smc_lgr_list smc_lgr_list = { /* established link groups */ 52 .lock = __SPIN_LOCK_UNLOCKED(smc_lgr_list.lock), 53 .list = LIST_HEAD_INIT(smc_lgr_list.list), 54 }; 55 56 static void smc_tcp_listen_work(struct work_struct *); 57 58 static void smc_set_keepalive(struct sock *sk, int val) 59 { 60 struct smc_sock *smc = smc_sk(sk); 61 62 smc->clcsock->sk->sk_prot->keepalive(smc->clcsock->sk, val); 63 } 64 65 static struct smc_hashinfo smc_v4_hashinfo = { 66 .lock = __RW_LOCK_UNLOCKED(smc_v4_hashinfo.lock), 67 }; 68 69 int smc_hash_sk(struct sock *sk) 70 { 71 struct smc_hashinfo *h = sk->sk_prot->h.smc_hash; 72 struct hlist_head *head; 73 74 head = &h->ht; 75 76 write_lock_bh(&h->lock); 77 sk_add_node(sk, head); 78 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); 79 write_unlock_bh(&h->lock); 80 81 return 0; 82 } 83 EXPORT_SYMBOL_GPL(smc_hash_sk); 84 85 void smc_unhash_sk(struct sock *sk) 86 { 87 struct smc_hashinfo *h = sk->sk_prot->h.smc_hash; 88 89 write_lock_bh(&h->lock); 90 if (sk_del_node_init(sk)) 91 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); 92 write_unlock_bh(&h->lock); 93 } 94 EXPORT_SYMBOL_GPL(smc_unhash_sk); 95 96 struct proto smc_proto = { 97 .name = "SMC", 98 .owner = THIS_MODULE, 99 .keepalive = smc_set_keepalive, 100 .hash = smc_hash_sk, 101 .unhash = smc_unhash_sk, 102 .obj_size = sizeof(struct smc_sock), 103 .h.smc_hash = &smc_v4_hashinfo, 104 .slab_flags = SLAB_TYPESAFE_BY_RCU, 105 }; 106 EXPORT_SYMBOL_GPL(smc_proto); 107 108 static int smc_release(struct socket *sock) 109 { 110 struct sock *sk = sock->sk; 111 struct smc_sock *smc; 112 int rc = 0; 113 114 if (!sk) 115 goto out; 116 117 smc = smc_sk(sk); 118 sock_hold(sk); 119 if (sk->sk_state == SMC_LISTEN) 120 /* smc_close_non_accepted() is called and acquires 121 * sock lock for child sockets again 122 */ 123 lock_sock_nested(sk, SINGLE_DEPTH_NESTING); 124 else 125 lock_sock(sk); 126 127 if (smc->use_fallback) { 128 sk->sk_state = SMC_CLOSED; 129 sk->sk_state_change(sk); 130 } else { 131 rc = smc_close_active(smc); 132 sock_set_flag(sk, SOCK_DEAD); 133 sk->sk_shutdown |= SHUTDOWN_MASK; 134 } 135 if (smc->clcsock) { 136 sock_release(smc->clcsock); 137 smc->clcsock = NULL; 138 } 139 140 /* detach socket */ 141 sock_orphan(sk); 142 sock->sk = NULL; 143 if (smc->use_fallback) { 144 schedule_delayed_work(&smc->sock_put_work, TCP_TIMEWAIT_LEN); 145 } else if (sk->sk_state == SMC_CLOSED) { 146 smc_conn_free(&smc->conn); 147 schedule_delayed_work(&smc->sock_put_work, 148 SMC_CLOSE_SOCK_PUT_DELAY); 149 } 150 release_sock(sk); 151 152 sock_put(sk); 153 out: 154 return rc; 155 } 156 157 static void smc_destruct(struct sock *sk) 158 { 159 if (sk->sk_state != SMC_CLOSED) 160 return; 161 if (!sock_flag(sk, SOCK_DEAD)) 162 return; 163 164 sk_refcnt_debug_dec(sk); 165 } 166 167 static struct sock *smc_sock_alloc(struct net *net, struct socket *sock) 168 { 169 struct smc_sock *smc; 170 struct sock *sk; 171 172 sk = sk_alloc(net, PF_SMC, GFP_KERNEL, &smc_proto, 0); 173 if (!sk) 174 return NULL; 175 176 sock_init_data(sock, sk); /* sets sk_refcnt to 1 */ 177 sk->sk_state = SMC_INIT; 178 sk->sk_destruct = smc_destruct; 179 sk->sk_protocol = SMCPROTO_SMC; 180 smc = smc_sk(sk); 181 INIT_WORK(&smc->tcp_listen_work, smc_tcp_listen_work); 182 INIT_LIST_HEAD(&smc->accept_q); 183 spin_lock_init(&smc->accept_q_lock); 184 INIT_DELAYED_WORK(&smc->sock_put_work, smc_close_sock_put_work); 185 sk->sk_prot->hash(sk); 186 sk_refcnt_debug_inc(sk); 187 188 return sk; 189 } 190 191 static int smc_bind(struct socket *sock, struct sockaddr *uaddr, 192 int addr_len) 193 { 194 struct sockaddr_in *addr = (struct sockaddr_in *)uaddr; 195 struct sock *sk = sock->sk; 196 struct smc_sock *smc; 197 int rc; 198 199 smc = smc_sk(sk); 200 201 /* replicate tests from inet_bind(), to be safe wrt. future changes */ 202 rc = -EINVAL; 203 if (addr_len < sizeof(struct sockaddr_in)) 204 goto out; 205 206 rc = -EAFNOSUPPORT; 207 /* accept AF_UNSPEC (mapped to AF_INET) only if s_addr is INADDR_ANY */ 208 if ((addr->sin_family != AF_INET) && 209 ((addr->sin_family != AF_UNSPEC) || 210 (addr->sin_addr.s_addr != htonl(INADDR_ANY)))) 211 goto out; 212 213 lock_sock(sk); 214 215 /* Check if socket is already active */ 216 rc = -EINVAL; 217 if (sk->sk_state != SMC_INIT) 218 goto out_rel; 219 220 smc->clcsock->sk->sk_reuse = sk->sk_reuse; 221 rc = kernel_bind(smc->clcsock, uaddr, addr_len); 222 223 out_rel: 224 release_sock(sk); 225 out: 226 return rc; 227 } 228 229 static void smc_copy_sock_settings(struct sock *nsk, struct sock *osk, 230 unsigned long mask) 231 { 232 /* options we don't get control via setsockopt for */ 233 nsk->sk_type = osk->sk_type; 234 nsk->sk_sndbuf = osk->sk_sndbuf; 235 nsk->sk_rcvbuf = osk->sk_rcvbuf; 236 nsk->sk_sndtimeo = osk->sk_sndtimeo; 237 nsk->sk_rcvtimeo = osk->sk_rcvtimeo; 238 nsk->sk_mark = osk->sk_mark; 239 nsk->sk_priority = osk->sk_priority; 240 nsk->sk_rcvlowat = osk->sk_rcvlowat; 241 nsk->sk_bound_dev_if = osk->sk_bound_dev_if; 242 nsk->sk_err = osk->sk_err; 243 244 nsk->sk_flags &= ~mask; 245 nsk->sk_flags |= osk->sk_flags & mask; 246 } 247 248 #define SK_FLAGS_SMC_TO_CLC ((1UL << SOCK_URGINLINE) | \ 249 (1UL << SOCK_KEEPOPEN) | \ 250 (1UL << SOCK_LINGER) | \ 251 (1UL << SOCK_BROADCAST) | \ 252 (1UL << SOCK_TIMESTAMP) | \ 253 (1UL << SOCK_DBG) | \ 254 (1UL << SOCK_RCVTSTAMP) | \ 255 (1UL << SOCK_RCVTSTAMPNS) | \ 256 (1UL << SOCK_LOCALROUTE) | \ 257 (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE) | \ 258 (1UL << SOCK_RXQ_OVFL) | \ 259 (1UL << SOCK_WIFI_STATUS) | \ 260 (1UL << SOCK_NOFCS) | \ 261 (1UL << SOCK_FILTER_LOCKED)) 262 /* copy only relevant settings and flags of SOL_SOCKET level from smc to 263 * clc socket (since smc is not called for these options from net/core) 264 */ 265 static void smc_copy_sock_settings_to_clc(struct smc_sock *smc) 266 { 267 smc_copy_sock_settings(smc->clcsock->sk, &smc->sk, SK_FLAGS_SMC_TO_CLC); 268 } 269 270 #define SK_FLAGS_CLC_TO_SMC ((1UL << SOCK_URGINLINE) | \ 271 (1UL << SOCK_KEEPOPEN) | \ 272 (1UL << SOCK_LINGER) | \ 273 (1UL << SOCK_DBG)) 274 /* copy only settings and flags relevant for smc from clc to smc socket */ 275 static void smc_copy_sock_settings_to_smc(struct smc_sock *smc) 276 { 277 smc_copy_sock_settings(&smc->sk, smc->clcsock->sk, SK_FLAGS_CLC_TO_SMC); 278 } 279 280 /* determine subnet and mask of internal TCP socket */ 281 int smc_netinfo_by_tcpsk(struct socket *clcsock, 282 __be32 *subnet, u8 *prefix_len) 283 { 284 struct dst_entry *dst = sk_dst_get(clcsock->sk); 285 struct sockaddr_in addr; 286 int rc = -ENOENT; 287 int len; 288 289 if (!dst) { 290 rc = -ENOTCONN; 291 goto out; 292 } 293 if (!dst->dev) { 294 rc = -ENODEV; 295 goto out_rel; 296 } 297 298 /* get address to which the internal TCP socket is bound */ 299 kernel_getsockname(clcsock, (struct sockaddr *)&addr, &len); 300 /* analyze IPv4 specific data of net_device belonging to TCP socket */ 301 for_ifa(dst->dev->ip_ptr) { 302 if (ifa->ifa_address != addr.sin_addr.s_addr) 303 continue; 304 *prefix_len = inet_mask_len(ifa->ifa_mask); 305 *subnet = ifa->ifa_address & ifa->ifa_mask; 306 rc = 0; 307 break; 308 } endfor_ifa(dst->dev->ip_ptr); 309 310 out_rel: 311 dst_release(dst); 312 out: 313 return rc; 314 } 315 316 static int smc_clnt_conf_first_link(struct smc_sock *smc, union ib_gid *gid) 317 { 318 struct smc_link_group *lgr = smc->conn.lgr; 319 struct smc_link *link; 320 int rest; 321 int rc; 322 323 link = &lgr->lnk[SMC_SINGLE_LINK]; 324 /* receive CONFIRM LINK request from server over RoCE fabric */ 325 rest = wait_for_completion_interruptible_timeout( 326 &link->llc_confirm, 327 SMC_LLC_WAIT_FIRST_TIME); 328 if (rest <= 0) { 329 struct smc_clc_msg_decline dclc; 330 331 rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc), 332 SMC_CLC_DECLINE); 333 return rc; 334 } 335 336 rc = smc_ib_modify_qp_rts(link); 337 if (rc) 338 return SMC_CLC_DECL_INTERR; 339 340 smc_wr_remember_qp_attr(link); 341 /* send CONFIRM LINK response over RoCE fabric */ 342 rc = smc_llc_send_confirm_link(link, 343 link->smcibdev->mac[link->ibport - 1], 344 gid, SMC_LLC_RESP); 345 if (rc < 0) 346 return SMC_CLC_DECL_TCL; 347 348 return rc; 349 } 350 351 static void smc_conn_save_peer_info(struct smc_sock *smc, 352 struct smc_clc_msg_accept_confirm *clc) 353 { 354 smc->conn.peer_conn_idx = clc->conn_idx; 355 smc->conn.local_tx_ctrl.token = ntohl(clc->rmbe_alert_token); 356 smc->conn.peer_rmbe_size = smc_uncompress_bufsize(clc->rmbe_size); 357 atomic_set(&smc->conn.peer_rmbe_space, smc->conn.peer_rmbe_size); 358 } 359 360 static void smc_link_save_peer_info(struct smc_link *link, 361 struct smc_clc_msg_accept_confirm *clc) 362 { 363 link->peer_qpn = ntoh24(clc->qpn); 364 memcpy(link->peer_gid, clc->lcl.gid, SMC_GID_SIZE); 365 memcpy(link->peer_mac, clc->lcl.mac, sizeof(link->peer_mac)); 366 link->peer_psn = ntoh24(clc->psn); 367 link->peer_mtu = clc->qp_mtu; 368 } 369 370 /* setup for RDMA connection of client */ 371 static int smc_connect_rdma(struct smc_sock *smc) 372 { 373 struct sockaddr_in *inaddr = (struct sockaddr_in *)smc->addr; 374 struct smc_clc_msg_accept_confirm aclc; 375 int local_contact = SMC_FIRST_CONTACT; 376 struct smc_ib_device *smcibdev; 377 struct smc_link *link; 378 u8 srv_first_contact; 379 int reason_code = 0; 380 int rc = 0; 381 u8 ibport; 382 383 /* IPSec connections opt out of SMC-R optimizations */ 384 if (using_ipsec(smc)) { 385 reason_code = SMC_CLC_DECL_IPSEC; 386 goto decline_rdma; 387 } 388 389 /* PNET table look up: search active ib_device and port 390 * within same PNETID that also contains the ethernet device 391 * used for the internal TCP socket 392 */ 393 smc_pnet_find_roce_resource(smc->clcsock->sk, &smcibdev, &ibport); 394 if (!smcibdev) { 395 reason_code = SMC_CLC_DECL_CNFERR; /* configuration error */ 396 goto decline_rdma; 397 } 398 399 /* do inband token exchange */ 400 reason_code = smc_clc_send_proposal(smc, smcibdev, ibport); 401 if (reason_code < 0) { 402 rc = reason_code; 403 goto out_err; 404 } 405 if (reason_code > 0) /* configuration error */ 406 goto decline_rdma; 407 /* receive SMC Accept CLC message */ 408 reason_code = smc_clc_wait_msg(smc, &aclc, sizeof(aclc), 409 SMC_CLC_ACCEPT); 410 if (reason_code < 0) { 411 rc = reason_code; 412 goto out_err; 413 } 414 if (reason_code > 0) 415 goto decline_rdma; 416 417 srv_first_contact = aclc.hdr.flag; 418 mutex_lock(&smc_create_lgr_pending); 419 local_contact = smc_conn_create(smc, inaddr->sin_addr.s_addr, smcibdev, 420 ibport, &aclc.lcl, srv_first_contact); 421 if (local_contact < 0) { 422 rc = local_contact; 423 if (rc == -ENOMEM) 424 reason_code = SMC_CLC_DECL_MEM;/* insufficient memory*/ 425 else if (rc == -ENOLINK) 426 reason_code = SMC_CLC_DECL_SYNCERR; /* synchr. error */ 427 goto decline_rdma_unlock; 428 } 429 link = &smc->conn.lgr->lnk[SMC_SINGLE_LINK]; 430 431 smc_conn_save_peer_info(smc, &aclc); 432 433 rc = smc_sndbuf_create(smc); 434 if (rc) { 435 reason_code = SMC_CLC_DECL_MEM; 436 goto decline_rdma_unlock; 437 } 438 rc = smc_rmb_create(smc); 439 if (rc) { 440 reason_code = SMC_CLC_DECL_MEM; 441 goto decline_rdma_unlock; 442 } 443 444 if (local_contact == SMC_FIRST_CONTACT) 445 smc_link_save_peer_info(link, &aclc); 446 447 rc = smc_rmb_rtoken_handling(&smc->conn, &aclc); 448 if (rc) { 449 reason_code = SMC_CLC_DECL_INTERR; 450 goto decline_rdma_unlock; 451 } 452 453 smc_close_init(smc); 454 smc_rx_init(smc); 455 456 if (local_contact == SMC_FIRST_CONTACT) { 457 rc = smc_ib_ready_link(link); 458 if (rc) { 459 reason_code = SMC_CLC_DECL_INTERR; 460 goto decline_rdma_unlock; 461 } 462 } 463 464 rc = smc_clc_send_confirm(smc); 465 if (rc) 466 goto out_err_unlock; 467 468 if (local_contact == SMC_FIRST_CONTACT) { 469 /* QP confirmation over RoCE fabric */ 470 reason_code = smc_clnt_conf_first_link( 471 smc, &smcibdev->gid[ibport - 1]); 472 if (reason_code < 0) { 473 rc = reason_code; 474 goto out_err_unlock; 475 } 476 if (reason_code > 0) 477 goto decline_rdma_unlock; 478 } 479 480 mutex_unlock(&smc_create_lgr_pending); 481 smc_tx_init(smc); 482 483 out_connected: 484 smc_copy_sock_settings_to_clc(smc); 485 if (smc->sk.sk_state == SMC_INIT) 486 smc->sk.sk_state = SMC_ACTIVE; 487 488 return rc ? rc : local_contact; 489 490 decline_rdma_unlock: 491 mutex_unlock(&smc_create_lgr_pending); 492 smc_conn_free(&smc->conn); 493 decline_rdma: 494 /* RDMA setup failed, switch back to TCP */ 495 smc->use_fallback = true; 496 if (reason_code && (reason_code != SMC_CLC_DECL_REPLY)) { 497 rc = smc_clc_send_decline(smc, reason_code, 0); 498 if (rc < sizeof(struct smc_clc_msg_decline)) 499 goto out_err; 500 } 501 goto out_connected; 502 503 out_err_unlock: 504 mutex_unlock(&smc_create_lgr_pending); 505 smc_conn_free(&smc->conn); 506 out_err: 507 return rc; 508 } 509 510 static int smc_connect(struct socket *sock, struct sockaddr *addr, 511 int alen, int flags) 512 { 513 struct sock *sk = sock->sk; 514 struct smc_sock *smc; 515 int rc = -EINVAL; 516 517 smc = smc_sk(sk); 518 519 /* separate smc parameter checking to be safe */ 520 if (alen < sizeof(addr->sa_family)) 521 goto out_err; 522 if (addr->sa_family != AF_INET) 523 goto out_err; 524 smc->addr = addr; /* needed for nonblocking connect */ 525 526 lock_sock(sk); 527 switch (sk->sk_state) { 528 default: 529 goto out; 530 case SMC_ACTIVE: 531 rc = -EISCONN; 532 goto out; 533 case SMC_INIT: 534 rc = 0; 535 break; 536 } 537 538 smc_copy_sock_settings_to_clc(smc); 539 rc = kernel_connect(smc->clcsock, addr, alen, flags); 540 if (rc) 541 goto out; 542 543 /* setup RDMA connection */ 544 rc = smc_connect_rdma(smc); 545 if (rc < 0) 546 goto out; 547 else 548 rc = 0; /* success cases including fallback */ 549 550 out: 551 release_sock(sk); 552 out_err: 553 return rc; 554 } 555 556 static int smc_clcsock_accept(struct smc_sock *lsmc, struct smc_sock **new_smc) 557 { 558 struct sock *sk = &lsmc->sk; 559 struct socket *new_clcsock; 560 struct sock *new_sk; 561 int rc; 562 563 release_sock(&lsmc->sk); 564 new_sk = smc_sock_alloc(sock_net(sk), NULL); 565 if (!new_sk) { 566 rc = -ENOMEM; 567 lsmc->sk.sk_err = ENOMEM; 568 *new_smc = NULL; 569 lock_sock(&lsmc->sk); 570 goto out; 571 } 572 *new_smc = smc_sk(new_sk); 573 574 rc = kernel_accept(lsmc->clcsock, &new_clcsock, 0); 575 lock_sock(&lsmc->sk); 576 if (rc < 0) { 577 lsmc->sk.sk_err = -rc; 578 new_sk->sk_state = SMC_CLOSED; 579 sock_set_flag(new_sk, SOCK_DEAD); 580 sk->sk_prot->unhash(new_sk); 581 sock_put(new_sk); 582 *new_smc = NULL; 583 goto out; 584 } 585 if (lsmc->sk.sk_state == SMC_CLOSED) { 586 if (new_clcsock) 587 sock_release(new_clcsock); 588 new_sk->sk_state = SMC_CLOSED; 589 sock_set_flag(new_sk, SOCK_DEAD); 590 sk->sk_prot->unhash(new_sk); 591 sock_put(new_sk); 592 *new_smc = NULL; 593 goto out; 594 } 595 596 (*new_smc)->clcsock = new_clcsock; 597 out: 598 return rc; 599 } 600 601 /* add a just created sock to the accept queue of the listen sock as 602 * candidate for a following socket accept call from user space 603 */ 604 static void smc_accept_enqueue(struct sock *parent, struct sock *sk) 605 { 606 struct smc_sock *par = smc_sk(parent); 607 608 sock_hold(sk); 609 spin_lock(&par->accept_q_lock); 610 list_add_tail(&smc_sk(sk)->accept_q, &par->accept_q); 611 spin_unlock(&par->accept_q_lock); 612 sk_acceptq_added(parent); 613 } 614 615 /* remove a socket from the accept queue of its parental listening socket */ 616 static void smc_accept_unlink(struct sock *sk) 617 { 618 struct smc_sock *par = smc_sk(sk)->listen_smc; 619 620 spin_lock(&par->accept_q_lock); 621 list_del_init(&smc_sk(sk)->accept_q); 622 spin_unlock(&par->accept_q_lock); 623 sk_acceptq_removed(&smc_sk(sk)->listen_smc->sk); 624 sock_put(sk); 625 } 626 627 /* remove a sock from the accept queue to bind it to a new socket created 628 * for a socket accept call from user space 629 */ 630 struct sock *smc_accept_dequeue(struct sock *parent, 631 struct socket *new_sock) 632 { 633 struct smc_sock *isk, *n; 634 struct sock *new_sk; 635 636 list_for_each_entry_safe(isk, n, &smc_sk(parent)->accept_q, accept_q) { 637 new_sk = (struct sock *)isk; 638 639 smc_accept_unlink(new_sk); 640 if (new_sk->sk_state == SMC_CLOSED) { 641 new_sk->sk_prot->unhash(new_sk); 642 sock_put(new_sk); 643 continue; 644 } 645 if (new_sock) 646 sock_graft(new_sk, new_sock); 647 return new_sk; 648 } 649 return NULL; 650 } 651 652 /* clean up for a created but never accepted sock */ 653 void smc_close_non_accepted(struct sock *sk) 654 { 655 struct smc_sock *smc = smc_sk(sk); 656 657 sock_hold(sk); 658 lock_sock(sk); 659 if (!sk->sk_lingertime) 660 /* wait for peer closing */ 661 sk->sk_lingertime = SMC_MAX_STREAM_WAIT_TIMEOUT; 662 if (smc->use_fallback) { 663 sk->sk_state = SMC_CLOSED; 664 } else { 665 smc_close_active(smc); 666 sock_set_flag(sk, SOCK_DEAD); 667 sk->sk_shutdown |= SHUTDOWN_MASK; 668 } 669 if (smc->clcsock) { 670 struct socket *tcp; 671 672 tcp = smc->clcsock; 673 smc->clcsock = NULL; 674 sock_release(tcp); 675 } 676 if (smc->use_fallback) { 677 schedule_delayed_work(&smc->sock_put_work, TCP_TIMEWAIT_LEN); 678 } else if (sk->sk_state == SMC_CLOSED) { 679 smc_conn_free(&smc->conn); 680 schedule_delayed_work(&smc->sock_put_work, 681 SMC_CLOSE_SOCK_PUT_DELAY); 682 } 683 release_sock(sk); 684 sock_put(sk); 685 } 686 687 static int smc_serv_conf_first_link(struct smc_sock *smc) 688 { 689 struct smc_link_group *lgr = smc->conn.lgr; 690 struct smc_link *link; 691 int rest; 692 int rc; 693 694 link = &lgr->lnk[SMC_SINGLE_LINK]; 695 /* send CONFIRM LINK request to client over the RoCE fabric */ 696 rc = smc_llc_send_confirm_link(link, 697 link->smcibdev->mac[link->ibport - 1], 698 &link->smcibdev->gid[link->ibport - 1], 699 SMC_LLC_REQ); 700 if (rc < 0) 701 return SMC_CLC_DECL_TCL; 702 703 /* receive CONFIRM LINK response from client over the RoCE fabric */ 704 rest = wait_for_completion_interruptible_timeout( 705 &link->llc_confirm_resp, 706 SMC_LLC_WAIT_FIRST_TIME); 707 if (rest <= 0) { 708 struct smc_clc_msg_decline dclc; 709 710 rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc), 711 SMC_CLC_DECLINE); 712 } 713 714 return rc; 715 } 716 717 /* setup for RDMA connection of server */ 718 static void smc_listen_work(struct work_struct *work) 719 { 720 struct smc_sock *new_smc = container_of(work, struct smc_sock, 721 smc_listen_work); 722 struct socket *newclcsock = new_smc->clcsock; 723 struct smc_sock *lsmc = new_smc->listen_smc; 724 struct smc_clc_msg_accept_confirm cclc; 725 int local_contact = SMC_REUSE_CONTACT; 726 struct sock *newsmcsk = &new_smc->sk; 727 struct smc_clc_msg_proposal pclc; 728 struct smc_ib_device *smcibdev; 729 struct sockaddr_in peeraddr; 730 struct smc_link *link; 731 int reason_code = 0; 732 int rc = 0, len; 733 __be32 subnet; 734 u8 prefix_len; 735 u8 ibport; 736 737 /* do inband token exchange - 738 *wait for and receive SMC Proposal CLC message 739 */ 740 reason_code = smc_clc_wait_msg(new_smc, &pclc, sizeof(pclc), 741 SMC_CLC_PROPOSAL); 742 if (reason_code < 0) 743 goto out_err; 744 if (reason_code > 0) 745 goto decline_rdma; 746 747 /* IPSec connections opt out of SMC-R optimizations */ 748 if (using_ipsec(new_smc)) { 749 reason_code = SMC_CLC_DECL_IPSEC; 750 goto decline_rdma; 751 } 752 753 /* PNET table look up: search active ib_device and port 754 * within same PNETID that also contains the ethernet device 755 * used for the internal TCP socket 756 */ 757 smc_pnet_find_roce_resource(newclcsock->sk, &smcibdev, &ibport); 758 if (!smcibdev) { 759 reason_code = SMC_CLC_DECL_CNFERR; /* configuration error */ 760 goto decline_rdma; 761 } 762 763 /* determine subnet and mask from internal TCP socket */ 764 rc = smc_netinfo_by_tcpsk(newclcsock, &subnet, &prefix_len); 765 if (rc) { 766 reason_code = SMC_CLC_DECL_CNFERR; /* configuration error */ 767 goto decline_rdma; 768 } 769 if ((pclc.outgoing_subnet != subnet) || 770 (pclc.prefix_len != prefix_len)) { 771 reason_code = SMC_CLC_DECL_CNFERR; /* configuration error */ 772 goto decline_rdma; 773 } 774 775 /* get address of the peer connected to the internal TCP socket */ 776 kernel_getpeername(newclcsock, (struct sockaddr *)&peeraddr, &len); 777 778 /* allocate connection / link group */ 779 mutex_lock(&smc_create_lgr_pending); 780 local_contact = smc_conn_create(new_smc, peeraddr.sin_addr.s_addr, 781 smcibdev, ibport, &pclc.lcl, 0); 782 if (local_contact == SMC_REUSE_CONTACT) 783 /* lock no longer needed, free it due to following 784 * smc_clc_wait_msg() call 785 */ 786 mutex_unlock(&smc_create_lgr_pending); 787 if (local_contact < 0) { 788 rc = local_contact; 789 if (rc == -ENOMEM) 790 reason_code = SMC_CLC_DECL_MEM;/* insufficient memory*/ 791 else if (rc == -ENOLINK) 792 reason_code = SMC_CLC_DECL_SYNCERR; /* synchr. error */ 793 goto decline_rdma; 794 } 795 link = &new_smc->conn.lgr->lnk[SMC_SINGLE_LINK]; 796 797 rc = smc_sndbuf_create(new_smc); 798 if (rc) { 799 reason_code = SMC_CLC_DECL_MEM; 800 goto decline_rdma; 801 } 802 rc = smc_rmb_create(new_smc); 803 if (rc) { 804 reason_code = SMC_CLC_DECL_MEM; 805 goto decline_rdma; 806 } 807 808 smc_close_init(new_smc); 809 smc_rx_init(new_smc); 810 811 rc = smc_clc_send_accept(new_smc, local_contact); 812 if (rc) 813 goto out_err; 814 815 /* receive SMC Confirm CLC message */ 816 reason_code = smc_clc_wait_msg(new_smc, &cclc, sizeof(cclc), 817 SMC_CLC_CONFIRM); 818 if (reason_code < 0) 819 goto out_err; 820 if (reason_code > 0) 821 goto decline_rdma; 822 smc_conn_save_peer_info(new_smc, &cclc); 823 if (local_contact == SMC_FIRST_CONTACT) 824 smc_link_save_peer_info(link, &cclc); 825 826 rc = smc_rmb_rtoken_handling(&new_smc->conn, &cclc); 827 if (rc) { 828 reason_code = SMC_CLC_DECL_INTERR; 829 goto decline_rdma; 830 } 831 832 if (local_contact == SMC_FIRST_CONTACT) { 833 rc = smc_ib_ready_link(link); 834 if (rc) { 835 reason_code = SMC_CLC_DECL_INTERR; 836 goto decline_rdma; 837 } 838 /* QP confirmation over RoCE fabric */ 839 reason_code = smc_serv_conf_first_link(new_smc); 840 if (reason_code < 0) { 841 /* peer is not aware of a problem */ 842 rc = reason_code; 843 goto out_err; 844 } 845 if (reason_code > 0) 846 goto decline_rdma; 847 } 848 849 smc_tx_init(new_smc); 850 851 out_connected: 852 sk_refcnt_debug_inc(newsmcsk); 853 if (newsmcsk->sk_state == SMC_INIT) 854 newsmcsk->sk_state = SMC_ACTIVE; 855 enqueue: 856 if (local_contact == SMC_FIRST_CONTACT) 857 mutex_unlock(&smc_create_lgr_pending); 858 lock_sock_nested(&lsmc->sk, SINGLE_DEPTH_NESTING); 859 if (lsmc->sk.sk_state == SMC_LISTEN) { 860 smc_accept_enqueue(&lsmc->sk, newsmcsk); 861 } else { /* no longer listening */ 862 smc_close_non_accepted(newsmcsk); 863 } 864 release_sock(&lsmc->sk); 865 866 /* Wake up accept */ 867 lsmc->sk.sk_data_ready(&lsmc->sk); 868 sock_put(&lsmc->sk); /* sock_hold in smc_tcp_listen_work */ 869 return; 870 871 decline_rdma: 872 /* RDMA setup failed, switch back to TCP */ 873 smc_conn_free(&new_smc->conn); 874 new_smc->use_fallback = true; 875 if (reason_code && (reason_code != SMC_CLC_DECL_REPLY)) { 876 rc = smc_clc_send_decline(new_smc, reason_code, 0); 877 if (rc < sizeof(struct smc_clc_msg_decline)) 878 goto out_err; 879 } 880 goto out_connected; 881 882 out_err: 883 newsmcsk->sk_state = SMC_CLOSED; 884 smc_conn_free(&new_smc->conn); 885 goto enqueue; /* queue new sock with sk_err set */ 886 } 887 888 static void smc_tcp_listen_work(struct work_struct *work) 889 { 890 struct smc_sock *lsmc = container_of(work, struct smc_sock, 891 tcp_listen_work); 892 struct smc_sock *new_smc; 893 int rc = 0; 894 895 lock_sock(&lsmc->sk); 896 while (lsmc->sk.sk_state == SMC_LISTEN) { 897 rc = smc_clcsock_accept(lsmc, &new_smc); 898 if (rc) 899 goto out; 900 if (!new_smc) 901 continue; 902 903 new_smc->listen_smc = lsmc; 904 new_smc->use_fallback = false; /* assume rdma capability first*/ 905 sock_hold(&lsmc->sk); /* sock_put in smc_listen_work */ 906 INIT_WORK(&new_smc->smc_listen_work, smc_listen_work); 907 smc_copy_sock_settings_to_smc(new_smc); 908 schedule_work(&new_smc->smc_listen_work); 909 } 910 911 out: 912 release_sock(&lsmc->sk); 913 lsmc->sk.sk_data_ready(&lsmc->sk); /* no more listening, wake accept */ 914 } 915 916 static int smc_listen(struct socket *sock, int backlog) 917 { 918 struct sock *sk = sock->sk; 919 struct smc_sock *smc; 920 int rc; 921 922 smc = smc_sk(sk); 923 lock_sock(sk); 924 925 rc = -EINVAL; 926 if ((sk->sk_state != SMC_INIT) && (sk->sk_state != SMC_LISTEN)) 927 goto out; 928 929 rc = 0; 930 if (sk->sk_state == SMC_LISTEN) { 931 sk->sk_max_ack_backlog = backlog; 932 goto out; 933 } 934 /* some socket options are handled in core, so we could not apply 935 * them to the clc socket -- copy smc socket options to clc socket 936 */ 937 smc_copy_sock_settings_to_clc(smc); 938 939 rc = kernel_listen(smc->clcsock, backlog); 940 if (rc) 941 goto out; 942 sk->sk_max_ack_backlog = backlog; 943 sk->sk_ack_backlog = 0; 944 sk->sk_state = SMC_LISTEN; 945 INIT_WORK(&smc->tcp_listen_work, smc_tcp_listen_work); 946 schedule_work(&smc->tcp_listen_work); 947 948 out: 949 release_sock(sk); 950 return rc; 951 } 952 953 static int smc_accept(struct socket *sock, struct socket *new_sock, 954 int flags, bool kern) 955 { 956 struct sock *sk = sock->sk, *nsk; 957 DECLARE_WAITQUEUE(wait, current); 958 struct smc_sock *lsmc; 959 long timeo; 960 int rc = 0; 961 962 lsmc = smc_sk(sk); 963 lock_sock(sk); 964 965 if (lsmc->sk.sk_state != SMC_LISTEN) { 966 rc = -EINVAL; 967 goto out; 968 } 969 970 /* Wait for an incoming connection */ 971 timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK); 972 add_wait_queue_exclusive(sk_sleep(sk), &wait); 973 while (!(nsk = smc_accept_dequeue(sk, new_sock))) { 974 set_current_state(TASK_INTERRUPTIBLE); 975 if (!timeo) { 976 rc = -EAGAIN; 977 break; 978 } 979 release_sock(sk); 980 timeo = schedule_timeout(timeo); 981 /* wakeup by sk_data_ready in smc_listen_work() */ 982 sched_annotate_sleep(); 983 lock_sock(sk); 984 if (signal_pending(current)) { 985 rc = sock_intr_errno(timeo); 986 break; 987 } 988 } 989 set_current_state(TASK_RUNNING); 990 remove_wait_queue(sk_sleep(sk), &wait); 991 992 if (!rc) 993 rc = sock_error(nsk); 994 995 out: 996 release_sock(sk); 997 return rc; 998 } 999 1000 static int smc_getname(struct socket *sock, struct sockaddr *addr, 1001 int *len, int peer) 1002 { 1003 struct smc_sock *smc; 1004 1005 if (peer && (sock->sk->sk_state != SMC_ACTIVE) && 1006 (sock->sk->sk_state != SMC_APPCLOSEWAIT1)) 1007 return -ENOTCONN; 1008 1009 smc = smc_sk(sock->sk); 1010 1011 return smc->clcsock->ops->getname(smc->clcsock, addr, len, peer); 1012 } 1013 1014 static int smc_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) 1015 { 1016 struct sock *sk = sock->sk; 1017 struct smc_sock *smc; 1018 int rc = -EPIPE; 1019 1020 smc = smc_sk(sk); 1021 lock_sock(sk); 1022 if ((sk->sk_state != SMC_ACTIVE) && 1023 (sk->sk_state != SMC_APPCLOSEWAIT1) && 1024 (sk->sk_state != SMC_INIT)) 1025 goto out; 1026 if (smc->use_fallback) 1027 rc = smc->clcsock->ops->sendmsg(smc->clcsock, msg, len); 1028 else 1029 rc = smc_tx_sendmsg(smc, msg, len); 1030 out: 1031 release_sock(sk); 1032 return rc; 1033 } 1034 1035 static int smc_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, 1036 int flags) 1037 { 1038 struct sock *sk = sock->sk; 1039 struct smc_sock *smc; 1040 int rc = -ENOTCONN; 1041 1042 smc = smc_sk(sk); 1043 lock_sock(sk); 1044 if ((sk->sk_state == SMC_INIT) || 1045 (sk->sk_state == SMC_LISTEN) || 1046 (sk->sk_state == SMC_CLOSED)) 1047 goto out; 1048 1049 if (sk->sk_state == SMC_PEERFINCLOSEWAIT) { 1050 rc = 0; 1051 goto out; 1052 } 1053 1054 if (smc->use_fallback) 1055 rc = smc->clcsock->ops->recvmsg(smc->clcsock, msg, len, flags); 1056 else 1057 rc = smc_rx_recvmsg(smc, msg, len, flags); 1058 1059 out: 1060 release_sock(sk); 1061 return rc; 1062 } 1063 1064 static unsigned int smc_accept_poll(struct sock *parent) 1065 { 1066 struct smc_sock *isk; 1067 struct sock *sk; 1068 1069 lock_sock(parent); 1070 list_for_each_entry(isk, &smc_sk(parent)->accept_q, accept_q) { 1071 sk = (struct sock *)isk; 1072 1073 if (sk->sk_state == SMC_ACTIVE) { 1074 release_sock(parent); 1075 return POLLIN | POLLRDNORM; 1076 } 1077 } 1078 release_sock(parent); 1079 1080 return 0; 1081 } 1082 1083 static unsigned int smc_poll(struct file *file, struct socket *sock, 1084 poll_table *wait) 1085 { 1086 struct sock *sk = sock->sk; 1087 unsigned int mask = 0; 1088 struct smc_sock *smc; 1089 int rc; 1090 1091 smc = smc_sk(sock->sk); 1092 if ((sk->sk_state == SMC_INIT) || smc->use_fallback) { 1093 /* delegate to CLC child sock */ 1094 mask = smc->clcsock->ops->poll(file, smc->clcsock, wait); 1095 /* if non-blocking connect finished ... */ 1096 lock_sock(sk); 1097 if ((sk->sk_state == SMC_INIT) && (mask & POLLOUT)) { 1098 sk->sk_err = smc->clcsock->sk->sk_err; 1099 if (sk->sk_err) { 1100 mask |= POLLERR; 1101 } else { 1102 rc = smc_connect_rdma(smc); 1103 if (rc < 0) 1104 mask |= POLLERR; 1105 else 1106 /* success cases including fallback */ 1107 mask |= POLLOUT | POLLWRNORM; 1108 } 1109 } 1110 release_sock(sk); 1111 } else { 1112 sock_poll_wait(file, sk_sleep(sk), wait); 1113 if (sk->sk_state == SMC_LISTEN) 1114 /* woken up by sk_data_ready in smc_listen_work() */ 1115 mask |= smc_accept_poll(sk); 1116 if (sk->sk_err) 1117 mask |= POLLERR; 1118 if (atomic_read(&smc->conn.sndbuf_space) || 1119 (sk->sk_shutdown & SEND_SHUTDOWN)) { 1120 mask |= POLLOUT | POLLWRNORM; 1121 } else { 1122 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk); 1123 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); 1124 } 1125 if (atomic_read(&smc->conn.bytes_to_rcv)) 1126 mask |= POLLIN | POLLRDNORM; 1127 if ((sk->sk_shutdown == SHUTDOWN_MASK) || 1128 (sk->sk_state == SMC_CLOSED)) 1129 mask |= POLLHUP; 1130 if (sk->sk_shutdown & RCV_SHUTDOWN) 1131 mask |= POLLIN | POLLRDNORM | POLLRDHUP; 1132 if (sk->sk_state == SMC_APPCLOSEWAIT1) 1133 mask |= POLLIN; 1134 1135 } 1136 1137 return mask; 1138 } 1139 1140 static int smc_shutdown(struct socket *sock, int how) 1141 { 1142 struct sock *sk = sock->sk; 1143 struct smc_sock *smc; 1144 int rc = -EINVAL; 1145 int rc1 = 0; 1146 1147 smc = smc_sk(sk); 1148 1149 if ((how < SHUT_RD) || (how > SHUT_RDWR)) 1150 return rc; 1151 1152 lock_sock(sk); 1153 1154 rc = -ENOTCONN; 1155 if ((sk->sk_state != SMC_LISTEN) && 1156 (sk->sk_state != SMC_ACTIVE) && 1157 (sk->sk_state != SMC_PEERCLOSEWAIT1) && 1158 (sk->sk_state != SMC_PEERCLOSEWAIT2) && 1159 (sk->sk_state != SMC_APPCLOSEWAIT1) && 1160 (sk->sk_state != SMC_APPCLOSEWAIT2) && 1161 (sk->sk_state != SMC_APPFINCLOSEWAIT)) 1162 goto out; 1163 if (smc->use_fallback) { 1164 rc = kernel_sock_shutdown(smc->clcsock, how); 1165 sk->sk_shutdown = smc->clcsock->sk->sk_shutdown; 1166 if (sk->sk_shutdown == SHUTDOWN_MASK) 1167 sk->sk_state = SMC_CLOSED; 1168 goto out; 1169 } 1170 switch (how) { 1171 case SHUT_RDWR: /* shutdown in both directions */ 1172 rc = smc_close_active(smc); 1173 break; 1174 case SHUT_WR: 1175 rc = smc_close_shutdown_write(smc); 1176 break; 1177 case SHUT_RD: 1178 if (sk->sk_state == SMC_LISTEN) 1179 rc = smc_close_active(smc); 1180 else 1181 rc = 0; 1182 /* nothing more to do because peer is not involved */ 1183 break; 1184 } 1185 rc1 = kernel_sock_shutdown(smc->clcsock, how); 1186 /* map sock_shutdown_cmd constants to sk_shutdown value range */ 1187 sk->sk_shutdown |= how + 1; 1188 1189 out: 1190 release_sock(sk); 1191 return rc ? rc : rc1; 1192 } 1193 1194 static int smc_setsockopt(struct socket *sock, int level, int optname, 1195 char __user *optval, unsigned int optlen) 1196 { 1197 struct sock *sk = sock->sk; 1198 struct smc_sock *smc; 1199 1200 smc = smc_sk(sk); 1201 1202 /* generic setsockopts reaching us here always apply to the 1203 * CLC socket 1204 */ 1205 return smc->clcsock->ops->setsockopt(smc->clcsock, level, optname, 1206 optval, optlen); 1207 } 1208 1209 static int smc_getsockopt(struct socket *sock, int level, int optname, 1210 char __user *optval, int __user *optlen) 1211 { 1212 struct smc_sock *smc; 1213 1214 smc = smc_sk(sock->sk); 1215 /* socket options apply to the CLC socket */ 1216 return smc->clcsock->ops->getsockopt(smc->clcsock, level, optname, 1217 optval, optlen); 1218 } 1219 1220 static int smc_ioctl(struct socket *sock, unsigned int cmd, 1221 unsigned long arg) 1222 { 1223 struct smc_sock *smc; 1224 1225 smc = smc_sk(sock->sk); 1226 if (smc->use_fallback) 1227 return smc->clcsock->ops->ioctl(smc->clcsock, cmd, arg); 1228 else 1229 return sock_no_ioctl(sock, cmd, arg); 1230 } 1231 1232 static ssize_t smc_sendpage(struct socket *sock, struct page *page, 1233 int offset, size_t size, int flags) 1234 { 1235 struct sock *sk = sock->sk; 1236 struct smc_sock *smc; 1237 int rc = -EPIPE; 1238 1239 smc = smc_sk(sk); 1240 lock_sock(sk); 1241 if (sk->sk_state != SMC_ACTIVE) 1242 goto out; 1243 if (smc->use_fallback) 1244 rc = kernel_sendpage(smc->clcsock, page, offset, 1245 size, flags); 1246 else 1247 rc = sock_no_sendpage(sock, page, offset, size, flags); 1248 1249 out: 1250 release_sock(sk); 1251 return rc; 1252 } 1253 1254 static ssize_t smc_splice_read(struct socket *sock, loff_t *ppos, 1255 struct pipe_inode_info *pipe, size_t len, 1256 unsigned int flags) 1257 { 1258 struct sock *sk = sock->sk; 1259 struct smc_sock *smc; 1260 int rc = -ENOTCONN; 1261 1262 smc = smc_sk(sk); 1263 lock_sock(sk); 1264 if ((sk->sk_state != SMC_ACTIVE) && (sk->sk_state != SMC_CLOSED)) 1265 goto out; 1266 if (smc->use_fallback) { 1267 rc = smc->clcsock->ops->splice_read(smc->clcsock, ppos, 1268 pipe, len, flags); 1269 } else { 1270 rc = -EOPNOTSUPP; 1271 } 1272 out: 1273 release_sock(sk); 1274 return rc; 1275 } 1276 1277 /* must look like tcp */ 1278 static const struct proto_ops smc_sock_ops = { 1279 .family = PF_SMC, 1280 .owner = THIS_MODULE, 1281 .release = smc_release, 1282 .bind = smc_bind, 1283 .connect = smc_connect, 1284 .socketpair = sock_no_socketpair, 1285 .accept = smc_accept, 1286 .getname = smc_getname, 1287 .poll = smc_poll, 1288 .ioctl = smc_ioctl, 1289 .listen = smc_listen, 1290 .shutdown = smc_shutdown, 1291 .setsockopt = smc_setsockopt, 1292 .getsockopt = smc_getsockopt, 1293 .sendmsg = smc_sendmsg, 1294 .recvmsg = smc_recvmsg, 1295 .mmap = sock_no_mmap, 1296 .sendpage = smc_sendpage, 1297 .splice_read = smc_splice_read, 1298 }; 1299 1300 static int smc_create(struct net *net, struct socket *sock, int protocol, 1301 int kern) 1302 { 1303 struct smc_sock *smc; 1304 struct sock *sk; 1305 int rc; 1306 1307 rc = -ESOCKTNOSUPPORT; 1308 if (sock->type != SOCK_STREAM) 1309 goto out; 1310 1311 rc = -EPROTONOSUPPORT; 1312 if ((protocol != IPPROTO_IP) && (protocol != IPPROTO_TCP)) 1313 goto out; 1314 1315 rc = -ENOBUFS; 1316 sock->ops = &smc_sock_ops; 1317 sk = smc_sock_alloc(net, sock); 1318 if (!sk) 1319 goto out; 1320 1321 /* create internal TCP socket for CLC handshake and fallback */ 1322 smc = smc_sk(sk); 1323 smc->use_fallback = false; /* assume rdma capability first */ 1324 rc = sock_create_kern(net, PF_INET, SOCK_STREAM, 1325 IPPROTO_TCP, &smc->clcsock); 1326 if (rc) 1327 sk_common_release(sk); 1328 smc->sk.sk_sndbuf = max(smc->clcsock->sk->sk_sndbuf, SMC_BUF_MIN_SIZE); 1329 smc->sk.sk_rcvbuf = max(smc->clcsock->sk->sk_rcvbuf, SMC_BUF_MIN_SIZE); 1330 1331 out: 1332 return rc; 1333 } 1334 1335 static const struct net_proto_family smc_sock_family_ops = { 1336 .family = PF_SMC, 1337 .owner = THIS_MODULE, 1338 .create = smc_create, 1339 }; 1340 1341 static int __init smc_init(void) 1342 { 1343 int rc; 1344 1345 rc = smc_pnet_init(); 1346 if (rc) 1347 return rc; 1348 1349 rc = smc_llc_init(); 1350 if (rc) { 1351 pr_err("%s: smc_llc_init fails with %d\n", __func__, rc); 1352 goto out_pnet; 1353 } 1354 1355 rc = smc_cdc_init(); 1356 if (rc) { 1357 pr_err("%s: smc_cdc_init fails with %d\n", __func__, rc); 1358 goto out_pnet; 1359 } 1360 1361 rc = proto_register(&smc_proto, 1); 1362 if (rc) { 1363 pr_err("%s: proto_register fails with %d\n", __func__, rc); 1364 goto out_pnet; 1365 } 1366 1367 rc = sock_register(&smc_sock_family_ops); 1368 if (rc) { 1369 pr_err("%s: sock_register fails with %d\n", __func__, rc); 1370 goto out_proto; 1371 } 1372 INIT_HLIST_HEAD(&smc_v4_hashinfo.ht); 1373 1374 rc = smc_ib_register_client(); 1375 if (rc) { 1376 pr_err("%s: ib_register fails with %d\n", __func__, rc); 1377 goto out_sock; 1378 } 1379 1380 return 0; 1381 1382 out_sock: 1383 sock_unregister(PF_SMC); 1384 out_proto: 1385 proto_unregister(&smc_proto); 1386 out_pnet: 1387 smc_pnet_exit(); 1388 return rc; 1389 } 1390 1391 static void __exit smc_exit(void) 1392 { 1393 struct smc_link_group *lgr, *lg; 1394 LIST_HEAD(lgr_freeing_list); 1395 1396 spin_lock_bh(&smc_lgr_list.lock); 1397 if (!list_empty(&smc_lgr_list.list)) 1398 list_splice_init(&smc_lgr_list.list, &lgr_freeing_list); 1399 spin_unlock_bh(&smc_lgr_list.lock); 1400 list_for_each_entry_safe(lgr, lg, &lgr_freeing_list, list) { 1401 list_del_init(&lgr->list); 1402 smc_lgr_free(lgr); /* free link group */ 1403 } 1404 smc_ib_unregister_client(); 1405 sock_unregister(PF_SMC); 1406 proto_unregister(&smc_proto); 1407 smc_pnet_exit(); 1408 } 1409 1410 module_init(smc_init); 1411 module_exit(smc_exit); 1412 1413 MODULE_AUTHOR("Ursula Braun <ubraun@linux.vnet.ibm.com>"); 1414 MODULE_DESCRIPTION("smc socket address family"); 1415 MODULE_LICENSE("GPL"); 1416 MODULE_ALIAS_NETPROTO(PF_SMC); 1417