1 /* 2 * Shared Memory Communications over RDMA (SMC-R) and RoCE 3 * 4 * AF_SMC protocol family socket handler keeping the AF_INET sock address type 5 * applies to SOCK_STREAM sockets only 6 * offers an alternative communication option for TCP-protocol sockets 7 * applicable with RoCE-cards only 8 * 9 * Initial restrictions: 10 * - non-blocking connect postponed 11 * - IPv6 support postponed 12 * - support for alternate links postponed 13 * - partial support for non-blocking sockets only 14 * - support for urgent data postponed 15 * 16 * Copyright IBM Corp. 2016 17 * 18 * Author(s): Ursula Braun <ubraun@linux.vnet.ibm.com> 19 * based on prototype from Frank Blaschka 20 */ 21 22 #define KMSG_COMPONENT "smc" 23 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt 24 25 #include <linux/module.h> 26 #include <linux/socket.h> 27 #include <linux/inetdevice.h> 28 #include <linux/workqueue.h> 29 #include <linux/in.h> 30 #include <linux/sched/signal.h> 31 32 #include <net/sock.h> 33 #include <net/tcp.h> 34 #include <net/smc.h> 35 36 #include "smc.h" 37 #include "smc_clc.h" 38 #include "smc_llc.h" 39 #include "smc_cdc.h" 40 #include "smc_core.h" 41 #include "smc_ib.h" 42 #include "smc_pnet.h" 43 #include "smc_tx.h" 44 #include "smc_rx.h" 45 #include "smc_close.h" 46 47 static DEFINE_MUTEX(smc_create_lgr_pending); /* serialize link group 48 * creation 49 */ 50 51 struct smc_lgr_list smc_lgr_list = { /* established link groups */ 52 .lock = __SPIN_LOCK_UNLOCKED(smc_lgr_list.lock), 53 .list = LIST_HEAD_INIT(smc_lgr_list.list), 54 }; 55 56 static void smc_tcp_listen_work(struct work_struct *); 57 58 static void smc_set_keepalive(struct sock *sk, int val) 59 { 60 struct smc_sock *smc = smc_sk(sk); 61 62 smc->clcsock->sk->sk_prot->keepalive(smc->clcsock->sk, val); 63 } 64 65 static struct smc_hashinfo smc_v4_hashinfo = { 66 .lock = __RW_LOCK_UNLOCKED(smc_v4_hashinfo.lock), 67 }; 68 69 int smc_hash_sk(struct sock *sk) 70 { 71 struct smc_hashinfo *h = sk->sk_prot->h.smc_hash; 72 struct hlist_head *head; 73 74 head = &h->ht; 75 76 write_lock_bh(&h->lock); 77 sk_add_node(sk, head); 78 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); 79 write_unlock_bh(&h->lock); 80 81 return 0; 82 } 83 EXPORT_SYMBOL_GPL(smc_hash_sk); 84 85 void smc_unhash_sk(struct sock *sk) 86 { 87 struct smc_hashinfo *h = sk->sk_prot->h.smc_hash; 88 89 write_lock_bh(&h->lock); 90 if (sk_del_node_init(sk)) 91 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); 92 write_unlock_bh(&h->lock); 93 } 94 EXPORT_SYMBOL_GPL(smc_unhash_sk); 95 96 struct proto smc_proto = { 97 .name = "SMC", 98 .owner = THIS_MODULE, 99 .keepalive = smc_set_keepalive, 100 .hash = smc_hash_sk, 101 .unhash = smc_unhash_sk, 102 .obj_size = sizeof(struct smc_sock), 103 .h.smc_hash = &smc_v4_hashinfo, 104 .slab_flags = SLAB_TYPESAFE_BY_RCU, 105 }; 106 EXPORT_SYMBOL_GPL(smc_proto); 107 108 static int smc_release(struct socket *sock) 109 { 110 struct sock *sk = sock->sk; 111 struct smc_sock *smc; 112 int rc = 0; 113 114 if (!sk) 115 goto out; 116 117 smc = smc_sk(sk); 118 sock_hold(sk); 119 if (sk->sk_state == SMC_LISTEN) 120 /* smc_close_non_accepted() is called and acquires 121 * sock lock for child sockets again 122 */ 123 lock_sock_nested(sk, SINGLE_DEPTH_NESTING); 124 else 125 lock_sock(sk); 126 127 if (smc->use_fallback) { 128 sk->sk_state = SMC_CLOSED; 129 sk->sk_state_change(sk); 130 } else { 131 rc = smc_close_active(smc); 132 sock_set_flag(sk, SOCK_DEAD); 133 sk->sk_shutdown |= SHUTDOWN_MASK; 134 } 135 if (smc->clcsock) { 136 sock_release(smc->clcsock); 137 smc->clcsock = NULL; 138 } 139 140 /* detach socket */ 141 sock_orphan(sk); 142 sock->sk = NULL; 143 if (smc->use_fallback) { 144 schedule_delayed_work(&smc->sock_put_work, TCP_TIMEWAIT_LEN); 145 } else if (sk->sk_state == SMC_CLOSED) { 146 smc_conn_free(&smc->conn); 147 schedule_delayed_work(&smc->sock_put_work, 148 SMC_CLOSE_SOCK_PUT_DELAY); 149 } 150 release_sock(sk); 151 152 sock_put(sk); 153 out: 154 return rc; 155 } 156 157 static void smc_destruct(struct sock *sk) 158 { 159 if (sk->sk_state != SMC_CLOSED) 160 return; 161 if (!sock_flag(sk, SOCK_DEAD)) 162 return; 163 164 sk_refcnt_debug_dec(sk); 165 } 166 167 static struct sock *smc_sock_alloc(struct net *net, struct socket *sock) 168 { 169 struct smc_sock *smc; 170 struct sock *sk; 171 172 sk = sk_alloc(net, PF_SMC, GFP_KERNEL, &smc_proto, 0); 173 if (!sk) 174 return NULL; 175 176 sock_init_data(sock, sk); /* sets sk_refcnt to 1 */ 177 sk->sk_state = SMC_INIT; 178 sk->sk_destruct = smc_destruct; 179 sk->sk_protocol = SMCPROTO_SMC; 180 smc = smc_sk(sk); 181 INIT_WORK(&smc->tcp_listen_work, smc_tcp_listen_work); 182 INIT_LIST_HEAD(&smc->accept_q); 183 spin_lock_init(&smc->accept_q_lock); 184 INIT_DELAYED_WORK(&smc->sock_put_work, smc_close_sock_put_work); 185 sk->sk_prot->hash(sk); 186 sk_refcnt_debug_inc(sk); 187 188 return sk; 189 } 190 191 static int smc_bind(struct socket *sock, struct sockaddr *uaddr, 192 int addr_len) 193 { 194 struct sockaddr_in *addr = (struct sockaddr_in *)uaddr; 195 struct sock *sk = sock->sk; 196 struct smc_sock *smc; 197 int rc; 198 199 smc = smc_sk(sk); 200 201 /* replicate tests from inet_bind(), to be safe wrt. future changes */ 202 rc = -EINVAL; 203 if (addr_len < sizeof(struct sockaddr_in)) 204 goto out; 205 206 rc = -EAFNOSUPPORT; 207 /* accept AF_UNSPEC (mapped to AF_INET) only if s_addr is INADDR_ANY */ 208 if ((addr->sin_family != AF_INET) && 209 ((addr->sin_family != AF_UNSPEC) || 210 (addr->sin_addr.s_addr != htonl(INADDR_ANY)))) 211 goto out; 212 213 lock_sock(sk); 214 215 /* Check if socket is already active */ 216 rc = -EINVAL; 217 if (sk->sk_state != SMC_INIT) 218 goto out_rel; 219 220 smc->clcsock->sk->sk_reuse = sk->sk_reuse; 221 rc = kernel_bind(smc->clcsock, uaddr, addr_len); 222 223 out_rel: 224 release_sock(sk); 225 out: 226 return rc; 227 } 228 229 static void smc_copy_sock_settings(struct sock *nsk, struct sock *osk, 230 unsigned long mask) 231 { 232 /* options we don't get control via setsockopt for */ 233 nsk->sk_type = osk->sk_type; 234 nsk->sk_sndbuf = osk->sk_sndbuf; 235 nsk->sk_rcvbuf = osk->sk_rcvbuf; 236 nsk->sk_sndtimeo = osk->sk_sndtimeo; 237 nsk->sk_rcvtimeo = osk->sk_rcvtimeo; 238 nsk->sk_mark = osk->sk_mark; 239 nsk->sk_priority = osk->sk_priority; 240 nsk->sk_rcvlowat = osk->sk_rcvlowat; 241 nsk->sk_bound_dev_if = osk->sk_bound_dev_if; 242 nsk->sk_err = osk->sk_err; 243 244 nsk->sk_flags &= ~mask; 245 nsk->sk_flags |= osk->sk_flags & mask; 246 } 247 248 #define SK_FLAGS_SMC_TO_CLC ((1UL << SOCK_URGINLINE) | \ 249 (1UL << SOCK_KEEPOPEN) | \ 250 (1UL << SOCK_LINGER) | \ 251 (1UL << SOCK_BROADCAST) | \ 252 (1UL << SOCK_TIMESTAMP) | \ 253 (1UL << SOCK_DBG) | \ 254 (1UL << SOCK_RCVTSTAMP) | \ 255 (1UL << SOCK_RCVTSTAMPNS) | \ 256 (1UL << SOCK_LOCALROUTE) | \ 257 (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE) | \ 258 (1UL << SOCK_RXQ_OVFL) | \ 259 (1UL << SOCK_WIFI_STATUS) | \ 260 (1UL << SOCK_NOFCS) | \ 261 (1UL << SOCK_FILTER_LOCKED)) 262 /* copy only relevant settings and flags of SOL_SOCKET level from smc to 263 * clc socket (since smc is not called for these options from net/core) 264 */ 265 static void smc_copy_sock_settings_to_clc(struct smc_sock *smc) 266 { 267 smc_copy_sock_settings(smc->clcsock->sk, &smc->sk, SK_FLAGS_SMC_TO_CLC); 268 } 269 270 #define SK_FLAGS_CLC_TO_SMC ((1UL << SOCK_URGINLINE) | \ 271 (1UL << SOCK_KEEPOPEN) | \ 272 (1UL << SOCK_LINGER) | \ 273 (1UL << SOCK_DBG)) 274 /* copy only settings and flags relevant for smc from clc to smc socket */ 275 static void smc_copy_sock_settings_to_smc(struct smc_sock *smc) 276 { 277 smc_copy_sock_settings(&smc->sk, smc->clcsock->sk, SK_FLAGS_CLC_TO_SMC); 278 } 279 280 /* determine subnet and mask of internal TCP socket */ 281 int smc_netinfo_by_tcpsk(struct socket *clcsock, 282 __be32 *subnet, u8 *prefix_len) 283 { 284 struct dst_entry *dst = sk_dst_get(clcsock->sk); 285 struct sockaddr_in addr; 286 int rc = -ENOENT; 287 int len; 288 289 if (!dst) { 290 rc = -ENOTCONN; 291 goto out; 292 } 293 if (!dst->dev) { 294 rc = -ENODEV; 295 goto out_rel; 296 } 297 298 /* get address to which the internal TCP socket is bound */ 299 kernel_getsockname(clcsock, (struct sockaddr *)&addr, &len); 300 /* analyze IPv4 specific data of net_device belonging to TCP socket */ 301 for_ifa(dst->dev->ip_ptr) { 302 if (ifa->ifa_address != addr.sin_addr.s_addr) 303 continue; 304 *prefix_len = inet_mask_len(ifa->ifa_mask); 305 *subnet = ifa->ifa_address & ifa->ifa_mask; 306 rc = 0; 307 break; 308 } endfor_ifa(dst->dev->ip_ptr); 309 310 out_rel: 311 dst_release(dst); 312 out: 313 return rc; 314 } 315 316 static int smc_clnt_conf_first_link(struct smc_sock *smc, union ib_gid *gid) 317 { 318 struct smc_link_group *lgr = smc->conn.lgr; 319 struct smc_link *link; 320 int rest; 321 int rc; 322 323 link = &lgr->lnk[SMC_SINGLE_LINK]; 324 /* receive CONFIRM LINK request from server over RoCE fabric */ 325 rest = wait_for_completion_interruptible_timeout( 326 &link->llc_confirm, 327 SMC_LLC_WAIT_FIRST_TIME); 328 if (rest <= 0) { 329 struct smc_clc_msg_decline dclc; 330 331 rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc), 332 SMC_CLC_DECLINE); 333 return rc; 334 } 335 336 rc = smc_ib_modify_qp_rts(link); 337 if (rc) 338 return SMC_CLC_DECL_INTERR; 339 340 smc_wr_remember_qp_attr(link); 341 342 rc = smc_wr_reg_send(link, 343 smc->conn.rmb_desc->mr_rx[SMC_SINGLE_LINK]); 344 if (rc) 345 return SMC_CLC_DECL_INTERR; 346 347 /* send CONFIRM LINK response over RoCE fabric */ 348 rc = smc_llc_send_confirm_link(link, 349 link->smcibdev->mac[link->ibport - 1], 350 gid, SMC_LLC_RESP); 351 if (rc < 0) 352 return SMC_CLC_DECL_TCL; 353 354 return rc; 355 } 356 357 static void smc_conn_save_peer_info(struct smc_sock *smc, 358 struct smc_clc_msg_accept_confirm *clc) 359 { 360 smc->conn.peer_conn_idx = clc->conn_idx; 361 smc->conn.local_tx_ctrl.token = ntohl(clc->rmbe_alert_token); 362 smc->conn.peer_rmbe_size = smc_uncompress_bufsize(clc->rmbe_size); 363 atomic_set(&smc->conn.peer_rmbe_space, smc->conn.peer_rmbe_size); 364 } 365 366 static void smc_link_save_peer_info(struct smc_link *link, 367 struct smc_clc_msg_accept_confirm *clc) 368 { 369 link->peer_qpn = ntoh24(clc->qpn); 370 memcpy(link->peer_gid, clc->lcl.gid, SMC_GID_SIZE); 371 memcpy(link->peer_mac, clc->lcl.mac, sizeof(link->peer_mac)); 372 link->peer_psn = ntoh24(clc->psn); 373 link->peer_mtu = clc->qp_mtu; 374 } 375 376 /* setup for RDMA connection of client */ 377 static int smc_connect_rdma(struct smc_sock *smc) 378 { 379 struct sockaddr_in *inaddr = (struct sockaddr_in *)smc->addr; 380 struct smc_clc_msg_accept_confirm aclc; 381 int local_contact = SMC_FIRST_CONTACT; 382 struct smc_ib_device *smcibdev; 383 struct smc_link *link; 384 u8 srv_first_contact; 385 int reason_code = 0; 386 int rc = 0; 387 u8 ibport; 388 389 /* IPSec connections opt out of SMC-R optimizations */ 390 if (using_ipsec(smc)) { 391 reason_code = SMC_CLC_DECL_IPSEC; 392 goto decline_rdma; 393 } 394 395 /* PNET table look up: search active ib_device and port 396 * within same PNETID that also contains the ethernet device 397 * used for the internal TCP socket 398 */ 399 smc_pnet_find_roce_resource(smc->clcsock->sk, &smcibdev, &ibport); 400 if (!smcibdev) { 401 reason_code = SMC_CLC_DECL_CNFERR; /* configuration error */ 402 goto decline_rdma; 403 } 404 405 /* do inband token exchange */ 406 reason_code = smc_clc_send_proposal(smc, smcibdev, ibport); 407 if (reason_code < 0) { 408 rc = reason_code; 409 goto out_err; 410 } 411 if (reason_code > 0) /* configuration error */ 412 goto decline_rdma; 413 /* receive SMC Accept CLC message */ 414 reason_code = smc_clc_wait_msg(smc, &aclc, sizeof(aclc), 415 SMC_CLC_ACCEPT); 416 if (reason_code < 0) { 417 rc = reason_code; 418 goto out_err; 419 } 420 if (reason_code > 0) 421 goto decline_rdma; 422 423 srv_first_contact = aclc.hdr.flag; 424 mutex_lock(&smc_create_lgr_pending); 425 local_contact = smc_conn_create(smc, inaddr->sin_addr.s_addr, smcibdev, 426 ibport, &aclc.lcl, srv_first_contact); 427 if (local_contact < 0) { 428 rc = local_contact; 429 if (rc == -ENOMEM) 430 reason_code = SMC_CLC_DECL_MEM;/* insufficient memory*/ 431 else if (rc == -ENOLINK) 432 reason_code = SMC_CLC_DECL_SYNCERR; /* synchr. error */ 433 goto decline_rdma_unlock; 434 } 435 link = &smc->conn.lgr->lnk[SMC_SINGLE_LINK]; 436 437 smc_conn_save_peer_info(smc, &aclc); 438 439 /* create send buffer and rmb */ 440 rc = smc_buf_create(smc); 441 if (rc) { 442 reason_code = SMC_CLC_DECL_MEM; 443 goto decline_rdma_unlock; 444 } 445 446 if (local_contact == SMC_FIRST_CONTACT) 447 smc_link_save_peer_info(link, &aclc); 448 449 rc = smc_rmb_rtoken_handling(&smc->conn, &aclc); 450 if (rc) { 451 reason_code = SMC_CLC_DECL_INTERR; 452 goto decline_rdma_unlock; 453 } 454 455 smc_close_init(smc); 456 smc_rx_init(smc); 457 458 if (local_contact == SMC_FIRST_CONTACT) { 459 rc = smc_ib_ready_link(link); 460 if (rc) { 461 reason_code = SMC_CLC_DECL_INTERR; 462 goto decline_rdma_unlock; 463 } 464 } else { 465 struct smc_buf_desc *buf_desc = smc->conn.rmb_desc; 466 467 if (!buf_desc->reused) { 468 /* register memory region for new rmb */ 469 rc = smc_wr_reg_send(link, 470 buf_desc->mr_rx[SMC_SINGLE_LINK]); 471 if (rc) { 472 reason_code = SMC_CLC_DECL_INTERR; 473 goto decline_rdma_unlock; 474 } 475 } 476 } 477 smc_rmb_sync_sg_for_device(&smc->conn); 478 479 rc = smc_clc_send_confirm(smc); 480 if (rc) 481 goto out_err_unlock; 482 483 if (local_contact == SMC_FIRST_CONTACT) { 484 /* QP confirmation over RoCE fabric */ 485 reason_code = smc_clnt_conf_first_link( 486 smc, &smcibdev->gid[ibport - 1]); 487 if (reason_code < 0) { 488 rc = reason_code; 489 goto out_err_unlock; 490 } 491 if (reason_code > 0) 492 goto decline_rdma_unlock; 493 } 494 495 mutex_unlock(&smc_create_lgr_pending); 496 smc_tx_init(smc); 497 498 out_connected: 499 smc_copy_sock_settings_to_clc(smc); 500 if (smc->sk.sk_state == SMC_INIT) 501 smc->sk.sk_state = SMC_ACTIVE; 502 503 return rc ? rc : local_contact; 504 505 decline_rdma_unlock: 506 mutex_unlock(&smc_create_lgr_pending); 507 smc_conn_free(&smc->conn); 508 decline_rdma: 509 /* RDMA setup failed, switch back to TCP */ 510 smc->use_fallback = true; 511 if (reason_code && (reason_code != SMC_CLC_DECL_REPLY)) { 512 rc = smc_clc_send_decline(smc, reason_code, 0); 513 if (rc < sizeof(struct smc_clc_msg_decline)) 514 goto out_err; 515 } 516 goto out_connected; 517 518 out_err_unlock: 519 mutex_unlock(&smc_create_lgr_pending); 520 smc_conn_free(&smc->conn); 521 out_err: 522 return rc; 523 } 524 525 static int smc_connect(struct socket *sock, struct sockaddr *addr, 526 int alen, int flags) 527 { 528 struct sock *sk = sock->sk; 529 struct smc_sock *smc; 530 int rc = -EINVAL; 531 532 smc = smc_sk(sk); 533 534 /* separate smc parameter checking to be safe */ 535 if (alen < sizeof(addr->sa_family)) 536 goto out_err; 537 if (addr->sa_family != AF_INET) 538 goto out_err; 539 smc->addr = addr; /* needed for nonblocking connect */ 540 541 lock_sock(sk); 542 switch (sk->sk_state) { 543 default: 544 goto out; 545 case SMC_ACTIVE: 546 rc = -EISCONN; 547 goto out; 548 case SMC_INIT: 549 rc = 0; 550 break; 551 } 552 553 smc_copy_sock_settings_to_clc(smc); 554 rc = kernel_connect(smc->clcsock, addr, alen, flags); 555 if (rc) 556 goto out; 557 558 /* setup RDMA connection */ 559 rc = smc_connect_rdma(smc); 560 if (rc < 0) 561 goto out; 562 else 563 rc = 0; /* success cases including fallback */ 564 565 out: 566 release_sock(sk); 567 out_err: 568 return rc; 569 } 570 571 static int smc_clcsock_accept(struct smc_sock *lsmc, struct smc_sock **new_smc) 572 { 573 struct sock *sk = &lsmc->sk; 574 struct socket *new_clcsock; 575 struct sock *new_sk; 576 int rc; 577 578 release_sock(&lsmc->sk); 579 new_sk = smc_sock_alloc(sock_net(sk), NULL); 580 if (!new_sk) { 581 rc = -ENOMEM; 582 lsmc->sk.sk_err = ENOMEM; 583 *new_smc = NULL; 584 lock_sock(&lsmc->sk); 585 goto out; 586 } 587 *new_smc = smc_sk(new_sk); 588 589 rc = kernel_accept(lsmc->clcsock, &new_clcsock, 0); 590 lock_sock(&lsmc->sk); 591 if (rc < 0) { 592 lsmc->sk.sk_err = -rc; 593 new_sk->sk_state = SMC_CLOSED; 594 sock_set_flag(new_sk, SOCK_DEAD); 595 sk->sk_prot->unhash(new_sk); 596 sock_put(new_sk); 597 *new_smc = NULL; 598 goto out; 599 } 600 if (lsmc->sk.sk_state == SMC_CLOSED) { 601 if (new_clcsock) 602 sock_release(new_clcsock); 603 new_sk->sk_state = SMC_CLOSED; 604 sock_set_flag(new_sk, SOCK_DEAD); 605 sk->sk_prot->unhash(new_sk); 606 sock_put(new_sk); 607 *new_smc = NULL; 608 goto out; 609 } 610 611 (*new_smc)->clcsock = new_clcsock; 612 out: 613 return rc; 614 } 615 616 /* add a just created sock to the accept queue of the listen sock as 617 * candidate for a following socket accept call from user space 618 */ 619 static void smc_accept_enqueue(struct sock *parent, struct sock *sk) 620 { 621 struct smc_sock *par = smc_sk(parent); 622 623 sock_hold(sk); 624 spin_lock(&par->accept_q_lock); 625 list_add_tail(&smc_sk(sk)->accept_q, &par->accept_q); 626 spin_unlock(&par->accept_q_lock); 627 sk_acceptq_added(parent); 628 } 629 630 /* remove a socket from the accept queue of its parental listening socket */ 631 static void smc_accept_unlink(struct sock *sk) 632 { 633 struct smc_sock *par = smc_sk(sk)->listen_smc; 634 635 spin_lock(&par->accept_q_lock); 636 list_del_init(&smc_sk(sk)->accept_q); 637 spin_unlock(&par->accept_q_lock); 638 sk_acceptq_removed(&smc_sk(sk)->listen_smc->sk); 639 sock_put(sk); 640 } 641 642 /* remove a sock from the accept queue to bind it to a new socket created 643 * for a socket accept call from user space 644 */ 645 struct sock *smc_accept_dequeue(struct sock *parent, 646 struct socket *new_sock) 647 { 648 struct smc_sock *isk, *n; 649 struct sock *new_sk; 650 651 list_for_each_entry_safe(isk, n, &smc_sk(parent)->accept_q, accept_q) { 652 new_sk = (struct sock *)isk; 653 654 smc_accept_unlink(new_sk); 655 if (new_sk->sk_state == SMC_CLOSED) { 656 new_sk->sk_prot->unhash(new_sk); 657 sock_put(new_sk); 658 continue; 659 } 660 if (new_sock) 661 sock_graft(new_sk, new_sock); 662 return new_sk; 663 } 664 return NULL; 665 } 666 667 /* clean up for a created but never accepted sock */ 668 void smc_close_non_accepted(struct sock *sk) 669 { 670 struct smc_sock *smc = smc_sk(sk); 671 672 sock_hold(sk); 673 lock_sock(sk); 674 if (!sk->sk_lingertime) 675 /* wait for peer closing */ 676 sk->sk_lingertime = SMC_MAX_STREAM_WAIT_TIMEOUT; 677 if (smc->use_fallback) { 678 sk->sk_state = SMC_CLOSED; 679 } else { 680 smc_close_active(smc); 681 sock_set_flag(sk, SOCK_DEAD); 682 sk->sk_shutdown |= SHUTDOWN_MASK; 683 } 684 if (smc->clcsock) { 685 struct socket *tcp; 686 687 tcp = smc->clcsock; 688 smc->clcsock = NULL; 689 sock_release(tcp); 690 } 691 if (smc->use_fallback) { 692 schedule_delayed_work(&smc->sock_put_work, TCP_TIMEWAIT_LEN); 693 } else if (sk->sk_state == SMC_CLOSED) { 694 smc_conn_free(&smc->conn); 695 schedule_delayed_work(&smc->sock_put_work, 696 SMC_CLOSE_SOCK_PUT_DELAY); 697 } 698 release_sock(sk); 699 sock_put(sk); 700 } 701 702 static int smc_serv_conf_first_link(struct smc_sock *smc) 703 { 704 struct smc_link_group *lgr = smc->conn.lgr; 705 struct smc_link *link; 706 int rest; 707 int rc; 708 709 link = &lgr->lnk[SMC_SINGLE_LINK]; 710 711 rc = smc_wr_reg_send(link, 712 smc->conn.rmb_desc->mr_rx[SMC_SINGLE_LINK]); 713 if (rc) 714 return SMC_CLC_DECL_INTERR; 715 716 /* send CONFIRM LINK request to client over the RoCE fabric */ 717 rc = smc_llc_send_confirm_link(link, 718 link->smcibdev->mac[link->ibport - 1], 719 &link->smcibdev->gid[link->ibport - 1], 720 SMC_LLC_REQ); 721 if (rc < 0) 722 return SMC_CLC_DECL_TCL; 723 724 /* receive CONFIRM LINK response from client over the RoCE fabric */ 725 rest = wait_for_completion_interruptible_timeout( 726 &link->llc_confirm_resp, 727 SMC_LLC_WAIT_FIRST_TIME); 728 if (rest <= 0) { 729 struct smc_clc_msg_decline dclc; 730 731 rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc), 732 SMC_CLC_DECLINE); 733 } 734 735 return rc; 736 } 737 738 /* setup for RDMA connection of server */ 739 static void smc_listen_work(struct work_struct *work) 740 { 741 struct smc_sock *new_smc = container_of(work, struct smc_sock, 742 smc_listen_work); 743 struct socket *newclcsock = new_smc->clcsock; 744 struct smc_sock *lsmc = new_smc->listen_smc; 745 struct smc_clc_msg_accept_confirm cclc; 746 int local_contact = SMC_REUSE_CONTACT; 747 struct sock *newsmcsk = &new_smc->sk; 748 struct smc_clc_msg_proposal pclc; 749 struct smc_ib_device *smcibdev; 750 struct sockaddr_in peeraddr; 751 struct smc_link *link; 752 int reason_code = 0; 753 int rc = 0, len; 754 __be32 subnet; 755 u8 prefix_len; 756 u8 ibport; 757 758 /* do inband token exchange - 759 *wait for and receive SMC Proposal CLC message 760 */ 761 reason_code = smc_clc_wait_msg(new_smc, &pclc, sizeof(pclc), 762 SMC_CLC_PROPOSAL); 763 if (reason_code < 0) 764 goto out_err; 765 if (reason_code > 0) 766 goto decline_rdma; 767 768 /* IPSec connections opt out of SMC-R optimizations */ 769 if (using_ipsec(new_smc)) { 770 reason_code = SMC_CLC_DECL_IPSEC; 771 goto decline_rdma; 772 } 773 774 /* PNET table look up: search active ib_device and port 775 * within same PNETID that also contains the ethernet device 776 * used for the internal TCP socket 777 */ 778 smc_pnet_find_roce_resource(newclcsock->sk, &smcibdev, &ibport); 779 if (!smcibdev) { 780 reason_code = SMC_CLC_DECL_CNFERR; /* configuration error */ 781 goto decline_rdma; 782 } 783 784 /* determine subnet and mask from internal TCP socket */ 785 rc = smc_netinfo_by_tcpsk(newclcsock, &subnet, &prefix_len); 786 if (rc) { 787 reason_code = SMC_CLC_DECL_CNFERR; /* configuration error */ 788 goto decline_rdma; 789 } 790 if ((pclc.outgoing_subnet != subnet) || 791 (pclc.prefix_len != prefix_len)) { 792 reason_code = SMC_CLC_DECL_CNFERR; /* configuration error */ 793 goto decline_rdma; 794 } 795 796 /* get address of the peer connected to the internal TCP socket */ 797 kernel_getpeername(newclcsock, (struct sockaddr *)&peeraddr, &len); 798 799 /* allocate connection / link group */ 800 mutex_lock(&smc_create_lgr_pending); 801 local_contact = smc_conn_create(new_smc, peeraddr.sin_addr.s_addr, 802 smcibdev, ibport, &pclc.lcl, 0); 803 if (local_contact < 0) { 804 rc = local_contact; 805 if (rc == -ENOMEM) 806 reason_code = SMC_CLC_DECL_MEM;/* insufficient memory*/ 807 else if (rc == -ENOLINK) 808 reason_code = SMC_CLC_DECL_SYNCERR; /* synchr. error */ 809 goto decline_rdma; 810 } 811 link = &new_smc->conn.lgr->lnk[SMC_SINGLE_LINK]; 812 813 /* create send buffer and rmb */ 814 rc = smc_buf_create(new_smc); 815 if (rc) { 816 reason_code = SMC_CLC_DECL_MEM; 817 goto decline_rdma; 818 } 819 820 smc_close_init(new_smc); 821 smc_rx_init(new_smc); 822 823 if (local_contact != SMC_FIRST_CONTACT) { 824 struct smc_buf_desc *buf_desc = new_smc->conn.rmb_desc; 825 826 if (!buf_desc->reused) { 827 /* register memory region for new rmb */ 828 rc = smc_wr_reg_send(link, 829 buf_desc->mr_rx[SMC_SINGLE_LINK]); 830 if (rc) { 831 reason_code = SMC_CLC_DECL_INTERR; 832 goto decline_rdma; 833 } 834 } 835 } 836 smc_rmb_sync_sg_for_device(&new_smc->conn); 837 838 rc = smc_clc_send_accept(new_smc, local_contact); 839 if (rc) 840 goto out_err; 841 842 /* receive SMC Confirm CLC message */ 843 reason_code = smc_clc_wait_msg(new_smc, &cclc, sizeof(cclc), 844 SMC_CLC_CONFIRM); 845 if (reason_code < 0) 846 goto out_err; 847 if (reason_code > 0) 848 goto decline_rdma; 849 smc_conn_save_peer_info(new_smc, &cclc); 850 if (local_contact == SMC_FIRST_CONTACT) 851 smc_link_save_peer_info(link, &cclc); 852 853 rc = smc_rmb_rtoken_handling(&new_smc->conn, &cclc); 854 if (rc) { 855 reason_code = SMC_CLC_DECL_INTERR; 856 goto decline_rdma; 857 } 858 859 if (local_contact == SMC_FIRST_CONTACT) { 860 rc = smc_ib_ready_link(link); 861 if (rc) { 862 reason_code = SMC_CLC_DECL_INTERR; 863 goto decline_rdma; 864 } 865 /* QP confirmation over RoCE fabric */ 866 reason_code = smc_serv_conf_first_link(new_smc); 867 if (reason_code < 0) { 868 /* peer is not aware of a problem */ 869 rc = reason_code; 870 goto out_err; 871 } 872 if (reason_code > 0) 873 goto decline_rdma; 874 } 875 876 smc_tx_init(new_smc); 877 878 out_connected: 879 sk_refcnt_debug_inc(newsmcsk); 880 if (newsmcsk->sk_state == SMC_INIT) 881 newsmcsk->sk_state = SMC_ACTIVE; 882 enqueue: 883 mutex_unlock(&smc_create_lgr_pending); 884 lock_sock_nested(&lsmc->sk, SINGLE_DEPTH_NESTING); 885 if (lsmc->sk.sk_state == SMC_LISTEN) { 886 smc_accept_enqueue(&lsmc->sk, newsmcsk); 887 } else { /* no longer listening */ 888 smc_close_non_accepted(newsmcsk); 889 } 890 release_sock(&lsmc->sk); 891 892 /* Wake up accept */ 893 lsmc->sk.sk_data_ready(&lsmc->sk); 894 sock_put(&lsmc->sk); /* sock_hold in smc_tcp_listen_work */ 895 return; 896 897 decline_rdma: 898 /* RDMA setup failed, switch back to TCP */ 899 smc_conn_free(&new_smc->conn); 900 new_smc->use_fallback = true; 901 if (reason_code && (reason_code != SMC_CLC_DECL_REPLY)) { 902 rc = smc_clc_send_decline(new_smc, reason_code, 0); 903 if (rc < sizeof(struct smc_clc_msg_decline)) 904 goto out_err; 905 } 906 goto out_connected; 907 908 out_err: 909 newsmcsk->sk_state = SMC_CLOSED; 910 smc_conn_free(&new_smc->conn); 911 goto enqueue; /* queue new sock with sk_err set */ 912 } 913 914 static void smc_tcp_listen_work(struct work_struct *work) 915 { 916 struct smc_sock *lsmc = container_of(work, struct smc_sock, 917 tcp_listen_work); 918 struct smc_sock *new_smc; 919 int rc = 0; 920 921 lock_sock(&lsmc->sk); 922 while (lsmc->sk.sk_state == SMC_LISTEN) { 923 rc = smc_clcsock_accept(lsmc, &new_smc); 924 if (rc) 925 goto out; 926 if (!new_smc) 927 continue; 928 929 new_smc->listen_smc = lsmc; 930 new_smc->use_fallback = false; /* assume rdma capability first*/ 931 sock_hold(&lsmc->sk); /* sock_put in smc_listen_work */ 932 INIT_WORK(&new_smc->smc_listen_work, smc_listen_work); 933 smc_copy_sock_settings_to_smc(new_smc); 934 schedule_work(&new_smc->smc_listen_work); 935 } 936 937 out: 938 release_sock(&lsmc->sk); 939 lsmc->sk.sk_data_ready(&lsmc->sk); /* no more listening, wake accept */ 940 } 941 942 static int smc_listen(struct socket *sock, int backlog) 943 { 944 struct sock *sk = sock->sk; 945 struct smc_sock *smc; 946 int rc; 947 948 smc = smc_sk(sk); 949 lock_sock(sk); 950 951 rc = -EINVAL; 952 if ((sk->sk_state != SMC_INIT) && (sk->sk_state != SMC_LISTEN)) 953 goto out; 954 955 rc = 0; 956 if (sk->sk_state == SMC_LISTEN) { 957 sk->sk_max_ack_backlog = backlog; 958 goto out; 959 } 960 /* some socket options are handled in core, so we could not apply 961 * them to the clc socket -- copy smc socket options to clc socket 962 */ 963 smc_copy_sock_settings_to_clc(smc); 964 965 rc = kernel_listen(smc->clcsock, backlog); 966 if (rc) 967 goto out; 968 sk->sk_max_ack_backlog = backlog; 969 sk->sk_ack_backlog = 0; 970 sk->sk_state = SMC_LISTEN; 971 INIT_WORK(&smc->tcp_listen_work, smc_tcp_listen_work); 972 schedule_work(&smc->tcp_listen_work); 973 974 out: 975 release_sock(sk); 976 return rc; 977 } 978 979 static int smc_accept(struct socket *sock, struct socket *new_sock, 980 int flags, bool kern) 981 { 982 struct sock *sk = sock->sk, *nsk; 983 DECLARE_WAITQUEUE(wait, current); 984 struct smc_sock *lsmc; 985 long timeo; 986 int rc = 0; 987 988 lsmc = smc_sk(sk); 989 lock_sock(sk); 990 991 if (lsmc->sk.sk_state != SMC_LISTEN) { 992 rc = -EINVAL; 993 goto out; 994 } 995 996 /* Wait for an incoming connection */ 997 timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK); 998 add_wait_queue_exclusive(sk_sleep(sk), &wait); 999 while (!(nsk = smc_accept_dequeue(sk, new_sock))) { 1000 set_current_state(TASK_INTERRUPTIBLE); 1001 if (!timeo) { 1002 rc = -EAGAIN; 1003 break; 1004 } 1005 release_sock(sk); 1006 timeo = schedule_timeout(timeo); 1007 /* wakeup by sk_data_ready in smc_listen_work() */ 1008 sched_annotate_sleep(); 1009 lock_sock(sk); 1010 if (signal_pending(current)) { 1011 rc = sock_intr_errno(timeo); 1012 break; 1013 } 1014 } 1015 set_current_state(TASK_RUNNING); 1016 remove_wait_queue(sk_sleep(sk), &wait); 1017 1018 if (!rc) 1019 rc = sock_error(nsk); 1020 1021 out: 1022 release_sock(sk); 1023 return rc; 1024 } 1025 1026 static int smc_getname(struct socket *sock, struct sockaddr *addr, 1027 int *len, int peer) 1028 { 1029 struct smc_sock *smc; 1030 1031 if (peer && (sock->sk->sk_state != SMC_ACTIVE) && 1032 (sock->sk->sk_state != SMC_APPCLOSEWAIT1)) 1033 return -ENOTCONN; 1034 1035 smc = smc_sk(sock->sk); 1036 1037 return smc->clcsock->ops->getname(smc->clcsock, addr, len, peer); 1038 } 1039 1040 static int smc_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) 1041 { 1042 struct sock *sk = sock->sk; 1043 struct smc_sock *smc; 1044 int rc = -EPIPE; 1045 1046 smc = smc_sk(sk); 1047 lock_sock(sk); 1048 if ((sk->sk_state != SMC_ACTIVE) && 1049 (sk->sk_state != SMC_APPCLOSEWAIT1) && 1050 (sk->sk_state != SMC_INIT)) 1051 goto out; 1052 if (smc->use_fallback) 1053 rc = smc->clcsock->ops->sendmsg(smc->clcsock, msg, len); 1054 else 1055 rc = smc_tx_sendmsg(smc, msg, len); 1056 out: 1057 release_sock(sk); 1058 return rc; 1059 } 1060 1061 static int smc_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, 1062 int flags) 1063 { 1064 struct sock *sk = sock->sk; 1065 struct smc_sock *smc; 1066 int rc = -ENOTCONN; 1067 1068 smc = smc_sk(sk); 1069 lock_sock(sk); 1070 if ((sk->sk_state == SMC_INIT) || 1071 (sk->sk_state == SMC_LISTEN) || 1072 (sk->sk_state == SMC_CLOSED)) 1073 goto out; 1074 1075 if (sk->sk_state == SMC_PEERFINCLOSEWAIT) { 1076 rc = 0; 1077 goto out; 1078 } 1079 1080 if (smc->use_fallback) 1081 rc = smc->clcsock->ops->recvmsg(smc->clcsock, msg, len, flags); 1082 else 1083 rc = smc_rx_recvmsg(smc, msg, len, flags); 1084 1085 out: 1086 release_sock(sk); 1087 return rc; 1088 } 1089 1090 static unsigned int smc_accept_poll(struct sock *parent) 1091 { 1092 struct smc_sock *isk; 1093 struct sock *sk; 1094 1095 lock_sock(parent); 1096 list_for_each_entry(isk, &smc_sk(parent)->accept_q, accept_q) { 1097 sk = (struct sock *)isk; 1098 1099 if (sk->sk_state == SMC_ACTIVE) { 1100 release_sock(parent); 1101 return POLLIN | POLLRDNORM; 1102 } 1103 } 1104 release_sock(parent); 1105 1106 return 0; 1107 } 1108 1109 static unsigned int smc_poll(struct file *file, struct socket *sock, 1110 poll_table *wait) 1111 { 1112 struct sock *sk = sock->sk; 1113 unsigned int mask = 0; 1114 struct smc_sock *smc; 1115 int rc; 1116 1117 smc = smc_sk(sock->sk); 1118 if ((sk->sk_state == SMC_INIT) || smc->use_fallback) { 1119 /* delegate to CLC child sock */ 1120 mask = smc->clcsock->ops->poll(file, smc->clcsock, wait); 1121 /* if non-blocking connect finished ... */ 1122 lock_sock(sk); 1123 if ((sk->sk_state == SMC_INIT) && (mask & POLLOUT)) { 1124 sk->sk_err = smc->clcsock->sk->sk_err; 1125 if (sk->sk_err) { 1126 mask |= POLLERR; 1127 } else { 1128 rc = smc_connect_rdma(smc); 1129 if (rc < 0) 1130 mask |= POLLERR; 1131 else 1132 /* success cases including fallback */ 1133 mask |= POLLOUT | POLLWRNORM; 1134 } 1135 } 1136 release_sock(sk); 1137 } else { 1138 sock_poll_wait(file, sk_sleep(sk), wait); 1139 if (sk->sk_state == SMC_LISTEN) 1140 /* woken up by sk_data_ready in smc_listen_work() */ 1141 mask |= smc_accept_poll(sk); 1142 if (sk->sk_err) 1143 mask |= POLLERR; 1144 if (atomic_read(&smc->conn.sndbuf_space) || 1145 (sk->sk_shutdown & SEND_SHUTDOWN)) { 1146 mask |= POLLOUT | POLLWRNORM; 1147 } else { 1148 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk); 1149 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); 1150 } 1151 if (atomic_read(&smc->conn.bytes_to_rcv)) 1152 mask |= POLLIN | POLLRDNORM; 1153 if ((sk->sk_shutdown == SHUTDOWN_MASK) || 1154 (sk->sk_state == SMC_CLOSED)) 1155 mask |= POLLHUP; 1156 if (sk->sk_shutdown & RCV_SHUTDOWN) 1157 mask |= POLLIN | POLLRDNORM | POLLRDHUP; 1158 if (sk->sk_state == SMC_APPCLOSEWAIT1) 1159 mask |= POLLIN; 1160 1161 } 1162 1163 return mask; 1164 } 1165 1166 static int smc_shutdown(struct socket *sock, int how) 1167 { 1168 struct sock *sk = sock->sk; 1169 struct smc_sock *smc; 1170 int rc = -EINVAL; 1171 int rc1 = 0; 1172 1173 smc = smc_sk(sk); 1174 1175 if ((how < SHUT_RD) || (how > SHUT_RDWR)) 1176 return rc; 1177 1178 lock_sock(sk); 1179 1180 rc = -ENOTCONN; 1181 if ((sk->sk_state != SMC_LISTEN) && 1182 (sk->sk_state != SMC_ACTIVE) && 1183 (sk->sk_state != SMC_PEERCLOSEWAIT1) && 1184 (sk->sk_state != SMC_PEERCLOSEWAIT2) && 1185 (sk->sk_state != SMC_APPCLOSEWAIT1) && 1186 (sk->sk_state != SMC_APPCLOSEWAIT2) && 1187 (sk->sk_state != SMC_APPFINCLOSEWAIT)) 1188 goto out; 1189 if (smc->use_fallback) { 1190 rc = kernel_sock_shutdown(smc->clcsock, how); 1191 sk->sk_shutdown = smc->clcsock->sk->sk_shutdown; 1192 if (sk->sk_shutdown == SHUTDOWN_MASK) 1193 sk->sk_state = SMC_CLOSED; 1194 goto out; 1195 } 1196 switch (how) { 1197 case SHUT_RDWR: /* shutdown in both directions */ 1198 rc = smc_close_active(smc); 1199 break; 1200 case SHUT_WR: 1201 rc = smc_close_shutdown_write(smc); 1202 break; 1203 case SHUT_RD: 1204 if (sk->sk_state == SMC_LISTEN) 1205 rc = smc_close_active(smc); 1206 else 1207 rc = 0; 1208 /* nothing more to do because peer is not involved */ 1209 break; 1210 } 1211 rc1 = kernel_sock_shutdown(smc->clcsock, how); 1212 /* map sock_shutdown_cmd constants to sk_shutdown value range */ 1213 sk->sk_shutdown |= how + 1; 1214 1215 out: 1216 release_sock(sk); 1217 return rc ? rc : rc1; 1218 } 1219 1220 static int smc_setsockopt(struct socket *sock, int level, int optname, 1221 char __user *optval, unsigned int optlen) 1222 { 1223 struct sock *sk = sock->sk; 1224 struct smc_sock *smc; 1225 1226 smc = smc_sk(sk); 1227 1228 /* generic setsockopts reaching us here always apply to the 1229 * CLC socket 1230 */ 1231 return smc->clcsock->ops->setsockopt(smc->clcsock, level, optname, 1232 optval, optlen); 1233 } 1234 1235 static int smc_getsockopt(struct socket *sock, int level, int optname, 1236 char __user *optval, int __user *optlen) 1237 { 1238 struct smc_sock *smc; 1239 1240 smc = smc_sk(sock->sk); 1241 /* socket options apply to the CLC socket */ 1242 return smc->clcsock->ops->getsockopt(smc->clcsock, level, optname, 1243 optval, optlen); 1244 } 1245 1246 static int smc_ioctl(struct socket *sock, unsigned int cmd, 1247 unsigned long arg) 1248 { 1249 struct smc_sock *smc; 1250 1251 smc = smc_sk(sock->sk); 1252 if (smc->use_fallback) 1253 return smc->clcsock->ops->ioctl(smc->clcsock, cmd, arg); 1254 else 1255 return sock_no_ioctl(sock, cmd, arg); 1256 } 1257 1258 static ssize_t smc_sendpage(struct socket *sock, struct page *page, 1259 int offset, size_t size, int flags) 1260 { 1261 struct sock *sk = sock->sk; 1262 struct smc_sock *smc; 1263 int rc = -EPIPE; 1264 1265 smc = smc_sk(sk); 1266 lock_sock(sk); 1267 if (sk->sk_state != SMC_ACTIVE) 1268 goto out; 1269 if (smc->use_fallback) 1270 rc = kernel_sendpage(smc->clcsock, page, offset, 1271 size, flags); 1272 else 1273 rc = sock_no_sendpage(sock, page, offset, size, flags); 1274 1275 out: 1276 release_sock(sk); 1277 return rc; 1278 } 1279 1280 static ssize_t smc_splice_read(struct socket *sock, loff_t *ppos, 1281 struct pipe_inode_info *pipe, size_t len, 1282 unsigned int flags) 1283 { 1284 struct sock *sk = sock->sk; 1285 struct smc_sock *smc; 1286 int rc = -ENOTCONN; 1287 1288 smc = smc_sk(sk); 1289 lock_sock(sk); 1290 if ((sk->sk_state != SMC_ACTIVE) && (sk->sk_state != SMC_CLOSED)) 1291 goto out; 1292 if (smc->use_fallback) { 1293 rc = smc->clcsock->ops->splice_read(smc->clcsock, ppos, 1294 pipe, len, flags); 1295 } else { 1296 rc = -EOPNOTSUPP; 1297 } 1298 out: 1299 release_sock(sk); 1300 return rc; 1301 } 1302 1303 /* must look like tcp */ 1304 static const struct proto_ops smc_sock_ops = { 1305 .family = PF_SMC, 1306 .owner = THIS_MODULE, 1307 .release = smc_release, 1308 .bind = smc_bind, 1309 .connect = smc_connect, 1310 .socketpair = sock_no_socketpair, 1311 .accept = smc_accept, 1312 .getname = smc_getname, 1313 .poll = smc_poll, 1314 .ioctl = smc_ioctl, 1315 .listen = smc_listen, 1316 .shutdown = smc_shutdown, 1317 .setsockopt = smc_setsockopt, 1318 .getsockopt = smc_getsockopt, 1319 .sendmsg = smc_sendmsg, 1320 .recvmsg = smc_recvmsg, 1321 .mmap = sock_no_mmap, 1322 .sendpage = smc_sendpage, 1323 .splice_read = smc_splice_read, 1324 }; 1325 1326 static int smc_create(struct net *net, struct socket *sock, int protocol, 1327 int kern) 1328 { 1329 struct smc_sock *smc; 1330 struct sock *sk; 1331 int rc; 1332 1333 rc = -ESOCKTNOSUPPORT; 1334 if (sock->type != SOCK_STREAM) 1335 goto out; 1336 1337 rc = -EPROTONOSUPPORT; 1338 if ((protocol != IPPROTO_IP) && (protocol != IPPROTO_TCP)) 1339 goto out; 1340 1341 rc = -ENOBUFS; 1342 sock->ops = &smc_sock_ops; 1343 sk = smc_sock_alloc(net, sock); 1344 if (!sk) 1345 goto out; 1346 1347 /* create internal TCP socket for CLC handshake and fallback */ 1348 smc = smc_sk(sk); 1349 smc->use_fallback = false; /* assume rdma capability first */ 1350 rc = sock_create_kern(net, PF_INET, SOCK_STREAM, 1351 IPPROTO_TCP, &smc->clcsock); 1352 if (rc) 1353 sk_common_release(sk); 1354 smc->sk.sk_sndbuf = max(smc->clcsock->sk->sk_sndbuf, SMC_BUF_MIN_SIZE); 1355 smc->sk.sk_rcvbuf = max(smc->clcsock->sk->sk_rcvbuf, SMC_BUF_MIN_SIZE); 1356 1357 out: 1358 return rc; 1359 } 1360 1361 static const struct net_proto_family smc_sock_family_ops = { 1362 .family = PF_SMC, 1363 .owner = THIS_MODULE, 1364 .create = smc_create, 1365 }; 1366 1367 static int __init smc_init(void) 1368 { 1369 int rc; 1370 1371 rc = smc_pnet_init(); 1372 if (rc) 1373 return rc; 1374 1375 rc = smc_llc_init(); 1376 if (rc) { 1377 pr_err("%s: smc_llc_init fails with %d\n", __func__, rc); 1378 goto out_pnet; 1379 } 1380 1381 rc = smc_cdc_init(); 1382 if (rc) { 1383 pr_err("%s: smc_cdc_init fails with %d\n", __func__, rc); 1384 goto out_pnet; 1385 } 1386 1387 rc = proto_register(&smc_proto, 1); 1388 if (rc) { 1389 pr_err("%s: proto_register fails with %d\n", __func__, rc); 1390 goto out_pnet; 1391 } 1392 1393 rc = sock_register(&smc_sock_family_ops); 1394 if (rc) { 1395 pr_err("%s: sock_register fails with %d\n", __func__, rc); 1396 goto out_proto; 1397 } 1398 INIT_HLIST_HEAD(&smc_v4_hashinfo.ht); 1399 1400 rc = smc_ib_register_client(); 1401 if (rc) { 1402 pr_err("%s: ib_register fails with %d\n", __func__, rc); 1403 goto out_sock; 1404 } 1405 1406 return 0; 1407 1408 out_sock: 1409 sock_unregister(PF_SMC); 1410 out_proto: 1411 proto_unregister(&smc_proto); 1412 out_pnet: 1413 smc_pnet_exit(); 1414 return rc; 1415 } 1416 1417 static void __exit smc_exit(void) 1418 { 1419 struct smc_link_group *lgr, *lg; 1420 LIST_HEAD(lgr_freeing_list); 1421 1422 spin_lock_bh(&smc_lgr_list.lock); 1423 if (!list_empty(&smc_lgr_list.list)) 1424 list_splice_init(&smc_lgr_list.list, &lgr_freeing_list); 1425 spin_unlock_bh(&smc_lgr_list.lock); 1426 list_for_each_entry_safe(lgr, lg, &lgr_freeing_list, list) { 1427 list_del_init(&lgr->list); 1428 smc_lgr_free(lgr); /* free link group */ 1429 } 1430 smc_ib_unregister_client(); 1431 sock_unregister(PF_SMC); 1432 proto_unregister(&smc_proto); 1433 smc_pnet_exit(); 1434 } 1435 1436 module_init(smc_init); 1437 module_exit(smc_exit); 1438 1439 MODULE_AUTHOR("Ursula Braun <ubraun@linux.vnet.ibm.com>"); 1440 MODULE_DESCRIPTION("smc socket address family"); 1441 MODULE_LICENSE("GPL"); 1442 MODULE_ALIAS_NETPROTO(PF_SMC); 1443