1 /* 2 * Shared Memory Communications over RDMA (SMC-R) and RoCE 3 * 4 * AF_SMC protocol family socket handler keeping the AF_INET sock address type 5 * applies to SOCK_STREAM sockets only 6 * offers an alternative communication option for TCP-protocol sockets 7 * applicable with RoCE-cards only 8 * 9 * Initial restrictions: 10 * - non-blocking connect postponed 11 * - IPv6 support postponed 12 * - support for alternate links postponed 13 * - partial support for non-blocking sockets only 14 * - support for urgent data postponed 15 * 16 * Copyright IBM Corp. 2016 17 * 18 * Author(s): Ursula Braun <ubraun@linux.vnet.ibm.com> 19 * based on prototype from Frank Blaschka 20 */ 21 22 #define KMSG_COMPONENT "smc" 23 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt 24 25 #include <linux/module.h> 26 #include <linux/socket.h> 27 #include <linux/inetdevice.h> 28 #include <linux/workqueue.h> 29 #include <linux/in.h> 30 #include <linux/sched/signal.h> 31 32 #include <net/sock.h> 33 #include <net/tcp.h> 34 #include <net/smc.h> 35 36 #include "smc.h" 37 #include "smc_clc.h" 38 #include "smc_llc.h" 39 #include "smc_cdc.h" 40 #include "smc_core.h" 41 #include "smc_ib.h" 42 #include "smc_pnet.h" 43 #include "smc_tx.h" 44 #include "smc_rx.h" 45 #include "smc_close.h" 46 47 static DEFINE_MUTEX(smc_create_lgr_pending); /* serialize link group 48 * creation 49 */ 50 51 struct smc_lgr_list smc_lgr_list = { /* established link groups */ 52 .lock = __SPIN_LOCK_UNLOCKED(smc_lgr_list.lock), 53 .list = LIST_HEAD_INIT(smc_lgr_list.list), 54 }; 55 56 static void smc_tcp_listen_work(struct work_struct *); 57 58 static void smc_set_keepalive(struct sock *sk, int val) 59 { 60 struct smc_sock *smc = smc_sk(sk); 61 62 smc->clcsock->sk->sk_prot->keepalive(smc->clcsock->sk, val); 63 } 64 65 static struct smc_hashinfo smc_v4_hashinfo = { 66 .lock = __RW_LOCK_UNLOCKED(smc_v4_hashinfo.lock), 67 }; 68 69 int smc_hash_sk(struct sock *sk) 70 { 71 struct smc_hashinfo *h = sk->sk_prot->h.smc_hash; 72 struct hlist_head *head; 73 74 head = &h->ht; 75 76 write_lock_bh(&h->lock); 77 sk_add_node(sk, head); 78 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); 79 write_unlock_bh(&h->lock); 80 81 return 0; 82 } 83 EXPORT_SYMBOL_GPL(smc_hash_sk); 84 85 void smc_unhash_sk(struct sock *sk) 86 { 87 struct smc_hashinfo *h = sk->sk_prot->h.smc_hash; 88 89 write_lock_bh(&h->lock); 90 if (sk_del_node_init(sk)) 91 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); 92 write_unlock_bh(&h->lock); 93 } 94 EXPORT_SYMBOL_GPL(smc_unhash_sk); 95 96 struct proto smc_proto = { 97 .name = "SMC", 98 .owner = THIS_MODULE, 99 .keepalive = smc_set_keepalive, 100 .hash = smc_hash_sk, 101 .unhash = smc_unhash_sk, 102 .obj_size = sizeof(struct smc_sock), 103 .h.smc_hash = &smc_v4_hashinfo, 104 .slab_flags = SLAB_TYPESAFE_BY_RCU, 105 }; 106 EXPORT_SYMBOL_GPL(smc_proto); 107 108 static int smc_release(struct socket *sock) 109 { 110 struct sock *sk = sock->sk; 111 struct smc_sock *smc; 112 int rc = 0; 113 114 if (!sk) 115 goto out; 116 117 smc = smc_sk(sk); 118 sock_hold(sk); 119 if (sk->sk_state == SMC_LISTEN) 120 /* smc_close_non_accepted() is called and acquires 121 * sock lock for child sockets again 122 */ 123 lock_sock_nested(sk, SINGLE_DEPTH_NESTING); 124 else 125 lock_sock(sk); 126 127 if (smc->use_fallback) { 128 sk->sk_state = SMC_CLOSED; 129 sk->sk_state_change(sk); 130 } else { 131 rc = smc_close_active(smc); 132 sock_set_flag(sk, SOCK_DEAD); 133 sk->sk_shutdown |= SHUTDOWN_MASK; 134 } 135 if (smc->clcsock) { 136 sock_release(smc->clcsock); 137 smc->clcsock = NULL; 138 } 139 140 /* detach socket */ 141 sock_orphan(sk); 142 sock->sk = NULL; 143 if (smc->use_fallback) { 144 schedule_delayed_work(&smc->sock_put_work, TCP_TIMEWAIT_LEN); 145 } else if (sk->sk_state == SMC_CLOSED) { 146 smc_conn_free(&smc->conn); 147 schedule_delayed_work(&smc->sock_put_work, 148 SMC_CLOSE_SOCK_PUT_DELAY); 149 } 150 release_sock(sk); 151 152 sock_put(sk); 153 out: 154 return rc; 155 } 156 157 static void smc_destruct(struct sock *sk) 158 { 159 if (sk->sk_state != SMC_CLOSED) 160 return; 161 if (!sock_flag(sk, SOCK_DEAD)) 162 return; 163 164 sk_refcnt_debug_dec(sk); 165 } 166 167 static struct sock *smc_sock_alloc(struct net *net, struct socket *sock) 168 { 169 struct smc_sock *smc; 170 struct sock *sk; 171 172 sk = sk_alloc(net, PF_SMC, GFP_KERNEL, &smc_proto, 0); 173 if (!sk) 174 return NULL; 175 176 sock_init_data(sock, sk); /* sets sk_refcnt to 1 */ 177 sk->sk_state = SMC_INIT; 178 sk->sk_destruct = smc_destruct; 179 sk->sk_protocol = SMCPROTO_SMC; 180 smc = smc_sk(sk); 181 INIT_WORK(&smc->tcp_listen_work, smc_tcp_listen_work); 182 INIT_LIST_HEAD(&smc->accept_q); 183 spin_lock_init(&smc->accept_q_lock); 184 INIT_DELAYED_WORK(&smc->sock_put_work, smc_close_sock_put_work); 185 sk->sk_prot->hash(sk); 186 sk_refcnt_debug_inc(sk); 187 188 return sk; 189 } 190 191 static int smc_bind(struct socket *sock, struct sockaddr *uaddr, 192 int addr_len) 193 { 194 struct sockaddr_in *addr = (struct sockaddr_in *)uaddr; 195 struct sock *sk = sock->sk; 196 struct smc_sock *smc; 197 int rc; 198 199 smc = smc_sk(sk); 200 201 /* replicate tests from inet_bind(), to be safe wrt. future changes */ 202 rc = -EINVAL; 203 if (addr_len < sizeof(struct sockaddr_in)) 204 goto out; 205 206 rc = -EAFNOSUPPORT; 207 /* accept AF_UNSPEC (mapped to AF_INET) only if s_addr is INADDR_ANY */ 208 if ((addr->sin_family != AF_INET) && 209 ((addr->sin_family != AF_UNSPEC) || 210 (addr->sin_addr.s_addr != htonl(INADDR_ANY)))) 211 goto out; 212 213 lock_sock(sk); 214 215 /* Check if socket is already active */ 216 rc = -EINVAL; 217 if (sk->sk_state != SMC_INIT) 218 goto out_rel; 219 220 smc->clcsock->sk->sk_reuse = sk->sk_reuse; 221 rc = kernel_bind(smc->clcsock, uaddr, addr_len); 222 223 out_rel: 224 release_sock(sk); 225 out: 226 return rc; 227 } 228 229 static void smc_copy_sock_settings(struct sock *nsk, struct sock *osk, 230 unsigned long mask) 231 { 232 /* options we don't get control via setsockopt for */ 233 nsk->sk_type = osk->sk_type; 234 nsk->sk_sndbuf = osk->sk_sndbuf; 235 nsk->sk_rcvbuf = osk->sk_rcvbuf; 236 nsk->sk_sndtimeo = osk->sk_sndtimeo; 237 nsk->sk_rcvtimeo = osk->sk_rcvtimeo; 238 nsk->sk_mark = osk->sk_mark; 239 nsk->sk_priority = osk->sk_priority; 240 nsk->sk_rcvlowat = osk->sk_rcvlowat; 241 nsk->sk_bound_dev_if = osk->sk_bound_dev_if; 242 nsk->sk_err = osk->sk_err; 243 244 nsk->sk_flags &= ~mask; 245 nsk->sk_flags |= osk->sk_flags & mask; 246 } 247 248 #define SK_FLAGS_SMC_TO_CLC ((1UL << SOCK_URGINLINE) | \ 249 (1UL << SOCK_KEEPOPEN) | \ 250 (1UL << SOCK_LINGER) | \ 251 (1UL << SOCK_BROADCAST) | \ 252 (1UL << SOCK_TIMESTAMP) | \ 253 (1UL << SOCK_DBG) | \ 254 (1UL << SOCK_RCVTSTAMP) | \ 255 (1UL << SOCK_RCVTSTAMPNS) | \ 256 (1UL << SOCK_LOCALROUTE) | \ 257 (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE) | \ 258 (1UL << SOCK_RXQ_OVFL) | \ 259 (1UL << SOCK_WIFI_STATUS) | \ 260 (1UL << SOCK_NOFCS) | \ 261 (1UL << SOCK_FILTER_LOCKED)) 262 /* copy only relevant settings and flags of SOL_SOCKET level from smc to 263 * clc socket (since smc is not called for these options from net/core) 264 */ 265 static void smc_copy_sock_settings_to_clc(struct smc_sock *smc) 266 { 267 smc_copy_sock_settings(smc->clcsock->sk, &smc->sk, SK_FLAGS_SMC_TO_CLC); 268 } 269 270 #define SK_FLAGS_CLC_TO_SMC ((1UL << SOCK_URGINLINE) | \ 271 (1UL << SOCK_KEEPOPEN) | \ 272 (1UL << SOCK_LINGER) | \ 273 (1UL << SOCK_DBG)) 274 /* copy only settings and flags relevant for smc from clc to smc socket */ 275 static void smc_copy_sock_settings_to_smc(struct smc_sock *smc) 276 { 277 smc_copy_sock_settings(&smc->sk, smc->clcsock->sk, SK_FLAGS_CLC_TO_SMC); 278 } 279 280 /* determine subnet and mask of internal TCP socket */ 281 int smc_netinfo_by_tcpsk(struct socket *clcsock, 282 __be32 *subnet, u8 *prefix_len) 283 { 284 struct dst_entry *dst = sk_dst_get(clcsock->sk); 285 struct in_device *in_dev; 286 struct sockaddr_in addr; 287 int rc = -ENOENT; 288 int len; 289 290 if (!dst) { 291 rc = -ENOTCONN; 292 goto out; 293 } 294 if (!dst->dev) { 295 rc = -ENODEV; 296 goto out_rel; 297 } 298 299 /* get address to which the internal TCP socket is bound */ 300 kernel_getsockname(clcsock, (struct sockaddr *)&addr, &len); 301 /* analyze IPv4 specific data of net_device belonging to TCP socket */ 302 rcu_read_lock(); 303 in_dev = __in_dev_get_rcu(dst->dev); 304 for_ifa(in_dev) { 305 if (!inet_ifa_match(addr.sin_addr.s_addr, ifa)) 306 continue; 307 *prefix_len = inet_mask_len(ifa->ifa_mask); 308 *subnet = ifa->ifa_address & ifa->ifa_mask; 309 rc = 0; 310 break; 311 } endfor_ifa(in_dev); 312 rcu_read_unlock(); 313 314 out_rel: 315 dst_release(dst); 316 out: 317 return rc; 318 } 319 320 static int smc_clnt_conf_first_link(struct smc_sock *smc, union ib_gid *gid) 321 { 322 struct smc_link_group *lgr = smc->conn.lgr; 323 struct smc_link *link; 324 int rest; 325 int rc; 326 327 link = &lgr->lnk[SMC_SINGLE_LINK]; 328 /* receive CONFIRM LINK request from server over RoCE fabric */ 329 rest = wait_for_completion_interruptible_timeout( 330 &link->llc_confirm, 331 SMC_LLC_WAIT_FIRST_TIME); 332 if (rest <= 0) { 333 struct smc_clc_msg_decline dclc; 334 335 rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc), 336 SMC_CLC_DECLINE); 337 return rc; 338 } 339 340 rc = smc_ib_modify_qp_rts(link); 341 if (rc) 342 return SMC_CLC_DECL_INTERR; 343 344 smc_wr_remember_qp_attr(link); 345 346 rc = smc_wr_reg_send(link, 347 smc->conn.rmb_desc->mr_rx[SMC_SINGLE_LINK]); 348 if (rc) 349 return SMC_CLC_DECL_INTERR; 350 351 /* send CONFIRM LINK response over RoCE fabric */ 352 rc = smc_llc_send_confirm_link(link, 353 link->smcibdev->mac[link->ibport - 1], 354 gid, SMC_LLC_RESP); 355 if (rc < 0) 356 return SMC_CLC_DECL_TCL; 357 358 return rc; 359 } 360 361 static void smc_conn_save_peer_info(struct smc_sock *smc, 362 struct smc_clc_msg_accept_confirm *clc) 363 { 364 smc->conn.peer_conn_idx = clc->conn_idx; 365 smc->conn.local_tx_ctrl.token = ntohl(clc->rmbe_alert_token); 366 smc->conn.peer_rmbe_size = smc_uncompress_bufsize(clc->rmbe_size); 367 atomic_set(&smc->conn.peer_rmbe_space, smc->conn.peer_rmbe_size); 368 } 369 370 static void smc_link_save_peer_info(struct smc_link *link, 371 struct smc_clc_msg_accept_confirm *clc) 372 { 373 link->peer_qpn = ntoh24(clc->qpn); 374 memcpy(link->peer_gid, clc->lcl.gid, SMC_GID_SIZE); 375 memcpy(link->peer_mac, clc->lcl.mac, sizeof(link->peer_mac)); 376 link->peer_psn = ntoh24(clc->psn); 377 link->peer_mtu = clc->qp_mtu; 378 } 379 380 /* setup for RDMA connection of client */ 381 static int smc_connect_rdma(struct smc_sock *smc) 382 { 383 struct sockaddr_in *inaddr = (struct sockaddr_in *)smc->addr; 384 struct smc_clc_msg_accept_confirm aclc; 385 int local_contact = SMC_FIRST_CONTACT; 386 struct smc_ib_device *smcibdev; 387 struct smc_link *link; 388 u8 srv_first_contact; 389 int reason_code = 0; 390 int rc = 0; 391 u8 ibport; 392 393 /* IPSec connections opt out of SMC-R optimizations */ 394 if (using_ipsec(smc)) { 395 reason_code = SMC_CLC_DECL_IPSEC; 396 goto decline_rdma; 397 } 398 399 /* PNET table look up: search active ib_device and port 400 * within same PNETID that also contains the ethernet device 401 * used for the internal TCP socket 402 */ 403 smc_pnet_find_roce_resource(smc->clcsock->sk, &smcibdev, &ibport); 404 if (!smcibdev) { 405 reason_code = SMC_CLC_DECL_CNFERR; /* configuration error */ 406 goto decline_rdma; 407 } 408 409 /* do inband token exchange */ 410 reason_code = smc_clc_send_proposal(smc, smcibdev, ibport); 411 if (reason_code < 0) { 412 rc = reason_code; 413 goto out_err; 414 } 415 if (reason_code > 0) /* configuration error */ 416 goto decline_rdma; 417 /* receive SMC Accept CLC message */ 418 reason_code = smc_clc_wait_msg(smc, &aclc, sizeof(aclc), 419 SMC_CLC_ACCEPT); 420 if (reason_code < 0) { 421 rc = reason_code; 422 goto out_err; 423 } 424 if (reason_code > 0) 425 goto decline_rdma; 426 427 srv_first_contact = aclc.hdr.flag; 428 mutex_lock(&smc_create_lgr_pending); 429 local_contact = smc_conn_create(smc, inaddr->sin_addr.s_addr, smcibdev, 430 ibport, &aclc.lcl, srv_first_contact); 431 if (local_contact < 0) { 432 rc = local_contact; 433 if (rc == -ENOMEM) 434 reason_code = SMC_CLC_DECL_MEM;/* insufficient memory*/ 435 else if (rc == -ENOLINK) 436 reason_code = SMC_CLC_DECL_SYNCERR; /* synchr. error */ 437 goto decline_rdma_unlock; 438 } 439 link = &smc->conn.lgr->lnk[SMC_SINGLE_LINK]; 440 441 smc_conn_save_peer_info(smc, &aclc); 442 443 /* create send buffer and rmb */ 444 rc = smc_buf_create(smc); 445 if (rc) { 446 reason_code = SMC_CLC_DECL_MEM; 447 goto decline_rdma_unlock; 448 } 449 450 if (local_contact == SMC_FIRST_CONTACT) 451 smc_link_save_peer_info(link, &aclc); 452 453 rc = smc_rmb_rtoken_handling(&smc->conn, &aclc); 454 if (rc) { 455 reason_code = SMC_CLC_DECL_INTERR; 456 goto decline_rdma_unlock; 457 } 458 459 smc_close_init(smc); 460 smc_rx_init(smc); 461 462 if (local_contact == SMC_FIRST_CONTACT) { 463 rc = smc_ib_ready_link(link); 464 if (rc) { 465 reason_code = SMC_CLC_DECL_INTERR; 466 goto decline_rdma_unlock; 467 } 468 } else { 469 struct smc_buf_desc *buf_desc = smc->conn.rmb_desc; 470 471 if (!buf_desc->reused) { 472 /* register memory region for new rmb */ 473 rc = smc_wr_reg_send(link, 474 buf_desc->mr_rx[SMC_SINGLE_LINK]); 475 if (rc) { 476 reason_code = SMC_CLC_DECL_INTERR; 477 goto decline_rdma_unlock; 478 } 479 } 480 } 481 smc_rmb_sync_sg_for_device(&smc->conn); 482 483 rc = smc_clc_send_confirm(smc); 484 if (rc) 485 goto out_err_unlock; 486 487 if (local_contact == SMC_FIRST_CONTACT) { 488 /* QP confirmation over RoCE fabric */ 489 reason_code = smc_clnt_conf_first_link( 490 smc, &smcibdev->gid[ibport - 1]); 491 if (reason_code < 0) { 492 rc = reason_code; 493 goto out_err_unlock; 494 } 495 if (reason_code > 0) 496 goto decline_rdma_unlock; 497 } 498 499 mutex_unlock(&smc_create_lgr_pending); 500 smc_tx_init(smc); 501 502 out_connected: 503 smc_copy_sock_settings_to_clc(smc); 504 if (smc->sk.sk_state == SMC_INIT) 505 smc->sk.sk_state = SMC_ACTIVE; 506 507 return rc ? rc : local_contact; 508 509 decline_rdma_unlock: 510 mutex_unlock(&smc_create_lgr_pending); 511 smc_conn_free(&smc->conn); 512 decline_rdma: 513 /* RDMA setup failed, switch back to TCP */ 514 smc->use_fallback = true; 515 if (reason_code && (reason_code != SMC_CLC_DECL_REPLY)) { 516 rc = smc_clc_send_decline(smc, reason_code); 517 if (rc < sizeof(struct smc_clc_msg_decline)) 518 goto out_err; 519 } 520 goto out_connected; 521 522 out_err_unlock: 523 mutex_unlock(&smc_create_lgr_pending); 524 smc_conn_free(&smc->conn); 525 out_err: 526 return rc; 527 } 528 529 static int smc_connect(struct socket *sock, struct sockaddr *addr, 530 int alen, int flags) 531 { 532 struct sock *sk = sock->sk; 533 struct smc_sock *smc; 534 int rc = -EINVAL; 535 536 smc = smc_sk(sk); 537 538 /* separate smc parameter checking to be safe */ 539 if (alen < sizeof(addr->sa_family)) 540 goto out_err; 541 if (addr->sa_family != AF_INET) 542 goto out_err; 543 smc->addr = addr; /* needed for nonblocking connect */ 544 545 lock_sock(sk); 546 switch (sk->sk_state) { 547 default: 548 goto out; 549 case SMC_ACTIVE: 550 rc = -EISCONN; 551 goto out; 552 case SMC_INIT: 553 rc = 0; 554 break; 555 } 556 557 smc_copy_sock_settings_to_clc(smc); 558 rc = kernel_connect(smc->clcsock, addr, alen, flags); 559 if (rc) 560 goto out; 561 562 /* setup RDMA connection */ 563 rc = smc_connect_rdma(smc); 564 if (rc < 0) 565 goto out; 566 else 567 rc = 0; /* success cases including fallback */ 568 569 out: 570 release_sock(sk); 571 out_err: 572 return rc; 573 } 574 575 static int smc_clcsock_accept(struct smc_sock *lsmc, struct smc_sock **new_smc) 576 { 577 struct sock *sk = &lsmc->sk; 578 struct socket *new_clcsock; 579 struct sock *new_sk; 580 int rc; 581 582 release_sock(&lsmc->sk); 583 new_sk = smc_sock_alloc(sock_net(sk), NULL); 584 if (!new_sk) { 585 rc = -ENOMEM; 586 lsmc->sk.sk_err = ENOMEM; 587 *new_smc = NULL; 588 lock_sock(&lsmc->sk); 589 goto out; 590 } 591 *new_smc = smc_sk(new_sk); 592 593 rc = kernel_accept(lsmc->clcsock, &new_clcsock, 0); 594 lock_sock(&lsmc->sk); 595 if (rc < 0) { 596 lsmc->sk.sk_err = -rc; 597 new_sk->sk_state = SMC_CLOSED; 598 sock_set_flag(new_sk, SOCK_DEAD); 599 sk->sk_prot->unhash(new_sk); 600 sock_put(new_sk); 601 *new_smc = NULL; 602 goto out; 603 } 604 if (lsmc->sk.sk_state == SMC_CLOSED) { 605 if (new_clcsock) 606 sock_release(new_clcsock); 607 new_sk->sk_state = SMC_CLOSED; 608 sock_set_flag(new_sk, SOCK_DEAD); 609 sk->sk_prot->unhash(new_sk); 610 sock_put(new_sk); 611 *new_smc = NULL; 612 goto out; 613 } 614 615 (*new_smc)->clcsock = new_clcsock; 616 out: 617 return rc; 618 } 619 620 /* add a just created sock to the accept queue of the listen sock as 621 * candidate for a following socket accept call from user space 622 */ 623 static void smc_accept_enqueue(struct sock *parent, struct sock *sk) 624 { 625 struct smc_sock *par = smc_sk(parent); 626 627 sock_hold(sk); 628 spin_lock(&par->accept_q_lock); 629 list_add_tail(&smc_sk(sk)->accept_q, &par->accept_q); 630 spin_unlock(&par->accept_q_lock); 631 sk_acceptq_added(parent); 632 } 633 634 /* remove a socket from the accept queue of its parental listening socket */ 635 static void smc_accept_unlink(struct sock *sk) 636 { 637 struct smc_sock *par = smc_sk(sk)->listen_smc; 638 639 spin_lock(&par->accept_q_lock); 640 list_del_init(&smc_sk(sk)->accept_q); 641 spin_unlock(&par->accept_q_lock); 642 sk_acceptq_removed(&smc_sk(sk)->listen_smc->sk); 643 sock_put(sk); 644 } 645 646 /* remove a sock from the accept queue to bind it to a new socket created 647 * for a socket accept call from user space 648 */ 649 struct sock *smc_accept_dequeue(struct sock *parent, 650 struct socket *new_sock) 651 { 652 struct smc_sock *isk, *n; 653 struct sock *new_sk; 654 655 list_for_each_entry_safe(isk, n, &smc_sk(parent)->accept_q, accept_q) { 656 new_sk = (struct sock *)isk; 657 658 smc_accept_unlink(new_sk); 659 if (new_sk->sk_state == SMC_CLOSED) { 660 new_sk->sk_prot->unhash(new_sk); 661 sock_put(new_sk); 662 continue; 663 } 664 if (new_sock) 665 sock_graft(new_sk, new_sock); 666 return new_sk; 667 } 668 return NULL; 669 } 670 671 /* clean up for a created but never accepted sock */ 672 void smc_close_non_accepted(struct sock *sk) 673 { 674 struct smc_sock *smc = smc_sk(sk); 675 676 sock_hold(sk); 677 lock_sock(sk); 678 if (!sk->sk_lingertime) 679 /* wait for peer closing */ 680 sk->sk_lingertime = SMC_MAX_STREAM_WAIT_TIMEOUT; 681 if (smc->use_fallback) { 682 sk->sk_state = SMC_CLOSED; 683 } else { 684 smc_close_active(smc); 685 sock_set_flag(sk, SOCK_DEAD); 686 sk->sk_shutdown |= SHUTDOWN_MASK; 687 } 688 if (smc->clcsock) { 689 struct socket *tcp; 690 691 tcp = smc->clcsock; 692 smc->clcsock = NULL; 693 sock_release(tcp); 694 } 695 if (smc->use_fallback) { 696 schedule_delayed_work(&smc->sock_put_work, TCP_TIMEWAIT_LEN); 697 } else if (sk->sk_state == SMC_CLOSED) { 698 smc_conn_free(&smc->conn); 699 schedule_delayed_work(&smc->sock_put_work, 700 SMC_CLOSE_SOCK_PUT_DELAY); 701 } 702 release_sock(sk); 703 sock_put(sk); 704 } 705 706 static int smc_serv_conf_first_link(struct smc_sock *smc) 707 { 708 struct smc_link_group *lgr = smc->conn.lgr; 709 struct smc_link *link; 710 int rest; 711 int rc; 712 713 link = &lgr->lnk[SMC_SINGLE_LINK]; 714 715 rc = smc_wr_reg_send(link, 716 smc->conn.rmb_desc->mr_rx[SMC_SINGLE_LINK]); 717 if (rc) 718 return SMC_CLC_DECL_INTERR; 719 720 /* send CONFIRM LINK request to client over the RoCE fabric */ 721 rc = smc_llc_send_confirm_link(link, 722 link->smcibdev->mac[link->ibport - 1], 723 &link->smcibdev->gid[link->ibport - 1], 724 SMC_LLC_REQ); 725 if (rc < 0) 726 return SMC_CLC_DECL_TCL; 727 728 /* receive CONFIRM LINK response from client over the RoCE fabric */ 729 rest = wait_for_completion_interruptible_timeout( 730 &link->llc_confirm_resp, 731 SMC_LLC_WAIT_FIRST_TIME); 732 if (rest <= 0) { 733 struct smc_clc_msg_decline dclc; 734 735 rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc), 736 SMC_CLC_DECLINE); 737 } 738 739 return rc; 740 } 741 742 /* setup for RDMA connection of server */ 743 static void smc_listen_work(struct work_struct *work) 744 { 745 struct smc_sock *new_smc = container_of(work, struct smc_sock, 746 smc_listen_work); 747 struct socket *newclcsock = new_smc->clcsock; 748 struct smc_sock *lsmc = new_smc->listen_smc; 749 struct smc_clc_msg_accept_confirm cclc; 750 int local_contact = SMC_REUSE_CONTACT; 751 struct sock *newsmcsk = &new_smc->sk; 752 struct smc_clc_msg_proposal pclc; 753 struct smc_ib_device *smcibdev; 754 struct sockaddr_in peeraddr; 755 struct smc_link *link; 756 int reason_code = 0; 757 int rc = 0, len; 758 __be32 subnet; 759 u8 prefix_len; 760 u8 ibport; 761 762 /* do inband token exchange - 763 *wait for and receive SMC Proposal CLC message 764 */ 765 reason_code = smc_clc_wait_msg(new_smc, &pclc, sizeof(pclc), 766 SMC_CLC_PROPOSAL); 767 if (reason_code < 0) 768 goto out_err; 769 if (reason_code > 0) 770 goto decline_rdma; 771 772 /* IPSec connections opt out of SMC-R optimizations */ 773 if (using_ipsec(new_smc)) { 774 reason_code = SMC_CLC_DECL_IPSEC; 775 goto decline_rdma; 776 } 777 778 /* PNET table look up: search active ib_device and port 779 * within same PNETID that also contains the ethernet device 780 * used for the internal TCP socket 781 */ 782 smc_pnet_find_roce_resource(newclcsock->sk, &smcibdev, &ibport); 783 if (!smcibdev) { 784 reason_code = SMC_CLC_DECL_CNFERR; /* configuration error */ 785 goto decline_rdma; 786 } 787 788 /* determine subnet and mask from internal TCP socket */ 789 rc = smc_netinfo_by_tcpsk(newclcsock, &subnet, &prefix_len); 790 if (rc) { 791 reason_code = SMC_CLC_DECL_CNFERR; /* configuration error */ 792 goto decline_rdma; 793 } 794 if ((pclc.outgoing_subnet != subnet) || 795 (pclc.prefix_len != prefix_len)) { 796 reason_code = SMC_CLC_DECL_CNFERR; /* configuration error */ 797 goto decline_rdma; 798 } 799 800 /* get address of the peer connected to the internal TCP socket */ 801 kernel_getpeername(newclcsock, (struct sockaddr *)&peeraddr, &len); 802 803 /* allocate connection / link group */ 804 mutex_lock(&smc_create_lgr_pending); 805 local_contact = smc_conn_create(new_smc, peeraddr.sin_addr.s_addr, 806 smcibdev, ibport, &pclc.lcl, 0); 807 if (local_contact < 0) { 808 rc = local_contact; 809 if (rc == -ENOMEM) 810 reason_code = SMC_CLC_DECL_MEM;/* insufficient memory*/ 811 goto decline_rdma; 812 } 813 link = &new_smc->conn.lgr->lnk[SMC_SINGLE_LINK]; 814 815 /* create send buffer and rmb */ 816 rc = smc_buf_create(new_smc); 817 if (rc) { 818 reason_code = SMC_CLC_DECL_MEM; 819 goto decline_rdma; 820 } 821 822 smc_close_init(new_smc); 823 smc_rx_init(new_smc); 824 825 if (local_contact != SMC_FIRST_CONTACT) { 826 struct smc_buf_desc *buf_desc = new_smc->conn.rmb_desc; 827 828 if (!buf_desc->reused) { 829 /* register memory region for new rmb */ 830 rc = smc_wr_reg_send(link, 831 buf_desc->mr_rx[SMC_SINGLE_LINK]); 832 if (rc) { 833 reason_code = SMC_CLC_DECL_INTERR; 834 goto decline_rdma; 835 } 836 } 837 } 838 smc_rmb_sync_sg_for_device(&new_smc->conn); 839 840 rc = smc_clc_send_accept(new_smc, local_contact); 841 if (rc) 842 goto out_err; 843 844 /* receive SMC Confirm CLC message */ 845 reason_code = smc_clc_wait_msg(new_smc, &cclc, sizeof(cclc), 846 SMC_CLC_CONFIRM); 847 if (reason_code < 0) 848 goto out_err; 849 if (reason_code > 0) 850 goto decline_rdma; 851 smc_conn_save_peer_info(new_smc, &cclc); 852 if (local_contact == SMC_FIRST_CONTACT) 853 smc_link_save_peer_info(link, &cclc); 854 855 rc = smc_rmb_rtoken_handling(&new_smc->conn, &cclc); 856 if (rc) { 857 reason_code = SMC_CLC_DECL_INTERR; 858 goto decline_rdma; 859 } 860 861 if (local_contact == SMC_FIRST_CONTACT) { 862 rc = smc_ib_ready_link(link); 863 if (rc) { 864 reason_code = SMC_CLC_DECL_INTERR; 865 goto decline_rdma; 866 } 867 /* QP confirmation over RoCE fabric */ 868 reason_code = smc_serv_conf_first_link(new_smc); 869 if (reason_code < 0) { 870 /* peer is not aware of a problem */ 871 rc = reason_code; 872 goto out_err; 873 } 874 if (reason_code > 0) 875 goto decline_rdma; 876 } 877 878 smc_tx_init(new_smc); 879 880 out_connected: 881 sk_refcnt_debug_inc(newsmcsk); 882 if (newsmcsk->sk_state == SMC_INIT) 883 newsmcsk->sk_state = SMC_ACTIVE; 884 enqueue: 885 mutex_unlock(&smc_create_lgr_pending); 886 lock_sock_nested(&lsmc->sk, SINGLE_DEPTH_NESTING); 887 if (lsmc->sk.sk_state == SMC_LISTEN) { 888 smc_accept_enqueue(&lsmc->sk, newsmcsk); 889 } else { /* no longer listening */ 890 smc_close_non_accepted(newsmcsk); 891 } 892 release_sock(&lsmc->sk); 893 894 /* Wake up accept */ 895 lsmc->sk.sk_data_ready(&lsmc->sk); 896 sock_put(&lsmc->sk); /* sock_hold in smc_tcp_listen_work */ 897 return; 898 899 decline_rdma: 900 /* RDMA setup failed, switch back to TCP */ 901 smc_conn_free(&new_smc->conn); 902 new_smc->use_fallback = true; 903 if (reason_code && (reason_code != SMC_CLC_DECL_REPLY)) { 904 rc = smc_clc_send_decline(new_smc, reason_code); 905 if (rc < sizeof(struct smc_clc_msg_decline)) 906 goto out_err; 907 } 908 goto out_connected; 909 910 out_err: 911 newsmcsk->sk_state = SMC_CLOSED; 912 smc_conn_free(&new_smc->conn); 913 goto enqueue; /* queue new sock with sk_err set */ 914 } 915 916 static void smc_tcp_listen_work(struct work_struct *work) 917 { 918 struct smc_sock *lsmc = container_of(work, struct smc_sock, 919 tcp_listen_work); 920 struct smc_sock *new_smc; 921 int rc = 0; 922 923 lock_sock(&lsmc->sk); 924 while (lsmc->sk.sk_state == SMC_LISTEN) { 925 rc = smc_clcsock_accept(lsmc, &new_smc); 926 if (rc) 927 goto out; 928 if (!new_smc) 929 continue; 930 931 new_smc->listen_smc = lsmc; 932 new_smc->use_fallback = false; /* assume rdma capability first*/ 933 sock_hold(&lsmc->sk); /* sock_put in smc_listen_work */ 934 INIT_WORK(&new_smc->smc_listen_work, smc_listen_work); 935 smc_copy_sock_settings_to_smc(new_smc); 936 schedule_work(&new_smc->smc_listen_work); 937 } 938 939 out: 940 release_sock(&lsmc->sk); 941 lsmc->sk.sk_data_ready(&lsmc->sk); /* no more listening, wake accept */ 942 } 943 944 static int smc_listen(struct socket *sock, int backlog) 945 { 946 struct sock *sk = sock->sk; 947 struct smc_sock *smc; 948 int rc; 949 950 smc = smc_sk(sk); 951 lock_sock(sk); 952 953 rc = -EINVAL; 954 if ((sk->sk_state != SMC_INIT) && (sk->sk_state != SMC_LISTEN)) 955 goto out; 956 957 rc = 0; 958 if (sk->sk_state == SMC_LISTEN) { 959 sk->sk_max_ack_backlog = backlog; 960 goto out; 961 } 962 /* some socket options are handled in core, so we could not apply 963 * them to the clc socket -- copy smc socket options to clc socket 964 */ 965 smc_copy_sock_settings_to_clc(smc); 966 967 rc = kernel_listen(smc->clcsock, backlog); 968 if (rc) 969 goto out; 970 sk->sk_max_ack_backlog = backlog; 971 sk->sk_ack_backlog = 0; 972 sk->sk_state = SMC_LISTEN; 973 INIT_WORK(&smc->tcp_listen_work, smc_tcp_listen_work); 974 schedule_work(&smc->tcp_listen_work); 975 976 out: 977 release_sock(sk); 978 return rc; 979 } 980 981 static int smc_accept(struct socket *sock, struct socket *new_sock, 982 int flags, bool kern) 983 { 984 struct sock *sk = sock->sk, *nsk; 985 DECLARE_WAITQUEUE(wait, current); 986 struct smc_sock *lsmc; 987 long timeo; 988 int rc = 0; 989 990 lsmc = smc_sk(sk); 991 lock_sock(sk); 992 993 if (lsmc->sk.sk_state != SMC_LISTEN) { 994 rc = -EINVAL; 995 goto out; 996 } 997 998 /* Wait for an incoming connection */ 999 timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK); 1000 add_wait_queue_exclusive(sk_sleep(sk), &wait); 1001 while (!(nsk = smc_accept_dequeue(sk, new_sock))) { 1002 set_current_state(TASK_INTERRUPTIBLE); 1003 if (!timeo) { 1004 rc = -EAGAIN; 1005 break; 1006 } 1007 release_sock(sk); 1008 timeo = schedule_timeout(timeo); 1009 /* wakeup by sk_data_ready in smc_listen_work() */ 1010 sched_annotate_sleep(); 1011 lock_sock(sk); 1012 if (signal_pending(current)) { 1013 rc = sock_intr_errno(timeo); 1014 break; 1015 } 1016 } 1017 set_current_state(TASK_RUNNING); 1018 remove_wait_queue(sk_sleep(sk), &wait); 1019 1020 if (!rc) 1021 rc = sock_error(nsk); 1022 1023 out: 1024 release_sock(sk); 1025 return rc; 1026 } 1027 1028 static int smc_getname(struct socket *sock, struct sockaddr *addr, 1029 int *len, int peer) 1030 { 1031 struct smc_sock *smc; 1032 1033 if (peer && (sock->sk->sk_state != SMC_ACTIVE) && 1034 (sock->sk->sk_state != SMC_APPCLOSEWAIT1)) 1035 return -ENOTCONN; 1036 1037 smc = smc_sk(sock->sk); 1038 1039 return smc->clcsock->ops->getname(smc->clcsock, addr, len, peer); 1040 } 1041 1042 static int smc_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) 1043 { 1044 struct sock *sk = sock->sk; 1045 struct smc_sock *smc; 1046 int rc = -EPIPE; 1047 1048 smc = smc_sk(sk); 1049 lock_sock(sk); 1050 if ((sk->sk_state != SMC_ACTIVE) && 1051 (sk->sk_state != SMC_APPCLOSEWAIT1) && 1052 (sk->sk_state != SMC_INIT)) 1053 goto out; 1054 if (smc->use_fallback) 1055 rc = smc->clcsock->ops->sendmsg(smc->clcsock, msg, len); 1056 else 1057 rc = smc_tx_sendmsg(smc, msg, len); 1058 out: 1059 release_sock(sk); 1060 return rc; 1061 } 1062 1063 static int smc_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, 1064 int flags) 1065 { 1066 struct sock *sk = sock->sk; 1067 struct smc_sock *smc; 1068 int rc = -ENOTCONN; 1069 1070 smc = smc_sk(sk); 1071 lock_sock(sk); 1072 if ((sk->sk_state == SMC_INIT) || 1073 (sk->sk_state == SMC_LISTEN) || 1074 (sk->sk_state == SMC_CLOSED)) 1075 goto out; 1076 1077 if (sk->sk_state == SMC_PEERFINCLOSEWAIT) { 1078 rc = 0; 1079 goto out; 1080 } 1081 1082 if (smc->use_fallback) 1083 rc = smc->clcsock->ops->recvmsg(smc->clcsock, msg, len, flags); 1084 else 1085 rc = smc_rx_recvmsg(smc, msg, len, flags); 1086 1087 out: 1088 release_sock(sk); 1089 return rc; 1090 } 1091 1092 static unsigned int smc_accept_poll(struct sock *parent) 1093 { 1094 struct smc_sock *isk; 1095 struct sock *sk; 1096 1097 lock_sock(parent); 1098 list_for_each_entry(isk, &smc_sk(parent)->accept_q, accept_q) { 1099 sk = (struct sock *)isk; 1100 1101 if (sk->sk_state == SMC_ACTIVE) { 1102 release_sock(parent); 1103 return POLLIN | POLLRDNORM; 1104 } 1105 } 1106 release_sock(parent); 1107 1108 return 0; 1109 } 1110 1111 static unsigned int smc_poll(struct file *file, struct socket *sock, 1112 poll_table *wait) 1113 { 1114 struct sock *sk = sock->sk; 1115 unsigned int mask = 0; 1116 struct smc_sock *smc; 1117 int rc; 1118 1119 smc = smc_sk(sock->sk); 1120 if ((sk->sk_state == SMC_INIT) || smc->use_fallback) { 1121 /* delegate to CLC child sock */ 1122 mask = smc->clcsock->ops->poll(file, smc->clcsock, wait); 1123 /* if non-blocking connect finished ... */ 1124 lock_sock(sk); 1125 if ((sk->sk_state == SMC_INIT) && (mask & POLLOUT)) { 1126 sk->sk_err = smc->clcsock->sk->sk_err; 1127 if (sk->sk_err) { 1128 mask |= POLLERR; 1129 } else { 1130 rc = smc_connect_rdma(smc); 1131 if (rc < 0) 1132 mask |= POLLERR; 1133 else 1134 /* success cases including fallback */ 1135 mask |= POLLOUT | POLLWRNORM; 1136 } 1137 } 1138 release_sock(sk); 1139 } else { 1140 sock_poll_wait(file, sk_sleep(sk), wait); 1141 if (sk->sk_state == SMC_LISTEN) 1142 /* woken up by sk_data_ready in smc_listen_work() */ 1143 mask |= smc_accept_poll(sk); 1144 if (sk->sk_err) 1145 mask |= POLLERR; 1146 if (atomic_read(&smc->conn.sndbuf_space) || 1147 (sk->sk_shutdown & SEND_SHUTDOWN)) { 1148 mask |= POLLOUT | POLLWRNORM; 1149 } else { 1150 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk); 1151 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); 1152 } 1153 if (atomic_read(&smc->conn.bytes_to_rcv)) 1154 mask |= POLLIN | POLLRDNORM; 1155 if ((sk->sk_shutdown == SHUTDOWN_MASK) || 1156 (sk->sk_state == SMC_CLOSED)) 1157 mask |= POLLHUP; 1158 if (sk->sk_shutdown & RCV_SHUTDOWN) 1159 mask |= POLLIN | POLLRDNORM | POLLRDHUP; 1160 if (sk->sk_state == SMC_APPCLOSEWAIT1) 1161 mask |= POLLIN; 1162 1163 } 1164 1165 return mask; 1166 } 1167 1168 static int smc_shutdown(struct socket *sock, int how) 1169 { 1170 struct sock *sk = sock->sk; 1171 struct smc_sock *smc; 1172 int rc = -EINVAL; 1173 int rc1 = 0; 1174 1175 smc = smc_sk(sk); 1176 1177 if ((how < SHUT_RD) || (how > SHUT_RDWR)) 1178 return rc; 1179 1180 lock_sock(sk); 1181 1182 rc = -ENOTCONN; 1183 if ((sk->sk_state != SMC_LISTEN) && 1184 (sk->sk_state != SMC_ACTIVE) && 1185 (sk->sk_state != SMC_PEERCLOSEWAIT1) && 1186 (sk->sk_state != SMC_PEERCLOSEWAIT2) && 1187 (sk->sk_state != SMC_APPCLOSEWAIT1) && 1188 (sk->sk_state != SMC_APPCLOSEWAIT2) && 1189 (sk->sk_state != SMC_APPFINCLOSEWAIT)) 1190 goto out; 1191 if (smc->use_fallback) { 1192 rc = kernel_sock_shutdown(smc->clcsock, how); 1193 sk->sk_shutdown = smc->clcsock->sk->sk_shutdown; 1194 if (sk->sk_shutdown == SHUTDOWN_MASK) 1195 sk->sk_state = SMC_CLOSED; 1196 goto out; 1197 } 1198 switch (how) { 1199 case SHUT_RDWR: /* shutdown in both directions */ 1200 rc = smc_close_active(smc); 1201 break; 1202 case SHUT_WR: 1203 rc = smc_close_shutdown_write(smc); 1204 break; 1205 case SHUT_RD: 1206 if (sk->sk_state == SMC_LISTEN) 1207 rc = smc_close_active(smc); 1208 else 1209 rc = 0; 1210 /* nothing more to do because peer is not involved */ 1211 break; 1212 } 1213 rc1 = kernel_sock_shutdown(smc->clcsock, how); 1214 /* map sock_shutdown_cmd constants to sk_shutdown value range */ 1215 sk->sk_shutdown |= how + 1; 1216 1217 out: 1218 release_sock(sk); 1219 return rc ? rc : rc1; 1220 } 1221 1222 static int smc_setsockopt(struct socket *sock, int level, int optname, 1223 char __user *optval, unsigned int optlen) 1224 { 1225 struct sock *sk = sock->sk; 1226 struct smc_sock *smc; 1227 1228 smc = smc_sk(sk); 1229 1230 /* generic setsockopts reaching us here always apply to the 1231 * CLC socket 1232 */ 1233 return smc->clcsock->ops->setsockopt(smc->clcsock, level, optname, 1234 optval, optlen); 1235 } 1236 1237 static int smc_getsockopt(struct socket *sock, int level, int optname, 1238 char __user *optval, int __user *optlen) 1239 { 1240 struct smc_sock *smc; 1241 1242 smc = smc_sk(sock->sk); 1243 /* socket options apply to the CLC socket */ 1244 return smc->clcsock->ops->getsockopt(smc->clcsock, level, optname, 1245 optval, optlen); 1246 } 1247 1248 static int smc_ioctl(struct socket *sock, unsigned int cmd, 1249 unsigned long arg) 1250 { 1251 struct smc_sock *smc; 1252 1253 smc = smc_sk(sock->sk); 1254 if (smc->use_fallback) 1255 return smc->clcsock->ops->ioctl(smc->clcsock, cmd, arg); 1256 else 1257 return sock_no_ioctl(sock, cmd, arg); 1258 } 1259 1260 static ssize_t smc_sendpage(struct socket *sock, struct page *page, 1261 int offset, size_t size, int flags) 1262 { 1263 struct sock *sk = sock->sk; 1264 struct smc_sock *smc; 1265 int rc = -EPIPE; 1266 1267 smc = smc_sk(sk); 1268 lock_sock(sk); 1269 if (sk->sk_state != SMC_ACTIVE) 1270 goto out; 1271 if (smc->use_fallback) 1272 rc = kernel_sendpage(smc->clcsock, page, offset, 1273 size, flags); 1274 else 1275 rc = sock_no_sendpage(sock, page, offset, size, flags); 1276 1277 out: 1278 release_sock(sk); 1279 return rc; 1280 } 1281 1282 static ssize_t smc_splice_read(struct socket *sock, loff_t *ppos, 1283 struct pipe_inode_info *pipe, size_t len, 1284 unsigned int flags) 1285 { 1286 struct sock *sk = sock->sk; 1287 struct smc_sock *smc; 1288 int rc = -ENOTCONN; 1289 1290 smc = smc_sk(sk); 1291 lock_sock(sk); 1292 if ((sk->sk_state != SMC_ACTIVE) && (sk->sk_state != SMC_CLOSED)) 1293 goto out; 1294 if (smc->use_fallback) { 1295 rc = smc->clcsock->ops->splice_read(smc->clcsock, ppos, 1296 pipe, len, flags); 1297 } else { 1298 rc = -EOPNOTSUPP; 1299 } 1300 out: 1301 release_sock(sk); 1302 return rc; 1303 } 1304 1305 /* must look like tcp */ 1306 static const struct proto_ops smc_sock_ops = { 1307 .family = PF_SMC, 1308 .owner = THIS_MODULE, 1309 .release = smc_release, 1310 .bind = smc_bind, 1311 .connect = smc_connect, 1312 .socketpair = sock_no_socketpair, 1313 .accept = smc_accept, 1314 .getname = smc_getname, 1315 .poll = smc_poll, 1316 .ioctl = smc_ioctl, 1317 .listen = smc_listen, 1318 .shutdown = smc_shutdown, 1319 .setsockopt = smc_setsockopt, 1320 .getsockopt = smc_getsockopt, 1321 .sendmsg = smc_sendmsg, 1322 .recvmsg = smc_recvmsg, 1323 .mmap = sock_no_mmap, 1324 .sendpage = smc_sendpage, 1325 .splice_read = smc_splice_read, 1326 }; 1327 1328 static int smc_create(struct net *net, struct socket *sock, int protocol, 1329 int kern) 1330 { 1331 struct smc_sock *smc; 1332 struct sock *sk; 1333 int rc; 1334 1335 rc = -ESOCKTNOSUPPORT; 1336 if (sock->type != SOCK_STREAM) 1337 goto out; 1338 1339 rc = -EPROTONOSUPPORT; 1340 if ((protocol != IPPROTO_IP) && (protocol != IPPROTO_TCP)) 1341 goto out; 1342 1343 rc = -ENOBUFS; 1344 sock->ops = &smc_sock_ops; 1345 sk = smc_sock_alloc(net, sock); 1346 if (!sk) 1347 goto out; 1348 1349 /* create internal TCP socket for CLC handshake and fallback */ 1350 smc = smc_sk(sk); 1351 smc->use_fallback = false; /* assume rdma capability first */ 1352 rc = sock_create_kern(net, PF_INET, SOCK_STREAM, 1353 IPPROTO_TCP, &smc->clcsock); 1354 if (rc) 1355 sk_common_release(sk); 1356 smc->sk.sk_sndbuf = max(smc->clcsock->sk->sk_sndbuf, SMC_BUF_MIN_SIZE); 1357 smc->sk.sk_rcvbuf = max(smc->clcsock->sk->sk_rcvbuf, SMC_BUF_MIN_SIZE); 1358 1359 out: 1360 return rc; 1361 } 1362 1363 static const struct net_proto_family smc_sock_family_ops = { 1364 .family = PF_SMC, 1365 .owner = THIS_MODULE, 1366 .create = smc_create, 1367 }; 1368 1369 static int __init smc_init(void) 1370 { 1371 int rc; 1372 1373 rc = smc_pnet_init(); 1374 if (rc) 1375 return rc; 1376 1377 rc = smc_llc_init(); 1378 if (rc) { 1379 pr_err("%s: smc_llc_init fails with %d\n", __func__, rc); 1380 goto out_pnet; 1381 } 1382 1383 rc = smc_cdc_init(); 1384 if (rc) { 1385 pr_err("%s: smc_cdc_init fails with %d\n", __func__, rc); 1386 goto out_pnet; 1387 } 1388 1389 rc = proto_register(&smc_proto, 1); 1390 if (rc) { 1391 pr_err("%s: proto_register fails with %d\n", __func__, rc); 1392 goto out_pnet; 1393 } 1394 1395 rc = sock_register(&smc_sock_family_ops); 1396 if (rc) { 1397 pr_err("%s: sock_register fails with %d\n", __func__, rc); 1398 goto out_proto; 1399 } 1400 INIT_HLIST_HEAD(&smc_v4_hashinfo.ht); 1401 1402 rc = smc_ib_register_client(); 1403 if (rc) { 1404 pr_err("%s: ib_register fails with %d\n", __func__, rc); 1405 goto out_sock; 1406 } 1407 1408 return 0; 1409 1410 out_sock: 1411 sock_unregister(PF_SMC); 1412 out_proto: 1413 proto_unregister(&smc_proto); 1414 out_pnet: 1415 smc_pnet_exit(); 1416 return rc; 1417 } 1418 1419 static void __exit smc_exit(void) 1420 { 1421 struct smc_link_group *lgr, *lg; 1422 LIST_HEAD(lgr_freeing_list); 1423 1424 spin_lock_bh(&smc_lgr_list.lock); 1425 if (!list_empty(&smc_lgr_list.list)) 1426 list_splice_init(&smc_lgr_list.list, &lgr_freeing_list); 1427 spin_unlock_bh(&smc_lgr_list.lock); 1428 list_for_each_entry_safe(lgr, lg, &lgr_freeing_list, list) { 1429 list_del_init(&lgr->list); 1430 smc_lgr_free(lgr); /* free link group */ 1431 } 1432 smc_ib_unregister_client(); 1433 sock_unregister(PF_SMC); 1434 proto_unregister(&smc_proto); 1435 smc_pnet_exit(); 1436 } 1437 1438 module_init(smc_init); 1439 module_exit(smc_exit); 1440 1441 MODULE_AUTHOR("Ursula Braun <ubraun@linux.vnet.ibm.com>"); 1442 MODULE_DESCRIPTION("smc socket address family"); 1443 MODULE_LICENSE("GPL"); 1444 MODULE_ALIAS_NETPROTO(PF_SMC); 1445