1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Shared Memory Communications over RDMA (SMC-R) and RoCE 4 * 5 * AF_SMC protocol family socket handler keeping the AF_INET sock address type 6 * applies to SOCK_STREAM sockets only 7 * offers an alternative communication option for TCP-protocol sockets 8 * applicable with RoCE-cards only 9 * 10 * Initial restrictions: 11 * - support for alternate links postponed 12 * 13 * Copyright IBM Corp. 2016, 2018 14 * 15 * Author(s): Ursula Braun <ubraun@linux.vnet.ibm.com> 16 * based on prototype from Frank Blaschka 17 */ 18 19 #define KMSG_COMPONENT "smc" 20 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt 21 22 #include <linux/module.h> 23 #include <linux/socket.h> 24 #include <linux/workqueue.h> 25 #include <linux/in.h> 26 #include <linux/sched/signal.h> 27 #include <linux/if_vlan.h> 28 #include <linux/rcupdate_wait.h> 29 #include <linux/ctype.h> 30 31 #include <net/sock.h> 32 #include <net/tcp.h> 33 #include <net/smc.h> 34 #include <asm/ioctls.h> 35 36 #include <net/net_namespace.h> 37 #include <net/netns/generic.h> 38 #include "smc_netns.h" 39 40 #include "smc.h" 41 #include "smc_clc.h" 42 #include "smc_llc.h" 43 #include "smc_cdc.h" 44 #include "smc_core.h" 45 #include "smc_ib.h" 46 #include "smc_ism.h" 47 #include "smc_pnet.h" 48 #include "smc_tx.h" 49 #include "smc_rx.h" 50 #include "smc_close.h" 51 52 static DEFINE_MUTEX(smc_server_lgr_pending); /* serialize link group 53 * creation on server 54 */ 55 static DEFINE_MUTEX(smc_client_lgr_pending); /* serialize link group 56 * creation on client 57 */ 58 59 struct workqueue_struct *smc_hs_wq; /* wq for handshake work */ 60 struct workqueue_struct *smc_close_wq; /* wq for close work */ 61 62 static void smc_tcp_listen_work(struct work_struct *); 63 static void smc_connect_work(struct work_struct *); 64 65 static void smc_set_keepalive(struct sock *sk, int val) 66 { 67 struct smc_sock *smc = smc_sk(sk); 68 69 smc->clcsock->sk->sk_prot->keepalive(smc->clcsock->sk, val); 70 } 71 72 static struct smc_hashinfo smc_v4_hashinfo = { 73 .lock = __RW_LOCK_UNLOCKED(smc_v4_hashinfo.lock), 74 }; 75 76 static struct smc_hashinfo smc_v6_hashinfo = { 77 .lock = __RW_LOCK_UNLOCKED(smc_v6_hashinfo.lock), 78 }; 79 80 int smc_hash_sk(struct sock *sk) 81 { 82 struct smc_hashinfo *h = sk->sk_prot->h.smc_hash; 83 struct hlist_head *head; 84 85 head = &h->ht; 86 87 write_lock_bh(&h->lock); 88 sk_add_node(sk, head); 89 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); 90 write_unlock_bh(&h->lock); 91 92 return 0; 93 } 94 EXPORT_SYMBOL_GPL(smc_hash_sk); 95 96 void smc_unhash_sk(struct sock *sk) 97 { 98 struct smc_hashinfo *h = sk->sk_prot->h.smc_hash; 99 100 write_lock_bh(&h->lock); 101 if (sk_del_node_init(sk)) 102 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); 103 write_unlock_bh(&h->lock); 104 } 105 EXPORT_SYMBOL_GPL(smc_unhash_sk); 106 107 struct proto smc_proto = { 108 .name = "SMC", 109 .owner = THIS_MODULE, 110 .keepalive = smc_set_keepalive, 111 .hash = smc_hash_sk, 112 .unhash = smc_unhash_sk, 113 .obj_size = sizeof(struct smc_sock), 114 .h.smc_hash = &smc_v4_hashinfo, 115 .slab_flags = SLAB_TYPESAFE_BY_RCU, 116 }; 117 EXPORT_SYMBOL_GPL(smc_proto); 118 119 struct proto smc_proto6 = { 120 .name = "SMC6", 121 .owner = THIS_MODULE, 122 .keepalive = smc_set_keepalive, 123 .hash = smc_hash_sk, 124 .unhash = smc_unhash_sk, 125 .obj_size = sizeof(struct smc_sock), 126 .h.smc_hash = &smc_v6_hashinfo, 127 .slab_flags = SLAB_TYPESAFE_BY_RCU, 128 }; 129 EXPORT_SYMBOL_GPL(smc_proto6); 130 131 static void smc_restore_fallback_changes(struct smc_sock *smc) 132 { 133 if (smc->clcsock->file) { /* non-accepted sockets have no file yet */ 134 smc->clcsock->file->private_data = smc->sk.sk_socket; 135 smc->clcsock->file = NULL; 136 } 137 } 138 139 static int __smc_release(struct smc_sock *smc) 140 { 141 struct sock *sk = &smc->sk; 142 int rc = 0; 143 144 if (!smc->use_fallback) { 145 rc = smc_close_active(smc); 146 sock_set_flag(sk, SOCK_DEAD); 147 sk->sk_shutdown |= SHUTDOWN_MASK; 148 } else { 149 if (sk->sk_state != SMC_LISTEN && sk->sk_state != SMC_INIT) 150 sock_put(sk); /* passive closing */ 151 if (sk->sk_state == SMC_LISTEN) { 152 /* wake up clcsock accept */ 153 rc = kernel_sock_shutdown(smc->clcsock, SHUT_RDWR); 154 } 155 sk->sk_state = SMC_CLOSED; 156 sk->sk_state_change(sk); 157 smc_restore_fallback_changes(smc); 158 } 159 160 sk->sk_prot->unhash(sk); 161 162 if (sk->sk_state == SMC_CLOSED) { 163 if (smc->clcsock) { 164 release_sock(sk); 165 smc_clcsock_release(smc); 166 lock_sock(sk); 167 } 168 if (!smc->use_fallback) 169 smc_conn_free(&smc->conn); 170 } 171 172 return rc; 173 } 174 175 static int smc_release(struct socket *sock) 176 { 177 struct sock *sk = sock->sk; 178 struct smc_sock *smc; 179 int rc = 0; 180 181 if (!sk) 182 goto out; 183 184 sock_hold(sk); /* sock_put below */ 185 smc = smc_sk(sk); 186 187 /* cleanup for a dangling non-blocking connect */ 188 if (smc->connect_nonblock && sk->sk_state == SMC_INIT) 189 tcp_abort(smc->clcsock->sk, ECONNABORTED); 190 flush_work(&smc->connect_work); 191 192 if (sk->sk_state == SMC_LISTEN) 193 /* smc_close_non_accepted() is called and acquires 194 * sock lock for child sockets again 195 */ 196 lock_sock_nested(sk, SINGLE_DEPTH_NESTING); 197 else 198 lock_sock(sk); 199 200 rc = __smc_release(smc); 201 202 /* detach socket */ 203 sock_orphan(sk); 204 sock->sk = NULL; 205 release_sock(sk); 206 207 sock_put(sk); /* sock_hold above */ 208 sock_put(sk); /* final sock_put */ 209 out: 210 return rc; 211 } 212 213 static void smc_destruct(struct sock *sk) 214 { 215 if (sk->sk_state != SMC_CLOSED) 216 return; 217 if (!sock_flag(sk, SOCK_DEAD)) 218 return; 219 220 sk_refcnt_debug_dec(sk); 221 } 222 223 static struct sock *smc_sock_alloc(struct net *net, struct socket *sock, 224 int protocol) 225 { 226 struct smc_sock *smc; 227 struct proto *prot; 228 struct sock *sk; 229 230 prot = (protocol == SMCPROTO_SMC6) ? &smc_proto6 : &smc_proto; 231 sk = sk_alloc(net, PF_SMC, GFP_KERNEL, prot, 0); 232 if (!sk) 233 return NULL; 234 235 sock_init_data(sock, sk); /* sets sk_refcnt to 1 */ 236 sk->sk_state = SMC_INIT; 237 sk->sk_destruct = smc_destruct; 238 sk->sk_protocol = protocol; 239 smc = smc_sk(sk); 240 INIT_WORK(&smc->tcp_listen_work, smc_tcp_listen_work); 241 INIT_WORK(&smc->connect_work, smc_connect_work); 242 INIT_DELAYED_WORK(&smc->conn.tx_work, smc_tx_work); 243 INIT_LIST_HEAD(&smc->accept_q); 244 spin_lock_init(&smc->accept_q_lock); 245 spin_lock_init(&smc->conn.send_lock); 246 sk->sk_prot->hash(sk); 247 sk_refcnt_debug_inc(sk); 248 mutex_init(&smc->clcsock_release_lock); 249 250 return sk; 251 } 252 253 static int smc_bind(struct socket *sock, struct sockaddr *uaddr, 254 int addr_len) 255 { 256 struct sockaddr_in *addr = (struct sockaddr_in *)uaddr; 257 struct sock *sk = sock->sk; 258 struct smc_sock *smc; 259 int rc; 260 261 smc = smc_sk(sk); 262 263 /* replicate tests from inet_bind(), to be safe wrt. future changes */ 264 rc = -EINVAL; 265 if (addr_len < sizeof(struct sockaddr_in)) 266 goto out; 267 268 rc = -EAFNOSUPPORT; 269 if (addr->sin_family != AF_INET && 270 addr->sin_family != AF_INET6 && 271 addr->sin_family != AF_UNSPEC) 272 goto out; 273 /* accept AF_UNSPEC (mapped to AF_INET) only if s_addr is INADDR_ANY */ 274 if (addr->sin_family == AF_UNSPEC && 275 addr->sin_addr.s_addr != htonl(INADDR_ANY)) 276 goto out; 277 278 lock_sock(sk); 279 280 /* Check if socket is already active */ 281 rc = -EINVAL; 282 if (sk->sk_state != SMC_INIT || smc->connect_nonblock) 283 goto out_rel; 284 285 smc->clcsock->sk->sk_reuse = sk->sk_reuse; 286 rc = kernel_bind(smc->clcsock, uaddr, addr_len); 287 288 out_rel: 289 release_sock(sk); 290 out: 291 return rc; 292 } 293 294 static void smc_copy_sock_settings(struct sock *nsk, struct sock *osk, 295 unsigned long mask) 296 { 297 /* options we don't get control via setsockopt for */ 298 nsk->sk_type = osk->sk_type; 299 nsk->sk_sndbuf = osk->sk_sndbuf; 300 nsk->sk_rcvbuf = osk->sk_rcvbuf; 301 nsk->sk_sndtimeo = osk->sk_sndtimeo; 302 nsk->sk_rcvtimeo = osk->sk_rcvtimeo; 303 nsk->sk_mark = osk->sk_mark; 304 nsk->sk_priority = osk->sk_priority; 305 nsk->sk_rcvlowat = osk->sk_rcvlowat; 306 nsk->sk_bound_dev_if = osk->sk_bound_dev_if; 307 nsk->sk_err = osk->sk_err; 308 309 nsk->sk_flags &= ~mask; 310 nsk->sk_flags |= osk->sk_flags & mask; 311 } 312 313 #define SK_FLAGS_SMC_TO_CLC ((1UL << SOCK_URGINLINE) | \ 314 (1UL << SOCK_KEEPOPEN) | \ 315 (1UL << SOCK_LINGER) | \ 316 (1UL << SOCK_BROADCAST) | \ 317 (1UL << SOCK_TIMESTAMP) | \ 318 (1UL << SOCK_DBG) | \ 319 (1UL << SOCK_RCVTSTAMP) | \ 320 (1UL << SOCK_RCVTSTAMPNS) | \ 321 (1UL << SOCK_LOCALROUTE) | \ 322 (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE) | \ 323 (1UL << SOCK_RXQ_OVFL) | \ 324 (1UL << SOCK_WIFI_STATUS) | \ 325 (1UL << SOCK_NOFCS) | \ 326 (1UL << SOCK_FILTER_LOCKED) | \ 327 (1UL << SOCK_TSTAMP_NEW)) 328 /* copy only relevant settings and flags of SOL_SOCKET level from smc to 329 * clc socket (since smc is not called for these options from net/core) 330 */ 331 static void smc_copy_sock_settings_to_clc(struct smc_sock *smc) 332 { 333 smc_copy_sock_settings(smc->clcsock->sk, &smc->sk, SK_FLAGS_SMC_TO_CLC); 334 } 335 336 #define SK_FLAGS_CLC_TO_SMC ((1UL << SOCK_URGINLINE) | \ 337 (1UL << SOCK_KEEPOPEN) | \ 338 (1UL << SOCK_LINGER) | \ 339 (1UL << SOCK_DBG)) 340 /* copy only settings and flags relevant for smc from clc to smc socket */ 341 static void smc_copy_sock_settings_to_smc(struct smc_sock *smc) 342 { 343 smc_copy_sock_settings(&smc->sk, smc->clcsock->sk, SK_FLAGS_CLC_TO_SMC); 344 } 345 346 /* register the new rmb on all links */ 347 static int smcr_lgr_reg_rmbs(struct smc_link *link, 348 struct smc_buf_desc *rmb_desc) 349 { 350 struct smc_link_group *lgr = link->lgr; 351 int i, rc = 0; 352 353 rc = smc_llc_flow_initiate(lgr, SMC_LLC_FLOW_RKEY); 354 if (rc) 355 return rc; 356 /* protect against parallel smc_llc_cli_rkey_exchange() and 357 * parallel smcr_link_reg_rmb() 358 */ 359 mutex_lock(&lgr->llc_conf_mutex); 360 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { 361 if (!smc_link_active(&lgr->lnk[i])) 362 continue; 363 rc = smcr_link_reg_rmb(&lgr->lnk[i], rmb_desc); 364 if (rc) 365 goto out; 366 } 367 368 /* exchange confirm_rkey msg with peer */ 369 rc = smc_llc_do_confirm_rkey(link, rmb_desc); 370 if (rc) { 371 rc = -EFAULT; 372 goto out; 373 } 374 rmb_desc->is_conf_rkey = true; 375 out: 376 mutex_unlock(&lgr->llc_conf_mutex); 377 smc_llc_flow_stop(lgr, &lgr->llc_flow_lcl); 378 return rc; 379 } 380 381 static int smcr_clnt_conf_first_link(struct smc_sock *smc) 382 { 383 struct smc_link *link = smc->conn.lnk; 384 struct smc_llc_qentry *qentry; 385 int rc; 386 387 /* receive CONFIRM LINK request from server over RoCE fabric */ 388 qentry = smc_llc_wait(link->lgr, NULL, SMC_LLC_WAIT_TIME, 389 SMC_LLC_CONFIRM_LINK); 390 if (!qentry) { 391 struct smc_clc_msg_decline dclc; 392 393 rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc), 394 SMC_CLC_DECLINE, CLC_WAIT_TIME_SHORT); 395 return rc == -EAGAIN ? SMC_CLC_DECL_TIMEOUT_CL : rc; 396 } 397 smc_llc_save_peer_uid(qentry); 398 rc = smc_llc_eval_conf_link(qentry, SMC_LLC_REQ); 399 smc_llc_flow_qentry_del(&link->lgr->llc_flow_lcl); 400 if (rc) 401 return SMC_CLC_DECL_RMBE_EC; 402 403 rc = smc_ib_modify_qp_rts(link); 404 if (rc) 405 return SMC_CLC_DECL_ERR_RDYLNK; 406 407 smc_wr_remember_qp_attr(link); 408 409 if (smcr_link_reg_rmb(link, smc->conn.rmb_desc)) 410 return SMC_CLC_DECL_ERR_REGRMB; 411 412 /* confirm_rkey is implicit on 1st contact */ 413 smc->conn.rmb_desc->is_conf_rkey = true; 414 415 /* send CONFIRM LINK response over RoCE fabric */ 416 rc = smc_llc_send_confirm_link(link, SMC_LLC_RESP); 417 if (rc < 0) 418 return SMC_CLC_DECL_TIMEOUT_CL; 419 420 smc_llc_link_active(link); 421 smcr_lgr_set_type(link->lgr, SMC_LGR_SINGLE); 422 423 /* optional 2nd link, receive ADD LINK request from server */ 424 qentry = smc_llc_wait(link->lgr, NULL, SMC_LLC_WAIT_TIME, 425 SMC_LLC_ADD_LINK); 426 if (!qentry) { 427 struct smc_clc_msg_decline dclc; 428 429 rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc), 430 SMC_CLC_DECLINE, CLC_WAIT_TIME_SHORT); 431 if (rc == -EAGAIN) 432 rc = 0; /* no DECLINE received, go with one link */ 433 return rc; 434 } 435 smc_llc_flow_qentry_clr(&link->lgr->llc_flow_lcl); 436 smc_llc_cli_add_link(link, qentry); 437 return 0; 438 } 439 440 static void smcr_conn_save_peer_info(struct smc_sock *smc, 441 struct smc_clc_msg_accept_confirm *clc) 442 { 443 int bufsize = smc_uncompress_bufsize(clc->r0.rmbe_size); 444 445 smc->conn.peer_rmbe_idx = clc->r0.rmbe_idx; 446 smc->conn.local_tx_ctrl.token = ntohl(clc->r0.rmbe_alert_token); 447 smc->conn.peer_rmbe_size = bufsize; 448 atomic_set(&smc->conn.peer_rmbe_space, smc->conn.peer_rmbe_size); 449 smc->conn.tx_off = bufsize * (smc->conn.peer_rmbe_idx - 1); 450 } 451 452 static bool smc_isascii(char *hostname) 453 { 454 int i; 455 456 for (i = 0; i < SMC_MAX_HOSTNAME_LEN; i++) 457 if (!isascii(hostname[i])) 458 return false; 459 return true; 460 } 461 462 static void smcd_conn_save_peer_info(struct smc_sock *smc, 463 struct smc_clc_msg_accept_confirm *clc) 464 { 465 int bufsize = smc_uncompress_bufsize(clc->d0.dmbe_size); 466 467 smc->conn.peer_rmbe_idx = clc->d0.dmbe_idx; 468 smc->conn.peer_token = clc->d0.token; 469 /* msg header takes up space in the buffer */ 470 smc->conn.peer_rmbe_size = bufsize - sizeof(struct smcd_cdc_msg); 471 atomic_set(&smc->conn.peer_rmbe_space, smc->conn.peer_rmbe_size); 472 smc->conn.tx_off = bufsize * smc->conn.peer_rmbe_idx; 473 if (clc->hdr.version > SMC_V1 && 474 (clc->hdr.typev2 & SMC_FIRST_CONTACT_MASK)) { 475 struct smc_clc_msg_accept_confirm_v2 *clc_v2 = 476 (struct smc_clc_msg_accept_confirm_v2 *)clc; 477 struct smc_clc_first_contact_ext *fce = 478 (struct smc_clc_first_contact_ext *) 479 (((u8 *)clc_v2) + sizeof(*clc_v2)); 480 481 memcpy(smc->conn.lgr->negotiated_eid, clc_v2->eid, 482 SMC_MAX_EID_LEN); 483 smc->conn.lgr->peer_os = fce->os_type; 484 smc->conn.lgr->peer_smc_release = fce->release; 485 if (smc_isascii(fce->hostname)) 486 memcpy(smc->conn.lgr->peer_hostname, fce->hostname, 487 SMC_MAX_HOSTNAME_LEN); 488 } 489 } 490 491 static void smc_conn_save_peer_info(struct smc_sock *smc, 492 struct smc_clc_msg_accept_confirm *clc) 493 { 494 if (smc->conn.lgr->is_smcd) 495 smcd_conn_save_peer_info(smc, clc); 496 else 497 smcr_conn_save_peer_info(smc, clc); 498 } 499 500 static void smc_link_save_peer_info(struct smc_link *link, 501 struct smc_clc_msg_accept_confirm *clc) 502 { 503 link->peer_qpn = ntoh24(clc->r0.qpn); 504 memcpy(link->peer_gid, clc->r0.lcl.gid, SMC_GID_SIZE); 505 memcpy(link->peer_mac, clc->r0.lcl.mac, sizeof(link->peer_mac)); 506 link->peer_psn = ntoh24(clc->r0.psn); 507 link->peer_mtu = clc->r0.qp_mtu; 508 } 509 510 static void smc_switch_to_fallback(struct smc_sock *smc) 511 { 512 smc->use_fallback = true; 513 if (smc->sk.sk_socket && smc->sk.sk_socket->file) { 514 smc->clcsock->file = smc->sk.sk_socket->file; 515 smc->clcsock->file->private_data = smc->clcsock; 516 smc->clcsock->wq.fasync_list = 517 smc->sk.sk_socket->wq.fasync_list; 518 } 519 } 520 521 /* fall back during connect */ 522 static int smc_connect_fallback(struct smc_sock *smc, int reason_code) 523 { 524 smc_switch_to_fallback(smc); 525 smc->fallback_rsn = reason_code; 526 smc_copy_sock_settings_to_clc(smc); 527 smc->connect_nonblock = 0; 528 if (smc->sk.sk_state == SMC_INIT) 529 smc->sk.sk_state = SMC_ACTIVE; 530 return 0; 531 } 532 533 /* decline and fall back during connect */ 534 static int smc_connect_decline_fallback(struct smc_sock *smc, int reason_code, 535 u8 version) 536 { 537 int rc; 538 539 if (reason_code < 0) { /* error, fallback is not possible */ 540 if (smc->sk.sk_state == SMC_INIT) 541 sock_put(&smc->sk); /* passive closing */ 542 return reason_code; 543 } 544 if (reason_code != SMC_CLC_DECL_PEERDECL) { 545 rc = smc_clc_send_decline(smc, reason_code, version); 546 if (rc < 0) { 547 if (smc->sk.sk_state == SMC_INIT) 548 sock_put(&smc->sk); /* passive closing */ 549 return rc; 550 } 551 } 552 return smc_connect_fallback(smc, reason_code); 553 } 554 555 /* abort connecting */ 556 static int smc_connect_abort(struct smc_sock *smc, int reason_code, 557 int local_first) 558 { 559 bool is_smcd = smc->conn.lgr->is_smcd; 560 561 if (local_first) 562 smc_lgr_cleanup_early(&smc->conn); 563 else 564 smc_conn_free(&smc->conn); 565 if (is_smcd) 566 /* there is only one lgr role for SMC-D; use server lock */ 567 mutex_unlock(&smc_server_lgr_pending); 568 else 569 mutex_unlock(&smc_client_lgr_pending); 570 571 smc->connect_nonblock = 0; 572 return reason_code; 573 } 574 575 /* check if there is a rdma device available for this connection. */ 576 /* called for connect and listen */ 577 static int smc_find_rdma_device(struct smc_sock *smc, struct smc_init_info *ini) 578 { 579 /* PNET table look up: search active ib_device and port 580 * within same PNETID that also contains the ethernet device 581 * used for the internal TCP socket 582 */ 583 smc_pnet_find_roce_resource(smc->clcsock->sk, ini); 584 if (!ini->ib_dev) 585 return SMC_CLC_DECL_NOSMCRDEV; 586 return 0; 587 } 588 589 /* check if there is an ISM device available for this connection. */ 590 /* called for connect and listen */ 591 static int smc_find_ism_device(struct smc_sock *smc, struct smc_init_info *ini) 592 { 593 /* Find ISM device with same PNETID as connecting interface */ 594 smc_pnet_find_ism_resource(smc->clcsock->sk, ini); 595 if (!ini->ism_dev[0]) 596 return SMC_CLC_DECL_NOSMCDDEV; 597 else 598 ini->ism_chid[0] = smc_ism_get_chid(ini->ism_dev[0]); 599 return 0; 600 } 601 602 /* determine possible V2 ISM devices (either without PNETID or with PNETID plus 603 * PNETID matching net_device) 604 */ 605 static int smc_find_ism_v2_device_clnt(struct smc_sock *smc, 606 struct smc_init_info *ini) 607 { 608 int rc = SMC_CLC_DECL_NOSMCDDEV; 609 struct smcd_dev *smcd; 610 int i = 1; 611 612 if (smcd_indicated(ini->smc_type_v1)) 613 rc = 0; /* already initialized for V1 */ 614 mutex_lock(&smcd_dev_list.mutex); 615 list_for_each_entry(smcd, &smcd_dev_list.list, list) { 616 if (smcd->going_away || smcd == ini->ism_dev[0]) 617 continue; 618 if (!smc_pnet_is_pnetid_set(smcd->pnetid) || 619 smc_pnet_is_ndev_pnetid(sock_net(&smc->sk), smcd->pnetid)) { 620 ini->ism_dev[i] = smcd; 621 ini->ism_chid[i] = smc_ism_get_chid(ini->ism_dev[i]); 622 ini->is_smcd = true; 623 rc = 0; 624 i++; 625 if (i > SMC_MAX_ISM_DEVS) 626 break; 627 } 628 } 629 mutex_unlock(&smcd_dev_list.mutex); 630 ini->ism_offered_cnt = i - 1; 631 if (!ini->ism_dev[0] && !ini->ism_dev[1]) 632 ini->smcd_version = 0; 633 634 return rc; 635 } 636 637 /* Check for VLAN ID and register it on ISM device just for CLC handshake */ 638 static int smc_connect_ism_vlan_setup(struct smc_sock *smc, 639 struct smc_init_info *ini) 640 { 641 if (ini->vlan_id && smc_ism_get_vlan(ini->ism_dev[0], ini->vlan_id)) 642 return SMC_CLC_DECL_ISMVLANERR; 643 return 0; 644 } 645 646 static int smc_find_proposal_devices(struct smc_sock *smc, 647 struct smc_init_info *ini) 648 { 649 int rc = 0; 650 651 /* check if there is an ism device available */ 652 if (ini->smcd_version & SMC_V1) { 653 if (smc_find_ism_device(smc, ini) || 654 smc_connect_ism_vlan_setup(smc, ini)) { 655 if (ini->smc_type_v1 == SMC_TYPE_B) 656 ini->smc_type_v1 = SMC_TYPE_R; 657 else 658 ini->smc_type_v1 = SMC_TYPE_N; 659 } /* else ISM V1 is supported for this connection */ 660 if (smc_find_rdma_device(smc, ini)) { 661 if (ini->smc_type_v1 == SMC_TYPE_B) 662 ini->smc_type_v1 = SMC_TYPE_D; 663 else 664 ini->smc_type_v1 = SMC_TYPE_N; 665 } /* else RDMA is supported for this connection */ 666 } 667 if (smc_ism_v2_capable && smc_find_ism_v2_device_clnt(smc, ini)) 668 ini->smc_type_v2 = SMC_TYPE_N; 669 670 /* if neither ISM nor RDMA are supported, fallback */ 671 if (!smcr_indicated(ini->smc_type_v1) && 672 ini->smc_type_v1 == SMC_TYPE_N && ini->smc_type_v2 == SMC_TYPE_N) 673 rc = SMC_CLC_DECL_NOSMCDEV; 674 675 return rc; 676 } 677 678 /* cleanup temporary VLAN ID registration used for CLC handshake. If ISM is 679 * used, the VLAN ID will be registered again during the connection setup. 680 */ 681 static int smc_connect_ism_vlan_cleanup(struct smc_sock *smc, 682 struct smc_init_info *ini) 683 { 684 if (!smcd_indicated(ini->smc_type_v1)) 685 return 0; 686 if (ini->vlan_id && smc_ism_put_vlan(ini->ism_dev[0], ini->vlan_id)) 687 return SMC_CLC_DECL_CNFERR; 688 return 0; 689 } 690 691 #define SMC_CLC_MAX_ACCEPT_LEN \ 692 (sizeof(struct smc_clc_msg_accept_confirm_v2) + \ 693 sizeof(struct smc_clc_first_contact_ext) + \ 694 sizeof(struct smc_clc_msg_trail)) 695 696 /* CLC handshake during connect */ 697 static int smc_connect_clc(struct smc_sock *smc, 698 struct smc_clc_msg_accept_confirm_v2 *aclc2, 699 struct smc_init_info *ini) 700 { 701 int rc = 0; 702 703 /* do inband token exchange */ 704 rc = smc_clc_send_proposal(smc, ini); 705 if (rc) 706 return rc; 707 /* receive SMC Accept CLC message */ 708 return smc_clc_wait_msg(smc, aclc2, SMC_CLC_MAX_ACCEPT_LEN, 709 SMC_CLC_ACCEPT, CLC_WAIT_TIME); 710 } 711 712 /* setup for RDMA connection of client */ 713 static int smc_connect_rdma(struct smc_sock *smc, 714 struct smc_clc_msg_accept_confirm *aclc, 715 struct smc_init_info *ini) 716 { 717 int i, reason_code = 0; 718 struct smc_link *link; 719 720 ini->is_smcd = false; 721 ini->ib_lcl = &aclc->r0.lcl; 722 ini->ib_clcqpn = ntoh24(aclc->r0.qpn); 723 ini->first_contact_peer = aclc->hdr.typev2 & SMC_FIRST_CONTACT_MASK; 724 725 mutex_lock(&smc_client_lgr_pending); 726 reason_code = smc_conn_create(smc, ini); 727 if (reason_code) { 728 mutex_unlock(&smc_client_lgr_pending); 729 return reason_code; 730 } 731 732 smc_conn_save_peer_info(smc, aclc); 733 734 if (ini->first_contact_local) { 735 link = smc->conn.lnk; 736 } else { 737 /* set link that was assigned by server */ 738 link = NULL; 739 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { 740 struct smc_link *l = &smc->conn.lgr->lnk[i]; 741 742 if (l->peer_qpn == ntoh24(aclc->r0.qpn) && 743 !memcmp(l->peer_gid, &aclc->r0.lcl.gid, 744 SMC_GID_SIZE) && 745 !memcmp(l->peer_mac, &aclc->r0.lcl.mac, 746 sizeof(l->peer_mac))) { 747 link = l; 748 break; 749 } 750 } 751 if (!link) 752 return smc_connect_abort(smc, SMC_CLC_DECL_NOSRVLINK, 753 ini->first_contact_local); 754 smc->conn.lnk = link; 755 } 756 757 /* create send buffer and rmb */ 758 if (smc_buf_create(smc, false)) 759 return smc_connect_abort(smc, SMC_CLC_DECL_MEM, 760 ini->first_contact_local); 761 762 if (ini->first_contact_local) 763 smc_link_save_peer_info(link, aclc); 764 765 if (smc_rmb_rtoken_handling(&smc->conn, link, aclc)) 766 return smc_connect_abort(smc, SMC_CLC_DECL_ERR_RTOK, 767 ini->first_contact_local); 768 769 smc_close_init(smc); 770 smc_rx_init(smc); 771 772 if (ini->first_contact_local) { 773 if (smc_ib_ready_link(link)) 774 return smc_connect_abort(smc, SMC_CLC_DECL_ERR_RDYLNK, 775 ini->first_contact_local); 776 } else { 777 if (smcr_lgr_reg_rmbs(link, smc->conn.rmb_desc)) 778 return smc_connect_abort(smc, SMC_CLC_DECL_ERR_REGRMB, 779 ini->first_contact_local); 780 } 781 smc_rmb_sync_sg_for_device(&smc->conn); 782 783 reason_code = smc_clc_send_confirm(smc, ini->first_contact_local, 784 SMC_V1); 785 if (reason_code) 786 return smc_connect_abort(smc, reason_code, 787 ini->first_contact_local); 788 789 smc_tx_init(smc); 790 791 if (ini->first_contact_local) { 792 /* QP confirmation over RoCE fabric */ 793 smc_llc_flow_initiate(link->lgr, SMC_LLC_FLOW_ADD_LINK); 794 reason_code = smcr_clnt_conf_first_link(smc); 795 smc_llc_flow_stop(link->lgr, &link->lgr->llc_flow_lcl); 796 if (reason_code) 797 return smc_connect_abort(smc, reason_code, 798 ini->first_contact_local); 799 } 800 mutex_unlock(&smc_client_lgr_pending); 801 802 smc_copy_sock_settings_to_clc(smc); 803 smc->connect_nonblock = 0; 804 if (smc->sk.sk_state == SMC_INIT) 805 smc->sk.sk_state = SMC_ACTIVE; 806 807 return 0; 808 } 809 810 /* The server has chosen one of the proposed ISM devices for the communication. 811 * Determine from the CHID of the received CLC ACCEPT the ISM device chosen. 812 */ 813 static int 814 smc_v2_determine_accepted_chid(struct smc_clc_msg_accept_confirm_v2 *aclc, 815 struct smc_init_info *ini) 816 { 817 int i; 818 819 for (i = 0; i < ini->ism_offered_cnt + 1; i++) { 820 if (ini->ism_chid[i] == ntohs(aclc->chid)) { 821 ini->ism_selected = i; 822 return 0; 823 } 824 } 825 826 return -EPROTO; 827 } 828 829 /* setup for ISM connection of client */ 830 static int smc_connect_ism(struct smc_sock *smc, 831 struct smc_clc_msg_accept_confirm *aclc, 832 struct smc_init_info *ini) 833 { 834 int rc = 0; 835 836 ini->is_smcd = true; 837 ini->first_contact_peer = aclc->hdr.typev2 & SMC_FIRST_CONTACT_MASK; 838 839 if (aclc->hdr.version == SMC_V2) { 840 struct smc_clc_msg_accept_confirm_v2 *aclc_v2 = 841 (struct smc_clc_msg_accept_confirm_v2 *)aclc; 842 843 rc = smc_v2_determine_accepted_chid(aclc_v2, ini); 844 if (rc) 845 return rc; 846 } 847 ini->ism_peer_gid[ini->ism_selected] = aclc->d0.gid; 848 849 /* there is only one lgr role for SMC-D; use server lock */ 850 mutex_lock(&smc_server_lgr_pending); 851 rc = smc_conn_create(smc, ini); 852 if (rc) { 853 mutex_unlock(&smc_server_lgr_pending); 854 return rc; 855 } 856 857 /* Create send and receive buffers */ 858 rc = smc_buf_create(smc, true); 859 if (rc) 860 return smc_connect_abort(smc, (rc == -ENOSPC) ? 861 SMC_CLC_DECL_MAX_DMB : 862 SMC_CLC_DECL_MEM, 863 ini->first_contact_local); 864 865 smc_conn_save_peer_info(smc, aclc); 866 smc_close_init(smc); 867 smc_rx_init(smc); 868 smc_tx_init(smc); 869 870 rc = smc_clc_send_confirm(smc, ini->first_contact_local, 871 aclc->hdr.version); 872 if (rc) 873 return smc_connect_abort(smc, rc, ini->first_contact_local); 874 mutex_unlock(&smc_server_lgr_pending); 875 876 smc_copy_sock_settings_to_clc(smc); 877 smc->connect_nonblock = 0; 878 if (smc->sk.sk_state == SMC_INIT) 879 smc->sk.sk_state = SMC_ACTIVE; 880 881 return 0; 882 } 883 884 /* check if received accept type and version matches a proposed one */ 885 static int smc_connect_check_aclc(struct smc_init_info *ini, 886 struct smc_clc_msg_accept_confirm *aclc) 887 { 888 if ((aclc->hdr.typev1 == SMC_TYPE_R && 889 !smcr_indicated(ini->smc_type_v1)) || 890 (aclc->hdr.typev1 == SMC_TYPE_D && 891 ((!smcd_indicated(ini->smc_type_v1) && 892 !smcd_indicated(ini->smc_type_v2)) || 893 (aclc->hdr.version == SMC_V1 && 894 !smcd_indicated(ini->smc_type_v1)) || 895 (aclc->hdr.version == SMC_V2 && 896 !smcd_indicated(ini->smc_type_v2))))) 897 return SMC_CLC_DECL_MODEUNSUPP; 898 899 return 0; 900 } 901 902 /* perform steps before actually connecting */ 903 static int __smc_connect(struct smc_sock *smc) 904 { 905 u8 version = smc_ism_v2_capable ? SMC_V2 : SMC_V1; 906 struct smc_clc_msg_accept_confirm_v2 *aclc2; 907 struct smc_clc_msg_accept_confirm *aclc; 908 struct smc_init_info *ini = NULL; 909 u8 *buf = NULL; 910 int rc = 0; 911 912 if (smc->use_fallback) 913 return smc_connect_fallback(smc, smc->fallback_rsn); 914 915 /* if peer has not signalled SMC-capability, fall back */ 916 if (!tcp_sk(smc->clcsock->sk)->syn_smc) 917 return smc_connect_fallback(smc, SMC_CLC_DECL_PEERNOSMC); 918 919 /* IPSec connections opt out of SMC optimizations */ 920 if (using_ipsec(smc)) 921 return smc_connect_decline_fallback(smc, SMC_CLC_DECL_IPSEC, 922 version); 923 924 ini = kzalloc(sizeof(*ini), GFP_KERNEL); 925 if (!ini) 926 return smc_connect_decline_fallback(smc, SMC_CLC_DECL_MEM, 927 version); 928 929 ini->smcd_version = SMC_V1; 930 ini->smcd_version |= smc_ism_v2_capable ? SMC_V2 : 0; 931 ini->smc_type_v1 = SMC_TYPE_B; 932 ini->smc_type_v2 = smc_ism_v2_capable ? SMC_TYPE_D : SMC_TYPE_N; 933 934 /* get vlan id from IP device */ 935 if (smc_vlan_by_tcpsk(smc->clcsock, ini)) { 936 ini->smcd_version &= ~SMC_V1; 937 ini->smc_type_v1 = SMC_TYPE_N; 938 if (!ini->smcd_version) { 939 rc = SMC_CLC_DECL_GETVLANERR; 940 goto fallback; 941 } 942 } 943 944 rc = smc_find_proposal_devices(smc, ini); 945 if (rc) 946 goto fallback; 947 948 buf = kzalloc(SMC_CLC_MAX_ACCEPT_LEN, GFP_KERNEL); 949 if (!buf) { 950 rc = SMC_CLC_DECL_MEM; 951 goto fallback; 952 } 953 aclc2 = (struct smc_clc_msg_accept_confirm_v2 *)buf; 954 aclc = (struct smc_clc_msg_accept_confirm *)aclc2; 955 956 /* perform CLC handshake */ 957 rc = smc_connect_clc(smc, aclc2, ini); 958 if (rc) 959 goto vlan_cleanup; 960 961 /* check if smc modes and versions of CLC proposal and accept match */ 962 rc = smc_connect_check_aclc(ini, aclc); 963 version = aclc->hdr.version == SMC_V1 ? SMC_V1 : version; 964 if (rc) 965 goto vlan_cleanup; 966 967 /* depending on previous steps, connect using rdma or ism */ 968 if (aclc->hdr.typev1 == SMC_TYPE_R) 969 rc = smc_connect_rdma(smc, aclc, ini); 970 else if (aclc->hdr.typev1 == SMC_TYPE_D) 971 rc = smc_connect_ism(smc, aclc, ini); 972 if (rc) 973 goto vlan_cleanup; 974 975 smc_connect_ism_vlan_cleanup(smc, ini); 976 kfree(buf); 977 kfree(ini); 978 return 0; 979 980 vlan_cleanup: 981 smc_connect_ism_vlan_cleanup(smc, ini); 982 kfree(buf); 983 fallback: 984 kfree(ini); 985 return smc_connect_decline_fallback(smc, rc, version); 986 } 987 988 static void smc_connect_work(struct work_struct *work) 989 { 990 struct smc_sock *smc = container_of(work, struct smc_sock, 991 connect_work); 992 long timeo = smc->sk.sk_sndtimeo; 993 int rc = 0; 994 995 if (!timeo) 996 timeo = MAX_SCHEDULE_TIMEOUT; 997 lock_sock(smc->clcsock->sk); 998 if (smc->clcsock->sk->sk_err) { 999 smc->sk.sk_err = smc->clcsock->sk->sk_err; 1000 } else if ((1 << smc->clcsock->sk->sk_state) & 1001 (TCPF_SYN_SENT | TCP_SYN_RECV)) { 1002 rc = sk_stream_wait_connect(smc->clcsock->sk, &timeo); 1003 if ((rc == -EPIPE) && 1004 ((1 << smc->clcsock->sk->sk_state) & 1005 (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT))) 1006 rc = 0; 1007 } 1008 release_sock(smc->clcsock->sk); 1009 lock_sock(&smc->sk); 1010 if (rc != 0 || smc->sk.sk_err) { 1011 smc->sk.sk_state = SMC_CLOSED; 1012 if (rc == -EPIPE || rc == -EAGAIN) 1013 smc->sk.sk_err = EPIPE; 1014 else if (signal_pending(current)) 1015 smc->sk.sk_err = -sock_intr_errno(timeo); 1016 sock_put(&smc->sk); /* passive closing */ 1017 goto out; 1018 } 1019 1020 rc = __smc_connect(smc); 1021 if (rc < 0) 1022 smc->sk.sk_err = -rc; 1023 1024 out: 1025 if (!sock_flag(&smc->sk, SOCK_DEAD)) { 1026 if (smc->sk.sk_err) { 1027 smc->sk.sk_state_change(&smc->sk); 1028 } else { /* allow polling before and after fallback decision */ 1029 smc->clcsock->sk->sk_write_space(smc->clcsock->sk); 1030 smc->sk.sk_write_space(&smc->sk); 1031 } 1032 } 1033 release_sock(&smc->sk); 1034 } 1035 1036 static int smc_connect(struct socket *sock, struct sockaddr *addr, 1037 int alen, int flags) 1038 { 1039 struct sock *sk = sock->sk; 1040 struct smc_sock *smc; 1041 int rc = -EINVAL; 1042 1043 smc = smc_sk(sk); 1044 1045 /* separate smc parameter checking to be safe */ 1046 if (alen < sizeof(addr->sa_family)) 1047 goto out_err; 1048 if (addr->sa_family != AF_INET && addr->sa_family != AF_INET6) 1049 goto out_err; 1050 1051 lock_sock(sk); 1052 switch (sk->sk_state) { 1053 default: 1054 goto out; 1055 case SMC_ACTIVE: 1056 rc = -EISCONN; 1057 goto out; 1058 case SMC_INIT: 1059 rc = 0; 1060 break; 1061 } 1062 1063 smc_copy_sock_settings_to_clc(smc); 1064 tcp_sk(smc->clcsock->sk)->syn_smc = 1; 1065 if (smc->connect_nonblock) { 1066 rc = -EALREADY; 1067 goto out; 1068 } 1069 rc = kernel_connect(smc->clcsock, addr, alen, flags); 1070 if (rc && rc != -EINPROGRESS) 1071 goto out; 1072 1073 sock_hold(&smc->sk); /* sock put in passive closing */ 1074 if (smc->use_fallback) 1075 goto out; 1076 if (flags & O_NONBLOCK) { 1077 if (queue_work(smc_hs_wq, &smc->connect_work)) 1078 smc->connect_nonblock = 1; 1079 rc = -EINPROGRESS; 1080 } else { 1081 rc = __smc_connect(smc); 1082 if (rc < 0) 1083 goto out; 1084 else 1085 rc = 0; /* success cases including fallback */ 1086 } 1087 1088 out: 1089 release_sock(sk); 1090 out_err: 1091 return rc; 1092 } 1093 1094 static int smc_clcsock_accept(struct smc_sock *lsmc, struct smc_sock **new_smc) 1095 { 1096 struct socket *new_clcsock = NULL; 1097 struct sock *lsk = &lsmc->sk; 1098 struct sock *new_sk; 1099 int rc = -EINVAL; 1100 1101 release_sock(lsk); 1102 new_sk = smc_sock_alloc(sock_net(lsk), NULL, lsk->sk_protocol); 1103 if (!new_sk) { 1104 rc = -ENOMEM; 1105 lsk->sk_err = ENOMEM; 1106 *new_smc = NULL; 1107 lock_sock(lsk); 1108 goto out; 1109 } 1110 *new_smc = smc_sk(new_sk); 1111 1112 mutex_lock(&lsmc->clcsock_release_lock); 1113 if (lsmc->clcsock) 1114 rc = kernel_accept(lsmc->clcsock, &new_clcsock, SOCK_NONBLOCK); 1115 mutex_unlock(&lsmc->clcsock_release_lock); 1116 lock_sock(lsk); 1117 if (rc < 0 && rc != -EAGAIN) 1118 lsk->sk_err = -rc; 1119 if (rc < 0 || lsk->sk_state == SMC_CLOSED) { 1120 new_sk->sk_prot->unhash(new_sk); 1121 if (new_clcsock) 1122 sock_release(new_clcsock); 1123 new_sk->sk_state = SMC_CLOSED; 1124 sock_set_flag(new_sk, SOCK_DEAD); 1125 sock_put(new_sk); /* final */ 1126 *new_smc = NULL; 1127 goto out; 1128 } 1129 1130 /* new clcsock has inherited the smc listen-specific sk_data_ready 1131 * function; switch it back to the original sk_data_ready function 1132 */ 1133 new_clcsock->sk->sk_data_ready = lsmc->clcsk_data_ready; 1134 (*new_smc)->clcsock = new_clcsock; 1135 out: 1136 return rc; 1137 } 1138 1139 /* add a just created sock to the accept queue of the listen sock as 1140 * candidate for a following socket accept call from user space 1141 */ 1142 static void smc_accept_enqueue(struct sock *parent, struct sock *sk) 1143 { 1144 struct smc_sock *par = smc_sk(parent); 1145 1146 sock_hold(sk); /* sock_put in smc_accept_unlink () */ 1147 spin_lock(&par->accept_q_lock); 1148 list_add_tail(&smc_sk(sk)->accept_q, &par->accept_q); 1149 spin_unlock(&par->accept_q_lock); 1150 sk_acceptq_added(parent); 1151 } 1152 1153 /* remove a socket from the accept queue of its parental listening socket */ 1154 static void smc_accept_unlink(struct sock *sk) 1155 { 1156 struct smc_sock *par = smc_sk(sk)->listen_smc; 1157 1158 spin_lock(&par->accept_q_lock); 1159 list_del_init(&smc_sk(sk)->accept_q); 1160 spin_unlock(&par->accept_q_lock); 1161 sk_acceptq_removed(&smc_sk(sk)->listen_smc->sk); 1162 sock_put(sk); /* sock_hold in smc_accept_enqueue */ 1163 } 1164 1165 /* remove a sock from the accept queue to bind it to a new socket created 1166 * for a socket accept call from user space 1167 */ 1168 struct sock *smc_accept_dequeue(struct sock *parent, 1169 struct socket *new_sock) 1170 { 1171 struct smc_sock *isk, *n; 1172 struct sock *new_sk; 1173 1174 list_for_each_entry_safe(isk, n, &smc_sk(parent)->accept_q, accept_q) { 1175 new_sk = (struct sock *)isk; 1176 1177 smc_accept_unlink(new_sk); 1178 if (new_sk->sk_state == SMC_CLOSED) { 1179 new_sk->sk_prot->unhash(new_sk); 1180 if (isk->clcsock) { 1181 sock_release(isk->clcsock); 1182 isk->clcsock = NULL; 1183 } 1184 sock_put(new_sk); /* final */ 1185 continue; 1186 } 1187 if (new_sock) { 1188 sock_graft(new_sk, new_sock); 1189 if (isk->use_fallback) { 1190 smc_sk(new_sk)->clcsock->file = new_sock->file; 1191 isk->clcsock->file->private_data = isk->clcsock; 1192 } 1193 } 1194 return new_sk; 1195 } 1196 return NULL; 1197 } 1198 1199 /* clean up for a created but never accepted sock */ 1200 void smc_close_non_accepted(struct sock *sk) 1201 { 1202 struct smc_sock *smc = smc_sk(sk); 1203 1204 sock_hold(sk); /* sock_put below */ 1205 lock_sock(sk); 1206 if (!sk->sk_lingertime) 1207 /* wait for peer closing */ 1208 sk->sk_lingertime = SMC_MAX_STREAM_WAIT_TIMEOUT; 1209 __smc_release(smc); 1210 release_sock(sk); 1211 sock_put(sk); /* sock_hold above */ 1212 sock_put(sk); /* final sock_put */ 1213 } 1214 1215 static int smcr_serv_conf_first_link(struct smc_sock *smc) 1216 { 1217 struct smc_link *link = smc->conn.lnk; 1218 struct smc_llc_qentry *qentry; 1219 int rc; 1220 1221 if (smcr_link_reg_rmb(link, smc->conn.rmb_desc)) 1222 return SMC_CLC_DECL_ERR_REGRMB; 1223 1224 /* send CONFIRM LINK request to client over the RoCE fabric */ 1225 rc = smc_llc_send_confirm_link(link, SMC_LLC_REQ); 1226 if (rc < 0) 1227 return SMC_CLC_DECL_TIMEOUT_CL; 1228 1229 /* receive CONFIRM LINK response from client over the RoCE fabric */ 1230 qentry = smc_llc_wait(link->lgr, link, SMC_LLC_WAIT_TIME, 1231 SMC_LLC_CONFIRM_LINK); 1232 if (!qentry) { 1233 struct smc_clc_msg_decline dclc; 1234 1235 rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc), 1236 SMC_CLC_DECLINE, CLC_WAIT_TIME_SHORT); 1237 return rc == -EAGAIN ? SMC_CLC_DECL_TIMEOUT_CL : rc; 1238 } 1239 smc_llc_save_peer_uid(qentry); 1240 rc = smc_llc_eval_conf_link(qentry, SMC_LLC_RESP); 1241 smc_llc_flow_qentry_del(&link->lgr->llc_flow_lcl); 1242 if (rc) 1243 return SMC_CLC_DECL_RMBE_EC; 1244 1245 /* confirm_rkey is implicit on 1st contact */ 1246 smc->conn.rmb_desc->is_conf_rkey = true; 1247 1248 smc_llc_link_active(link); 1249 smcr_lgr_set_type(link->lgr, SMC_LGR_SINGLE); 1250 1251 /* initial contact - try to establish second link */ 1252 smc_llc_srv_add_link(link); 1253 return 0; 1254 } 1255 1256 /* listen worker: finish */ 1257 static void smc_listen_out(struct smc_sock *new_smc) 1258 { 1259 struct smc_sock *lsmc = new_smc->listen_smc; 1260 struct sock *newsmcsk = &new_smc->sk; 1261 1262 if (lsmc->sk.sk_state == SMC_LISTEN) { 1263 lock_sock_nested(&lsmc->sk, SINGLE_DEPTH_NESTING); 1264 smc_accept_enqueue(&lsmc->sk, newsmcsk); 1265 release_sock(&lsmc->sk); 1266 } else { /* no longer listening */ 1267 smc_close_non_accepted(newsmcsk); 1268 } 1269 1270 /* Wake up accept */ 1271 lsmc->sk.sk_data_ready(&lsmc->sk); 1272 sock_put(&lsmc->sk); /* sock_hold in smc_tcp_listen_work */ 1273 } 1274 1275 /* listen worker: finish in state connected */ 1276 static void smc_listen_out_connected(struct smc_sock *new_smc) 1277 { 1278 struct sock *newsmcsk = &new_smc->sk; 1279 1280 sk_refcnt_debug_inc(newsmcsk); 1281 if (newsmcsk->sk_state == SMC_INIT) 1282 newsmcsk->sk_state = SMC_ACTIVE; 1283 1284 smc_listen_out(new_smc); 1285 } 1286 1287 /* listen worker: finish in error state */ 1288 static void smc_listen_out_err(struct smc_sock *new_smc) 1289 { 1290 struct sock *newsmcsk = &new_smc->sk; 1291 1292 if (newsmcsk->sk_state == SMC_INIT) 1293 sock_put(&new_smc->sk); /* passive closing */ 1294 newsmcsk->sk_state = SMC_CLOSED; 1295 1296 smc_listen_out(new_smc); 1297 } 1298 1299 /* listen worker: decline and fall back if possible */ 1300 static void smc_listen_decline(struct smc_sock *new_smc, int reason_code, 1301 struct smc_init_info *ini, u8 version) 1302 { 1303 /* RDMA setup failed, switch back to TCP */ 1304 if (ini->first_contact_local) 1305 smc_lgr_cleanup_early(&new_smc->conn); 1306 else 1307 smc_conn_free(&new_smc->conn); 1308 if (reason_code < 0) { /* error, no fallback possible */ 1309 smc_listen_out_err(new_smc); 1310 return; 1311 } 1312 smc_switch_to_fallback(new_smc); 1313 new_smc->fallback_rsn = reason_code; 1314 if (reason_code && reason_code != SMC_CLC_DECL_PEERDECL) { 1315 if (smc_clc_send_decline(new_smc, reason_code, version) < 0) { 1316 smc_listen_out_err(new_smc); 1317 return; 1318 } 1319 } 1320 smc_listen_out_connected(new_smc); 1321 } 1322 1323 /* listen worker: version checking */ 1324 static int smc_listen_v2_check(struct smc_sock *new_smc, 1325 struct smc_clc_msg_proposal *pclc, 1326 struct smc_init_info *ini) 1327 { 1328 struct smc_clc_smcd_v2_extension *pclc_smcd_v2_ext; 1329 struct smc_clc_v2_extension *pclc_v2_ext; 1330 1331 ini->smc_type_v1 = pclc->hdr.typev1; 1332 ini->smc_type_v2 = pclc->hdr.typev2; 1333 ini->smcd_version = ini->smc_type_v1 != SMC_TYPE_N ? SMC_V1 : 0; 1334 if (pclc->hdr.version > SMC_V1) 1335 ini->smcd_version |= 1336 ini->smc_type_v2 != SMC_TYPE_N ? SMC_V2 : 0; 1337 if (!smc_ism_v2_capable) { 1338 ini->smcd_version &= ~SMC_V2; 1339 goto out; 1340 } 1341 pclc_v2_ext = smc_get_clc_v2_ext(pclc); 1342 if (!pclc_v2_ext) { 1343 ini->smcd_version &= ~SMC_V2; 1344 goto out; 1345 } 1346 pclc_smcd_v2_ext = smc_get_clc_smcd_v2_ext(pclc_v2_ext); 1347 if (!pclc_smcd_v2_ext) 1348 ini->smcd_version &= ~SMC_V2; 1349 1350 out: 1351 if (!ini->smcd_version) { 1352 if (pclc->hdr.typev1 == SMC_TYPE_B || 1353 pclc->hdr.typev2 == SMC_TYPE_B) 1354 return SMC_CLC_DECL_NOSMCDEV; 1355 if (pclc->hdr.typev1 == SMC_TYPE_D || 1356 pclc->hdr.typev2 == SMC_TYPE_D) 1357 return SMC_CLC_DECL_NOSMCDDEV; 1358 return SMC_CLC_DECL_NOSMCRDEV; 1359 } 1360 1361 return 0; 1362 } 1363 1364 /* listen worker: check prefixes */ 1365 static int smc_listen_prfx_check(struct smc_sock *new_smc, 1366 struct smc_clc_msg_proposal *pclc) 1367 { 1368 struct smc_clc_msg_proposal_prefix *pclc_prfx; 1369 struct socket *newclcsock = new_smc->clcsock; 1370 1371 if (pclc->hdr.typev1 == SMC_TYPE_N) 1372 return 0; 1373 pclc_prfx = smc_clc_proposal_get_prefix(pclc); 1374 if (smc_clc_prfx_match(newclcsock, pclc_prfx)) 1375 return SMC_CLC_DECL_DIFFPREFIX; 1376 1377 return 0; 1378 } 1379 1380 /* listen worker: initialize connection and buffers */ 1381 static int smc_listen_rdma_init(struct smc_sock *new_smc, 1382 struct smc_init_info *ini) 1383 { 1384 int rc; 1385 1386 /* allocate connection / link group */ 1387 rc = smc_conn_create(new_smc, ini); 1388 if (rc) 1389 return rc; 1390 1391 /* create send buffer and rmb */ 1392 if (smc_buf_create(new_smc, false)) 1393 return SMC_CLC_DECL_MEM; 1394 1395 return 0; 1396 } 1397 1398 /* listen worker: initialize connection and buffers for SMC-D */ 1399 static int smc_listen_ism_init(struct smc_sock *new_smc, 1400 struct smc_init_info *ini) 1401 { 1402 int rc; 1403 1404 rc = smc_conn_create(new_smc, ini); 1405 if (rc) 1406 return rc; 1407 1408 /* Create send and receive buffers */ 1409 rc = smc_buf_create(new_smc, true); 1410 if (rc) { 1411 if (ini->first_contact_local) 1412 smc_lgr_cleanup_early(&new_smc->conn); 1413 else 1414 smc_conn_free(&new_smc->conn); 1415 return (rc == -ENOSPC) ? SMC_CLC_DECL_MAX_DMB : 1416 SMC_CLC_DECL_MEM; 1417 } 1418 1419 return 0; 1420 } 1421 1422 static bool smc_is_already_selected(struct smcd_dev *smcd, 1423 struct smc_init_info *ini, 1424 int matches) 1425 { 1426 int i; 1427 1428 for (i = 0; i < matches; i++) 1429 if (smcd == ini->ism_dev[i]) 1430 return true; 1431 1432 return false; 1433 } 1434 1435 /* check for ISM devices matching proposed ISM devices */ 1436 static void smc_check_ism_v2_match(struct smc_init_info *ini, 1437 u16 proposed_chid, u64 proposed_gid, 1438 unsigned int *matches) 1439 { 1440 struct smcd_dev *smcd; 1441 1442 list_for_each_entry(smcd, &smcd_dev_list.list, list) { 1443 if (smcd->going_away) 1444 continue; 1445 if (smc_is_already_selected(smcd, ini, *matches)) 1446 continue; 1447 if (smc_ism_get_chid(smcd) == proposed_chid && 1448 !smc_ism_cantalk(proposed_gid, ISM_RESERVED_VLANID, smcd)) { 1449 ini->ism_peer_gid[*matches] = proposed_gid; 1450 ini->ism_dev[*matches] = smcd; 1451 (*matches)++; 1452 break; 1453 } 1454 } 1455 } 1456 1457 static void smc_find_ism_v2_device_serv(struct smc_sock *new_smc, 1458 struct smc_clc_msg_proposal *pclc, 1459 struct smc_init_info *ini) 1460 { 1461 struct smc_clc_smcd_v2_extension *smcd_v2_ext; 1462 struct smc_clc_v2_extension *smc_v2_ext; 1463 struct smc_clc_msg_smcd *pclc_smcd; 1464 unsigned int matches = 0; 1465 u8 *eid = NULL; 1466 int i; 1467 1468 if (!(ini->smcd_version & SMC_V2) || !smcd_indicated(ini->smc_type_v2)) 1469 return; 1470 1471 pclc_smcd = smc_get_clc_msg_smcd(pclc); 1472 smc_v2_ext = smc_get_clc_v2_ext(pclc); 1473 smcd_v2_ext = smc_get_clc_smcd_v2_ext(smc_v2_ext); 1474 if (!smcd_v2_ext || 1475 !smc_v2_ext->hdr.flag.seid) /* no system EID support for SMCD */ 1476 goto not_found; 1477 1478 mutex_lock(&smcd_dev_list.mutex); 1479 if (pclc_smcd->ism.chid) 1480 /* check for ISM device matching proposed native ISM device */ 1481 smc_check_ism_v2_match(ini, ntohs(pclc_smcd->ism.chid), 1482 ntohll(pclc_smcd->ism.gid), &matches); 1483 for (i = 1; i <= smc_v2_ext->hdr.ism_gid_cnt; i++) { 1484 /* check for ISM devices matching proposed non-native ISM 1485 * devices 1486 */ 1487 smc_check_ism_v2_match(ini, 1488 ntohs(smcd_v2_ext->gidchid[i - 1].chid), 1489 ntohll(smcd_v2_ext->gidchid[i - 1].gid), 1490 &matches); 1491 } 1492 mutex_unlock(&smcd_dev_list.mutex); 1493 1494 if (ini->ism_dev[0]) { 1495 smc_ism_get_system_eid(ini->ism_dev[0], &eid); 1496 if (memcmp(eid, smcd_v2_ext->system_eid, SMC_MAX_EID_LEN)) 1497 goto not_found; 1498 } else { 1499 goto not_found; 1500 } 1501 1502 /* separate - outside the smcd_dev_list.lock */ 1503 for (i = 0; i < matches; i++) { 1504 ini->smcd_version = SMC_V2; 1505 ini->is_smcd = true; 1506 ini->ism_selected = i; 1507 if (smc_listen_ism_init(new_smc, ini)) 1508 /* try next active ISM device */ 1509 continue; 1510 return; /* matching and usable V2 ISM device found */ 1511 } 1512 1513 not_found: 1514 ini->smcd_version &= ~SMC_V2; 1515 ini->ism_dev[0] = NULL; 1516 ini->is_smcd = false; 1517 } 1518 1519 static void smc_find_ism_v1_device_serv(struct smc_sock *new_smc, 1520 struct smc_clc_msg_proposal *pclc, 1521 struct smc_init_info *ini) 1522 { 1523 struct smc_clc_msg_smcd *pclc_smcd = smc_get_clc_msg_smcd(pclc); 1524 1525 /* check if ISM V1 is available */ 1526 if (!(ini->smcd_version & SMC_V1) || !smcd_indicated(ini->smc_type_v1)) 1527 goto not_found; 1528 ini->is_smcd = true; /* prepare ISM check */ 1529 ini->ism_peer_gid[0] = ntohll(pclc_smcd->ism.gid); 1530 if (smc_find_ism_device(new_smc, ini)) 1531 goto not_found; 1532 ini->ism_selected = 0; 1533 if (!smc_listen_ism_init(new_smc, ini)) 1534 return; /* V1 ISM device found */ 1535 1536 not_found: 1537 ini->ism_dev[0] = NULL; 1538 ini->is_smcd = false; 1539 } 1540 1541 /* listen worker: register buffers */ 1542 static int smc_listen_rdma_reg(struct smc_sock *new_smc, bool local_first) 1543 { 1544 struct smc_connection *conn = &new_smc->conn; 1545 1546 if (!local_first) { 1547 if (smcr_lgr_reg_rmbs(conn->lnk, conn->rmb_desc)) 1548 return SMC_CLC_DECL_ERR_REGRMB; 1549 } 1550 smc_rmb_sync_sg_for_device(&new_smc->conn); 1551 1552 return 0; 1553 } 1554 1555 static int smc_find_rdma_v1_device_serv(struct smc_sock *new_smc, 1556 struct smc_clc_msg_proposal *pclc, 1557 struct smc_init_info *ini) 1558 { 1559 int rc; 1560 1561 if (!smcr_indicated(ini->smc_type_v1)) 1562 return SMC_CLC_DECL_NOSMCDEV; 1563 1564 /* prepare RDMA check */ 1565 ini->ib_lcl = &pclc->lcl; 1566 rc = smc_find_rdma_device(new_smc, ini); 1567 if (rc) { 1568 /* no RDMA device found */ 1569 if (ini->smc_type_v1 == SMC_TYPE_B) 1570 /* neither ISM nor RDMA device found */ 1571 rc = SMC_CLC_DECL_NOSMCDEV; 1572 return rc; 1573 } 1574 rc = smc_listen_rdma_init(new_smc, ini); 1575 if (rc) 1576 return rc; 1577 return smc_listen_rdma_reg(new_smc, ini->first_contact_local); 1578 } 1579 1580 /* determine the local device matching to proposal */ 1581 static int smc_listen_find_device(struct smc_sock *new_smc, 1582 struct smc_clc_msg_proposal *pclc, 1583 struct smc_init_info *ini) 1584 { 1585 int rc; 1586 1587 /* check for ISM device matching V2 proposed device */ 1588 smc_find_ism_v2_device_serv(new_smc, pclc, ini); 1589 if (ini->ism_dev[0]) 1590 return 0; 1591 1592 if (!(ini->smcd_version & SMC_V1)) 1593 return SMC_CLC_DECL_NOSMCDEV; 1594 1595 /* check for matching IP prefix and subnet length */ 1596 rc = smc_listen_prfx_check(new_smc, pclc); 1597 if (rc) 1598 return rc; 1599 1600 /* get vlan id from IP device */ 1601 if (smc_vlan_by_tcpsk(new_smc->clcsock, ini)) 1602 return SMC_CLC_DECL_GETVLANERR; 1603 1604 /* check for ISM device matching V1 proposed device */ 1605 smc_find_ism_v1_device_serv(new_smc, pclc, ini); 1606 if (ini->ism_dev[0]) 1607 return 0; 1608 1609 if (pclc->hdr.typev1 == SMC_TYPE_D) 1610 return SMC_CLC_DECL_NOSMCDDEV; /* skip RDMA and decline */ 1611 1612 /* check if RDMA is available */ 1613 return smc_find_rdma_v1_device_serv(new_smc, pclc, ini); 1614 } 1615 1616 /* listen worker: finish RDMA setup */ 1617 static int smc_listen_rdma_finish(struct smc_sock *new_smc, 1618 struct smc_clc_msg_accept_confirm *cclc, 1619 bool local_first) 1620 { 1621 struct smc_link *link = new_smc->conn.lnk; 1622 int reason_code = 0; 1623 1624 if (local_first) 1625 smc_link_save_peer_info(link, cclc); 1626 1627 if (smc_rmb_rtoken_handling(&new_smc->conn, link, cclc)) 1628 return SMC_CLC_DECL_ERR_RTOK; 1629 1630 if (local_first) { 1631 if (smc_ib_ready_link(link)) 1632 return SMC_CLC_DECL_ERR_RDYLNK; 1633 /* QP confirmation over RoCE fabric */ 1634 smc_llc_flow_initiate(link->lgr, SMC_LLC_FLOW_ADD_LINK); 1635 reason_code = smcr_serv_conf_first_link(new_smc); 1636 smc_llc_flow_stop(link->lgr, &link->lgr->llc_flow_lcl); 1637 } 1638 return reason_code; 1639 } 1640 1641 /* setup for connection of server */ 1642 static void smc_listen_work(struct work_struct *work) 1643 { 1644 struct smc_sock *new_smc = container_of(work, struct smc_sock, 1645 smc_listen_work); 1646 u8 version = smc_ism_v2_capable ? SMC_V2 : SMC_V1; 1647 struct socket *newclcsock = new_smc->clcsock; 1648 struct smc_clc_msg_accept_confirm_v2 *cclc2; 1649 struct smc_clc_msg_accept_confirm *cclc; 1650 struct smc_clc_msg_proposal_area *buf; 1651 struct smc_clc_msg_proposal *pclc; 1652 struct smc_init_info *ini = NULL; 1653 int rc = 0; 1654 1655 if (new_smc->listen_smc->sk.sk_state != SMC_LISTEN) 1656 return smc_listen_out_err(new_smc); 1657 1658 if (new_smc->use_fallback) { 1659 smc_listen_out_connected(new_smc); 1660 return; 1661 } 1662 1663 /* check if peer is smc capable */ 1664 if (!tcp_sk(newclcsock->sk)->syn_smc) { 1665 smc_switch_to_fallback(new_smc); 1666 new_smc->fallback_rsn = SMC_CLC_DECL_PEERNOSMC; 1667 smc_listen_out_connected(new_smc); 1668 return; 1669 } 1670 1671 /* do inband token exchange - 1672 * wait for and receive SMC Proposal CLC message 1673 */ 1674 buf = kzalloc(sizeof(*buf), GFP_KERNEL); 1675 if (!buf) { 1676 rc = SMC_CLC_DECL_MEM; 1677 goto out_decl; 1678 } 1679 pclc = (struct smc_clc_msg_proposal *)buf; 1680 rc = smc_clc_wait_msg(new_smc, pclc, sizeof(*buf), 1681 SMC_CLC_PROPOSAL, CLC_WAIT_TIME); 1682 if (rc) 1683 goto out_decl; 1684 version = pclc->hdr.version == SMC_V1 ? SMC_V1 : version; 1685 1686 /* IPSec connections opt out of SMC optimizations */ 1687 if (using_ipsec(new_smc)) { 1688 rc = SMC_CLC_DECL_IPSEC; 1689 goto out_decl; 1690 } 1691 1692 ini = kzalloc(sizeof(*ini), GFP_KERNEL); 1693 if (!ini) { 1694 rc = SMC_CLC_DECL_MEM; 1695 goto out_decl; 1696 } 1697 1698 /* initial version checking */ 1699 rc = smc_listen_v2_check(new_smc, pclc, ini); 1700 if (rc) 1701 goto out_decl; 1702 1703 mutex_lock(&smc_server_lgr_pending); 1704 smc_close_init(new_smc); 1705 smc_rx_init(new_smc); 1706 smc_tx_init(new_smc); 1707 1708 /* determine ISM or RoCE device used for connection */ 1709 rc = smc_listen_find_device(new_smc, pclc, ini); 1710 if (rc) 1711 goto out_unlock; 1712 1713 /* send SMC Accept CLC message */ 1714 rc = smc_clc_send_accept(new_smc, ini->first_contact_local, 1715 ini->smcd_version == SMC_V2 ? SMC_V2 : SMC_V1); 1716 if (rc) 1717 goto out_unlock; 1718 1719 /* SMC-D does not need this lock any more */ 1720 if (ini->is_smcd) 1721 mutex_unlock(&smc_server_lgr_pending); 1722 1723 /* receive SMC Confirm CLC message */ 1724 cclc2 = (struct smc_clc_msg_accept_confirm_v2 *)buf; 1725 cclc = (struct smc_clc_msg_accept_confirm *)cclc2; 1726 memset(buf, 0, sizeof(struct smc_clc_msg_proposal_area)); 1727 rc = smc_clc_wait_msg(new_smc, cclc2, 1728 sizeof(struct smc_clc_msg_proposal_area), 1729 SMC_CLC_CONFIRM, CLC_WAIT_TIME); 1730 if (rc) { 1731 if (!ini->is_smcd) 1732 goto out_unlock; 1733 goto out_decl; 1734 } 1735 1736 /* finish worker */ 1737 if (!ini->is_smcd) { 1738 rc = smc_listen_rdma_finish(new_smc, cclc, 1739 ini->first_contact_local); 1740 if (rc) 1741 goto out_unlock; 1742 mutex_unlock(&smc_server_lgr_pending); 1743 } 1744 smc_conn_save_peer_info(new_smc, cclc); 1745 smc_listen_out_connected(new_smc); 1746 goto out_free; 1747 1748 out_unlock: 1749 mutex_unlock(&smc_server_lgr_pending); 1750 out_decl: 1751 smc_listen_decline(new_smc, rc, ini, version); 1752 out_free: 1753 kfree(ini); 1754 kfree(buf); 1755 } 1756 1757 static void smc_tcp_listen_work(struct work_struct *work) 1758 { 1759 struct smc_sock *lsmc = container_of(work, struct smc_sock, 1760 tcp_listen_work); 1761 struct sock *lsk = &lsmc->sk; 1762 struct smc_sock *new_smc; 1763 int rc = 0; 1764 1765 lock_sock(lsk); 1766 while (lsk->sk_state == SMC_LISTEN) { 1767 rc = smc_clcsock_accept(lsmc, &new_smc); 1768 if (rc) /* clcsock accept queue empty or error */ 1769 goto out; 1770 if (!new_smc) 1771 continue; 1772 1773 new_smc->listen_smc = lsmc; 1774 new_smc->use_fallback = lsmc->use_fallback; 1775 new_smc->fallback_rsn = lsmc->fallback_rsn; 1776 sock_hold(lsk); /* sock_put in smc_listen_work */ 1777 INIT_WORK(&new_smc->smc_listen_work, smc_listen_work); 1778 smc_copy_sock_settings_to_smc(new_smc); 1779 new_smc->sk.sk_sndbuf = lsmc->sk.sk_sndbuf; 1780 new_smc->sk.sk_rcvbuf = lsmc->sk.sk_rcvbuf; 1781 sock_hold(&new_smc->sk); /* sock_put in passive closing */ 1782 if (!queue_work(smc_hs_wq, &new_smc->smc_listen_work)) 1783 sock_put(&new_smc->sk); 1784 } 1785 1786 out: 1787 release_sock(lsk); 1788 sock_put(&lsmc->sk); /* sock_hold in smc_clcsock_data_ready() */ 1789 } 1790 1791 static void smc_clcsock_data_ready(struct sock *listen_clcsock) 1792 { 1793 struct smc_sock *lsmc; 1794 1795 lsmc = (struct smc_sock *) 1796 ((uintptr_t)listen_clcsock->sk_user_data & ~SK_USER_DATA_NOCOPY); 1797 if (!lsmc) 1798 return; 1799 lsmc->clcsk_data_ready(listen_clcsock); 1800 if (lsmc->sk.sk_state == SMC_LISTEN) { 1801 sock_hold(&lsmc->sk); /* sock_put in smc_tcp_listen_work() */ 1802 if (!queue_work(smc_hs_wq, &lsmc->tcp_listen_work)) 1803 sock_put(&lsmc->sk); 1804 } 1805 } 1806 1807 static int smc_listen(struct socket *sock, int backlog) 1808 { 1809 struct sock *sk = sock->sk; 1810 struct smc_sock *smc; 1811 int rc; 1812 1813 smc = smc_sk(sk); 1814 lock_sock(sk); 1815 1816 rc = -EINVAL; 1817 if ((sk->sk_state != SMC_INIT && sk->sk_state != SMC_LISTEN) || 1818 smc->connect_nonblock) 1819 goto out; 1820 1821 rc = 0; 1822 if (sk->sk_state == SMC_LISTEN) { 1823 sk->sk_max_ack_backlog = backlog; 1824 goto out; 1825 } 1826 /* some socket options are handled in core, so we could not apply 1827 * them to the clc socket -- copy smc socket options to clc socket 1828 */ 1829 smc_copy_sock_settings_to_clc(smc); 1830 if (!smc->use_fallback) 1831 tcp_sk(smc->clcsock->sk)->syn_smc = 1; 1832 1833 /* save original sk_data_ready function and establish 1834 * smc-specific sk_data_ready function 1835 */ 1836 smc->clcsk_data_ready = smc->clcsock->sk->sk_data_ready; 1837 smc->clcsock->sk->sk_data_ready = smc_clcsock_data_ready; 1838 smc->clcsock->sk->sk_user_data = 1839 (void *)((uintptr_t)smc | SK_USER_DATA_NOCOPY); 1840 rc = kernel_listen(smc->clcsock, backlog); 1841 if (rc) 1842 goto out; 1843 sk->sk_max_ack_backlog = backlog; 1844 sk->sk_ack_backlog = 0; 1845 sk->sk_state = SMC_LISTEN; 1846 1847 out: 1848 release_sock(sk); 1849 return rc; 1850 } 1851 1852 static int smc_accept(struct socket *sock, struct socket *new_sock, 1853 int flags, bool kern) 1854 { 1855 struct sock *sk = sock->sk, *nsk; 1856 DECLARE_WAITQUEUE(wait, current); 1857 struct smc_sock *lsmc; 1858 long timeo; 1859 int rc = 0; 1860 1861 lsmc = smc_sk(sk); 1862 sock_hold(sk); /* sock_put below */ 1863 lock_sock(sk); 1864 1865 if (lsmc->sk.sk_state != SMC_LISTEN) { 1866 rc = -EINVAL; 1867 release_sock(sk); 1868 goto out; 1869 } 1870 1871 /* Wait for an incoming connection */ 1872 timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK); 1873 add_wait_queue_exclusive(sk_sleep(sk), &wait); 1874 while (!(nsk = smc_accept_dequeue(sk, new_sock))) { 1875 set_current_state(TASK_INTERRUPTIBLE); 1876 if (!timeo) { 1877 rc = -EAGAIN; 1878 break; 1879 } 1880 release_sock(sk); 1881 timeo = schedule_timeout(timeo); 1882 /* wakeup by sk_data_ready in smc_listen_work() */ 1883 sched_annotate_sleep(); 1884 lock_sock(sk); 1885 if (signal_pending(current)) { 1886 rc = sock_intr_errno(timeo); 1887 break; 1888 } 1889 } 1890 set_current_state(TASK_RUNNING); 1891 remove_wait_queue(sk_sleep(sk), &wait); 1892 1893 if (!rc) 1894 rc = sock_error(nsk); 1895 release_sock(sk); 1896 if (rc) 1897 goto out; 1898 1899 if (lsmc->sockopt_defer_accept && !(flags & O_NONBLOCK)) { 1900 /* wait till data arrives on the socket */ 1901 timeo = msecs_to_jiffies(lsmc->sockopt_defer_accept * 1902 MSEC_PER_SEC); 1903 if (smc_sk(nsk)->use_fallback) { 1904 struct sock *clcsk = smc_sk(nsk)->clcsock->sk; 1905 1906 lock_sock(clcsk); 1907 if (skb_queue_empty(&clcsk->sk_receive_queue)) 1908 sk_wait_data(clcsk, &timeo, NULL); 1909 release_sock(clcsk); 1910 } else if (!atomic_read(&smc_sk(nsk)->conn.bytes_to_rcv)) { 1911 lock_sock(nsk); 1912 smc_rx_wait(smc_sk(nsk), &timeo, smc_rx_data_available); 1913 release_sock(nsk); 1914 } 1915 } 1916 1917 out: 1918 sock_put(sk); /* sock_hold above */ 1919 return rc; 1920 } 1921 1922 static int smc_getname(struct socket *sock, struct sockaddr *addr, 1923 int peer) 1924 { 1925 struct smc_sock *smc; 1926 1927 if (peer && (sock->sk->sk_state != SMC_ACTIVE) && 1928 (sock->sk->sk_state != SMC_APPCLOSEWAIT1)) 1929 return -ENOTCONN; 1930 1931 smc = smc_sk(sock->sk); 1932 1933 return smc->clcsock->ops->getname(smc->clcsock, addr, peer); 1934 } 1935 1936 static int smc_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) 1937 { 1938 struct sock *sk = sock->sk; 1939 struct smc_sock *smc; 1940 int rc = -EPIPE; 1941 1942 smc = smc_sk(sk); 1943 lock_sock(sk); 1944 if ((sk->sk_state != SMC_ACTIVE) && 1945 (sk->sk_state != SMC_APPCLOSEWAIT1) && 1946 (sk->sk_state != SMC_INIT)) 1947 goto out; 1948 1949 if (msg->msg_flags & MSG_FASTOPEN) { 1950 if (sk->sk_state == SMC_INIT && !smc->connect_nonblock) { 1951 smc_switch_to_fallback(smc); 1952 smc->fallback_rsn = SMC_CLC_DECL_OPTUNSUPP; 1953 } else { 1954 rc = -EINVAL; 1955 goto out; 1956 } 1957 } 1958 1959 if (smc->use_fallback) 1960 rc = smc->clcsock->ops->sendmsg(smc->clcsock, msg, len); 1961 else 1962 rc = smc_tx_sendmsg(smc, msg, len); 1963 out: 1964 release_sock(sk); 1965 return rc; 1966 } 1967 1968 static int smc_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, 1969 int flags) 1970 { 1971 struct sock *sk = sock->sk; 1972 struct smc_sock *smc; 1973 int rc = -ENOTCONN; 1974 1975 smc = smc_sk(sk); 1976 lock_sock(sk); 1977 if (sk->sk_state == SMC_CLOSED && (sk->sk_shutdown & RCV_SHUTDOWN)) { 1978 /* socket was connected before, no more data to read */ 1979 rc = 0; 1980 goto out; 1981 } 1982 if ((sk->sk_state == SMC_INIT) || 1983 (sk->sk_state == SMC_LISTEN) || 1984 (sk->sk_state == SMC_CLOSED)) 1985 goto out; 1986 1987 if (sk->sk_state == SMC_PEERFINCLOSEWAIT) { 1988 rc = 0; 1989 goto out; 1990 } 1991 1992 if (smc->use_fallback) { 1993 rc = smc->clcsock->ops->recvmsg(smc->clcsock, msg, len, flags); 1994 } else { 1995 msg->msg_namelen = 0; 1996 rc = smc_rx_recvmsg(smc, msg, NULL, len, flags); 1997 } 1998 1999 out: 2000 release_sock(sk); 2001 return rc; 2002 } 2003 2004 static __poll_t smc_accept_poll(struct sock *parent) 2005 { 2006 struct smc_sock *isk = smc_sk(parent); 2007 __poll_t mask = 0; 2008 2009 spin_lock(&isk->accept_q_lock); 2010 if (!list_empty(&isk->accept_q)) 2011 mask = EPOLLIN | EPOLLRDNORM; 2012 spin_unlock(&isk->accept_q_lock); 2013 2014 return mask; 2015 } 2016 2017 static __poll_t smc_poll(struct file *file, struct socket *sock, 2018 poll_table *wait) 2019 { 2020 struct sock *sk = sock->sk; 2021 struct smc_sock *smc; 2022 __poll_t mask = 0; 2023 2024 if (!sk) 2025 return EPOLLNVAL; 2026 2027 smc = smc_sk(sock->sk); 2028 if (smc->use_fallback) { 2029 /* delegate to CLC child sock */ 2030 mask = smc->clcsock->ops->poll(file, smc->clcsock, wait); 2031 sk->sk_err = smc->clcsock->sk->sk_err; 2032 } else { 2033 if (sk->sk_state != SMC_CLOSED) 2034 sock_poll_wait(file, sock, wait); 2035 if (sk->sk_err) 2036 mask |= EPOLLERR; 2037 if ((sk->sk_shutdown == SHUTDOWN_MASK) || 2038 (sk->sk_state == SMC_CLOSED)) 2039 mask |= EPOLLHUP; 2040 if (sk->sk_state == SMC_LISTEN) { 2041 /* woken up by sk_data_ready in smc_listen_work() */ 2042 mask |= smc_accept_poll(sk); 2043 } else if (smc->use_fallback) { /* as result of connect_work()*/ 2044 mask |= smc->clcsock->ops->poll(file, smc->clcsock, 2045 wait); 2046 sk->sk_err = smc->clcsock->sk->sk_err; 2047 } else { 2048 if ((sk->sk_state != SMC_INIT && 2049 atomic_read(&smc->conn.sndbuf_space)) || 2050 sk->sk_shutdown & SEND_SHUTDOWN) { 2051 mask |= EPOLLOUT | EPOLLWRNORM; 2052 } else { 2053 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk); 2054 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); 2055 } 2056 if (atomic_read(&smc->conn.bytes_to_rcv)) 2057 mask |= EPOLLIN | EPOLLRDNORM; 2058 if (sk->sk_shutdown & RCV_SHUTDOWN) 2059 mask |= EPOLLIN | EPOLLRDNORM | EPOLLRDHUP; 2060 if (sk->sk_state == SMC_APPCLOSEWAIT1) 2061 mask |= EPOLLIN; 2062 if (smc->conn.urg_state == SMC_URG_VALID) 2063 mask |= EPOLLPRI; 2064 } 2065 } 2066 2067 return mask; 2068 } 2069 2070 static int smc_shutdown(struct socket *sock, int how) 2071 { 2072 struct sock *sk = sock->sk; 2073 struct smc_sock *smc; 2074 int rc = -EINVAL; 2075 int rc1 = 0; 2076 2077 smc = smc_sk(sk); 2078 2079 if ((how < SHUT_RD) || (how > SHUT_RDWR)) 2080 return rc; 2081 2082 lock_sock(sk); 2083 2084 rc = -ENOTCONN; 2085 if ((sk->sk_state != SMC_ACTIVE) && 2086 (sk->sk_state != SMC_PEERCLOSEWAIT1) && 2087 (sk->sk_state != SMC_PEERCLOSEWAIT2) && 2088 (sk->sk_state != SMC_APPCLOSEWAIT1) && 2089 (sk->sk_state != SMC_APPCLOSEWAIT2) && 2090 (sk->sk_state != SMC_APPFINCLOSEWAIT)) 2091 goto out; 2092 if (smc->use_fallback) { 2093 rc = kernel_sock_shutdown(smc->clcsock, how); 2094 sk->sk_shutdown = smc->clcsock->sk->sk_shutdown; 2095 if (sk->sk_shutdown == SHUTDOWN_MASK) 2096 sk->sk_state = SMC_CLOSED; 2097 goto out; 2098 } 2099 switch (how) { 2100 case SHUT_RDWR: /* shutdown in both directions */ 2101 rc = smc_close_active(smc); 2102 break; 2103 case SHUT_WR: 2104 rc = smc_close_shutdown_write(smc); 2105 break; 2106 case SHUT_RD: 2107 rc = 0; 2108 /* nothing more to do because peer is not involved */ 2109 break; 2110 } 2111 if (smc->clcsock) 2112 rc1 = kernel_sock_shutdown(smc->clcsock, how); 2113 /* map sock_shutdown_cmd constants to sk_shutdown value range */ 2114 sk->sk_shutdown |= how + 1; 2115 2116 out: 2117 release_sock(sk); 2118 return rc ? rc : rc1; 2119 } 2120 2121 static int smc_setsockopt(struct socket *sock, int level, int optname, 2122 sockptr_t optval, unsigned int optlen) 2123 { 2124 struct sock *sk = sock->sk; 2125 struct smc_sock *smc; 2126 int val, rc; 2127 2128 smc = smc_sk(sk); 2129 2130 /* generic setsockopts reaching us here always apply to the 2131 * CLC socket 2132 */ 2133 if (unlikely(!smc->clcsock->ops->setsockopt)) 2134 rc = -EOPNOTSUPP; 2135 else 2136 rc = smc->clcsock->ops->setsockopt(smc->clcsock, level, optname, 2137 optval, optlen); 2138 if (smc->clcsock->sk->sk_err) { 2139 sk->sk_err = smc->clcsock->sk->sk_err; 2140 sk->sk_error_report(sk); 2141 } 2142 2143 if (optlen < sizeof(int)) 2144 return -EINVAL; 2145 if (copy_from_sockptr(&val, optval, sizeof(int))) 2146 return -EFAULT; 2147 2148 lock_sock(sk); 2149 if (rc || smc->use_fallback) 2150 goto out; 2151 switch (optname) { 2152 case TCP_ULP: 2153 case TCP_FASTOPEN: 2154 case TCP_FASTOPEN_CONNECT: 2155 case TCP_FASTOPEN_KEY: 2156 case TCP_FASTOPEN_NO_COOKIE: 2157 /* option not supported by SMC */ 2158 if (sk->sk_state == SMC_INIT && !smc->connect_nonblock) { 2159 smc_switch_to_fallback(smc); 2160 smc->fallback_rsn = SMC_CLC_DECL_OPTUNSUPP; 2161 } else { 2162 rc = -EINVAL; 2163 } 2164 break; 2165 case TCP_NODELAY: 2166 if (sk->sk_state != SMC_INIT && 2167 sk->sk_state != SMC_LISTEN && 2168 sk->sk_state != SMC_CLOSED) { 2169 if (val) 2170 mod_delayed_work(smc->conn.lgr->tx_wq, 2171 &smc->conn.tx_work, 0); 2172 } 2173 break; 2174 case TCP_CORK: 2175 if (sk->sk_state != SMC_INIT && 2176 sk->sk_state != SMC_LISTEN && 2177 sk->sk_state != SMC_CLOSED) { 2178 if (!val) 2179 mod_delayed_work(smc->conn.lgr->tx_wq, 2180 &smc->conn.tx_work, 0); 2181 } 2182 break; 2183 case TCP_DEFER_ACCEPT: 2184 smc->sockopt_defer_accept = val; 2185 break; 2186 default: 2187 break; 2188 } 2189 out: 2190 release_sock(sk); 2191 2192 return rc; 2193 } 2194 2195 static int smc_getsockopt(struct socket *sock, int level, int optname, 2196 char __user *optval, int __user *optlen) 2197 { 2198 struct smc_sock *smc; 2199 2200 smc = smc_sk(sock->sk); 2201 /* socket options apply to the CLC socket */ 2202 if (unlikely(!smc->clcsock->ops->getsockopt)) 2203 return -EOPNOTSUPP; 2204 return smc->clcsock->ops->getsockopt(smc->clcsock, level, optname, 2205 optval, optlen); 2206 } 2207 2208 static int smc_ioctl(struct socket *sock, unsigned int cmd, 2209 unsigned long arg) 2210 { 2211 union smc_host_cursor cons, urg; 2212 struct smc_connection *conn; 2213 struct smc_sock *smc; 2214 int answ; 2215 2216 smc = smc_sk(sock->sk); 2217 conn = &smc->conn; 2218 lock_sock(&smc->sk); 2219 if (smc->use_fallback) { 2220 if (!smc->clcsock) { 2221 release_sock(&smc->sk); 2222 return -EBADF; 2223 } 2224 answ = smc->clcsock->ops->ioctl(smc->clcsock, cmd, arg); 2225 release_sock(&smc->sk); 2226 return answ; 2227 } 2228 switch (cmd) { 2229 case SIOCINQ: /* same as FIONREAD */ 2230 if (smc->sk.sk_state == SMC_LISTEN) { 2231 release_sock(&smc->sk); 2232 return -EINVAL; 2233 } 2234 if (smc->sk.sk_state == SMC_INIT || 2235 smc->sk.sk_state == SMC_CLOSED) 2236 answ = 0; 2237 else 2238 answ = atomic_read(&smc->conn.bytes_to_rcv); 2239 break; 2240 case SIOCOUTQ: 2241 /* output queue size (not send + not acked) */ 2242 if (smc->sk.sk_state == SMC_LISTEN) { 2243 release_sock(&smc->sk); 2244 return -EINVAL; 2245 } 2246 if (smc->sk.sk_state == SMC_INIT || 2247 smc->sk.sk_state == SMC_CLOSED) 2248 answ = 0; 2249 else 2250 answ = smc->conn.sndbuf_desc->len - 2251 atomic_read(&smc->conn.sndbuf_space); 2252 break; 2253 case SIOCOUTQNSD: 2254 /* output queue size (not send only) */ 2255 if (smc->sk.sk_state == SMC_LISTEN) { 2256 release_sock(&smc->sk); 2257 return -EINVAL; 2258 } 2259 if (smc->sk.sk_state == SMC_INIT || 2260 smc->sk.sk_state == SMC_CLOSED) 2261 answ = 0; 2262 else 2263 answ = smc_tx_prepared_sends(&smc->conn); 2264 break; 2265 case SIOCATMARK: 2266 if (smc->sk.sk_state == SMC_LISTEN) { 2267 release_sock(&smc->sk); 2268 return -EINVAL; 2269 } 2270 if (smc->sk.sk_state == SMC_INIT || 2271 smc->sk.sk_state == SMC_CLOSED) { 2272 answ = 0; 2273 } else { 2274 smc_curs_copy(&cons, &conn->local_tx_ctrl.cons, conn); 2275 smc_curs_copy(&urg, &conn->urg_curs, conn); 2276 answ = smc_curs_diff(conn->rmb_desc->len, 2277 &cons, &urg) == 1; 2278 } 2279 break; 2280 default: 2281 release_sock(&smc->sk); 2282 return -ENOIOCTLCMD; 2283 } 2284 release_sock(&smc->sk); 2285 2286 return put_user(answ, (int __user *)arg); 2287 } 2288 2289 static ssize_t smc_sendpage(struct socket *sock, struct page *page, 2290 int offset, size_t size, int flags) 2291 { 2292 struct sock *sk = sock->sk; 2293 struct smc_sock *smc; 2294 int rc = -EPIPE; 2295 2296 smc = smc_sk(sk); 2297 lock_sock(sk); 2298 if (sk->sk_state != SMC_ACTIVE) { 2299 release_sock(sk); 2300 goto out; 2301 } 2302 release_sock(sk); 2303 if (smc->use_fallback) 2304 rc = kernel_sendpage(smc->clcsock, page, offset, 2305 size, flags); 2306 else 2307 rc = sock_no_sendpage(sock, page, offset, size, flags); 2308 2309 out: 2310 return rc; 2311 } 2312 2313 /* Map the affected portions of the rmbe into an spd, note the number of bytes 2314 * to splice in conn->splice_pending, and press 'go'. Delays consumer cursor 2315 * updates till whenever a respective page has been fully processed. 2316 * Note that subsequent recv() calls have to wait till all splice() processing 2317 * completed. 2318 */ 2319 static ssize_t smc_splice_read(struct socket *sock, loff_t *ppos, 2320 struct pipe_inode_info *pipe, size_t len, 2321 unsigned int flags) 2322 { 2323 struct sock *sk = sock->sk; 2324 struct smc_sock *smc; 2325 int rc = -ENOTCONN; 2326 2327 smc = smc_sk(sk); 2328 lock_sock(sk); 2329 if (sk->sk_state == SMC_CLOSED && (sk->sk_shutdown & RCV_SHUTDOWN)) { 2330 /* socket was connected before, no more data to read */ 2331 rc = 0; 2332 goto out; 2333 } 2334 if (sk->sk_state == SMC_INIT || 2335 sk->sk_state == SMC_LISTEN || 2336 sk->sk_state == SMC_CLOSED) 2337 goto out; 2338 2339 if (sk->sk_state == SMC_PEERFINCLOSEWAIT) { 2340 rc = 0; 2341 goto out; 2342 } 2343 2344 if (smc->use_fallback) { 2345 rc = smc->clcsock->ops->splice_read(smc->clcsock, ppos, 2346 pipe, len, flags); 2347 } else { 2348 if (*ppos) { 2349 rc = -ESPIPE; 2350 goto out; 2351 } 2352 if (flags & SPLICE_F_NONBLOCK) 2353 flags = MSG_DONTWAIT; 2354 else 2355 flags = 0; 2356 rc = smc_rx_recvmsg(smc, NULL, pipe, len, flags); 2357 } 2358 out: 2359 release_sock(sk); 2360 2361 return rc; 2362 } 2363 2364 /* must look like tcp */ 2365 static const struct proto_ops smc_sock_ops = { 2366 .family = PF_SMC, 2367 .owner = THIS_MODULE, 2368 .release = smc_release, 2369 .bind = smc_bind, 2370 .connect = smc_connect, 2371 .socketpair = sock_no_socketpair, 2372 .accept = smc_accept, 2373 .getname = smc_getname, 2374 .poll = smc_poll, 2375 .ioctl = smc_ioctl, 2376 .listen = smc_listen, 2377 .shutdown = smc_shutdown, 2378 .setsockopt = smc_setsockopt, 2379 .getsockopt = smc_getsockopt, 2380 .sendmsg = smc_sendmsg, 2381 .recvmsg = smc_recvmsg, 2382 .mmap = sock_no_mmap, 2383 .sendpage = smc_sendpage, 2384 .splice_read = smc_splice_read, 2385 }; 2386 2387 static int smc_create(struct net *net, struct socket *sock, int protocol, 2388 int kern) 2389 { 2390 int family = (protocol == SMCPROTO_SMC6) ? PF_INET6 : PF_INET; 2391 struct smc_sock *smc; 2392 struct sock *sk; 2393 int rc; 2394 2395 rc = -ESOCKTNOSUPPORT; 2396 if (sock->type != SOCK_STREAM) 2397 goto out; 2398 2399 rc = -EPROTONOSUPPORT; 2400 if (protocol != SMCPROTO_SMC && protocol != SMCPROTO_SMC6) 2401 goto out; 2402 2403 rc = -ENOBUFS; 2404 sock->ops = &smc_sock_ops; 2405 sk = smc_sock_alloc(net, sock, protocol); 2406 if (!sk) 2407 goto out; 2408 2409 /* create internal TCP socket for CLC handshake and fallback */ 2410 smc = smc_sk(sk); 2411 smc->use_fallback = false; /* assume rdma capability first */ 2412 smc->fallback_rsn = 0; 2413 rc = sock_create_kern(net, family, SOCK_STREAM, IPPROTO_TCP, 2414 &smc->clcsock); 2415 if (rc) { 2416 sk_common_release(sk); 2417 goto out; 2418 } 2419 smc->sk.sk_sndbuf = max(smc->clcsock->sk->sk_sndbuf, SMC_BUF_MIN_SIZE); 2420 smc->sk.sk_rcvbuf = max(smc->clcsock->sk->sk_rcvbuf, SMC_BUF_MIN_SIZE); 2421 2422 out: 2423 return rc; 2424 } 2425 2426 static const struct net_proto_family smc_sock_family_ops = { 2427 .family = PF_SMC, 2428 .owner = THIS_MODULE, 2429 .create = smc_create, 2430 }; 2431 2432 unsigned int smc_net_id; 2433 2434 static __net_init int smc_net_init(struct net *net) 2435 { 2436 return smc_pnet_net_init(net); 2437 } 2438 2439 static void __net_exit smc_net_exit(struct net *net) 2440 { 2441 smc_pnet_net_exit(net); 2442 } 2443 2444 static struct pernet_operations smc_net_ops = { 2445 .init = smc_net_init, 2446 .exit = smc_net_exit, 2447 .id = &smc_net_id, 2448 .size = sizeof(struct smc_net), 2449 }; 2450 2451 static int __init smc_init(void) 2452 { 2453 int rc; 2454 2455 rc = register_pernet_subsys(&smc_net_ops); 2456 if (rc) 2457 return rc; 2458 2459 smc_ism_init(); 2460 smc_clc_init(); 2461 2462 rc = smc_pnet_init(); 2463 if (rc) 2464 goto out_pernet_subsys; 2465 2466 rc = -ENOMEM; 2467 smc_hs_wq = alloc_workqueue("smc_hs_wq", 0, 0); 2468 if (!smc_hs_wq) 2469 goto out_pnet; 2470 2471 smc_close_wq = alloc_workqueue("smc_close_wq", 0, 0); 2472 if (!smc_close_wq) 2473 goto out_alloc_hs_wq; 2474 2475 rc = smc_core_init(); 2476 if (rc) { 2477 pr_err("%s: smc_core_init fails with %d\n", __func__, rc); 2478 goto out_alloc_wqs; 2479 } 2480 2481 rc = smc_llc_init(); 2482 if (rc) { 2483 pr_err("%s: smc_llc_init fails with %d\n", __func__, rc); 2484 goto out_core; 2485 } 2486 2487 rc = smc_cdc_init(); 2488 if (rc) { 2489 pr_err("%s: smc_cdc_init fails with %d\n", __func__, rc); 2490 goto out_core; 2491 } 2492 2493 rc = proto_register(&smc_proto, 1); 2494 if (rc) { 2495 pr_err("%s: proto_register(v4) fails with %d\n", __func__, rc); 2496 goto out_core; 2497 } 2498 2499 rc = proto_register(&smc_proto6, 1); 2500 if (rc) { 2501 pr_err("%s: proto_register(v6) fails with %d\n", __func__, rc); 2502 goto out_proto; 2503 } 2504 2505 rc = sock_register(&smc_sock_family_ops); 2506 if (rc) { 2507 pr_err("%s: sock_register fails with %d\n", __func__, rc); 2508 goto out_proto6; 2509 } 2510 INIT_HLIST_HEAD(&smc_v4_hashinfo.ht); 2511 INIT_HLIST_HEAD(&smc_v6_hashinfo.ht); 2512 2513 rc = smc_ib_register_client(); 2514 if (rc) { 2515 pr_err("%s: ib_register fails with %d\n", __func__, rc); 2516 goto out_sock; 2517 } 2518 2519 static_branch_enable(&tcp_have_smc); 2520 return 0; 2521 2522 out_sock: 2523 sock_unregister(PF_SMC); 2524 out_proto6: 2525 proto_unregister(&smc_proto6); 2526 out_proto: 2527 proto_unregister(&smc_proto); 2528 out_core: 2529 smc_core_exit(); 2530 out_alloc_wqs: 2531 destroy_workqueue(smc_close_wq); 2532 out_alloc_hs_wq: 2533 destroy_workqueue(smc_hs_wq); 2534 out_pnet: 2535 smc_pnet_exit(); 2536 out_pernet_subsys: 2537 unregister_pernet_subsys(&smc_net_ops); 2538 2539 return rc; 2540 } 2541 2542 static void __exit smc_exit(void) 2543 { 2544 static_branch_disable(&tcp_have_smc); 2545 sock_unregister(PF_SMC); 2546 smc_core_exit(); 2547 smc_ib_unregister_client(); 2548 destroy_workqueue(smc_close_wq); 2549 destroy_workqueue(smc_hs_wq); 2550 proto_unregister(&smc_proto6); 2551 proto_unregister(&smc_proto); 2552 smc_pnet_exit(); 2553 unregister_pernet_subsys(&smc_net_ops); 2554 rcu_barrier(); 2555 } 2556 2557 module_init(smc_init); 2558 module_exit(smc_exit); 2559 2560 MODULE_AUTHOR("Ursula Braun <ubraun@linux.vnet.ibm.com>"); 2561 MODULE_DESCRIPTION("smc socket address family"); 2562 MODULE_LICENSE("GPL"); 2563 MODULE_ALIAS_NETPROTO(PF_SMC); 2564