1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Shared Memory Communications over RDMA (SMC-R) and RoCE 4 * 5 * AF_SMC protocol family socket handler keeping the AF_INET sock address type 6 * applies to SOCK_STREAM sockets only 7 * offers an alternative communication option for TCP-protocol sockets 8 * applicable with RoCE-cards only 9 * 10 * Initial restrictions: 11 * - support for alternate links postponed 12 * 13 * Copyright IBM Corp. 2016, 2018 14 * 15 * Author(s): Ursula Braun <ubraun@linux.vnet.ibm.com> 16 * based on prototype from Frank Blaschka 17 */ 18 19 #define KMSG_COMPONENT "smc" 20 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt 21 22 #include <linux/module.h> 23 #include <linux/socket.h> 24 #include <linux/workqueue.h> 25 #include <linux/in.h> 26 #include <linux/sched/signal.h> 27 #include <linux/if_vlan.h> 28 #include <linux/rcupdate_wait.h> 29 30 #include <net/sock.h> 31 #include <net/tcp.h> 32 #include <net/smc.h> 33 #include <asm/ioctls.h> 34 35 #include <net/net_namespace.h> 36 #include <net/netns/generic.h> 37 #include "smc_netns.h" 38 39 #include "smc.h" 40 #include "smc_clc.h" 41 #include "smc_llc.h" 42 #include "smc_cdc.h" 43 #include "smc_core.h" 44 #include "smc_ib.h" 45 #include "smc_ism.h" 46 #include "smc_pnet.h" 47 #include "smc_tx.h" 48 #include "smc_rx.h" 49 #include "smc_close.h" 50 51 static DEFINE_MUTEX(smc_server_lgr_pending); /* serialize link group 52 * creation on server 53 */ 54 static DEFINE_MUTEX(smc_client_lgr_pending); /* serialize link group 55 * creation on client 56 */ 57 58 static void smc_tcp_listen_work(struct work_struct *); 59 static void smc_connect_work(struct work_struct *); 60 61 static void smc_set_keepalive(struct sock *sk, int val) 62 { 63 struct smc_sock *smc = smc_sk(sk); 64 65 smc->clcsock->sk->sk_prot->keepalive(smc->clcsock->sk, val); 66 } 67 68 static struct smc_hashinfo smc_v4_hashinfo = { 69 .lock = __RW_LOCK_UNLOCKED(smc_v4_hashinfo.lock), 70 }; 71 72 static struct smc_hashinfo smc_v6_hashinfo = { 73 .lock = __RW_LOCK_UNLOCKED(smc_v6_hashinfo.lock), 74 }; 75 76 int smc_hash_sk(struct sock *sk) 77 { 78 struct smc_hashinfo *h = sk->sk_prot->h.smc_hash; 79 struct hlist_head *head; 80 81 head = &h->ht; 82 83 write_lock_bh(&h->lock); 84 sk_add_node(sk, head); 85 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); 86 write_unlock_bh(&h->lock); 87 88 return 0; 89 } 90 EXPORT_SYMBOL_GPL(smc_hash_sk); 91 92 void smc_unhash_sk(struct sock *sk) 93 { 94 struct smc_hashinfo *h = sk->sk_prot->h.smc_hash; 95 96 write_lock_bh(&h->lock); 97 if (sk_del_node_init(sk)) 98 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); 99 write_unlock_bh(&h->lock); 100 } 101 EXPORT_SYMBOL_GPL(smc_unhash_sk); 102 103 struct proto smc_proto = { 104 .name = "SMC", 105 .owner = THIS_MODULE, 106 .keepalive = smc_set_keepalive, 107 .hash = smc_hash_sk, 108 .unhash = smc_unhash_sk, 109 .obj_size = sizeof(struct smc_sock), 110 .h.smc_hash = &smc_v4_hashinfo, 111 .slab_flags = SLAB_TYPESAFE_BY_RCU, 112 }; 113 EXPORT_SYMBOL_GPL(smc_proto); 114 115 struct proto smc_proto6 = { 116 .name = "SMC6", 117 .owner = THIS_MODULE, 118 .keepalive = smc_set_keepalive, 119 .hash = smc_hash_sk, 120 .unhash = smc_unhash_sk, 121 .obj_size = sizeof(struct smc_sock), 122 .h.smc_hash = &smc_v6_hashinfo, 123 .slab_flags = SLAB_TYPESAFE_BY_RCU, 124 }; 125 EXPORT_SYMBOL_GPL(smc_proto6); 126 127 static void smc_restore_fallback_changes(struct smc_sock *smc) 128 { 129 if (smc->clcsock->file) { /* non-accepted sockets have no file yet */ 130 smc->clcsock->file->private_data = smc->sk.sk_socket; 131 smc->clcsock->file = NULL; 132 } 133 } 134 135 static int __smc_release(struct smc_sock *smc) 136 { 137 struct sock *sk = &smc->sk; 138 int rc = 0; 139 140 if (!smc->use_fallback) { 141 rc = smc_close_active(smc); 142 sock_set_flag(sk, SOCK_DEAD); 143 sk->sk_shutdown |= SHUTDOWN_MASK; 144 } else { 145 if (sk->sk_state != SMC_LISTEN && sk->sk_state != SMC_INIT) 146 sock_put(sk); /* passive closing */ 147 if (sk->sk_state == SMC_LISTEN) { 148 /* wake up clcsock accept */ 149 rc = kernel_sock_shutdown(smc->clcsock, SHUT_RDWR); 150 } 151 sk->sk_state = SMC_CLOSED; 152 sk->sk_state_change(sk); 153 smc_restore_fallback_changes(smc); 154 } 155 156 sk->sk_prot->unhash(sk); 157 158 if (sk->sk_state == SMC_CLOSED) { 159 if (smc->clcsock) { 160 release_sock(sk); 161 smc_clcsock_release(smc); 162 lock_sock(sk); 163 } 164 if (!smc->use_fallback) 165 smc_conn_free(&smc->conn); 166 } 167 168 return rc; 169 } 170 171 static int smc_release(struct socket *sock) 172 { 173 struct sock *sk = sock->sk; 174 struct smc_sock *smc; 175 int rc = 0; 176 177 if (!sk) 178 goto out; 179 180 sock_hold(sk); /* sock_put below */ 181 smc = smc_sk(sk); 182 183 /* cleanup for a dangling non-blocking connect */ 184 if (smc->connect_nonblock && sk->sk_state == SMC_INIT) 185 tcp_abort(smc->clcsock->sk, ECONNABORTED); 186 flush_work(&smc->connect_work); 187 188 if (sk->sk_state == SMC_LISTEN) 189 /* smc_close_non_accepted() is called and acquires 190 * sock lock for child sockets again 191 */ 192 lock_sock_nested(sk, SINGLE_DEPTH_NESTING); 193 else 194 lock_sock(sk); 195 196 rc = __smc_release(smc); 197 198 /* detach socket */ 199 sock_orphan(sk); 200 sock->sk = NULL; 201 release_sock(sk); 202 203 sock_put(sk); /* sock_hold above */ 204 sock_put(sk); /* final sock_put */ 205 out: 206 return rc; 207 } 208 209 static void smc_destruct(struct sock *sk) 210 { 211 if (sk->sk_state != SMC_CLOSED) 212 return; 213 if (!sock_flag(sk, SOCK_DEAD)) 214 return; 215 216 sk_refcnt_debug_dec(sk); 217 } 218 219 static struct sock *smc_sock_alloc(struct net *net, struct socket *sock, 220 int protocol) 221 { 222 struct smc_sock *smc; 223 struct proto *prot; 224 struct sock *sk; 225 226 prot = (protocol == SMCPROTO_SMC6) ? &smc_proto6 : &smc_proto; 227 sk = sk_alloc(net, PF_SMC, GFP_KERNEL, prot, 0); 228 if (!sk) 229 return NULL; 230 231 sock_init_data(sock, sk); /* sets sk_refcnt to 1 */ 232 sk->sk_state = SMC_INIT; 233 sk->sk_destruct = smc_destruct; 234 sk->sk_protocol = protocol; 235 smc = smc_sk(sk); 236 INIT_WORK(&smc->tcp_listen_work, smc_tcp_listen_work); 237 INIT_WORK(&smc->connect_work, smc_connect_work); 238 INIT_DELAYED_WORK(&smc->conn.tx_work, smc_tx_work); 239 INIT_LIST_HEAD(&smc->accept_q); 240 spin_lock_init(&smc->accept_q_lock); 241 spin_lock_init(&smc->conn.send_lock); 242 sk->sk_prot->hash(sk); 243 sk_refcnt_debug_inc(sk); 244 mutex_init(&smc->clcsock_release_lock); 245 246 return sk; 247 } 248 249 static int smc_bind(struct socket *sock, struct sockaddr *uaddr, 250 int addr_len) 251 { 252 struct sockaddr_in *addr = (struct sockaddr_in *)uaddr; 253 struct sock *sk = sock->sk; 254 struct smc_sock *smc; 255 int rc; 256 257 smc = smc_sk(sk); 258 259 /* replicate tests from inet_bind(), to be safe wrt. future changes */ 260 rc = -EINVAL; 261 if (addr_len < sizeof(struct sockaddr_in)) 262 goto out; 263 264 rc = -EAFNOSUPPORT; 265 if (addr->sin_family != AF_INET && 266 addr->sin_family != AF_INET6 && 267 addr->sin_family != AF_UNSPEC) 268 goto out; 269 /* accept AF_UNSPEC (mapped to AF_INET) only if s_addr is INADDR_ANY */ 270 if (addr->sin_family == AF_UNSPEC && 271 addr->sin_addr.s_addr != htonl(INADDR_ANY)) 272 goto out; 273 274 lock_sock(sk); 275 276 /* Check if socket is already active */ 277 rc = -EINVAL; 278 if (sk->sk_state != SMC_INIT || smc->connect_nonblock) 279 goto out_rel; 280 281 smc->clcsock->sk->sk_reuse = sk->sk_reuse; 282 rc = kernel_bind(smc->clcsock, uaddr, addr_len); 283 284 out_rel: 285 release_sock(sk); 286 out: 287 return rc; 288 } 289 290 static void smc_copy_sock_settings(struct sock *nsk, struct sock *osk, 291 unsigned long mask) 292 { 293 /* options we don't get control via setsockopt for */ 294 nsk->sk_type = osk->sk_type; 295 nsk->sk_sndbuf = osk->sk_sndbuf; 296 nsk->sk_rcvbuf = osk->sk_rcvbuf; 297 nsk->sk_sndtimeo = osk->sk_sndtimeo; 298 nsk->sk_rcvtimeo = osk->sk_rcvtimeo; 299 nsk->sk_mark = osk->sk_mark; 300 nsk->sk_priority = osk->sk_priority; 301 nsk->sk_rcvlowat = osk->sk_rcvlowat; 302 nsk->sk_bound_dev_if = osk->sk_bound_dev_if; 303 nsk->sk_err = osk->sk_err; 304 305 nsk->sk_flags &= ~mask; 306 nsk->sk_flags |= osk->sk_flags & mask; 307 } 308 309 #define SK_FLAGS_SMC_TO_CLC ((1UL << SOCK_URGINLINE) | \ 310 (1UL << SOCK_KEEPOPEN) | \ 311 (1UL << SOCK_LINGER) | \ 312 (1UL << SOCK_BROADCAST) | \ 313 (1UL << SOCK_TIMESTAMP) | \ 314 (1UL << SOCK_DBG) | \ 315 (1UL << SOCK_RCVTSTAMP) | \ 316 (1UL << SOCK_RCVTSTAMPNS) | \ 317 (1UL << SOCK_LOCALROUTE) | \ 318 (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE) | \ 319 (1UL << SOCK_RXQ_OVFL) | \ 320 (1UL << SOCK_WIFI_STATUS) | \ 321 (1UL << SOCK_NOFCS) | \ 322 (1UL << SOCK_FILTER_LOCKED) | \ 323 (1UL << SOCK_TSTAMP_NEW)) 324 /* copy only relevant settings and flags of SOL_SOCKET level from smc to 325 * clc socket (since smc is not called for these options from net/core) 326 */ 327 static void smc_copy_sock_settings_to_clc(struct smc_sock *smc) 328 { 329 smc_copy_sock_settings(smc->clcsock->sk, &smc->sk, SK_FLAGS_SMC_TO_CLC); 330 } 331 332 #define SK_FLAGS_CLC_TO_SMC ((1UL << SOCK_URGINLINE) | \ 333 (1UL << SOCK_KEEPOPEN) | \ 334 (1UL << SOCK_LINGER) | \ 335 (1UL << SOCK_DBG)) 336 /* copy only settings and flags relevant for smc from clc to smc socket */ 337 static void smc_copy_sock_settings_to_smc(struct smc_sock *smc) 338 { 339 smc_copy_sock_settings(&smc->sk, smc->clcsock->sk, SK_FLAGS_CLC_TO_SMC); 340 } 341 342 /* register the new rmb on all links */ 343 static int smcr_lgr_reg_rmbs(struct smc_link *link, 344 struct smc_buf_desc *rmb_desc) 345 { 346 struct smc_link_group *lgr = link->lgr; 347 int i, rc = 0; 348 349 rc = smc_llc_flow_initiate(lgr, SMC_LLC_FLOW_RKEY); 350 if (rc) 351 return rc; 352 /* protect against parallel smc_llc_cli_rkey_exchange() and 353 * parallel smcr_link_reg_rmb() 354 */ 355 mutex_lock(&lgr->llc_conf_mutex); 356 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { 357 if (!smc_link_active(&lgr->lnk[i])) 358 continue; 359 rc = smcr_link_reg_rmb(&lgr->lnk[i], rmb_desc); 360 if (rc) 361 goto out; 362 } 363 364 /* exchange confirm_rkey msg with peer */ 365 rc = smc_llc_do_confirm_rkey(link, rmb_desc); 366 if (rc) { 367 rc = -EFAULT; 368 goto out; 369 } 370 rmb_desc->is_conf_rkey = true; 371 out: 372 mutex_unlock(&lgr->llc_conf_mutex); 373 smc_llc_flow_stop(lgr, &lgr->llc_flow_lcl); 374 return rc; 375 } 376 377 static int smcr_clnt_conf_first_link(struct smc_sock *smc) 378 { 379 struct smc_link *link = smc->conn.lnk; 380 struct smc_llc_qentry *qentry; 381 int rc; 382 383 /* receive CONFIRM LINK request from server over RoCE fabric */ 384 qentry = smc_llc_wait(link->lgr, NULL, SMC_LLC_WAIT_TIME, 385 SMC_LLC_CONFIRM_LINK); 386 if (!qentry) { 387 struct smc_clc_msg_decline dclc; 388 389 rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc), 390 SMC_CLC_DECLINE, CLC_WAIT_TIME_SHORT); 391 return rc == -EAGAIN ? SMC_CLC_DECL_TIMEOUT_CL : rc; 392 } 393 smc_llc_save_peer_uid(qentry); 394 rc = smc_llc_eval_conf_link(qentry, SMC_LLC_REQ); 395 smc_llc_flow_qentry_del(&link->lgr->llc_flow_lcl); 396 if (rc) 397 return SMC_CLC_DECL_RMBE_EC; 398 399 rc = smc_ib_modify_qp_rts(link); 400 if (rc) 401 return SMC_CLC_DECL_ERR_RDYLNK; 402 403 smc_wr_remember_qp_attr(link); 404 405 if (smcr_link_reg_rmb(link, smc->conn.rmb_desc)) 406 return SMC_CLC_DECL_ERR_REGRMB; 407 408 /* confirm_rkey is implicit on 1st contact */ 409 smc->conn.rmb_desc->is_conf_rkey = true; 410 411 /* send CONFIRM LINK response over RoCE fabric */ 412 rc = smc_llc_send_confirm_link(link, SMC_LLC_RESP); 413 if (rc < 0) 414 return SMC_CLC_DECL_TIMEOUT_CL; 415 416 smc_llc_link_active(link); 417 smcr_lgr_set_type(link->lgr, SMC_LGR_SINGLE); 418 419 /* optional 2nd link, receive ADD LINK request from server */ 420 qentry = smc_llc_wait(link->lgr, NULL, SMC_LLC_WAIT_TIME, 421 SMC_LLC_ADD_LINK); 422 if (!qentry) { 423 struct smc_clc_msg_decline dclc; 424 425 rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc), 426 SMC_CLC_DECLINE, CLC_WAIT_TIME_SHORT); 427 if (rc == -EAGAIN) 428 rc = 0; /* no DECLINE received, go with one link */ 429 return rc; 430 } 431 smc_llc_flow_qentry_clr(&link->lgr->llc_flow_lcl); 432 smc_llc_cli_add_link(link, qentry); 433 return 0; 434 } 435 436 static void smcr_conn_save_peer_info(struct smc_sock *smc, 437 struct smc_clc_msg_accept_confirm *clc) 438 { 439 int bufsize = smc_uncompress_bufsize(clc->rmbe_size); 440 441 smc->conn.peer_rmbe_idx = clc->rmbe_idx; 442 smc->conn.local_tx_ctrl.token = ntohl(clc->rmbe_alert_token); 443 smc->conn.peer_rmbe_size = bufsize; 444 atomic_set(&smc->conn.peer_rmbe_space, smc->conn.peer_rmbe_size); 445 smc->conn.tx_off = bufsize * (smc->conn.peer_rmbe_idx - 1); 446 } 447 448 static void smcd_conn_save_peer_info(struct smc_sock *smc, 449 struct smc_clc_msg_accept_confirm *clc) 450 { 451 int bufsize = smc_uncompress_bufsize(clc->dmbe_size); 452 453 smc->conn.peer_rmbe_idx = clc->dmbe_idx; 454 smc->conn.peer_token = clc->token; 455 /* msg header takes up space in the buffer */ 456 smc->conn.peer_rmbe_size = bufsize - sizeof(struct smcd_cdc_msg); 457 atomic_set(&smc->conn.peer_rmbe_space, smc->conn.peer_rmbe_size); 458 smc->conn.tx_off = bufsize * smc->conn.peer_rmbe_idx; 459 } 460 461 static void smc_conn_save_peer_info(struct smc_sock *smc, 462 struct smc_clc_msg_accept_confirm *clc) 463 { 464 if (smc->conn.lgr->is_smcd) 465 smcd_conn_save_peer_info(smc, clc); 466 else 467 smcr_conn_save_peer_info(smc, clc); 468 } 469 470 static void smc_link_save_peer_info(struct smc_link *link, 471 struct smc_clc_msg_accept_confirm *clc) 472 { 473 link->peer_qpn = ntoh24(clc->qpn); 474 memcpy(link->peer_gid, clc->lcl.gid, SMC_GID_SIZE); 475 memcpy(link->peer_mac, clc->lcl.mac, sizeof(link->peer_mac)); 476 link->peer_psn = ntoh24(clc->psn); 477 link->peer_mtu = clc->qp_mtu; 478 } 479 480 static void smc_switch_to_fallback(struct smc_sock *smc) 481 { 482 smc->use_fallback = true; 483 if (smc->sk.sk_socket && smc->sk.sk_socket->file) { 484 smc->clcsock->file = smc->sk.sk_socket->file; 485 smc->clcsock->file->private_data = smc->clcsock; 486 smc->clcsock->wq.fasync_list = 487 smc->sk.sk_socket->wq.fasync_list; 488 } 489 } 490 491 /* fall back during connect */ 492 static int smc_connect_fallback(struct smc_sock *smc, int reason_code) 493 { 494 smc_switch_to_fallback(smc); 495 smc->fallback_rsn = reason_code; 496 smc_copy_sock_settings_to_clc(smc); 497 smc->connect_nonblock = 0; 498 if (smc->sk.sk_state == SMC_INIT) 499 smc->sk.sk_state = SMC_ACTIVE; 500 return 0; 501 } 502 503 /* decline and fall back during connect */ 504 static int smc_connect_decline_fallback(struct smc_sock *smc, int reason_code) 505 { 506 int rc; 507 508 if (reason_code < 0) { /* error, fallback is not possible */ 509 if (smc->sk.sk_state == SMC_INIT) 510 sock_put(&smc->sk); /* passive closing */ 511 return reason_code; 512 } 513 if (reason_code != SMC_CLC_DECL_PEERDECL) { 514 rc = smc_clc_send_decline(smc, reason_code); 515 if (rc < 0) { 516 if (smc->sk.sk_state == SMC_INIT) 517 sock_put(&smc->sk); /* passive closing */ 518 return rc; 519 } 520 } 521 return smc_connect_fallback(smc, reason_code); 522 } 523 524 /* abort connecting */ 525 static int smc_connect_abort(struct smc_sock *smc, int reason_code, 526 int local_contact) 527 { 528 bool is_smcd = smc->conn.lgr->is_smcd; 529 530 if (local_contact == SMC_FIRST_CONTACT) 531 smc_lgr_cleanup_early(&smc->conn); 532 else 533 smc_conn_free(&smc->conn); 534 if (is_smcd) 535 /* there is only one lgr role for SMC-D; use server lock */ 536 mutex_unlock(&smc_server_lgr_pending); 537 else 538 mutex_unlock(&smc_client_lgr_pending); 539 540 smc->connect_nonblock = 0; 541 return reason_code; 542 } 543 544 /* check if there is a rdma device available for this connection. */ 545 /* called for connect and listen */ 546 static int smc_find_rdma_device(struct smc_sock *smc, struct smc_init_info *ini) 547 { 548 /* PNET table look up: search active ib_device and port 549 * within same PNETID that also contains the ethernet device 550 * used for the internal TCP socket 551 */ 552 smc_pnet_find_roce_resource(smc->clcsock->sk, ini); 553 if (!ini->ib_dev) 554 return SMC_CLC_DECL_NOSMCRDEV; 555 return 0; 556 } 557 558 /* check if there is an ISM device available for this connection. */ 559 /* called for connect and listen */ 560 static int smc_find_ism_device(struct smc_sock *smc, struct smc_init_info *ini) 561 { 562 /* Find ISM device with same PNETID as connecting interface */ 563 smc_pnet_find_ism_resource(smc->clcsock->sk, ini); 564 if (!ini->ism_dev) 565 return SMC_CLC_DECL_NOSMCDDEV; 566 return 0; 567 } 568 569 /* Check for VLAN ID and register it on ISM device just for CLC handshake */ 570 static int smc_connect_ism_vlan_setup(struct smc_sock *smc, 571 struct smc_init_info *ini) 572 { 573 if (ini->vlan_id && smc_ism_get_vlan(ini->ism_dev, ini->vlan_id)) 574 return SMC_CLC_DECL_ISMVLANERR; 575 return 0; 576 } 577 578 /* cleanup temporary VLAN ID registration used for CLC handshake. If ISM is 579 * used, the VLAN ID will be registered again during the connection setup. 580 */ 581 static int smc_connect_ism_vlan_cleanup(struct smc_sock *smc, bool is_smcd, 582 struct smc_init_info *ini) 583 { 584 if (!is_smcd) 585 return 0; 586 if (ini->vlan_id && smc_ism_put_vlan(ini->ism_dev, ini->vlan_id)) 587 return SMC_CLC_DECL_CNFERR; 588 return 0; 589 } 590 591 /* CLC handshake during connect */ 592 static int smc_connect_clc(struct smc_sock *smc, int smc_type, 593 struct smc_clc_msg_accept_confirm *aclc, 594 struct smc_init_info *ini) 595 { 596 int rc = 0; 597 598 /* do inband token exchange */ 599 rc = smc_clc_send_proposal(smc, smc_type, ini); 600 if (rc) 601 return rc; 602 /* receive SMC Accept CLC message */ 603 return smc_clc_wait_msg(smc, aclc, sizeof(*aclc), SMC_CLC_ACCEPT, 604 CLC_WAIT_TIME); 605 } 606 607 /* setup for RDMA connection of client */ 608 static int smc_connect_rdma(struct smc_sock *smc, 609 struct smc_clc_msg_accept_confirm *aclc, 610 struct smc_init_info *ini) 611 { 612 int i, reason_code = 0; 613 struct smc_link *link; 614 615 ini->is_smcd = false; 616 ini->ib_lcl = &aclc->lcl; 617 ini->ib_clcqpn = ntoh24(aclc->qpn); 618 ini->srv_first_contact = aclc->hdr.flag; 619 620 mutex_lock(&smc_client_lgr_pending); 621 reason_code = smc_conn_create(smc, ini); 622 if (reason_code) { 623 mutex_unlock(&smc_client_lgr_pending); 624 return reason_code; 625 } 626 627 smc_conn_save_peer_info(smc, aclc); 628 629 if (ini->cln_first_contact == SMC_FIRST_CONTACT) { 630 link = smc->conn.lnk; 631 } else { 632 /* set link that was assigned by server */ 633 link = NULL; 634 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { 635 struct smc_link *l = &smc->conn.lgr->lnk[i]; 636 637 if (l->peer_qpn == ntoh24(aclc->qpn) && 638 !memcmp(l->peer_gid, &aclc->lcl.gid, SMC_GID_SIZE) && 639 !memcmp(l->peer_mac, &aclc->lcl.mac, sizeof(l->peer_mac))) { 640 link = l; 641 break; 642 } 643 } 644 if (!link) 645 return smc_connect_abort(smc, SMC_CLC_DECL_NOSRVLINK, 646 ini->cln_first_contact); 647 smc->conn.lnk = link; 648 } 649 650 /* create send buffer and rmb */ 651 if (smc_buf_create(smc, false)) 652 return smc_connect_abort(smc, SMC_CLC_DECL_MEM, 653 ini->cln_first_contact); 654 655 if (ini->cln_first_contact == SMC_FIRST_CONTACT) 656 smc_link_save_peer_info(link, aclc); 657 658 if (smc_rmb_rtoken_handling(&smc->conn, link, aclc)) 659 return smc_connect_abort(smc, SMC_CLC_DECL_ERR_RTOK, 660 ini->cln_first_contact); 661 662 smc_close_init(smc); 663 smc_rx_init(smc); 664 665 if (ini->cln_first_contact == SMC_FIRST_CONTACT) { 666 if (smc_ib_ready_link(link)) 667 return smc_connect_abort(smc, SMC_CLC_DECL_ERR_RDYLNK, 668 ini->cln_first_contact); 669 } else { 670 if (smcr_lgr_reg_rmbs(link, smc->conn.rmb_desc)) 671 return smc_connect_abort(smc, SMC_CLC_DECL_ERR_REGRMB, 672 ini->cln_first_contact); 673 } 674 smc_rmb_sync_sg_for_device(&smc->conn); 675 676 reason_code = smc_clc_send_confirm(smc); 677 if (reason_code) 678 return smc_connect_abort(smc, reason_code, 679 ini->cln_first_contact); 680 681 smc_tx_init(smc); 682 683 if (ini->cln_first_contact == SMC_FIRST_CONTACT) { 684 /* QP confirmation over RoCE fabric */ 685 smc_llc_flow_initiate(link->lgr, SMC_LLC_FLOW_ADD_LINK); 686 reason_code = smcr_clnt_conf_first_link(smc); 687 smc_llc_flow_stop(link->lgr, &link->lgr->llc_flow_lcl); 688 if (reason_code) 689 return smc_connect_abort(smc, reason_code, 690 ini->cln_first_contact); 691 } 692 mutex_unlock(&smc_client_lgr_pending); 693 694 smc_copy_sock_settings_to_clc(smc); 695 smc->connect_nonblock = 0; 696 if (smc->sk.sk_state == SMC_INIT) 697 smc->sk.sk_state = SMC_ACTIVE; 698 699 return 0; 700 } 701 702 /* setup for ISM connection of client */ 703 static int smc_connect_ism(struct smc_sock *smc, 704 struct smc_clc_msg_accept_confirm *aclc, 705 struct smc_init_info *ini) 706 { 707 int rc = 0; 708 709 ini->is_smcd = true; 710 ini->ism_gid = aclc->gid; 711 ini->srv_first_contact = aclc->hdr.flag; 712 713 /* there is only one lgr role for SMC-D; use server lock */ 714 mutex_lock(&smc_server_lgr_pending); 715 rc = smc_conn_create(smc, ini); 716 if (rc) { 717 mutex_unlock(&smc_server_lgr_pending); 718 return rc; 719 } 720 721 /* Create send and receive buffers */ 722 rc = smc_buf_create(smc, true); 723 if (rc) 724 return smc_connect_abort(smc, (rc == -ENOSPC) ? 725 SMC_CLC_DECL_MAX_DMB : 726 SMC_CLC_DECL_MEM, 727 ini->cln_first_contact); 728 729 smc_conn_save_peer_info(smc, aclc); 730 smc_close_init(smc); 731 smc_rx_init(smc); 732 smc_tx_init(smc); 733 734 rc = smc_clc_send_confirm(smc); 735 if (rc) 736 return smc_connect_abort(smc, rc, ini->cln_first_contact); 737 mutex_unlock(&smc_server_lgr_pending); 738 739 smc_copy_sock_settings_to_clc(smc); 740 smc->connect_nonblock = 0; 741 if (smc->sk.sk_state == SMC_INIT) 742 smc->sk.sk_state = SMC_ACTIVE; 743 744 return 0; 745 } 746 747 /* perform steps before actually connecting */ 748 static int __smc_connect(struct smc_sock *smc) 749 { 750 bool ism_supported = false, rdma_supported = false; 751 struct smc_clc_msg_accept_confirm aclc; 752 struct smc_init_info ini = {0}; 753 int smc_type; 754 int rc = 0; 755 756 if (smc->use_fallback) 757 return smc_connect_fallback(smc, smc->fallback_rsn); 758 759 /* if peer has not signalled SMC-capability, fall back */ 760 if (!tcp_sk(smc->clcsock->sk)->syn_smc) 761 return smc_connect_fallback(smc, SMC_CLC_DECL_PEERNOSMC); 762 763 /* IPSec connections opt out of SMC-R optimizations */ 764 if (using_ipsec(smc)) 765 return smc_connect_decline_fallback(smc, SMC_CLC_DECL_IPSEC); 766 767 /* get vlan id from IP device */ 768 if (smc_vlan_by_tcpsk(smc->clcsock, &ini)) 769 return smc_connect_decline_fallback(smc, 770 SMC_CLC_DECL_GETVLANERR); 771 772 /* check if there is an ism device available */ 773 if (!smc_find_ism_device(smc, &ini) && 774 !smc_connect_ism_vlan_setup(smc, &ini)) { 775 /* ISM is supported for this connection */ 776 ism_supported = true; 777 smc_type = SMC_TYPE_D; 778 } 779 780 /* check if there is a rdma device available */ 781 if (!smc_find_rdma_device(smc, &ini)) { 782 /* RDMA is supported for this connection */ 783 rdma_supported = true; 784 if (ism_supported) 785 smc_type = SMC_TYPE_B; /* both */ 786 else 787 smc_type = SMC_TYPE_R; /* only RDMA */ 788 } 789 790 /* if neither ISM nor RDMA are supported, fallback */ 791 if (!rdma_supported && !ism_supported) 792 return smc_connect_decline_fallback(smc, SMC_CLC_DECL_NOSMCDEV); 793 794 /* perform CLC handshake */ 795 rc = smc_connect_clc(smc, smc_type, &aclc, &ini); 796 if (rc) { 797 smc_connect_ism_vlan_cleanup(smc, ism_supported, &ini); 798 return smc_connect_decline_fallback(smc, rc); 799 } 800 801 /* depending on previous steps, connect using rdma or ism */ 802 if (rdma_supported && aclc.hdr.path == SMC_TYPE_R) 803 rc = smc_connect_rdma(smc, &aclc, &ini); 804 else if (ism_supported && aclc.hdr.path == SMC_TYPE_D) 805 rc = smc_connect_ism(smc, &aclc, &ini); 806 else 807 rc = SMC_CLC_DECL_MODEUNSUPP; 808 if (rc) { 809 smc_connect_ism_vlan_cleanup(smc, ism_supported, &ini); 810 return smc_connect_decline_fallback(smc, rc); 811 } 812 813 smc_connect_ism_vlan_cleanup(smc, ism_supported, &ini); 814 return 0; 815 } 816 817 static void smc_connect_work(struct work_struct *work) 818 { 819 struct smc_sock *smc = container_of(work, struct smc_sock, 820 connect_work); 821 long timeo = smc->sk.sk_sndtimeo; 822 int rc = 0; 823 824 if (!timeo) 825 timeo = MAX_SCHEDULE_TIMEOUT; 826 lock_sock(smc->clcsock->sk); 827 if (smc->clcsock->sk->sk_err) { 828 smc->sk.sk_err = smc->clcsock->sk->sk_err; 829 } else if ((1 << smc->clcsock->sk->sk_state) & 830 (TCPF_SYN_SENT | TCP_SYN_RECV)) { 831 rc = sk_stream_wait_connect(smc->clcsock->sk, &timeo); 832 if ((rc == -EPIPE) && 833 ((1 << smc->clcsock->sk->sk_state) & 834 (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT))) 835 rc = 0; 836 } 837 release_sock(smc->clcsock->sk); 838 lock_sock(&smc->sk); 839 if (rc != 0 || smc->sk.sk_err) { 840 smc->sk.sk_state = SMC_CLOSED; 841 if (rc == -EPIPE || rc == -EAGAIN) 842 smc->sk.sk_err = EPIPE; 843 else if (signal_pending(current)) 844 smc->sk.sk_err = -sock_intr_errno(timeo); 845 sock_put(&smc->sk); /* passive closing */ 846 goto out; 847 } 848 849 rc = __smc_connect(smc); 850 if (rc < 0) 851 smc->sk.sk_err = -rc; 852 853 out: 854 if (!sock_flag(&smc->sk, SOCK_DEAD)) { 855 if (smc->sk.sk_err) { 856 smc->sk.sk_state_change(&smc->sk); 857 } else { /* allow polling before and after fallback decision */ 858 smc->clcsock->sk->sk_write_space(smc->clcsock->sk); 859 smc->sk.sk_write_space(&smc->sk); 860 } 861 } 862 release_sock(&smc->sk); 863 } 864 865 static int smc_connect(struct socket *sock, struct sockaddr *addr, 866 int alen, int flags) 867 { 868 struct sock *sk = sock->sk; 869 struct smc_sock *smc; 870 int rc = -EINVAL; 871 872 smc = smc_sk(sk); 873 874 /* separate smc parameter checking to be safe */ 875 if (alen < sizeof(addr->sa_family)) 876 goto out_err; 877 if (addr->sa_family != AF_INET && addr->sa_family != AF_INET6) 878 goto out_err; 879 880 lock_sock(sk); 881 switch (sk->sk_state) { 882 default: 883 goto out; 884 case SMC_ACTIVE: 885 rc = -EISCONN; 886 goto out; 887 case SMC_INIT: 888 rc = 0; 889 break; 890 } 891 892 smc_copy_sock_settings_to_clc(smc); 893 tcp_sk(smc->clcsock->sk)->syn_smc = 1; 894 if (smc->connect_nonblock) { 895 rc = -EALREADY; 896 goto out; 897 } 898 rc = kernel_connect(smc->clcsock, addr, alen, flags); 899 if (rc && rc != -EINPROGRESS) 900 goto out; 901 902 sock_hold(&smc->sk); /* sock put in passive closing */ 903 if (smc->use_fallback) 904 goto out; 905 if (flags & O_NONBLOCK) { 906 if (schedule_work(&smc->connect_work)) 907 smc->connect_nonblock = 1; 908 rc = -EINPROGRESS; 909 } else { 910 rc = __smc_connect(smc); 911 if (rc < 0) 912 goto out; 913 else 914 rc = 0; /* success cases including fallback */ 915 } 916 917 out: 918 release_sock(sk); 919 out_err: 920 return rc; 921 } 922 923 static int smc_clcsock_accept(struct smc_sock *lsmc, struct smc_sock **new_smc) 924 { 925 struct socket *new_clcsock = NULL; 926 struct sock *lsk = &lsmc->sk; 927 struct sock *new_sk; 928 int rc = -EINVAL; 929 930 release_sock(lsk); 931 new_sk = smc_sock_alloc(sock_net(lsk), NULL, lsk->sk_protocol); 932 if (!new_sk) { 933 rc = -ENOMEM; 934 lsk->sk_err = ENOMEM; 935 *new_smc = NULL; 936 lock_sock(lsk); 937 goto out; 938 } 939 *new_smc = smc_sk(new_sk); 940 941 mutex_lock(&lsmc->clcsock_release_lock); 942 if (lsmc->clcsock) 943 rc = kernel_accept(lsmc->clcsock, &new_clcsock, 0); 944 mutex_unlock(&lsmc->clcsock_release_lock); 945 lock_sock(lsk); 946 if (rc < 0) 947 lsk->sk_err = -rc; 948 if (rc < 0 || lsk->sk_state == SMC_CLOSED) { 949 new_sk->sk_prot->unhash(new_sk); 950 if (new_clcsock) 951 sock_release(new_clcsock); 952 new_sk->sk_state = SMC_CLOSED; 953 sock_set_flag(new_sk, SOCK_DEAD); 954 sock_put(new_sk); /* final */ 955 *new_smc = NULL; 956 goto out; 957 } 958 959 (*new_smc)->clcsock = new_clcsock; 960 out: 961 return rc; 962 } 963 964 /* add a just created sock to the accept queue of the listen sock as 965 * candidate for a following socket accept call from user space 966 */ 967 static void smc_accept_enqueue(struct sock *parent, struct sock *sk) 968 { 969 struct smc_sock *par = smc_sk(parent); 970 971 sock_hold(sk); /* sock_put in smc_accept_unlink () */ 972 spin_lock(&par->accept_q_lock); 973 list_add_tail(&smc_sk(sk)->accept_q, &par->accept_q); 974 spin_unlock(&par->accept_q_lock); 975 sk_acceptq_added(parent); 976 } 977 978 /* remove a socket from the accept queue of its parental listening socket */ 979 static void smc_accept_unlink(struct sock *sk) 980 { 981 struct smc_sock *par = smc_sk(sk)->listen_smc; 982 983 spin_lock(&par->accept_q_lock); 984 list_del_init(&smc_sk(sk)->accept_q); 985 spin_unlock(&par->accept_q_lock); 986 sk_acceptq_removed(&smc_sk(sk)->listen_smc->sk); 987 sock_put(sk); /* sock_hold in smc_accept_enqueue */ 988 } 989 990 /* remove a sock from the accept queue to bind it to a new socket created 991 * for a socket accept call from user space 992 */ 993 struct sock *smc_accept_dequeue(struct sock *parent, 994 struct socket *new_sock) 995 { 996 struct smc_sock *isk, *n; 997 struct sock *new_sk; 998 999 list_for_each_entry_safe(isk, n, &smc_sk(parent)->accept_q, accept_q) { 1000 new_sk = (struct sock *)isk; 1001 1002 smc_accept_unlink(new_sk); 1003 if (new_sk->sk_state == SMC_CLOSED) { 1004 new_sk->sk_prot->unhash(new_sk); 1005 if (isk->clcsock) { 1006 sock_release(isk->clcsock); 1007 isk->clcsock = NULL; 1008 } 1009 sock_put(new_sk); /* final */ 1010 continue; 1011 } 1012 if (new_sock) { 1013 sock_graft(new_sk, new_sock); 1014 if (isk->use_fallback) { 1015 smc_sk(new_sk)->clcsock->file = new_sock->file; 1016 isk->clcsock->file->private_data = isk->clcsock; 1017 } 1018 } 1019 return new_sk; 1020 } 1021 return NULL; 1022 } 1023 1024 /* clean up for a created but never accepted sock */ 1025 void smc_close_non_accepted(struct sock *sk) 1026 { 1027 struct smc_sock *smc = smc_sk(sk); 1028 1029 sock_hold(sk); /* sock_put below */ 1030 lock_sock(sk); 1031 if (!sk->sk_lingertime) 1032 /* wait for peer closing */ 1033 sk->sk_lingertime = SMC_MAX_STREAM_WAIT_TIMEOUT; 1034 __smc_release(smc); 1035 release_sock(sk); 1036 sock_put(sk); /* sock_hold above */ 1037 sock_put(sk); /* final sock_put */ 1038 } 1039 1040 static int smcr_serv_conf_first_link(struct smc_sock *smc) 1041 { 1042 struct smc_link *link = smc->conn.lnk; 1043 struct smc_llc_qentry *qentry; 1044 int rc; 1045 1046 if (smcr_link_reg_rmb(link, smc->conn.rmb_desc)) 1047 return SMC_CLC_DECL_ERR_REGRMB; 1048 1049 /* send CONFIRM LINK request to client over the RoCE fabric */ 1050 rc = smc_llc_send_confirm_link(link, SMC_LLC_REQ); 1051 if (rc < 0) 1052 return SMC_CLC_DECL_TIMEOUT_CL; 1053 1054 /* receive CONFIRM LINK response from client over the RoCE fabric */ 1055 qentry = smc_llc_wait(link->lgr, link, SMC_LLC_WAIT_TIME, 1056 SMC_LLC_CONFIRM_LINK); 1057 if (!qentry) { 1058 struct smc_clc_msg_decline dclc; 1059 1060 rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc), 1061 SMC_CLC_DECLINE, CLC_WAIT_TIME_SHORT); 1062 return rc == -EAGAIN ? SMC_CLC_DECL_TIMEOUT_CL : rc; 1063 } 1064 smc_llc_save_peer_uid(qentry); 1065 rc = smc_llc_eval_conf_link(qentry, SMC_LLC_RESP); 1066 smc_llc_flow_qentry_del(&link->lgr->llc_flow_lcl); 1067 if (rc) 1068 return SMC_CLC_DECL_RMBE_EC; 1069 1070 /* confirm_rkey is implicit on 1st contact */ 1071 smc->conn.rmb_desc->is_conf_rkey = true; 1072 1073 smc_llc_link_active(link); 1074 smcr_lgr_set_type(link->lgr, SMC_LGR_SINGLE); 1075 1076 /* initial contact - try to establish second link */ 1077 smc_llc_srv_add_link(link); 1078 return 0; 1079 } 1080 1081 /* listen worker: finish */ 1082 static void smc_listen_out(struct smc_sock *new_smc) 1083 { 1084 struct smc_sock *lsmc = new_smc->listen_smc; 1085 struct sock *newsmcsk = &new_smc->sk; 1086 1087 if (lsmc->sk.sk_state == SMC_LISTEN) { 1088 lock_sock_nested(&lsmc->sk, SINGLE_DEPTH_NESTING); 1089 smc_accept_enqueue(&lsmc->sk, newsmcsk); 1090 release_sock(&lsmc->sk); 1091 } else { /* no longer listening */ 1092 smc_close_non_accepted(newsmcsk); 1093 } 1094 1095 /* Wake up accept */ 1096 lsmc->sk.sk_data_ready(&lsmc->sk); 1097 sock_put(&lsmc->sk); /* sock_hold in smc_tcp_listen_work */ 1098 } 1099 1100 /* listen worker: finish in state connected */ 1101 static void smc_listen_out_connected(struct smc_sock *new_smc) 1102 { 1103 struct sock *newsmcsk = &new_smc->sk; 1104 1105 sk_refcnt_debug_inc(newsmcsk); 1106 if (newsmcsk->sk_state == SMC_INIT) 1107 newsmcsk->sk_state = SMC_ACTIVE; 1108 1109 smc_listen_out(new_smc); 1110 } 1111 1112 /* listen worker: finish in error state */ 1113 static void smc_listen_out_err(struct smc_sock *new_smc) 1114 { 1115 struct sock *newsmcsk = &new_smc->sk; 1116 1117 if (newsmcsk->sk_state == SMC_INIT) 1118 sock_put(&new_smc->sk); /* passive closing */ 1119 newsmcsk->sk_state = SMC_CLOSED; 1120 1121 smc_listen_out(new_smc); 1122 } 1123 1124 /* listen worker: decline and fall back if possible */ 1125 static void smc_listen_decline(struct smc_sock *new_smc, int reason_code, 1126 int local_contact) 1127 { 1128 /* RDMA setup failed, switch back to TCP */ 1129 if (local_contact == SMC_FIRST_CONTACT) 1130 smc_lgr_cleanup_early(&new_smc->conn); 1131 else 1132 smc_conn_free(&new_smc->conn); 1133 if (reason_code < 0) { /* error, no fallback possible */ 1134 smc_listen_out_err(new_smc); 1135 return; 1136 } 1137 smc_switch_to_fallback(new_smc); 1138 new_smc->fallback_rsn = reason_code; 1139 if (reason_code && reason_code != SMC_CLC_DECL_PEERDECL) { 1140 if (smc_clc_send_decline(new_smc, reason_code) < 0) { 1141 smc_listen_out_err(new_smc); 1142 return; 1143 } 1144 } 1145 smc_listen_out_connected(new_smc); 1146 } 1147 1148 /* listen worker: check prefixes */ 1149 static int smc_listen_prfx_check(struct smc_sock *new_smc, 1150 struct smc_clc_msg_proposal *pclc) 1151 { 1152 struct smc_clc_msg_proposal_prefix *pclc_prfx; 1153 struct socket *newclcsock = new_smc->clcsock; 1154 1155 pclc_prfx = smc_clc_proposal_get_prefix(pclc); 1156 if (smc_clc_prfx_match(newclcsock, pclc_prfx)) 1157 return SMC_CLC_DECL_DIFFPREFIX; 1158 1159 return 0; 1160 } 1161 1162 /* listen worker: initialize connection and buffers */ 1163 static int smc_listen_rdma_init(struct smc_sock *new_smc, 1164 struct smc_init_info *ini) 1165 { 1166 int rc; 1167 1168 /* allocate connection / link group */ 1169 rc = smc_conn_create(new_smc, ini); 1170 if (rc) 1171 return rc; 1172 1173 /* create send buffer and rmb */ 1174 if (smc_buf_create(new_smc, false)) 1175 return SMC_CLC_DECL_MEM; 1176 1177 return 0; 1178 } 1179 1180 /* listen worker: initialize connection and buffers for SMC-D */ 1181 static int smc_listen_ism_init(struct smc_sock *new_smc, 1182 struct smc_clc_msg_proposal *pclc, 1183 struct smc_init_info *ini) 1184 { 1185 struct smc_clc_msg_smcd *pclc_smcd; 1186 int rc; 1187 1188 pclc_smcd = smc_get_clc_msg_smcd(pclc); 1189 ini->ism_gid = pclc_smcd->gid; 1190 rc = smc_conn_create(new_smc, ini); 1191 if (rc) 1192 return rc; 1193 1194 /* Check if peer can be reached via ISM device */ 1195 if (smc_ism_cantalk(new_smc->conn.lgr->peer_gid, 1196 new_smc->conn.lgr->vlan_id, 1197 new_smc->conn.lgr->smcd)) { 1198 if (ini->cln_first_contact == SMC_FIRST_CONTACT) 1199 smc_lgr_cleanup_early(&new_smc->conn); 1200 else 1201 smc_conn_free(&new_smc->conn); 1202 return SMC_CLC_DECL_SMCDNOTALK; 1203 } 1204 1205 /* Create send and receive buffers */ 1206 rc = smc_buf_create(new_smc, true); 1207 if (rc) { 1208 if (ini->cln_first_contact == SMC_FIRST_CONTACT) 1209 smc_lgr_cleanup_early(&new_smc->conn); 1210 else 1211 smc_conn_free(&new_smc->conn); 1212 return (rc == -ENOSPC) ? SMC_CLC_DECL_MAX_DMB : 1213 SMC_CLC_DECL_MEM; 1214 } 1215 1216 return 0; 1217 } 1218 1219 /* listen worker: register buffers */ 1220 static int smc_listen_rdma_reg(struct smc_sock *new_smc, int local_contact) 1221 { 1222 struct smc_connection *conn = &new_smc->conn; 1223 1224 if (local_contact != SMC_FIRST_CONTACT) { 1225 if (smcr_lgr_reg_rmbs(conn->lnk, conn->rmb_desc)) 1226 return SMC_CLC_DECL_ERR_REGRMB; 1227 } 1228 smc_rmb_sync_sg_for_device(&new_smc->conn); 1229 1230 return 0; 1231 } 1232 1233 /* listen worker: finish RDMA setup */ 1234 static int smc_listen_rdma_finish(struct smc_sock *new_smc, 1235 struct smc_clc_msg_accept_confirm *cclc, 1236 int local_contact) 1237 { 1238 struct smc_link *link = new_smc->conn.lnk; 1239 int reason_code = 0; 1240 1241 if (local_contact == SMC_FIRST_CONTACT) 1242 smc_link_save_peer_info(link, cclc); 1243 1244 if (smc_rmb_rtoken_handling(&new_smc->conn, link, cclc)) { 1245 reason_code = SMC_CLC_DECL_ERR_RTOK; 1246 goto decline; 1247 } 1248 1249 if (local_contact == SMC_FIRST_CONTACT) { 1250 if (smc_ib_ready_link(link)) { 1251 reason_code = SMC_CLC_DECL_ERR_RDYLNK; 1252 goto decline; 1253 } 1254 /* QP confirmation over RoCE fabric */ 1255 smc_llc_flow_initiate(link->lgr, SMC_LLC_FLOW_ADD_LINK); 1256 reason_code = smcr_serv_conf_first_link(new_smc); 1257 smc_llc_flow_stop(link->lgr, &link->lgr->llc_flow_lcl); 1258 if (reason_code) 1259 goto decline; 1260 } 1261 return 0; 1262 1263 decline: 1264 smc_listen_decline(new_smc, reason_code, local_contact); 1265 return reason_code; 1266 } 1267 1268 /* setup for RDMA connection of server */ 1269 static void smc_listen_work(struct work_struct *work) 1270 { 1271 struct smc_sock *new_smc = container_of(work, struct smc_sock, 1272 smc_listen_work); 1273 struct socket *newclcsock = new_smc->clcsock; 1274 struct smc_clc_msg_accept_confirm cclc; 1275 struct smc_clc_msg_proposal *pclc; 1276 struct smc_init_info ini = {0}; 1277 bool ism_supported = false; 1278 u8 buf[SMC_CLC_MAX_LEN]; 1279 int rc = 0; 1280 1281 if (new_smc->listen_smc->sk.sk_state != SMC_LISTEN) 1282 return smc_listen_out_err(new_smc); 1283 1284 if (new_smc->use_fallback) { 1285 smc_listen_out_connected(new_smc); 1286 return; 1287 } 1288 1289 /* check if peer is smc capable */ 1290 if (!tcp_sk(newclcsock->sk)->syn_smc) { 1291 smc_switch_to_fallback(new_smc); 1292 new_smc->fallback_rsn = SMC_CLC_DECL_PEERNOSMC; 1293 smc_listen_out_connected(new_smc); 1294 return; 1295 } 1296 1297 /* do inband token exchange - 1298 * wait for and receive SMC Proposal CLC message 1299 */ 1300 pclc = (struct smc_clc_msg_proposal *)&buf; 1301 rc = smc_clc_wait_msg(new_smc, pclc, SMC_CLC_MAX_LEN, 1302 SMC_CLC_PROPOSAL, CLC_WAIT_TIME); 1303 if (rc) 1304 goto out_decl; 1305 1306 /* IPSec connections opt out of SMC-R optimizations */ 1307 if (using_ipsec(new_smc)) { 1308 rc = SMC_CLC_DECL_IPSEC; 1309 goto out_decl; 1310 } 1311 1312 /* check for matching IP prefix and subnet length */ 1313 rc = smc_listen_prfx_check(new_smc, pclc); 1314 if (rc) 1315 goto out_decl; 1316 1317 /* get vlan id from IP device */ 1318 if (smc_vlan_by_tcpsk(new_smc->clcsock, &ini)) { 1319 rc = SMC_CLC_DECL_GETVLANERR; 1320 goto out_decl; 1321 } 1322 1323 mutex_lock(&smc_server_lgr_pending); 1324 smc_close_init(new_smc); 1325 smc_rx_init(new_smc); 1326 smc_tx_init(new_smc); 1327 1328 /* check if ISM is available */ 1329 if (pclc->hdr.path == SMC_TYPE_D || pclc->hdr.path == SMC_TYPE_B) { 1330 ini.is_smcd = true; /* prepare ISM check */ 1331 rc = smc_find_ism_device(new_smc, &ini); 1332 if (!rc) 1333 rc = smc_listen_ism_init(new_smc, pclc, &ini); 1334 if (!rc) 1335 ism_supported = true; 1336 else if (pclc->hdr.path == SMC_TYPE_D) 1337 goto out_unlock; /* skip RDMA and decline */ 1338 } 1339 1340 /* check if RDMA is available */ 1341 if (!ism_supported) { /* SMC_TYPE_R or SMC_TYPE_B */ 1342 /* prepare RDMA check */ 1343 ini.is_smcd = false; 1344 ini.ism_dev = NULL; 1345 ini.ib_lcl = &pclc->lcl; 1346 rc = smc_find_rdma_device(new_smc, &ini); 1347 if (rc) { 1348 /* no RDMA device found */ 1349 if (pclc->hdr.path == SMC_TYPE_B) 1350 /* neither ISM nor RDMA device found */ 1351 rc = SMC_CLC_DECL_NOSMCDEV; 1352 goto out_unlock; 1353 } 1354 rc = smc_listen_rdma_init(new_smc, &ini); 1355 if (rc) 1356 goto out_unlock; 1357 rc = smc_listen_rdma_reg(new_smc, ini.cln_first_contact); 1358 if (rc) 1359 goto out_unlock; 1360 } 1361 1362 /* send SMC Accept CLC message */ 1363 rc = smc_clc_send_accept(new_smc, ini.cln_first_contact); 1364 if (rc) 1365 goto out_unlock; 1366 1367 /* SMC-D does not need this lock any more */ 1368 if (ism_supported) 1369 mutex_unlock(&smc_server_lgr_pending); 1370 1371 /* receive SMC Confirm CLC message */ 1372 rc = smc_clc_wait_msg(new_smc, &cclc, sizeof(cclc), 1373 SMC_CLC_CONFIRM, CLC_WAIT_TIME); 1374 if (rc) { 1375 if (!ism_supported) 1376 goto out_unlock; 1377 goto out_decl; 1378 } 1379 1380 /* finish worker */ 1381 if (!ism_supported) { 1382 rc = smc_listen_rdma_finish(new_smc, &cclc, 1383 ini.cln_first_contact); 1384 mutex_unlock(&smc_server_lgr_pending); 1385 if (rc) 1386 return; 1387 } 1388 smc_conn_save_peer_info(new_smc, &cclc); 1389 smc_listen_out_connected(new_smc); 1390 return; 1391 1392 out_unlock: 1393 mutex_unlock(&smc_server_lgr_pending); 1394 out_decl: 1395 smc_listen_decline(new_smc, rc, ini.cln_first_contact); 1396 } 1397 1398 static void smc_tcp_listen_work(struct work_struct *work) 1399 { 1400 struct smc_sock *lsmc = container_of(work, struct smc_sock, 1401 tcp_listen_work); 1402 struct sock *lsk = &lsmc->sk; 1403 struct smc_sock *new_smc; 1404 int rc = 0; 1405 1406 lock_sock(lsk); 1407 while (lsk->sk_state == SMC_LISTEN) { 1408 rc = smc_clcsock_accept(lsmc, &new_smc); 1409 if (rc) 1410 goto out; 1411 if (!new_smc) 1412 continue; 1413 1414 new_smc->listen_smc = lsmc; 1415 new_smc->use_fallback = lsmc->use_fallback; 1416 new_smc->fallback_rsn = lsmc->fallback_rsn; 1417 sock_hold(lsk); /* sock_put in smc_listen_work */ 1418 INIT_WORK(&new_smc->smc_listen_work, smc_listen_work); 1419 smc_copy_sock_settings_to_smc(new_smc); 1420 new_smc->sk.sk_sndbuf = lsmc->sk.sk_sndbuf; 1421 new_smc->sk.sk_rcvbuf = lsmc->sk.sk_rcvbuf; 1422 sock_hold(&new_smc->sk); /* sock_put in passive closing */ 1423 if (!schedule_work(&new_smc->smc_listen_work)) 1424 sock_put(&new_smc->sk); 1425 } 1426 1427 out: 1428 release_sock(lsk); 1429 sock_put(&lsmc->sk); /* sock_hold in smc_listen */ 1430 } 1431 1432 static int smc_listen(struct socket *sock, int backlog) 1433 { 1434 struct sock *sk = sock->sk; 1435 struct smc_sock *smc; 1436 int rc; 1437 1438 smc = smc_sk(sk); 1439 lock_sock(sk); 1440 1441 rc = -EINVAL; 1442 if ((sk->sk_state != SMC_INIT && sk->sk_state != SMC_LISTEN) || 1443 smc->connect_nonblock) 1444 goto out; 1445 1446 rc = 0; 1447 if (sk->sk_state == SMC_LISTEN) { 1448 sk->sk_max_ack_backlog = backlog; 1449 goto out; 1450 } 1451 /* some socket options are handled in core, so we could not apply 1452 * them to the clc socket -- copy smc socket options to clc socket 1453 */ 1454 smc_copy_sock_settings_to_clc(smc); 1455 if (!smc->use_fallback) 1456 tcp_sk(smc->clcsock->sk)->syn_smc = 1; 1457 1458 rc = kernel_listen(smc->clcsock, backlog); 1459 if (rc) 1460 goto out; 1461 sk->sk_max_ack_backlog = backlog; 1462 sk->sk_ack_backlog = 0; 1463 sk->sk_state = SMC_LISTEN; 1464 sock_hold(sk); /* sock_hold in tcp_listen_worker */ 1465 if (!schedule_work(&smc->tcp_listen_work)) 1466 sock_put(sk); 1467 1468 out: 1469 release_sock(sk); 1470 return rc; 1471 } 1472 1473 static int smc_accept(struct socket *sock, struct socket *new_sock, 1474 int flags, bool kern) 1475 { 1476 struct sock *sk = sock->sk, *nsk; 1477 DECLARE_WAITQUEUE(wait, current); 1478 struct smc_sock *lsmc; 1479 long timeo; 1480 int rc = 0; 1481 1482 lsmc = smc_sk(sk); 1483 sock_hold(sk); /* sock_put below */ 1484 lock_sock(sk); 1485 1486 if (lsmc->sk.sk_state != SMC_LISTEN) { 1487 rc = -EINVAL; 1488 release_sock(sk); 1489 goto out; 1490 } 1491 1492 /* Wait for an incoming connection */ 1493 timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK); 1494 add_wait_queue_exclusive(sk_sleep(sk), &wait); 1495 while (!(nsk = smc_accept_dequeue(sk, new_sock))) { 1496 set_current_state(TASK_INTERRUPTIBLE); 1497 if (!timeo) { 1498 rc = -EAGAIN; 1499 break; 1500 } 1501 release_sock(sk); 1502 timeo = schedule_timeout(timeo); 1503 /* wakeup by sk_data_ready in smc_listen_work() */ 1504 sched_annotate_sleep(); 1505 lock_sock(sk); 1506 if (signal_pending(current)) { 1507 rc = sock_intr_errno(timeo); 1508 break; 1509 } 1510 } 1511 set_current_state(TASK_RUNNING); 1512 remove_wait_queue(sk_sleep(sk), &wait); 1513 1514 if (!rc) 1515 rc = sock_error(nsk); 1516 release_sock(sk); 1517 if (rc) 1518 goto out; 1519 1520 if (lsmc->sockopt_defer_accept && !(flags & O_NONBLOCK)) { 1521 /* wait till data arrives on the socket */ 1522 timeo = msecs_to_jiffies(lsmc->sockopt_defer_accept * 1523 MSEC_PER_SEC); 1524 if (smc_sk(nsk)->use_fallback) { 1525 struct sock *clcsk = smc_sk(nsk)->clcsock->sk; 1526 1527 lock_sock(clcsk); 1528 if (skb_queue_empty(&clcsk->sk_receive_queue)) 1529 sk_wait_data(clcsk, &timeo, NULL); 1530 release_sock(clcsk); 1531 } else if (!atomic_read(&smc_sk(nsk)->conn.bytes_to_rcv)) { 1532 lock_sock(nsk); 1533 smc_rx_wait(smc_sk(nsk), &timeo, smc_rx_data_available); 1534 release_sock(nsk); 1535 } 1536 } 1537 1538 out: 1539 sock_put(sk); /* sock_hold above */ 1540 return rc; 1541 } 1542 1543 static int smc_getname(struct socket *sock, struct sockaddr *addr, 1544 int peer) 1545 { 1546 struct smc_sock *smc; 1547 1548 if (peer && (sock->sk->sk_state != SMC_ACTIVE) && 1549 (sock->sk->sk_state != SMC_APPCLOSEWAIT1)) 1550 return -ENOTCONN; 1551 1552 smc = smc_sk(sock->sk); 1553 1554 return smc->clcsock->ops->getname(smc->clcsock, addr, peer); 1555 } 1556 1557 static int smc_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) 1558 { 1559 struct sock *sk = sock->sk; 1560 struct smc_sock *smc; 1561 int rc = -EPIPE; 1562 1563 smc = smc_sk(sk); 1564 lock_sock(sk); 1565 if ((sk->sk_state != SMC_ACTIVE) && 1566 (sk->sk_state != SMC_APPCLOSEWAIT1) && 1567 (sk->sk_state != SMC_INIT)) 1568 goto out; 1569 1570 if (msg->msg_flags & MSG_FASTOPEN) { 1571 if (sk->sk_state == SMC_INIT && !smc->connect_nonblock) { 1572 smc_switch_to_fallback(smc); 1573 smc->fallback_rsn = SMC_CLC_DECL_OPTUNSUPP; 1574 } else { 1575 rc = -EINVAL; 1576 goto out; 1577 } 1578 } 1579 1580 if (smc->use_fallback) 1581 rc = smc->clcsock->ops->sendmsg(smc->clcsock, msg, len); 1582 else 1583 rc = smc_tx_sendmsg(smc, msg, len); 1584 out: 1585 release_sock(sk); 1586 return rc; 1587 } 1588 1589 static int smc_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, 1590 int flags) 1591 { 1592 struct sock *sk = sock->sk; 1593 struct smc_sock *smc; 1594 int rc = -ENOTCONN; 1595 1596 smc = smc_sk(sk); 1597 lock_sock(sk); 1598 if (sk->sk_state == SMC_CLOSED && (sk->sk_shutdown & RCV_SHUTDOWN)) { 1599 /* socket was connected before, no more data to read */ 1600 rc = 0; 1601 goto out; 1602 } 1603 if ((sk->sk_state == SMC_INIT) || 1604 (sk->sk_state == SMC_LISTEN) || 1605 (sk->sk_state == SMC_CLOSED)) 1606 goto out; 1607 1608 if (sk->sk_state == SMC_PEERFINCLOSEWAIT) { 1609 rc = 0; 1610 goto out; 1611 } 1612 1613 if (smc->use_fallback) { 1614 rc = smc->clcsock->ops->recvmsg(smc->clcsock, msg, len, flags); 1615 } else { 1616 msg->msg_namelen = 0; 1617 rc = smc_rx_recvmsg(smc, msg, NULL, len, flags); 1618 } 1619 1620 out: 1621 release_sock(sk); 1622 return rc; 1623 } 1624 1625 static __poll_t smc_accept_poll(struct sock *parent) 1626 { 1627 struct smc_sock *isk = smc_sk(parent); 1628 __poll_t mask = 0; 1629 1630 spin_lock(&isk->accept_q_lock); 1631 if (!list_empty(&isk->accept_q)) 1632 mask = EPOLLIN | EPOLLRDNORM; 1633 spin_unlock(&isk->accept_q_lock); 1634 1635 return mask; 1636 } 1637 1638 static __poll_t smc_poll(struct file *file, struct socket *sock, 1639 poll_table *wait) 1640 { 1641 struct sock *sk = sock->sk; 1642 struct smc_sock *smc; 1643 __poll_t mask = 0; 1644 1645 if (!sk) 1646 return EPOLLNVAL; 1647 1648 smc = smc_sk(sock->sk); 1649 if (smc->use_fallback) { 1650 /* delegate to CLC child sock */ 1651 mask = smc->clcsock->ops->poll(file, smc->clcsock, wait); 1652 sk->sk_err = smc->clcsock->sk->sk_err; 1653 } else { 1654 if (sk->sk_state != SMC_CLOSED) 1655 sock_poll_wait(file, sock, wait); 1656 if (sk->sk_err) 1657 mask |= EPOLLERR; 1658 if ((sk->sk_shutdown == SHUTDOWN_MASK) || 1659 (sk->sk_state == SMC_CLOSED)) 1660 mask |= EPOLLHUP; 1661 if (sk->sk_state == SMC_LISTEN) { 1662 /* woken up by sk_data_ready in smc_listen_work() */ 1663 mask |= smc_accept_poll(sk); 1664 } else if (smc->use_fallback) { /* as result of connect_work()*/ 1665 mask |= smc->clcsock->ops->poll(file, smc->clcsock, 1666 wait); 1667 sk->sk_err = smc->clcsock->sk->sk_err; 1668 } else { 1669 if ((sk->sk_state != SMC_INIT && 1670 atomic_read(&smc->conn.sndbuf_space)) || 1671 sk->sk_shutdown & SEND_SHUTDOWN) { 1672 mask |= EPOLLOUT | EPOLLWRNORM; 1673 } else { 1674 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk); 1675 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); 1676 } 1677 if (atomic_read(&smc->conn.bytes_to_rcv)) 1678 mask |= EPOLLIN | EPOLLRDNORM; 1679 if (sk->sk_shutdown & RCV_SHUTDOWN) 1680 mask |= EPOLLIN | EPOLLRDNORM | EPOLLRDHUP; 1681 if (sk->sk_state == SMC_APPCLOSEWAIT1) 1682 mask |= EPOLLIN; 1683 if (smc->conn.urg_state == SMC_URG_VALID) 1684 mask |= EPOLLPRI; 1685 } 1686 } 1687 1688 return mask; 1689 } 1690 1691 static int smc_shutdown(struct socket *sock, int how) 1692 { 1693 struct sock *sk = sock->sk; 1694 struct smc_sock *smc; 1695 int rc = -EINVAL; 1696 int rc1 = 0; 1697 1698 smc = smc_sk(sk); 1699 1700 if ((how < SHUT_RD) || (how > SHUT_RDWR)) 1701 return rc; 1702 1703 lock_sock(sk); 1704 1705 rc = -ENOTCONN; 1706 if ((sk->sk_state != SMC_ACTIVE) && 1707 (sk->sk_state != SMC_PEERCLOSEWAIT1) && 1708 (sk->sk_state != SMC_PEERCLOSEWAIT2) && 1709 (sk->sk_state != SMC_APPCLOSEWAIT1) && 1710 (sk->sk_state != SMC_APPCLOSEWAIT2) && 1711 (sk->sk_state != SMC_APPFINCLOSEWAIT)) 1712 goto out; 1713 if (smc->use_fallback) { 1714 rc = kernel_sock_shutdown(smc->clcsock, how); 1715 sk->sk_shutdown = smc->clcsock->sk->sk_shutdown; 1716 if (sk->sk_shutdown == SHUTDOWN_MASK) 1717 sk->sk_state = SMC_CLOSED; 1718 goto out; 1719 } 1720 switch (how) { 1721 case SHUT_RDWR: /* shutdown in both directions */ 1722 rc = smc_close_active(smc); 1723 break; 1724 case SHUT_WR: 1725 rc = smc_close_shutdown_write(smc); 1726 break; 1727 case SHUT_RD: 1728 rc = 0; 1729 /* nothing more to do because peer is not involved */ 1730 break; 1731 } 1732 if (smc->clcsock) 1733 rc1 = kernel_sock_shutdown(smc->clcsock, how); 1734 /* map sock_shutdown_cmd constants to sk_shutdown value range */ 1735 sk->sk_shutdown |= how + 1; 1736 1737 out: 1738 release_sock(sk); 1739 return rc ? rc : rc1; 1740 } 1741 1742 static int smc_setsockopt(struct socket *sock, int level, int optname, 1743 sockptr_t optval, unsigned int optlen) 1744 { 1745 struct sock *sk = sock->sk; 1746 struct smc_sock *smc; 1747 int val, rc; 1748 1749 smc = smc_sk(sk); 1750 1751 /* generic setsockopts reaching us here always apply to the 1752 * CLC socket 1753 */ 1754 if (unlikely(!smc->clcsock->ops->setsockopt)) 1755 rc = -EOPNOTSUPP; 1756 else 1757 rc = smc->clcsock->ops->setsockopt(smc->clcsock, level, optname, 1758 optval, optlen); 1759 if (smc->clcsock->sk->sk_err) { 1760 sk->sk_err = smc->clcsock->sk->sk_err; 1761 sk->sk_error_report(sk); 1762 } 1763 1764 if (optlen < sizeof(int)) 1765 return -EINVAL; 1766 if (copy_from_sockptr(&val, optval, sizeof(int))) 1767 return -EFAULT; 1768 1769 lock_sock(sk); 1770 if (rc || smc->use_fallback) 1771 goto out; 1772 switch (optname) { 1773 case TCP_ULP: 1774 case TCP_FASTOPEN: 1775 case TCP_FASTOPEN_CONNECT: 1776 case TCP_FASTOPEN_KEY: 1777 case TCP_FASTOPEN_NO_COOKIE: 1778 /* option not supported by SMC */ 1779 if (sk->sk_state == SMC_INIT && !smc->connect_nonblock) { 1780 smc_switch_to_fallback(smc); 1781 smc->fallback_rsn = SMC_CLC_DECL_OPTUNSUPP; 1782 } else { 1783 rc = -EINVAL; 1784 } 1785 break; 1786 case TCP_NODELAY: 1787 if (sk->sk_state != SMC_INIT && 1788 sk->sk_state != SMC_LISTEN && 1789 sk->sk_state != SMC_CLOSED) { 1790 if (val) 1791 mod_delayed_work(system_wq, &smc->conn.tx_work, 1792 0); 1793 } 1794 break; 1795 case TCP_CORK: 1796 if (sk->sk_state != SMC_INIT && 1797 sk->sk_state != SMC_LISTEN && 1798 sk->sk_state != SMC_CLOSED) { 1799 if (!val) 1800 mod_delayed_work(system_wq, &smc->conn.tx_work, 1801 0); 1802 } 1803 break; 1804 case TCP_DEFER_ACCEPT: 1805 smc->sockopt_defer_accept = val; 1806 break; 1807 default: 1808 break; 1809 } 1810 out: 1811 release_sock(sk); 1812 1813 return rc; 1814 } 1815 1816 static int smc_getsockopt(struct socket *sock, int level, int optname, 1817 char __user *optval, int __user *optlen) 1818 { 1819 struct smc_sock *smc; 1820 1821 smc = smc_sk(sock->sk); 1822 /* socket options apply to the CLC socket */ 1823 if (unlikely(!smc->clcsock->ops->getsockopt)) 1824 return -EOPNOTSUPP; 1825 return smc->clcsock->ops->getsockopt(smc->clcsock, level, optname, 1826 optval, optlen); 1827 } 1828 1829 static int smc_ioctl(struct socket *sock, unsigned int cmd, 1830 unsigned long arg) 1831 { 1832 union smc_host_cursor cons, urg; 1833 struct smc_connection *conn; 1834 struct smc_sock *smc; 1835 int answ; 1836 1837 smc = smc_sk(sock->sk); 1838 conn = &smc->conn; 1839 lock_sock(&smc->sk); 1840 if (smc->use_fallback) { 1841 if (!smc->clcsock) { 1842 release_sock(&smc->sk); 1843 return -EBADF; 1844 } 1845 answ = smc->clcsock->ops->ioctl(smc->clcsock, cmd, arg); 1846 release_sock(&smc->sk); 1847 return answ; 1848 } 1849 switch (cmd) { 1850 case SIOCINQ: /* same as FIONREAD */ 1851 if (smc->sk.sk_state == SMC_LISTEN) { 1852 release_sock(&smc->sk); 1853 return -EINVAL; 1854 } 1855 if (smc->sk.sk_state == SMC_INIT || 1856 smc->sk.sk_state == SMC_CLOSED) 1857 answ = 0; 1858 else 1859 answ = atomic_read(&smc->conn.bytes_to_rcv); 1860 break; 1861 case SIOCOUTQ: 1862 /* output queue size (not send + not acked) */ 1863 if (smc->sk.sk_state == SMC_LISTEN) { 1864 release_sock(&smc->sk); 1865 return -EINVAL; 1866 } 1867 if (smc->sk.sk_state == SMC_INIT || 1868 smc->sk.sk_state == SMC_CLOSED) 1869 answ = 0; 1870 else 1871 answ = smc->conn.sndbuf_desc->len - 1872 atomic_read(&smc->conn.sndbuf_space); 1873 break; 1874 case SIOCOUTQNSD: 1875 /* output queue size (not send only) */ 1876 if (smc->sk.sk_state == SMC_LISTEN) { 1877 release_sock(&smc->sk); 1878 return -EINVAL; 1879 } 1880 if (smc->sk.sk_state == SMC_INIT || 1881 smc->sk.sk_state == SMC_CLOSED) 1882 answ = 0; 1883 else 1884 answ = smc_tx_prepared_sends(&smc->conn); 1885 break; 1886 case SIOCATMARK: 1887 if (smc->sk.sk_state == SMC_LISTEN) { 1888 release_sock(&smc->sk); 1889 return -EINVAL; 1890 } 1891 if (smc->sk.sk_state == SMC_INIT || 1892 smc->sk.sk_state == SMC_CLOSED) { 1893 answ = 0; 1894 } else { 1895 smc_curs_copy(&cons, &conn->local_tx_ctrl.cons, conn); 1896 smc_curs_copy(&urg, &conn->urg_curs, conn); 1897 answ = smc_curs_diff(conn->rmb_desc->len, 1898 &cons, &urg) == 1; 1899 } 1900 break; 1901 default: 1902 release_sock(&smc->sk); 1903 return -ENOIOCTLCMD; 1904 } 1905 release_sock(&smc->sk); 1906 1907 return put_user(answ, (int __user *)arg); 1908 } 1909 1910 static ssize_t smc_sendpage(struct socket *sock, struct page *page, 1911 int offset, size_t size, int flags) 1912 { 1913 struct sock *sk = sock->sk; 1914 struct smc_sock *smc; 1915 int rc = -EPIPE; 1916 1917 smc = smc_sk(sk); 1918 lock_sock(sk); 1919 if (sk->sk_state != SMC_ACTIVE) { 1920 release_sock(sk); 1921 goto out; 1922 } 1923 release_sock(sk); 1924 if (smc->use_fallback) 1925 rc = kernel_sendpage(smc->clcsock, page, offset, 1926 size, flags); 1927 else 1928 rc = sock_no_sendpage(sock, page, offset, size, flags); 1929 1930 out: 1931 return rc; 1932 } 1933 1934 /* Map the affected portions of the rmbe into an spd, note the number of bytes 1935 * to splice in conn->splice_pending, and press 'go'. Delays consumer cursor 1936 * updates till whenever a respective page has been fully processed. 1937 * Note that subsequent recv() calls have to wait till all splice() processing 1938 * completed. 1939 */ 1940 static ssize_t smc_splice_read(struct socket *sock, loff_t *ppos, 1941 struct pipe_inode_info *pipe, size_t len, 1942 unsigned int flags) 1943 { 1944 struct sock *sk = sock->sk; 1945 struct smc_sock *smc; 1946 int rc = -ENOTCONN; 1947 1948 smc = smc_sk(sk); 1949 lock_sock(sk); 1950 if (sk->sk_state == SMC_CLOSED && (sk->sk_shutdown & RCV_SHUTDOWN)) { 1951 /* socket was connected before, no more data to read */ 1952 rc = 0; 1953 goto out; 1954 } 1955 if (sk->sk_state == SMC_INIT || 1956 sk->sk_state == SMC_LISTEN || 1957 sk->sk_state == SMC_CLOSED) 1958 goto out; 1959 1960 if (sk->sk_state == SMC_PEERFINCLOSEWAIT) { 1961 rc = 0; 1962 goto out; 1963 } 1964 1965 if (smc->use_fallback) { 1966 rc = smc->clcsock->ops->splice_read(smc->clcsock, ppos, 1967 pipe, len, flags); 1968 } else { 1969 if (*ppos) { 1970 rc = -ESPIPE; 1971 goto out; 1972 } 1973 if (flags & SPLICE_F_NONBLOCK) 1974 flags = MSG_DONTWAIT; 1975 else 1976 flags = 0; 1977 rc = smc_rx_recvmsg(smc, NULL, pipe, len, flags); 1978 } 1979 out: 1980 release_sock(sk); 1981 1982 return rc; 1983 } 1984 1985 /* must look like tcp */ 1986 static const struct proto_ops smc_sock_ops = { 1987 .family = PF_SMC, 1988 .owner = THIS_MODULE, 1989 .release = smc_release, 1990 .bind = smc_bind, 1991 .connect = smc_connect, 1992 .socketpair = sock_no_socketpair, 1993 .accept = smc_accept, 1994 .getname = smc_getname, 1995 .poll = smc_poll, 1996 .ioctl = smc_ioctl, 1997 .listen = smc_listen, 1998 .shutdown = smc_shutdown, 1999 .setsockopt = smc_setsockopt, 2000 .getsockopt = smc_getsockopt, 2001 .sendmsg = smc_sendmsg, 2002 .recvmsg = smc_recvmsg, 2003 .mmap = sock_no_mmap, 2004 .sendpage = smc_sendpage, 2005 .splice_read = smc_splice_read, 2006 }; 2007 2008 static int smc_create(struct net *net, struct socket *sock, int protocol, 2009 int kern) 2010 { 2011 int family = (protocol == SMCPROTO_SMC6) ? PF_INET6 : PF_INET; 2012 struct smc_sock *smc; 2013 struct sock *sk; 2014 int rc; 2015 2016 rc = -ESOCKTNOSUPPORT; 2017 if (sock->type != SOCK_STREAM) 2018 goto out; 2019 2020 rc = -EPROTONOSUPPORT; 2021 if (protocol != SMCPROTO_SMC && protocol != SMCPROTO_SMC6) 2022 goto out; 2023 2024 rc = -ENOBUFS; 2025 sock->ops = &smc_sock_ops; 2026 sk = smc_sock_alloc(net, sock, protocol); 2027 if (!sk) 2028 goto out; 2029 2030 /* create internal TCP socket for CLC handshake and fallback */ 2031 smc = smc_sk(sk); 2032 smc->use_fallback = false; /* assume rdma capability first */ 2033 smc->fallback_rsn = 0; 2034 rc = sock_create_kern(net, family, SOCK_STREAM, IPPROTO_TCP, 2035 &smc->clcsock); 2036 if (rc) { 2037 sk_common_release(sk); 2038 goto out; 2039 } 2040 smc->sk.sk_sndbuf = max(smc->clcsock->sk->sk_sndbuf, SMC_BUF_MIN_SIZE); 2041 smc->sk.sk_rcvbuf = max(smc->clcsock->sk->sk_rcvbuf, SMC_BUF_MIN_SIZE); 2042 2043 out: 2044 return rc; 2045 } 2046 2047 static const struct net_proto_family smc_sock_family_ops = { 2048 .family = PF_SMC, 2049 .owner = THIS_MODULE, 2050 .create = smc_create, 2051 }; 2052 2053 unsigned int smc_net_id; 2054 2055 static __net_init int smc_net_init(struct net *net) 2056 { 2057 return smc_pnet_net_init(net); 2058 } 2059 2060 static void __net_exit smc_net_exit(struct net *net) 2061 { 2062 smc_pnet_net_exit(net); 2063 } 2064 2065 static struct pernet_operations smc_net_ops = { 2066 .init = smc_net_init, 2067 .exit = smc_net_exit, 2068 .id = &smc_net_id, 2069 .size = sizeof(struct smc_net), 2070 }; 2071 2072 static int __init smc_init(void) 2073 { 2074 int rc; 2075 2076 rc = register_pernet_subsys(&smc_net_ops); 2077 if (rc) 2078 return rc; 2079 2080 rc = smc_pnet_init(); 2081 if (rc) 2082 goto out_pernet_subsys; 2083 2084 rc = smc_core_init(); 2085 if (rc) { 2086 pr_err("%s: smc_core_init fails with %d\n", __func__, rc); 2087 goto out_pnet; 2088 } 2089 2090 rc = smc_llc_init(); 2091 if (rc) { 2092 pr_err("%s: smc_llc_init fails with %d\n", __func__, rc); 2093 goto out_core; 2094 } 2095 2096 rc = smc_cdc_init(); 2097 if (rc) { 2098 pr_err("%s: smc_cdc_init fails with %d\n", __func__, rc); 2099 goto out_core; 2100 } 2101 2102 rc = proto_register(&smc_proto, 1); 2103 if (rc) { 2104 pr_err("%s: proto_register(v4) fails with %d\n", __func__, rc); 2105 goto out_core; 2106 } 2107 2108 rc = proto_register(&smc_proto6, 1); 2109 if (rc) { 2110 pr_err("%s: proto_register(v6) fails with %d\n", __func__, rc); 2111 goto out_proto; 2112 } 2113 2114 rc = sock_register(&smc_sock_family_ops); 2115 if (rc) { 2116 pr_err("%s: sock_register fails with %d\n", __func__, rc); 2117 goto out_proto6; 2118 } 2119 INIT_HLIST_HEAD(&smc_v4_hashinfo.ht); 2120 INIT_HLIST_HEAD(&smc_v6_hashinfo.ht); 2121 2122 rc = smc_ib_register_client(); 2123 if (rc) { 2124 pr_err("%s: ib_register fails with %d\n", __func__, rc); 2125 goto out_sock; 2126 } 2127 2128 static_branch_enable(&tcp_have_smc); 2129 return 0; 2130 2131 out_sock: 2132 sock_unregister(PF_SMC); 2133 out_proto6: 2134 proto_unregister(&smc_proto6); 2135 out_proto: 2136 proto_unregister(&smc_proto); 2137 out_core: 2138 smc_core_exit(); 2139 out_pnet: 2140 smc_pnet_exit(); 2141 out_pernet_subsys: 2142 unregister_pernet_subsys(&smc_net_ops); 2143 2144 return rc; 2145 } 2146 2147 static void __exit smc_exit(void) 2148 { 2149 static_branch_disable(&tcp_have_smc); 2150 sock_unregister(PF_SMC); 2151 smc_core_exit(); 2152 smc_ib_unregister_client(); 2153 proto_unregister(&smc_proto6); 2154 proto_unregister(&smc_proto); 2155 smc_pnet_exit(); 2156 unregister_pernet_subsys(&smc_net_ops); 2157 rcu_barrier(); 2158 } 2159 2160 module_init(smc_init); 2161 module_exit(smc_exit); 2162 2163 MODULE_AUTHOR("Ursula Braun <ubraun@linux.vnet.ibm.com>"); 2164 MODULE_DESCRIPTION("smc socket address family"); 2165 MODULE_LICENSE("GPL"); 2166 MODULE_ALIAS_NETPROTO(PF_SMC); 2167