1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Shared Memory Communications over RDMA (SMC-R) and RoCE 4 * 5 * AF_SMC protocol family socket handler keeping the AF_INET sock address type 6 * applies to SOCK_STREAM sockets only 7 * offers an alternative communication option for TCP-protocol sockets 8 * applicable with RoCE-cards only 9 * 10 * Initial restrictions: 11 * - support for alternate links postponed 12 * 13 * Copyright IBM Corp. 2016, 2018 14 * 15 * Author(s): Ursula Braun <ubraun@linux.vnet.ibm.com> 16 * based on prototype from Frank Blaschka 17 */ 18 19 #define KMSG_COMPONENT "smc" 20 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt 21 22 #include <linux/module.h> 23 #include <linux/socket.h> 24 #include <linux/workqueue.h> 25 #include <linux/in.h> 26 #include <linux/sched/signal.h> 27 #include <linux/if_vlan.h> 28 #include <linux/rcupdate_wait.h> 29 30 #include <net/sock.h> 31 #include <net/tcp.h> 32 #include <net/smc.h> 33 #include <asm/ioctls.h> 34 35 #include <net/net_namespace.h> 36 #include <net/netns/generic.h> 37 #include "smc_netns.h" 38 39 #include "smc.h" 40 #include "smc_clc.h" 41 #include "smc_llc.h" 42 #include "smc_cdc.h" 43 #include "smc_core.h" 44 #include "smc_ib.h" 45 #include "smc_ism.h" 46 #include "smc_pnet.h" 47 #include "smc_tx.h" 48 #include "smc_rx.h" 49 #include "smc_close.h" 50 51 static DEFINE_MUTEX(smc_server_lgr_pending); /* serialize link group 52 * creation on server 53 */ 54 static DEFINE_MUTEX(smc_client_lgr_pending); /* serialize link group 55 * creation on client 56 */ 57 58 static void smc_tcp_listen_work(struct work_struct *); 59 static void smc_connect_work(struct work_struct *); 60 61 static void smc_set_keepalive(struct sock *sk, int val) 62 { 63 struct smc_sock *smc = smc_sk(sk); 64 65 smc->clcsock->sk->sk_prot->keepalive(smc->clcsock->sk, val); 66 } 67 68 static struct smc_hashinfo smc_v4_hashinfo = { 69 .lock = __RW_LOCK_UNLOCKED(smc_v4_hashinfo.lock), 70 }; 71 72 static struct smc_hashinfo smc_v6_hashinfo = { 73 .lock = __RW_LOCK_UNLOCKED(smc_v6_hashinfo.lock), 74 }; 75 76 int smc_hash_sk(struct sock *sk) 77 { 78 struct smc_hashinfo *h = sk->sk_prot->h.smc_hash; 79 struct hlist_head *head; 80 81 head = &h->ht; 82 83 write_lock_bh(&h->lock); 84 sk_add_node(sk, head); 85 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); 86 write_unlock_bh(&h->lock); 87 88 return 0; 89 } 90 EXPORT_SYMBOL_GPL(smc_hash_sk); 91 92 void smc_unhash_sk(struct sock *sk) 93 { 94 struct smc_hashinfo *h = sk->sk_prot->h.smc_hash; 95 96 write_lock_bh(&h->lock); 97 if (sk_del_node_init(sk)) 98 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); 99 write_unlock_bh(&h->lock); 100 } 101 EXPORT_SYMBOL_GPL(smc_unhash_sk); 102 103 struct proto smc_proto = { 104 .name = "SMC", 105 .owner = THIS_MODULE, 106 .keepalive = smc_set_keepalive, 107 .hash = smc_hash_sk, 108 .unhash = smc_unhash_sk, 109 .obj_size = sizeof(struct smc_sock), 110 .h.smc_hash = &smc_v4_hashinfo, 111 .slab_flags = SLAB_TYPESAFE_BY_RCU, 112 }; 113 EXPORT_SYMBOL_GPL(smc_proto); 114 115 struct proto smc_proto6 = { 116 .name = "SMC6", 117 .owner = THIS_MODULE, 118 .keepalive = smc_set_keepalive, 119 .hash = smc_hash_sk, 120 .unhash = smc_unhash_sk, 121 .obj_size = sizeof(struct smc_sock), 122 .h.smc_hash = &smc_v6_hashinfo, 123 .slab_flags = SLAB_TYPESAFE_BY_RCU, 124 }; 125 EXPORT_SYMBOL_GPL(smc_proto6); 126 127 static void smc_restore_fallback_changes(struct smc_sock *smc) 128 { 129 smc->clcsock->file->private_data = smc->sk.sk_socket; 130 smc->clcsock->file = NULL; 131 } 132 133 static int __smc_release(struct smc_sock *smc) 134 { 135 struct sock *sk = &smc->sk; 136 int rc = 0; 137 138 if (!smc->use_fallback) { 139 rc = smc_close_active(smc); 140 sock_set_flag(sk, SOCK_DEAD); 141 sk->sk_shutdown |= SHUTDOWN_MASK; 142 } else { 143 if (sk->sk_state != SMC_LISTEN && sk->sk_state != SMC_INIT) 144 sock_put(sk); /* passive closing */ 145 if (sk->sk_state == SMC_LISTEN) { 146 /* wake up clcsock accept */ 147 rc = kernel_sock_shutdown(smc->clcsock, SHUT_RDWR); 148 } 149 sk->sk_state = SMC_CLOSED; 150 sk->sk_state_change(sk); 151 smc_restore_fallback_changes(smc); 152 } 153 154 sk->sk_prot->unhash(sk); 155 156 if (sk->sk_state == SMC_CLOSED) { 157 if (smc->clcsock) { 158 release_sock(sk); 159 smc_clcsock_release(smc); 160 lock_sock(sk); 161 } 162 if (!smc->use_fallback) 163 smc_conn_free(&smc->conn); 164 } 165 166 return rc; 167 } 168 169 static int smc_release(struct socket *sock) 170 { 171 struct sock *sk = sock->sk; 172 struct smc_sock *smc; 173 int rc = 0; 174 175 if (!sk) 176 goto out; 177 178 sock_hold(sk); /* sock_put below */ 179 smc = smc_sk(sk); 180 181 /* cleanup for a dangling non-blocking connect */ 182 if (smc->connect_nonblock && sk->sk_state == SMC_INIT) 183 tcp_abort(smc->clcsock->sk, ECONNABORTED); 184 flush_work(&smc->connect_work); 185 186 if (sk->sk_state == SMC_LISTEN) 187 /* smc_close_non_accepted() is called and acquires 188 * sock lock for child sockets again 189 */ 190 lock_sock_nested(sk, SINGLE_DEPTH_NESTING); 191 else 192 lock_sock(sk); 193 194 rc = __smc_release(smc); 195 196 /* detach socket */ 197 sock_orphan(sk); 198 sock->sk = NULL; 199 release_sock(sk); 200 201 sock_put(sk); /* sock_hold above */ 202 sock_put(sk); /* final sock_put */ 203 out: 204 return rc; 205 } 206 207 static void smc_destruct(struct sock *sk) 208 { 209 if (sk->sk_state != SMC_CLOSED) 210 return; 211 if (!sock_flag(sk, SOCK_DEAD)) 212 return; 213 214 sk_refcnt_debug_dec(sk); 215 } 216 217 static struct sock *smc_sock_alloc(struct net *net, struct socket *sock, 218 int protocol) 219 { 220 struct smc_sock *smc; 221 struct proto *prot; 222 struct sock *sk; 223 224 prot = (protocol == SMCPROTO_SMC6) ? &smc_proto6 : &smc_proto; 225 sk = sk_alloc(net, PF_SMC, GFP_KERNEL, prot, 0); 226 if (!sk) 227 return NULL; 228 229 sock_init_data(sock, sk); /* sets sk_refcnt to 1 */ 230 sk->sk_state = SMC_INIT; 231 sk->sk_destruct = smc_destruct; 232 sk->sk_protocol = protocol; 233 smc = smc_sk(sk); 234 INIT_WORK(&smc->tcp_listen_work, smc_tcp_listen_work); 235 INIT_WORK(&smc->connect_work, smc_connect_work); 236 INIT_DELAYED_WORK(&smc->conn.tx_work, smc_tx_work); 237 INIT_LIST_HEAD(&smc->accept_q); 238 spin_lock_init(&smc->accept_q_lock); 239 spin_lock_init(&smc->conn.send_lock); 240 sk->sk_prot->hash(sk); 241 sk_refcnt_debug_inc(sk); 242 mutex_init(&smc->clcsock_release_lock); 243 244 return sk; 245 } 246 247 static int smc_bind(struct socket *sock, struct sockaddr *uaddr, 248 int addr_len) 249 { 250 struct sockaddr_in *addr = (struct sockaddr_in *)uaddr; 251 struct sock *sk = sock->sk; 252 struct smc_sock *smc; 253 int rc; 254 255 smc = smc_sk(sk); 256 257 /* replicate tests from inet_bind(), to be safe wrt. future changes */ 258 rc = -EINVAL; 259 if (addr_len < sizeof(struct sockaddr_in)) 260 goto out; 261 262 rc = -EAFNOSUPPORT; 263 if (addr->sin_family != AF_INET && 264 addr->sin_family != AF_INET6 && 265 addr->sin_family != AF_UNSPEC) 266 goto out; 267 /* accept AF_UNSPEC (mapped to AF_INET) only if s_addr is INADDR_ANY */ 268 if (addr->sin_family == AF_UNSPEC && 269 addr->sin_addr.s_addr != htonl(INADDR_ANY)) 270 goto out; 271 272 lock_sock(sk); 273 274 /* Check if socket is already active */ 275 rc = -EINVAL; 276 if (sk->sk_state != SMC_INIT || smc->connect_nonblock) 277 goto out_rel; 278 279 smc->clcsock->sk->sk_reuse = sk->sk_reuse; 280 rc = kernel_bind(smc->clcsock, uaddr, addr_len); 281 282 out_rel: 283 release_sock(sk); 284 out: 285 return rc; 286 } 287 288 static void smc_copy_sock_settings(struct sock *nsk, struct sock *osk, 289 unsigned long mask) 290 { 291 /* options we don't get control via setsockopt for */ 292 nsk->sk_type = osk->sk_type; 293 nsk->sk_sndbuf = osk->sk_sndbuf; 294 nsk->sk_rcvbuf = osk->sk_rcvbuf; 295 nsk->sk_sndtimeo = osk->sk_sndtimeo; 296 nsk->sk_rcvtimeo = osk->sk_rcvtimeo; 297 nsk->sk_mark = osk->sk_mark; 298 nsk->sk_priority = osk->sk_priority; 299 nsk->sk_rcvlowat = osk->sk_rcvlowat; 300 nsk->sk_bound_dev_if = osk->sk_bound_dev_if; 301 nsk->sk_err = osk->sk_err; 302 303 nsk->sk_flags &= ~mask; 304 nsk->sk_flags |= osk->sk_flags & mask; 305 } 306 307 #define SK_FLAGS_SMC_TO_CLC ((1UL << SOCK_URGINLINE) | \ 308 (1UL << SOCK_KEEPOPEN) | \ 309 (1UL << SOCK_LINGER) | \ 310 (1UL << SOCK_BROADCAST) | \ 311 (1UL << SOCK_TIMESTAMP) | \ 312 (1UL << SOCK_DBG) | \ 313 (1UL << SOCK_RCVTSTAMP) | \ 314 (1UL << SOCK_RCVTSTAMPNS) | \ 315 (1UL << SOCK_LOCALROUTE) | \ 316 (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE) | \ 317 (1UL << SOCK_RXQ_OVFL) | \ 318 (1UL << SOCK_WIFI_STATUS) | \ 319 (1UL << SOCK_NOFCS) | \ 320 (1UL << SOCK_FILTER_LOCKED) | \ 321 (1UL << SOCK_TSTAMP_NEW)) 322 /* copy only relevant settings and flags of SOL_SOCKET level from smc to 323 * clc socket (since smc is not called for these options from net/core) 324 */ 325 static void smc_copy_sock_settings_to_clc(struct smc_sock *smc) 326 { 327 smc_copy_sock_settings(smc->clcsock->sk, &smc->sk, SK_FLAGS_SMC_TO_CLC); 328 } 329 330 #define SK_FLAGS_CLC_TO_SMC ((1UL << SOCK_URGINLINE) | \ 331 (1UL << SOCK_KEEPOPEN) | \ 332 (1UL << SOCK_LINGER) | \ 333 (1UL << SOCK_DBG)) 334 /* copy only settings and flags relevant for smc from clc to smc socket */ 335 static void smc_copy_sock_settings_to_smc(struct smc_sock *smc) 336 { 337 smc_copy_sock_settings(&smc->sk, smc->clcsock->sk, SK_FLAGS_CLC_TO_SMC); 338 } 339 340 /* register the new rmb on all links */ 341 static int smcr_lgr_reg_rmbs(struct smc_link *link, 342 struct smc_buf_desc *rmb_desc) 343 { 344 struct smc_link_group *lgr = link->lgr; 345 int i, rc = 0; 346 347 rc = smc_llc_flow_initiate(lgr, SMC_LLC_FLOW_RKEY); 348 if (rc) 349 return rc; 350 /* protect against parallel smc_llc_cli_rkey_exchange() and 351 * parallel smcr_link_reg_rmb() 352 */ 353 mutex_lock(&lgr->llc_conf_mutex); 354 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { 355 if (lgr->lnk[i].state != SMC_LNK_ACTIVE) 356 continue; 357 rc = smcr_link_reg_rmb(&lgr->lnk[i], rmb_desc); 358 if (rc) 359 goto out; 360 } 361 362 /* exchange confirm_rkey msg with peer */ 363 rc = smc_llc_do_confirm_rkey(link, rmb_desc); 364 if (rc) { 365 rc = -EFAULT; 366 goto out; 367 } 368 rmb_desc->is_conf_rkey = true; 369 out: 370 mutex_unlock(&lgr->llc_conf_mutex); 371 smc_llc_flow_stop(lgr, &lgr->llc_flow_lcl); 372 return rc; 373 } 374 375 static int smcr_clnt_conf_first_link(struct smc_sock *smc) 376 { 377 struct smc_link *link = smc->conn.lnk; 378 struct smc_llc_qentry *qentry; 379 int rc; 380 381 /* receive CONFIRM LINK request from server over RoCE fabric */ 382 qentry = smc_llc_wait(link->lgr, NULL, SMC_LLC_WAIT_TIME, 383 SMC_LLC_CONFIRM_LINK); 384 if (!qentry) { 385 struct smc_clc_msg_decline dclc; 386 387 rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc), 388 SMC_CLC_DECLINE, CLC_WAIT_TIME_SHORT); 389 return rc == -EAGAIN ? SMC_CLC_DECL_TIMEOUT_CL : rc; 390 } 391 smc_llc_save_peer_uid(qentry); 392 rc = smc_llc_eval_conf_link(qentry, SMC_LLC_REQ); 393 smc_llc_flow_qentry_del(&link->lgr->llc_flow_lcl); 394 if (rc) 395 return SMC_CLC_DECL_RMBE_EC; 396 397 rc = smc_ib_modify_qp_rts(link); 398 if (rc) 399 return SMC_CLC_DECL_ERR_RDYLNK; 400 401 smc_wr_remember_qp_attr(link); 402 403 if (smcr_link_reg_rmb(link, smc->conn.rmb_desc)) 404 return SMC_CLC_DECL_ERR_REGRMB; 405 406 /* confirm_rkey is implicit on 1st contact */ 407 smc->conn.rmb_desc->is_conf_rkey = true; 408 409 /* send CONFIRM LINK response over RoCE fabric */ 410 rc = smc_llc_send_confirm_link(link, SMC_LLC_RESP); 411 if (rc < 0) 412 return SMC_CLC_DECL_TIMEOUT_CL; 413 414 smc_llc_link_active(link); 415 smcr_lgr_set_type(link->lgr, SMC_LGR_SINGLE); 416 417 /* optional 2nd link, receive ADD LINK request from server */ 418 qentry = smc_llc_wait(link->lgr, NULL, SMC_LLC_WAIT_TIME, 419 SMC_LLC_ADD_LINK); 420 if (!qentry) { 421 struct smc_clc_msg_decline dclc; 422 423 rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc), 424 SMC_CLC_DECLINE, CLC_WAIT_TIME_SHORT); 425 if (rc == -EAGAIN) 426 rc = 0; /* no DECLINE received, go with one link */ 427 return rc; 428 } 429 smc_llc_flow_qentry_clr(&link->lgr->llc_flow_lcl); 430 smc_llc_cli_add_link(link, qentry); 431 return 0; 432 } 433 434 static void smcr_conn_save_peer_info(struct smc_sock *smc, 435 struct smc_clc_msg_accept_confirm *clc) 436 { 437 int bufsize = smc_uncompress_bufsize(clc->rmbe_size); 438 439 smc->conn.peer_rmbe_idx = clc->rmbe_idx; 440 smc->conn.local_tx_ctrl.token = ntohl(clc->rmbe_alert_token); 441 smc->conn.peer_rmbe_size = bufsize; 442 atomic_set(&smc->conn.peer_rmbe_space, smc->conn.peer_rmbe_size); 443 smc->conn.tx_off = bufsize * (smc->conn.peer_rmbe_idx - 1); 444 } 445 446 static void smcd_conn_save_peer_info(struct smc_sock *smc, 447 struct smc_clc_msg_accept_confirm *clc) 448 { 449 int bufsize = smc_uncompress_bufsize(clc->dmbe_size); 450 451 smc->conn.peer_rmbe_idx = clc->dmbe_idx; 452 smc->conn.peer_token = clc->token; 453 /* msg header takes up space in the buffer */ 454 smc->conn.peer_rmbe_size = bufsize - sizeof(struct smcd_cdc_msg); 455 atomic_set(&smc->conn.peer_rmbe_space, smc->conn.peer_rmbe_size); 456 smc->conn.tx_off = bufsize * smc->conn.peer_rmbe_idx; 457 } 458 459 static void smc_conn_save_peer_info(struct smc_sock *smc, 460 struct smc_clc_msg_accept_confirm *clc) 461 { 462 if (smc->conn.lgr->is_smcd) 463 smcd_conn_save_peer_info(smc, clc); 464 else 465 smcr_conn_save_peer_info(smc, clc); 466 } 467 468 static void smc_link_save_peer_info(struct smc_link *link, 469 struct smc_clc_msg_accept_confirm *clc) 470 { 471 link->peer_qpn = ntoh24(clc->qpn); 472 memcpy(link->peer_gid, clc->lcl.gid, SMC_GID_SIZE); 473 memcpy(link->peer_mac, clc->lcl.mac, sizeof(link->peer_mac)); 474 link->peer_psn = ntoh24(clc->psn); 475 link->peer_mtu = clc->qp_mtu; 476 } 477 478 static void smc_switch_to_fallback(struct smc_sock *smc) 479 { 480 smc->use_fallback = true; 481 if (smc->sk.sk_socket && smc->sk.sk_socket->file) { 482 smc->clcsock->file = smc->sk.sk_socket->file; 483 smc->clcsock->file->private_data = smc->clcsock; 484 smc->clcsock->wq.fasync_list = 485 smc->sk.sk_socket->wq.fasync_list; 486 } 487 } 488 489 /* fall back during connect */ 490 static int smc_connect_fallback(struct smc_sock *smc, int reason_code) 491 { 492 smc_switch_to_fallback(smc); 493 smc->fallback_rsn = reason_code; 494 smc_copy_sock_settings_to_clc(smc); 495 smc->connect_nonblock = 0; 496 if (smc->sk.sk_state == SMC_INIT) 497 smc->sk.sk_state = SMC_ACTIVE; 498 return 0; 499 } 500 501 /* decline and fall back during connect */ 502 static int smc_connect_decline_fallback(struct smc_sock *smc, int reason_code) 503 { 504 int rc; 505 506 if (reason_code < 0) { /* error, fallback is not possible */ 507 if (smc->sk.sk_state == SMC_INIT) 508 sock_put(&smc->sk); /* passive closing */ 509 return reason_code; 510 } 511 if (reason_code != SMC_CLC_DECL_PEERDECL) { 512 rc = smc_clc_send_decline(smc, reason_code); 513 if (rc < 0) { 514 if (smc->sk.sk_state == SMC_INIT) 515 sock_put(&smc->sk); /* passive closing */ 516 return rc; 517 } 518 } 519 return smc_connect_fallback(smc, reason_code); 520 } 521 522 /* abort connecting */ 523 static int smc_connect_abort(struct smc_sock *smc, int reason_code, 524 int local_contact) 525 { 526 bool is_smcd = smc->conn.lgr->is_smcd; 527 528 if (local_contact == SMC_FIRST_CONTACT) 529 smc_lgr_cleanup_early(&smc->conn); 530 else 531 smc_conn_free(&smc->conn); 532 if (is_smcd) 533 /* there is only one lgr role for SMC-D; use server lock */ 534 mutex_unlock(&smc_server_lgr_pending); 535 else 536 mutex_unlock(&smc_client_lgr_pending); 537 538 smc->connect_nonblock = 0; 539 return reason_code; 540 } 541 542 /* check if there is a rdma device available for this connection. */ 543 /* called for connect and listen */ 544 static int smc_find_rdma_device(struct smc_sock *smc, struct smc_init_info *ini) 545 { 546 /* PNET table look up: search active ib_device and port 547 * within same PNETID that also contains the ethernet device 548 * used for the internal TCP socket 549 */ 550 smc_pnet_find_roce_resource(smc->clcsock->sk, ini); 551 if (!ini->ib_dev) 552 return SMC_CLC_DECL_NOSMCRDEV; 553 return 0; 554 } 555 556 /* check if there is an ISM device available for this connection. */ 557 /* called for connect and listen */ 558 static int smc_find_ism_device(struct smc_sock *smc, struct smc_init_info *ini) 559 { 560 /* Find ISM device with same PNETID as connecting interface */ 561 smc_pnet_find_ism_resource(smc->clcsock->sk, ini); 562 if (!ini->ism_dev) 563 return SMC_CLC_DECL_NOSMCDDEV; 564 return 0; 565 } 566 567 /* Check for VLAN ID and register it on ISM device just for CLC handshake */ 568 static int smc_connect_ism_vlan_setup(struct smc_sock *smc, 569 struct smc_init_info *ini) 570 { 571 if (ini->vlan_id && smc_ism_get_vlan(ini->ism_dev, ini->vlan_id)) 572 return SMC_CLC_DECL_ISMVLANERR; 573 return 0; 574 } 575 576 /* cleanup temporary VLAN ID registration used for CLC handshake. If ISM is 577 * used, the VLAN ID will be registered again during the connection setup. 578 */ 579 static int smc_connect_ism_vlan_cleanup(struct smc_sock *smc, bool is_smcd, 580 struct smc_init_info *ini) 581 { 582 if (!is_smcd) 583 return 0; 584 if (ini->vlan_id && smc_ism_put_vlan(ini->ism_dev, ini->vlan_id)) 585 return SMC_CLC_DECL_CNFERR; 586 return 0; 587 } 588 589 /* CLC handshake during connect */ 590 static int smc_connect_clc(struct smc_sock *smc, int smc_type, 591 struct smc_clc_msg_accept_confirm *aclc, 592 struct smc_init_info *ini) 593 { 594 int rc = 0; 595 596 /* do inband token exchange */ 597 rc = smc_clc_send_proposal(smc, smc_type, ini); 598 if (rc) 599 return rc; 600 /* receive SMC Accept CLC message */ 601 return smc_clc_wait_msg(smc, aclc, sizeof(*aclc), SMC_CLC_ACCEPT, 602 CLC_WAIT_TIME); 603 } 604 605 /* setup for RDMA connection of client */ 606 static int smc_connect_rdma(struct smc_sock *smc, 607 struct smc_clc_msg_accept_confirm *aclc, 608 struct smc_init_info *ini) 609 { 610 int i, reason_code = 0; 611 struct smc_link *link; 612 613 ini->is_smcd = false; 614 ini->ib_lcl = &aclc->lcl; 615 ini->ib_clcqpn = ntoh24(aclc->qpn); 616 ini->srv_first_contact = aclc->hdr.flag; 617 618 mutex_lock(&smc_client_lgr_pending); 619 reason_code = smc_conn_create(smc, ini); 620 if (reason_code) { 621 mutex_unlock(&smc_client_lgr_pending); 622 return reason_code; 623 } 624 625 smc_conn_save_peer_info(smc, aclc); 626 627 if (ini->cln_first_contact == SMC_FIRST_CONTACT) { 628 link = smc->conn.lnk; 629 } else { 630 /* set link that was assigned by server */ 631 link = NULL; 632 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { 633 struct smc_link *l = &smc->conn.lgr->lnk[i]; 634 635 if (l->peer_qpn == ntoh24(aclc->qpn)) { 636 link = l; 637 break; 638 } 639 } 640 if (!link) 641 return smc_connect_abort(smc, SMC_CLC_DECL_NOSRVLINK, 642 ini->cln_first_contact); 643 smc->conn.lnk = link; 644 } 645 646 /* create send buffer and rmb */ 647 if (smc_buf_create(smc, false)) 648 return smc_connect_abort(smc, SMC_CLC_DECL_MEM, 649 ini->cln_first_contact); 650 651 if (ini->cln_first_contact == SMC_FIRST_CONTACT) 652 smc_link_save_peer_info(link, aclc); 653 654 if (smc_rmb_rtoken_handling(&smc->conn, link, aclc)) 655 return smc_connect_abort(smc, SMC_CLC_DECL_ERR_RTOK, 656 ini->cln_first_contact); 657 658 smc_close_init(smc); 659 smc_rx_init(smc); 660 661 if (ini->cln_first_contact == SMC_FIRST_CONTACT) { 662 if (smc_ib_ready_link(link)) 663 return smc_connect_abort(smc, SMC_CLC_DECL_ERR_RDYLNK, 664 ini->cln_first_contact); 665 } else { 666 if (smcr_lgr_reg_rmbs(link, smc->conn.rmb_desc)) 667 return smc_connect_abort(smc, SMC_CLC_DECL_ERR_REGRMB, 668 ini->cln_first_contact); 669 } 670 smc_rmb_sync_sg_for_device(&smc->conn); 671 672 reason_code = smc_clc_send_confirm(smc); 673 if (reason_code) 674 return smc_connect_abort(smc, reason_code, 675 ini->cln_first_contact); 676 677 smc_tx_init(smc); 678 679 if (ini->cln_first_contact == SMC_FIRST_CONTACT) { 680 /* QP confirmation over RoCE fabric */ 681 smc_llc_flow_initiate(link->lgr, SMC_LLC_FLOW_ADD_LINK); 682 reason_code = smcr_clnt_conf_first_link(smc); 683 smc_llc_flow_stop(link->lgr, &link->lgr->llc_flow_lcl); 684 if (reason_code) 685 return smc_connect_abort(smc, reason_code, 686 ini->cln_first_contact); 687 } 688 mutex_unlock(&smc_client_lgr_pending); 689 690 smc_copy_sock_settings_to_clc(smc); 691 smc->connect_nonblock = 0; 692 if (smc->sk.sk_state == SMC_INIT) 693 smc->sk.sk_state = SMC_ACTIVE; 694 695 return 0; 696 } 697 698 /* setup for ISM connection of client */ 699 static int smc_connect_ism(struct smc_sock *smc, 700 struct smc_clc_msg_accept_confirm *aclc, 701 struct smc_init_info *ini) 702 { 703 int rc = 0; 704 705 ini->is_smcd = true; 706 ini->ism_gid = aclc->gid; 707 ini->srv_first_contact = aclc->hdr.flag; 708 709 /* there is only one lgr role for SMC-D; use server lock */ 710 mutex_lock(&smc_server_lgr_pending); 711 rc = smc_conn_create(smc, ini); 712 if (rc) { 713 mutex_unlock(&smc_server_lgr_pending); 714 return rc; 715 } 716 717 /* Create send and receive buffers */ 718 if (smc_buf_create(smc, true)) 719 return smc_connect_abort(smc, SMC_CLC_DECL_MEM, 720 ini->cln_first_contact); 721 722 smc_conn_save_peer_info(smc, aclc); 723 smc_close_init(smc); 724 smc_rx_init(smc); 725 smc_tx_init(smc); 726 727 rc = smc_clc_send_confirm(smc); 728 if (rc) 729 return smc_connect_abort(smc, rc, ini->cln_first_contact); 730 mutex_unlock(&smc_server_lgr_pending); 731 732 smc_copy_sock_settings_to_clc(smc); 733 smc->connect_nonblock = 0; 734 if (smc->sk.sk_state == SMC_INIT) 735 smc->sk.sk_state = SMC_ACTIVE; 736 737 return 0; 738 } 739 740 /* perform steps before actually connecting */ 741 static int __smc_connect(struct smc_sock *smc) 742 { 743 bool ism_supported = false, rdma_supported = false; 744 struct smc_clc_msg_accept_confirm aclc; 745 struct smc_init_info ini = {0}; 746 int smc_type; 747 int rc = 0; 748 749 if (smc->use_fallback) 750 return smc_connect_fallback(smc, smc->fallback_rsn); 751 752 /* if peer has not signalled SMC-capability, fall back */ 753 if (!tcp_sk(smc->clcsock->sk)->syn_smc) 754 return smc_connect_fallback(smc, SMC_CLC_DECL_PEERNOSMC); 755 756 /* IPSec connections opt out of SMC-R optimizations */ 757 if (using_ipsec(smc)) 758 return smc_connect_decline_fallback(smc, SMC_CLC_DECL_IPSEC); 759 760 /* get vlan id from IP device */ 761 if (smc_vlan_by_tcpsk(smc->clcsock, &ini)) 762 return smc_connect_decline_fallback(smc, 763 SMC_CLC_DECL_GETVLANERR); 764 765 /* check if there is an ism device available */ 766 if (!smc_find_ism_device(smc, &ini) && 767 !smc_connect_ism_vlan_setup(smc, &ini)) { 768 /* ISM is supported for this connection */ 769 ism_supported = true; 770 smc_type = SMC_TYPE_D; 771 } 772 773 /* check if there is a rdma device available */ 774 if (!smc_find_rdma_device(smc, &ini)) { 775 /* RDMA is supported for this connection */ 776 rdma_supported = true; 777 if (ism_supported) 778 smc_type = SMC_TYPE_B; /* both */ 779 else 780 smc_type = SMC_TYPE_R; /* only RDMA */ 781 } 782 783 /* if neither ISM nor RDMA are supported, fallback */ 784 if (!rdma_supported && !ism_supported) 785 return smc_connect_decline_fallback(smc, SMC_CLC_DECL_NOSMCDEV); 786 787 /* perform CLC handshake */ 788 rc = smc_connect_clc(smc, smc_type, &aclc, &ini); 789 if (rc) { 790 smc_connect_ism_vlan_cleanup(smc, ism_supported, &ini); 791 return smc_connect_decline_fallback(smc, rc); 792 } 793 794 /* depending on previous steps, connect using rdma or ism */ 795 if (rdma_supported && aclc.hdr.path == SMC_TYPE_R) 796 rc = smc_connect_rdma(smc, &aclc, &ini); 797 else if (ism_supported && aclc.hdr.path == SMC_TYPE_D) 798 rc = smc_connect_ism(smc, &aclc, &ini); 799 else 800 rc = SMC_CLC_DECL_MODEUNSUPP; 801 if (rc) { 802 smc_connect_ism_vlan_cleanup(smc, ism_supported, &ini); 803 return smc_connect_decline_fallback(smc, rc); 804 } 805 806 smc_connect_ism_vlan_cleanup(smc, ism_supported, &ini); 807 return 0; 808 } 809 810 static void smc_connect_work(struct work_struct *work) 811 { 812 struct smc_sock *smc = container_of(work, struct smc_sock, 813 connect_work); 814 long timeo = smc->sk.sk_sndtimeo; 815 int rc = 0; 816 817 if (!timeo) 818 timeo = MAX_SCHEDULE_TIMEOUT; 819 lock_sock(smc->clcsock->sk); 820 if (smc->clcsock->sk->sk_err) { 821 smc->sk.sk_err = smc->clcsock->sk->sk_err; 822 } else if ((1 << smc->clcsock->sk->sk_state) & 823 (TCPF_SYN_SENT | TCP_SYN_RECV)) { 824 rc = sk_stream_wait_connect(smc->clcsock->sk, &timeo); 825 if ((rc == -EPIPE) && 826 ((1 << smc->clcsock->sk->sk_state) & 827 (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT))) 828 rc = 0; 829 } 830 release_sock(smc->clcsock->sk); 831 lock_sock(&smc->sk); 832 if (rc != 0 || smc->sk.sk_err) { 833 smc->sk.sk_state = SMC_CLOSED; 834 if (rc == -EPIPE || rc == -EAGAIN) 835 smc->sk.sk_err = EPIPE; 836 else if (signal_pending(current)) 837 smc->sk.sk_err = -sock_intr_errno(timeo); 838 sock_put(&smc->sk); /* passive closing */ 839 goto out; 840 } 841 842 rc = __smc_connect(smc); 843 if (rc < 0) 844 smc->sk.sk_err = -rc; 845 846 out: 847 if (!sock_flag(&smc->sk, SOCK_DEAD)) { 848 if (smc->sk.sk_err) { 849 smc->sk.sk_state_change(&smc->sk); 850 } else { /* allow polling before and after fallback decision */ 851 smc->clcsock->sk->sk_write_space(smc->clcsock->sk); 852 smc->sk.sk_write_space(&smc->sk); 853 } 854 } 855 release_sock(&smc->sk); 856 } 857 858 static int smc_connect(struct socket *sock, struct sockaddr *addr, 859 int alen, int flags) 860 { 861 struct sock *sk = sock->sk; 862 struct smc_sock *smc; 863 int rc = -EINVAL; 864 865 smc = smc_sk(sk); 866 867 /* separate smc parameter checking to be safe */ 868 if (alen < sizeof(addr->sa_family)) 869 goto out_err; 870 if (addr->sa_family != AF_INET && addr->sa_family != AF_INET6) 871 goto out_err; 872 873 lock_sock(sk); 874 switch (sk->sk_state) { 875 default: 876 goto out; 877 case SMC_ACTIVE: 878 rc = -EISCONN; 879 goto out; 880 case SMC_INIT: 881 rc = 0; 882 break; 883 } 884 885 smc_copy_sock_settings_to_clc(smc); 886 tcp_sk(smc->clcsock->sk)->syn_smc = 1; 887 if (smc->connect_nonblock) { 888 rc = -EALREADY; 889 goto out; 890 } 891 rc = kernel_connect(smc->clcsock, addr, alen, flags); 892 if (rc && rc != -EINPROGRESS) 893 goto out; 894 895 sock_hold(&smc->sk); /* sock put in passive closing */ 896 if (smc->use_fallback) 897 goto out; 898 if (flags & O_NONBLOCK) { 899 if (schedule_work(&smc->connect_work)) 900 smc->connect_nonblock = 1; 901 rc = -EINPROGRESS; 902 } else { 903 rc = __smc_connect(smc); 904 if (rc < 0) 905 goto out; 906 else 907 rc = 0; /* success cases including fallback */ 908 } 909 910 out: 911 release_sock(sk); 912 out_err: 913 return rc; 914 } 915 916 static int smc_clcsock_accept(struct smc_sock *lsmc, struct smc_sock **new_smc) 917 { 918 struct socket *new_clcsock = NULL; 919 struct sock *lsk = &lsmc->sk; 920 struct sock *new_sk; 921 int rc = -EINVAL; 922 923 release_sock(lsk); 924 new_sk = smc_sock_alloc(sock_net(lsk), NULL, lsk->sk_protocol); 925 if (!new_sk) { 926 rc = -ENOMEM; 927 lsk->sk_err = ENOMEM; 928 *new_smc = NULL; 929 lock_sock(lsk); 930 goto out; 931 } 932 *new_smc = smc_sk(new_sk); 933 934 mutex_lock(&lsmc->clcsock_release_lock); 935 if (lsmc->clcsock) 936 rc = kernel_accept(lsmc->clcsock, &new_clcsock, 0); 937 mutex_unlock(&lsmc->clcsock_release_lock); 938 lock_sock(lsk); 939 if (rc < 0) 940 lsk->sk_err = -rc; 941 if (rc < 0 || lsk->sk_state == SMC_CLOSED) { 942 new_sk->sk_prot->unhash(new_sk); 943 if (new_clcsock) 944 sock_release(new_clcsock); 945 new_sk->sk_state = SMC_CLOSED; 946 sock_set_flag(new_sk, SOCK_DEAD); 947 sock_put(new_sk); /* final */ 948 *new_smc = NULL; 949 goto out; 950 } 951 952 (*new_smc)->clcsock = new_clcsock; 953 out: 954 return rc; 955 } 956 957 /* add a just created sock to the accept queue of the listen sock as 958 * candidate for a following socket accept call from user space 959 */ 960 static void smc_accept_enqueue(struct sock *parent, struct sock *sk) 961 { 962 struct smc_sock *par = smc_sk(parent); 963 964 sock_hold(sk); /* sock_put in smc_accept_unlink () */ 965 spin_lock(&par->accept_q_lock); 966 list_add_tail(&smc_sk(sk)->accept_q, &par->accept_q); 967 spin_unlock(&par->accept_q_lock); 968 sk_acceptq_added(parent); 969 } 970 971 /* remove a socket from the accept queue of its parental listening socket */ 972 static void smc_accept_unlink(struct sock *sk) 973 { 974 struct smc_sock *par = smc_sk(sk)->listen_smc; 975 976 spin_lock(&par->accept_q_lock); 977 list_del_init(&smc_sk(sk)->accept_q); 978 spin_unlock(&par->accept_q_lock); 979 sk_acceptq_removed(&smc_sk(sk)->listen_smc->sk); 980 sock_put(sk); /* sock_hold in smc_accept_enqueue */ 981 } 982 983 /* remove a sock from the accept queue to bind it to a new socket created 984 * for a socket accept call from user space 985 */ 986 struct sock *smc_accept_dequeue(struct sock *parent, 987 struct socket *new_sock) 988 { 989 struct smc_sock *isk, *n; 990 struct sock *new_sk; 991 992 list_for_each_entry_safe(isk, n, &smc_sk(parent)->accept_q, accept_q) { 993 new_sk = (struct sock *)isk; 994 995 smc_accept_unlink(new_sk); 996 if (new_sk->sk_state == SMC_CLOSED) { 997 new_sk->sk_prot->unhash(new_sk); 998 if (isk->clcsock) { 999 sock_release(isk->clcsock); 1000 isk->clcsock = NULL; 1001 } 1002 sock_put(new_sk); /* final */ 1003 continue; 1004 } 1005 if (new_sock) { 1006 sock_graft(new_sk, new_sock); 1007 if (isk->use_fallback) { 1008 smc_sk(new_sk)->clcsock->file = new_sock->file; 1009 isk->clcsock->file->private_data = isk->clcsock; 1010 } 1011 } 1012 return new_sk; 1013 } 1014 return NULL; 1015 } 1016 1017 /* clean up for a created but never accepted sock */ 1018 void smc_close_non_accepted(struct sock *sk) 1019 { 1020 struct smc_sock *smc = smc_sk(sk); 1021 1022 sock_hold(sk); /* sock_put below */ 1023 lock_sock(sk); 1024 if (!sk->sk_lingertime) 1025 /* wait for peer closing */ 1026 sk->sk_lingertime = SMC_MAX_STREAM_WAIT_TIMEOUT; 1027 __smc_release(smc); 1028 release_sock(sk); 1029 sock_put(sk); /* sock_hold above */ 1030 sock_put(sk); /* final sock_put */ 1031 } 1032 1033 static int smcr_serv_conf_first_link(struct smc_sock *smc) 1034 { 1035 struct smc_link *link = smc->conn.lnk; 1036 struct smc_llc_qentry *qentry; 1037 int rc; 1038 1039 if (smcr_link_reg_rmb(link, smc->conn.rmb_desc)) 1040 return SMC_CLC_DECL_ERR_REGRMB; 1041 1042 /* send CONFIRM LINK request to client over the RoCE fabric */ 1043 rc = smc_llc_send_confirm_link(link, SMC_LLC_REQ); 1044 if (rc < 0) 1045 return SMC_CLC_DECL_TIMEOUT_CL; 1046 1047 /* receive CONFIRM LINK response from client over the RoCE fabric */ 1048 qentry = smc_llc_wait(link->lgr, link, SMC_LLC_WAIT_TIME, 1049 SMC_LLC_CONFIRM_LINK); 1050 if (!qentry) { 1051 struct smc_clc_msg_decline dclc; 1052 1053 rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc), 1054 SMC_CLC_DECLINE, CLC_WAIT_TIME_SHORT); 1055 return rc == -EAGAIN ? SMC_CLC_DECL_TIMEOUT_CL : rc; 1056 } 1057 smc_llc_save_peer_uid(qentry); 1058 rc = smc_llc_eval_conf_link(qentry, SMC_LLC_RESP); 1059 smc_llc_flow_qentry_del(&link->lgr->llc_flow_lcl); 1060 if (rc) 1061 return SMC_CLC_DECL_RMBE_EC; 1062 1063 /* confirm_rkey is implicit on 1st contact */ 1064 smc->conn.rmb_desc->is_conf_rkey = true; 1065 1066 smc_llc_link_active(link); 1067 smcr_lgr_set_type(link->lgr, SMC_LGR_SINGLE); 1068 1069 /* initial contact - try to establish second link */ 1070 smc_llc_srv_add_link(link); 1071 return 0; 1072 } 1073 1074 /* listen worker: finish */ 1075 static void smc_listen_out(struct smc_sock *new_smc) 1076 { 1077 struct smc_sock *lsmc = new_smc->listen_smc; 1078 struct sock *newsmcsk = &new_smc->sk; 1079 1080 if (lsmc->sk.sk_state == SMC_LISTEN) { 1081 lock_sock_nested(&lsmc->sk, SINGLE_DEPTH_NESTING); 1082 smc_accept_enqueue(&lsmc->sk, newsmcsk); 1083 release_sock(&lsmc->sk); 1084 } else { /* no longer listening */ 1085 smc_close_non_accepted(newsmcsk); 1086 } 1087 1088 /* Wake up accept */ 1089 lsmc->sk.sk_data_ready(&lsmc->sk); 1090 sock_put(&lsmc->sk); /* sock_hold in smc_tcp_listen_work */ 1091 } 1092 1093 /* listen worker: finish in state connected */ 1094 static void smc_listen_out_connected(struct smc_sock *new_smc) 1095 { 1096 struct sock *newsmcsk = &new_smc->sk; 1097 1098 sk_refcnt_debug_inc(newsmcsk); 1099 if (newsmcsk->sk_state == SMC_INIT) 1100 newsmcsk->sk_state = SMC_ACTIVE; 1101 1102 smc_listen_out(new_smc); 1103 } 1104 1105 /* listen worker: finish in error state */ 1106 static void smc_listen_out_err(struct smc_sock *new_smc) 1107 { 1108 struct sock *newsmcsk = &new_smc->sk; 1109 1110 if (newsmcsk->sk_state == SMC_INIT) 1111 sock_put(&new_smc->sk); /* passive closing */ 1112 newsmcsk->sk_state = SMC_CLOSED; 1113 1114 smc_listen_out(new_smc); 1115 } 1116 1117 /* listen worker: decline and fall back if possible */ 1118 static void smc_listen_decline(struct smc_sock *new_smc, int reason_code, 1119 int local_contact) 1120 { 1121 /* RDMA setup failed, switch back to TCP */ 1122 if (local_contact == SMC_FIRST_CONTACT) 1123 smc_lgr_cleanup_early(&new_smc->conn); 1124 else 1125 smc_conn_free(&new_smc->conn); 1126 if (reason_code < 0) { /* error, no fallback possible */ 1127 smc_listen_out_err(new_smc); 1128 return; 1129 } 1130 smc_switch_to_fallback(new_smc); 1131 new_smc->fallback_rsn = reason_code; 1132 if (reason_code && reason_code != SMC_CLC_DECL_PEERDECL) { 1133 if (smc_clc_send_decline(new_smc, reason_code) < 0) { 1134 smc_listen_out_err(new_smc); 1135 return; 1136 } 1137 } 1138 smc_listen_out_connected(new_smc); 1139 } 1140 1141 /* listen worker: check prefixes */ 1142 static int smc_listen_prfx_check(struct smc_sock *new_smc, 1143 struct smc_clc_msg_proposal *pclc) 1144 { 1145 struct smc_clc_msg_proposal_prefix *pclc_prfx; 1146 struct socket *newclcsock = new_smc->clcsock; 1147 1148 pclc_prfx = smc_clc_proposal_get_prefix(pclc); 1149 if (smc_clc_prfx_match(newclcsock, pclc_prfx)) 1150 return SMC_CLC_DECL_DIFFPREFIX; 1151 1152 return 0; 1153 } 1154 1155 /* listen worker: initialize connection and buffers */ 1156 static int smc_listen_rdma_init(struct smc_sock *new_smc, 1157 struct smc_init_info *ini) 1158 { 1159 int rc; 1160 1161 /* allocate connection / link group */ 1162 rc = smc_conn_create(new_smc, ini); 1163 if (rc) 1164 return rc; 1165 1166 /* create send buffer and rmb */ 1167 if (smc_buf_create(new_smc, false)) 1168 return SMC_CLC_DECL_MEM; 1169 1170 return 0; 1171 } 1172 1173 /* listen worker: initialize connection and buffers for SMC-D */ 1174 static int smc_listen_ism_init(struct smc_sock *new_smc, 1175 struct smc_clc_msg_proposal *pclc, 1176 struct smc_init_info *ini) 1177 { 1178 struct smc_clc_msg_smcd *pclc_smcd; 1179 int rc; 1180 1181 pclc_smcd = smc_get_clc_msg_smcd(pclc); 1182 ini->ism_gid = pclc_smcd->gid; 1183 rc = smc_conn_create(new_smc, ini); 1184 if (rc) 1185 return rc; 1186 1187 /* Check if peer can be reached via ISM device */ 1188 if (smc_ism_cantalk(new_smc->conn.lgr->peer_gid, 1189 new_smc->conn.lgr->vlan_id, 1190 new_smc->conn.lgr->smcd)) { 1191 if (ini->cln_first_contact == SMC_FIRST_CONTACT) 1192 smc_lgr_cleanup_early(&new_smc->conn); 1193 else 1194 smc_conn_free(&new_smc->conn); 1195 return SMC_CLC_DECL_SMCDNOTALK; 1196 } 1197 1198 /* Create send and receive buffers */ 1199 if (smc_buf_create(new_smc, true)) { 1200 if (ini->cln_first_contact == SMC_FIRST_CONTACT) 1201 smc_lgr_cleanup_early(&new_smc->conn); 1202 else 1203 smc_conn_free(&new_smc->conn); 1204 return SMC_CLC_DECL_MEM; 1205 } 1206 1207 return 0; 1208 } 1209 1210 /* listen worker: register buffers */ 1211 static int smc_listen_rdma_reg(struct smc_sock *new_smc, int local_contact) 1212 { 1213 struct smc_connection *conn = &new_smc->conn; 1214 1215 if (local_contact != SMC_FIRST_CONTACT) { 1216 if (smcr_lgr_reg_rmbs(conn->lnk, conn->rmb_desc)) 1217 return SMC_CLC_DECL_ERR_REGRMB; 1218 } 1219 smc_rmb_sync_sg_for_device(&new_smc->conn); 1220 1221 return 0; 1222 } 1223 1224 /* listen worker: finish RDMA setup */ 1225 static int smc_listen_rdma_finish(struct smc_sock *new_smc, 1226 struct smc_clc_msg_accept_confirm *cclc, 1227 int local_contact) 1228 { 1229 struct smc_link *link = new_smc->conn.lnk; 1230 int reason_code = 0; 1231 1232 if (local_contact == SMC_FIRST_CONTACT) 1233 smc_link_save_peer_info(link, cclc); 1234 1235 if (smc_rmb_rtoken_handling(&new_smc->conn, link, cclc)) { 1236 reason_code = SMC_CLC_DECL_ERR_RTOK; 1237 goto decline; 1238 } 1239 1240 if (local_contact == SMC_FIRST_CONTACT) { 1241 if (smc_ib_ready_link(link)) { 1242 reason_code = SMC_CLC_DECL_ERR_RDYLNK; 1243 goto decline; 1244 } 1245 /* QP confirmation over RoCE fabric */ 1246 smc_llc_flow_initiate(link->lgr, SMC_LLC_FLOW_ADD_LINK); 1247 reason_code = smcr_serv_conf_first_link(new_smc); 1248 smc_llc_flow_stop(link->lgr, &link->lgr->llc_flow_lcl); 1249 if (reason_code) 1250 goto decline; 1251 } 1252 return 0; 1253 1254 decline: 1255 smc_listen_decline(new_smc, reason_code, local_contact); 1256 return reason_code; 1257 } 1258 1259 /* setup for RDMA connection of server */ 1260 static void smc_listen_work(struct work_struct *work) 1261 { 1262 struct smc_sock *new_smc = container_of(work, struct smc_sock, 1263 smc_listen_work); 1264 struct socket *newclcsock = new_smc->clcsock; 1265 struct smc_clc_msg_accept_confirm cclc; 1266 struct smc_clc_msg_proposal *pclc; 1267 struct smc_init_info ini = {0}; 1268 bool ism_supported = false; 1269 u8 buf[SMC_CLC_MAX_LEN]; 1270 int rc = 0; 1271 1272 if (new_smc->listen_smc->sk.sk_state != SMC_LISTEN) 1273 return smc_listen_out_err(new_smc); 1274 1275 if (new_smc->use_fallback) { 1276 smc_listen_out_connected(new_smc); 1277 return; 1278 } 1279 1280 /* check if peer is smc capable */ 1281 if (!tcp_sk(newclcsock->sk)->syn_smc) { 1282 smc_switch_to_fallback(new_smc); 1283 new_smc->fallback_rsn = SMC_CLC_DECL_PEERNOSMC; 1284 smc_listen_out_connected(new_smc); 1285 return; 1286 } 1287 1288 /* do inband token exchange - 1289 * wait for and receive SMC Proposal CLC message 1290 */ 1291 pclc = (struct smc_clc_msg_proposal *)&buf; 1292 rc = smc_clc_wait_msg(new_smc, pclc, SMC_CLC_MAX_LEN, 1293 SMC_CLC_PROPOSAL, CLC_WAIT_TIME); 1294 if (rc) 1295 goto out_decl; 1296 1297 /* IPSec connections opt out of SMC-R optimizations */ 1298 if (using_ipsec(new_smc)) { 1299 rc = SMC_CLC_DECL_IPSEC; 1300 goto out_decl; 1301 } 1302 1303 /* check for matching IP prefix and subnet length */ 1304 rc = smc_listen_prfx_check(new_smc, pclc); 1305 if (rc) 1306 goto out_decl; 1307 1308 /* get vlan id from IP device */ 1309 if (smc_vlan_by_tcpsk(new_smc->clcsock, &ini)) { 1310 rc = SMC_CLC_DECL_GETVLANERR; 1311 goto out_decl; 1312 } 1313 1314 mutex_lock(&smc_server_lgr_pending); 1315 smc_close_init(new_smc); 1316 smc_rx_init(new_smc); 1317 smc_tx_init(new_smc); 1318 1319 /* check if ISM is available */ 1320 if (pclc->hdr.path == SMC_TYPE_D || pclc->hdr.path == SMC_TYPE_B) { 1321 ini.is_smcd = true; /* prepare ISM check */ 1322 rc = smc_find_ism_device(new_smc, &ini); 1323 if (!rc) 1324 rc = smc_listen_ism_init(new_smc, pclc, &ini); 1325 if (!rc) 1326 ism_supported = true; 1327 else if (pclc->hdr.path == SMC_TYPE_D) 1328 goto out_unlock; /* skip RDMA and decline */ 1329 } 1330 1331 /* check if RDMA is available */ 1332 if (!ism_supported) { /* SMC_TYPE_R or SMC_TYPE_B */ 1333 /* prepare RDMA check */ 1334 ini.is_smcd = false; 1335 ini.ism_dev = NULL; 1336 ini.ib_lcl = &pclc->lcl; 1337 rc = smc_find_rdma_device(new_smc, &ini); 1338 if (rc) { 1339 /* no RDMA device found */ 1340 if (pclc->hdr.path == SMC_TYPE_B) 1341 /* neither ISM nor RDMA device found */ 1342 rc = SMC_CLC_DECL_NOSMCDEV; 1343 goto out_unlock; 1344 } 1345 rc = smc_listen_rdma_init(new_smc, &ini); 1346 if (rc) 1347 goto out_unlock; 1348 rc = smc_listen_rdma_reg(new_smc, ini.cln_first_contact); 1349 if (rc) 1350 goto out_unlock; 1351 } 1352 1353 /* send SMC Accept CLC message */ 1354 rc = smc_clc_send_accept(new_smc, ini.cln_first_contact); 1355 if (rc) 1356 goto out_unlock; 1357 1358 /* SMC-D does not need this lock any more */ 1359 if (ism_supported) 1360 mutex_unlock(&smc_server_lgr_pending); 1361 1362 /* receive SMC Confirm CLC message */ 1363 rc = smc_clc_wait_msg(new_smc, &cclc, sizeof(cclc), 1364 SMC_CLC_CONFIRM, CLC_WAIT_TIME); 1365 if (rc) { 1366 if (!ism_supported) 1367 goto out_unlock; 1368 goto out_decl; 1369 } 1370 1371 /* finish worker */ 1372 if (!ism_supported) { 1373 rc = smc_listen_rdma_finish(new_smc, &cclc, 1374 ini.cln_first_contact); 1375 mutex_unlock(&smc_server_lgr_pending); 1376 if (rc) 1377 return; 1378 } 1379 smc_conn_save_peer_info(new_smc, &cclc); 1380 smc_listen_out_connected(new_smc); 1381 return; 1382 1383 out_unlock: 1384 mutex_unlock(&smc_server_lgr_pending); 1385 out_decl: 1386 smc_listen_decline(new_smc, rc, ini.cln_first_contact); 1387 } 1388 1389 static void smc_tcp_listen_work(struct work_struct *work) 1390 { 1391 struct smc_sock *lsmc = container_of(work, struct smc_sock, 1392 tcp_listen_work); 1393 struct sock *lsk = &lsmc->sk; 1394 struct smc_sock *new_smc; 1395 int rc = 0; 1396 1397 lock_sock(lsk); 1398 while (lsk->sk_state == SMC_LISTEN) { 1399 rc = smc_clcsock_accept(lsmc, &new_smc); 1400 if (rc) 1401 goto out; 1402 if (!new_smc) 1403 continue; 1404 1405 new_smc->listen_smc = lsmc; 1406 new_smc->use_fallback = lsmc->use_fallback; 1407 new_smc->fallback_rsn = lsmc->fallback_rsn; 1408 sock_hold(lsk); /* sock_put in smc_listen_work */ 1409 INIT_WORK(&new_smc->smc_listen_work, smc_listen_work); 1410 smc_copy_sock_settings_to_smc(new_smc); 1411 new_smc->sk.sk_sndbuf = lsmc->sk.sk_sndbuf; 1412 new_smc->sk.sk_rcvbuf = lsmc->sk.sk_rcvbuf; 1413 sock_hold(&new_smc->sk); /* sock_put in passive closing */ 1414 if (!schedule_work(&new_smc->smc_listen_work)) 1415 sock_put(&new_smc->sk); 1416 } 1417 1418 out: 1419 release_sock(lsk); 1420 sock_put(&lsmc->sk); /* sock_hold in smc_listen */ 1421 } 1422 1423 static int smc_listen(struct socket *sock, int backlog) 1424 { 1425 struct sock *sk = sock->sk; 1426 struct smc_sock *smc; 1427 int rc; 1428 1429 smc = smc_sk(sk); 1430 lock_sock(sk); 1431 1432 rc = -EINVAL; 1433 if ((sk->sk_state != SMC_INIT && sk->sk_state != SMC_LISTEN) || 1434 smc->connect_nonblock) 1435 goto out; 1436 1437 rc = 0; 1438 if (sk->sk_state == SMC_LISTEN) { 1439 sk->sk_max_ack_backlog = backlog; 1440 goto out; 1441 } 1442 /* some socket options are handled in core, so we could not apply 1443 * them to the clc socket -- copy smc socket options to clc socket 1444 */ 1445 smc_copy_sock_settings_to_clc(smc); 1446 if (!smc->use_fallback) 1447 tcp_sk(smc->clcsock->sk)->syn_smc = 1; 1448 1449 rc = kernel_listen(smc->clcsock, backlog); 1450 if (rc) 1451 goto out; 1452 sk->sk_max_ack_backlog = backlog; 1453 sk->sk_ack_backlog = 0; 1454 sk->sk_state = SMC_LISTEN; 1455 sock_hold(sk); /* sock_hold in tcp_listen_worker */ 1456 if (!schedule_work(&smc->tcp_listen_work)) 1457 sock_put(sk); 1458 1459 out: 1460 release_sock(sk); 1461 return rc; 1462 } 1463 1464 static int smc_accept(struct socket *sock, struct socket *new_sock, 1465 int flags, bool kern) 1466 { 1467 struct sock *sk = sock->sk, *nsk; 1468 DECLARE_WAITQUEUE(wait, current); 1469 struct smc_sock *lsmc; 1470 long timeo; 1471 int rc = 0; 1472 1473 lsmc = smc_sk(sk); 1474 sock_hold(sk); /* sock_put below */ 1475 lock_sock(sk); 1476 1477 if (lsmc->sk.sk_state != SMC_LISTEN) { 1478 rc = -EINVAL; 1479 release_sock(sk); 1480 goto out; 1481 } 1482 1483 /* Wait for an incoming connection */ 1484 timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK); 1485 add_wait_queue_exclusive(sk_sleep(sk), &wait); 1486 while (!(nsk = smc_accept_dequeue(sk, new_sock))) { 1487 set_current_state(TASK_INTERRUPTIBLE); 1488 if (!timeo) { 1489 rc = -EAGAIN; 1490 break; 1491 } 1492 release_sock(sk); 1493 timeo = schedule_timeout(timeo); 1494 /* wakeup by sk_data_ready in smc_listen_work() */ 1495 sched_annotate_sleep(); 1496 lock_sock(sk); 1497 if (signal_pending(current)) { 1498 rc = sock_intr_errno(timeo); 1499 break; 1500 } 1501 } 1502 set_current_state(TASK_RUNNING); 1503 remove_wait_queue(sk_sleep(sk), &wait); 1504 1505 if (!rc) 1506 rc = sock_error(nsk); 1507 release_sock(sk); 1508 if (rc) 1509 goto out; 1510 1511 if (lsmc->sockopt_defer_accept && !(flags & O_NONBLOCK)) { 1512 /* wait till data arrives on the socket */ 1513 timeo = msecs_to_jiffies(lsmc->sockopt_defer_accept * 1514 MSEC_PER_SEC); 1515 if (smc_sk(nsk)->use_fallback) { 1516 struct sock *clcsk = smc_sk(nsk)->clcsock->sk; 1517 1518 lock_sock(clcsk); 1519 if (skb_queue_empty(&clcsk->sk_receive_queue)) 1520 sk_wait_data(clcsk, &timeo, NULL); 1521 release_sock(clcsk); 1522 } else if (!atomic_read(&smc_sk(nsk)->conn.bytes_to_rcv)) { 1523 lock_sock(nsk); 1524 smc_rx_wait(smc_sk(nsk), &timeo, smc_rx_data_available); 1525 release_sock(nsk); 1526 } 1527 } 1528 1529 out: 1530 sock_put(sk); /* sock_hold above */ 1531 return rc; 1532 } 1533 1534 static int smc_getname(struct socket *sock, struct sockaddr *addr, 1535 int peer) 1536 { 1537 struct smc_sock *smc; 1538 1539 if (peer && (sock->sk->sk_state != SMC_ACTIVE) && 1540 (sock->sk->sk_state != SMC_APPCLOSEWAIT1)) 1541 return -ENOTCONN; 1542 1543 smc = smc_sk(sock->sk); 1544 1545 return smc->clcsock->ops->getname(smc->clcsock, addr, peer); 1546 } 1547 1548 static int smc_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) 1549 { 1550 struct sock *sk = sock->sk; 1551 struct smc_sock *smc; 1552 int rc = -EPIPE; 1553 1554 smc = smc_sk(sk); 1555 lock_sock(sk); 1556 if ((sk->sk_state != SMC_ACTIVE) && 1557 (sk->sk_state != SMC_APPCLOSEWAIT1) && 1558 (sk->sk_state != SMC_INIT)) 1559 goto out; 1560 1561 if (msg->msg_flags & MSG_FASTOPEN) { 1562 if (sk->sk_state == SMC_INIT && !smc->connect_nonblock) { 1563 smc_switch_to_fallback(smc); 1564 smc->fallback_rsn = SMC_CLC_DECL_OPTUNSUPP; 1565 } else { 1566 rc = -EINVAL; 1567 goto out; 1568 } 1569 } 1570 1571 if (smc->use_fallback) 1572 rc = smc->clcsock->ops->sendmsg(smc->clcsock, msg, len); 1573 else 1574 rc = smc_tx_sendmsg(smc, msg, len); 1575 out: 1576 release_sock(sk); 1577 return rc; 1578 } 1579 1580 static int smc_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, 1581 int flags) 1582 { 1583 struct sock *sk = sock->sk; 1584 struct smc_sock *smc; 1585 int rc = -ENOTCONN; 1586 1587 smc = smc_sk(sk); 1588 lock_sock(sk); 1589 if (sk->sk_state == SMC_CLOSED && (sk->sk_shutdown & RCV_SHUTDOWN)) { 1590 /* socket was connected before, no more data to read */ 1591 rc = 0; 1592 goto out; 1593 } 1594 if ((sk->sk_state == SMC_INIT) || 1595 (sk->sk_state == SMC_LISTEN) || 1596 (sk->sk_state == SMC_CLOSED)) 1597 goto out; 1598 1599 if (sk->sk_state == SMC_PEERFINCLOSEWAIT) { 1600 rc = 0; 1601 goto out; 1602 } 1603 1604 if (smc->use_fallback) { 1605 rc = smc->clcsock->ops->recvmsg(smc->clcsock, msg, len, flags); 1606 } else { 1607 msg->msg_namelen = 0; 1608 rc = smc_rx_recvmsg(smc, msg, NULL, len, flags); 1609 } 1610 1611 out: 1612 release_sock(sk); 1613 return rc; 1614 } 1615 1616 static __poll_t smc_accept_poll(struct sock *parent) 1617 { 1618 struct smc_sock *isk = smc_sk(parent); 1619 __poll_t mask = 0; 1620 1621 spin_lock(&isk->accept_q_lock); 1622 if (!list_empty(&isk->accept_q)) 1623 mask = EPOLLIN | EPOLLRDNORM; 1624 spin_unlock(&isk->accept_q_lock); 1625 1626 return mask; 1627 } 1628 1629 static __poll_t smc_poll(struct file *file, struct socket *sock, 1630 poll_table *wait) 1631 { 1632 struct sock *sk = sock->sk; 1633 struct smc_sock *smc; 1634 __poll_t mask = 0; 1635 1636 if (!sk) 1637 return EPOLLNVAL; 1638 1639 smc = smc_sk(sock->sk); 1640 if (smc->use_fallback) { 1641 /* delegate to CLC child sock */ 1642 mask = smc->clcsock->ops->poll(file, smc->clcsock, wait); 1643 sk->sk_err = smc->clcsock->sk->sk_err; 1644 } else { 1645 if (sk->sk_state != SMC_CLOSED) 1646 sock_poll_wait(file, sock, wait); 1647 if (sk->sk_err) 1648 mask |= EPOLLERR; 1649 if ((sk->sk_shutdown == SHUTDOWN_MASK) || 1650 (sk->sk_state == SMC_CLOSED)) 1651 mask |= EPOLLHUP; 1652 if (sk->sk_state == SMC_LISTEN) { 1653 /* woken up by sk_data_ready in smc_listen_work() */ 1654 mask |= smc_accept_poll(sk); 1655 } else if (smc->use_fallback) { /* as result of connect_work()*/ 1656 mask |= smc->clcsock->ops->poll(file, smc->clcsock, 1657 wait); 1658 sk->sk_err = smc->clcsock->sk->sk_err; 1659 } else { 1660 if ((sk->sk_state != SMC_INIT && 1661 atomic_read(&smc->conn.sndbuf_space)) || 1662 sk->sk_shutdown & SEND_SHUTDOWN) { 1663 mask |= EPOLLOUT | EPOLLWRNORM; 1664 } else { 1665 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk); 1666 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); 1667 } 1668 if (atomic_read(&smc->conn.bytes_to_rcv)) 1669 mask |= EPOLLIN | EPOLLRDNORM; 1670 if (sk->sk_shutdown & RCV_SHUTDOWN) 1671 mask |= EPOLLIN | EPOLLRDNORM | EPOLLRDHUP; 1672 if (sk->sk_state == SMC_APPCLOSEWAIT1) 1673 mask |= EPOLLIN; 1674 if (smc->conn.urg_state == SMC_URG_VALID) 1675 mask |= EPOLLPRI; 1676 } 1677 } 1678 1679 return mask; 1680 } 1681 1682 static int smc_shutdown(struct socket *sock, int how) 1683 { 1684 struct sock *sk = sock->sk; 1685 struct smc_sock *smc; 1686 int rc = -EINVAL; 1687 int rc1 = 0; 1688 1689 smc = smc_sk(sk); 1690 1691 if ((how < SHUT_RD) || (how > SHUT_RDWR)) 1692 return rc; 1693 1694 lock_sock(sk); 1695 1696 rc = -ENOTCONN; 1697 if ((sk->sk_state != SMC_ACTIVE) && 1698 (sk->sk_state != SMC_PEERCLOSEWAIT1) && 1699 (sk->sk_state != SMC_PEERCLOSEWAIT2) && 1700 (sk->sk_state != SMC_APPCLOSEWAIT1) && 1701 (sk->sk_state != SMC_APPCLOSEWAIT2) && 1702 (sk->sk_state != SMC_APPFINCLOSEWAIT)) 1703 goto out; 1704 if (smc->use_fallback) { 1705 rc = kernel_sock_shutdown(smc->clcsock, how); 1706 sk->sk_shutdown = smc->clcsock->sk->sk_shutdown; 1707 if (sk->sk_shutdown == SHUTDOWN_MASK) 1708 sk->sk_state = SMC_CLOSED; 1709 goto out; 1710 } 1711 switch (how) { 1712 case SHUT_RDWR: /* shutdown in both directions */ 1713 rc = smc_close_active(smc); 1714 break; 1715 case SHUT_WR: 1716 rc = smc_close_shutdown_write(smc); 1717 break; 1718 case SHUT_RD: 1719 rc = 0; 1720 /* nothing more to do because peer is not involved */ 1721 break; 1722 } 1723 if (smc->clcsock) 1724 rc1 = kernel_sock_shutdown(smc->clcsock, how); 1725 /* map sock_shutdown_cmd constants to sk_shutdown value range */ 1726 sk->sk_shutdown |= how + 1; 1727 1728 out: 1729 release_sock(sk); 1730 return rc ? rc : rc1; 1731 } 1732 1733 static int smc_setsockopt(struct socket *sock, int level, int optname, 1734 char __user *optval, unsigned int optlen) 1735 { 1736 struct sock *sk = sock->sk; 1737 struct smc_sock *smc; 1738 int val, rc; 1739 1740 smc = smc_sk(sk); 1741 1742 /* generic setsockopts reaching us here always apply to the 1743 * CLC socket 1744 */ 1745 rc = smc->clcsock->ops->setsockopt(smc->clcsock, level, optname, 1746 optval, optlen); 1747 if (smc->clcsock->sk->sk_err) { 1748 sk->sk_err = smc->clcsock->sk->sk_err; 1749 sk->sk_error_report(sk); 1750 } 1751 1752 if (optlen < sizeof(int)) 1753 return -EINVAL; 1754 if (get_user(val, (int __user *)optval)) 1755 return -EFAULT; 1756 1757 lock_sock(sk); 1758 if (rc || smc->use_fallback) 1759 goto out; 1760 switch (optname) { 1761 case TCP_ULP: 1762 case TCP_FASTOPEN: 1763 case TCP_FASTOPEN_CONNECT: 1764 case TCP_FASTOPEN_KEY: 1765 case TCP_FASTOPEN_NO_COOKIE: 1766 /* option not supported by SMC */ 1767 if (sk->sk_state == SMC_INIT && !smc->connect_nonblock) { 1768 smc_switch_to_fallback(smc); 1769 smc->fallback_rsn = SMC_CLC_DECL_OPTUNSUPP; 1770 } else { 1771 rc = -EINVAL; 1772 } 1773 break; 1774 case TCP_NODELAY: 1775 if (sk->sk_state != SMC_INIT && 1776 sk->sk_state != SMC_LISTEN && 1777 sk->sk_state != SMC_CLOSED) { 1778 if (val) 1779 mod_delayed_work(system_wq, &smc->conn.tx_work, 1780 0); 1781 } 1782 break; 1783 case TCP_CORK: 1784 if (sk->sk_state != SMC_INIT && 1785 sk->sk_state != SMC_LISTEN && 1786 sk->sk_state != SMC_CLOSED) { 1787 if (!val) 1788 mod_delayed_work(system_wq, &smc->conn.tx_work, 1789 0); 1790 } 1791 break; 1792 case TCP_DEFER_ACCEPT: 1793 smc->sockopt_defer_accept = val; 1794 break; 1795 default: 1796 break; 1797 } 1798 out: 1799 release_sock(sk); 1800 1801 return rc; 1802 } 1803 1804 static int smc_getsockopt(struct socket *sock, int level, int optname, 1805 char __user *optval, int __user *optlen) 1806 { 1807 struct smc_sock *smc; 1808 1809 smc = smc_sk(sock->sk); 1810 /* socket options apply to the CLC socket */ 1811 return smc->clcsock->ops->getsockopt(smc->clcsock, level, optname, 1812 optval, optlen); 1813 } 1814 1815 static int smc_ioctl(struct socket *sock, unsigned int cmd, 1816 unsigned long arg) 1817 { 1818 union smc_host_cursor cons, urg; 1819 struct smc_connection *conn; 1820 struct smc_sock *smc; 1821 int answ; 1822 1823 smc = smc_sk(sock->sk); 1824 conn = &smc->conn; 1825 lock_sock(&smc->sk); 1826 if (smc->use_fallback) { 1827 if (!smc->clcsock) { 1828 release_sock(&smc->sk); 1829 return -EBADF; 1830 } 1831 answ = smc->clcsock->ops->ioctl(smc->clcsock, cmd, arg); 1832 release_sock(&smc->sk); 1833 return answ; 1834 } 1835 switch (cmd) { 1836 case SIOCINQ: /* same as FIONREAD */ 1837 if (smc->sk.sk_state == SMC_LISTEN) { 1838 release_sock(&smc->sk); 1839 return -EINVAL; 1840 } 1841 if (smc->sk.sk_state == SMC_INIT || 1842 smc->sk.sk_state == SMC_CLOSED) 1843 answ = 0; 1844 else 1845 answ = atomic_read(&smc->conn.bytes_to_rcv); 1846 break; 1847 case SIOCOUTQ: 1848 /* output queue size (not send + not acked) */ 1849 if (smc->sk.sk_state == SMC_LISTEN) { 1850 release_sock(&smc->sk); 1851 return -EINVAL; 1852 } 1853 if (smc->sk.sk_state == SMC_INIT || 1854 smc->sk.sk_state == SMC_CLOSED) 1855 answ = 0; 1856 else 1857 answ = smc->conn.sndbuf_desc->len - 1858 atomic_read(&smc->conn.sndbuf_space); 1859 break; 1860 case SIOCOUTQNSD: 1861 /* output queue size (not send only) */ 1862 if (smc->sk.sk_state == SMC_LISTEN) { 1863 release_sock(&smc->sk); 1864 return -EINVAL; 1865 } 1866 if (smc->sk.sk_state == SMC_INIT || 1867 smc->sk.sk_state == SMC_CLOSED) 1868 answ = 0; 1869 else 1870 answ = smc_tx_prepared_sends(&smc->conn); 1871 break; 1872 case SIOCATMARK: 1873 if (smc->sk.sk_state == SMC_LISTEN) { 1874 release_sock(&smc->sk); 1875 return -EINVAL; 1876 } 1877 if (smc->sk.sk_state == SMC_INIT || 1878 smc->sk.sk_state == SMC_CLOSED) { 1879 answ = 0; 1880 } else { 1881 smc_curs_copy(&cons, &conn->local_tx_ctrl.cons, conn); 1882 smc_curs_copy(&urg, &conn->urg_curs, conn); 1883 answ = smc_curs_diff(conn->rmb_desc->len, 1884 &cons, &urg) == 1; 1885 } 1886 break; 1887 default: 1888 release_sock(&smc->sk); 1889 return -ENOIOCTLCMD; 1890 } 1891 release_sock(&smc->sk); 1892 1893 return put_user(answ, (int __user *)arg); 1894 } 1895 1896 static ssize_t smc_sendpage(struct socket *sock, struct page *page, 1897 int offset, size_t size, int flags) 1898 { 1899 struct sock *sk = sock->sk; 1900 struct smc_sock *smc; 1901 int rc = -EPIPE; 1902 1903 smc = smc_sk(sk); 1904 lock_sock(sk); 1905 if (sk->sk_state != SMC_ACTIVE) { 1906 release_sock(sk); 1907 goto out; 1908 } 1909 release_sock(sk); 1910 if (smc->use_fallback) 1911 rc = kernel_sendpage(smc->clcsock, page, offset, 1912 size, flags); 1913 else 1914 rc = sock_no_sendpage(sock, page, offset, size, flags); 1915 1916 out: 1917 return rc; 1918 } 1919 1920 /* Map the affected portions of the rmbe into an spd, note the number of bytes 1921 * to splice in conn->splice_pending, and press 'go'. Delays consumer cursor 1922 * updates till whenever a respective page has been fully processed. 1923 * Note that subsequent recv() calls have to wait till all splice() processing 1924 * completed. 1925 */ 1926 static ssize_t smc_splice_read(struct socket *sock, loff_t *ppos, 1927 struct pipe_inode_info *pipe, size_t len, 1928 unsigned int flags) 1929 { 1930 struct sock *sk = sock->sk; 1931 struct smc_sock *smc; 1932 int rc = -ENOTCONN; 1933 1934 smc = smc_sk(sk); 1935 lock_sock(sk); 1936 if (sk->sk_state == SMC_CLOSED && (sk->sk_shutdown & RCV_SHUTDOWN)) { 1937 /* socket was connected before, no more data to read */ 1938 rc = 0; 1939 goto out; 1940 } 1941 if (sk->sk_state == SMC_INIT || 1942 sk->sk_state == SMC_LISTEN || 1943 sk->sk_state == SMC_CLOSED) 1944 goto out; 1945 1946 if (sk->sk_state == SMC_PEERFINCLOSEWAIT) { 1947 rc = 0; 1948 goto out; 1949 } 1950 1951 if (smc->use_fallback) { 1952 rc = smc->clcsock->ops->splice_read(smc->clcsock, ppos, 1953 pipe, len, flags); 1954 } else { 1955 if (*ppos) { 1956 rc = -ESPIPE; 1957 goto out; 1958 } 1959 if (flags & SPLICE_F_NONBLOCK) 1960 flags = MSG_DONTWAIT; 1961 else 1962 flags = 0; 1963 rc = smc_rx_recvmsg(smc, NULL, pipe, len, flags); 1964 } 1965 out: 1966 release_sock(sk); 1967 1968 return rc; 1969 } 1970 1971 /* must look like tcp */ 1972 static const struct proto_ops smc_sock_ops = { 1973 .family = PF_SMC, 1974 .owner = THIS_MODULE, 1975 .release = smc_release, 1976 .bind = smc_bind, 1977 .connect = smc_connect, 1978 .socketpair = sock_no_socketpair, 1979 .accept = smc_accept, 1980 .getname = smc_getname, 1981 .poll = smc_poll, 1982 .ioctl = smc_ioctl, 1983 .listen = smc_listen, 1984 .shutdown = smc_shutdown, 1985 .setsockopt = smc_setsockopt, 1986 .getsockopt = smc_getsockopt, 1987 .sendmsg = smc_sendmsg, 1988 .recvmsg = smc_recvmsg, 1989 .mmap = sock_no_mmap, 1990 .sendpage = smc_sendpage, 1991 .splice_read = smc_splice_read, 1992 }; 1993 1994 static int smc_create(struct net *net, struct socket *sock, int protocol, 1995 int kern) 1996 { 1997 int family = (protocol == SMCPROTO_SMC6) ? PF_INET6 : PF_INET; 1998 struct smc_sock *smc; 1999 struct sock *sk; 2000 int rc; 2001 2002 rc = -ESOCKTNOSUPPORT; 2003 if (sock->type != SOCK_STREAM) 2004 goto out; 2005 2006 rc = -EPROTONOSUPPORT; 2007 if (protocol != SMCPROTO_SMC && protocol != SMCPROTO_SMC6) 2008 goto out; 2009 2010 rc = -ENOBUFS; 2011 sock->ops = &smc_sock_ops; 2012 sk = smc_sock_alloc(net, sock, protocol); 2013 if (!sk) 2014 goto out; 2015 2016 /* create internal TCP socket for CLC handshake and fallback */ 2017 smc = smc_sk(sk); 2018 smc->use_fallback = false; /* assume rdma capability first */ 2019 smc->fallback_rsn = 0; 2020 rc = sock_create_kern(net, family, SOCK_STREAM, IPPROTO_TCP, 2021 &smc->clcsock); 2022 if (rc) { 2023 sk_common_release(sk); 2024 goto out; 2025 } 2026 smc->sk.sk_sndbuf = max(smc->clcsock->sk->sk_sndbuf, SMC_BUF_MIN_SIZE); 2027 smc->sk.sk_rcvbuf = max(smc->clcsock->sk->sk_rcvbuf, SMC_BUF_MIN_SIZE); 2028 2029 out: 2030 return rc; 2031 } 2032 2033 static const struct net_proto_family smc_sock_family_ops = { 2034 .family = PF_SMC, 2035 .owner = THIS_MODULE, 2036 .create = smc_create, 2037 }; 2038 2039 unsigned int smc_net_id; 2040 2041 static __net_init int smc_net_init(struct net *net) 2042 { 2043 return smc_pnet_net_init(net); 2044 } 2045 2046 static void __net_exit smc_net_exit(struct net *net) 2047 { 2048 smc_pnet_net_exit(net); 2049 } 2050 2051 static struct pernet_operations smc_net_ops = { 2052 .init = smc_net_init, 2053 .exit = smc_net_exit, 2054 .id = &smc_net_id, 2055 .size = sizeof(struct smc_net), 2056 }; 2057 2058 static int __init smc_init(void) 2059 { 2060 int rc; 2061 2062 rc = register_pernet_subsys(&smc_net_ops); 2063 if (rc) 2064 return rc; 2065 2066 rc = smc_pnet_init(); 2067 if (rc) 2068 goto out_pernet_subsys; 2069 2070 rc = smc_core_init(); 2071 if (rc) { 2072 pr_err("%s: smc_core_init fails with %d\n", __func__, rc); 2073 goto out_pnet; 2074 } 2075 2076 rc = smc_llc_init(); 2077 if (rc) { 2078 pr_err("%s: smc_llc_init fails with %d\n", __func__, rc); 2079 goto out_core; 2080 } 2081 2082 rc = smc_cdc_init(); 2083 if (rc) { 2084 pr_err("%s: smc_cdc_init fails with %d\n", __func__, rc); 2085 goto out_core; 2086 } 2087 2088 rc = proto_register(&smc_proto, 1); 2089 if (rc) { 2090 pr_err("%s: proto_register(v4) fails with %d\n", __func__, rc); 2091 goto out_core; 2092 } 2093 2094 rc = proto_register(&smc_proto6, 1); 2095 if (rc) { 2096 pr_err("%s: proto_register(v6) fails with %d\n", __func__, rc); 2097 goto out_proto; 2098 } 2099 2100 rc = sock_register(&smc_sock_family_ops); 2101 if (rc) { 2102 pr_err("%s: sock_register fails with %d\n", __func__, rc); 2103 goto out_proto6; 2104 } 2105 INIT_HLIST_HEAD(&smc_v4_hashinfo.ht); 2106 INIT_HLIST_HEAD(&smc_v6_hashinfo.ht); 2107 2108 rc = smc_ib_register_client(); 2109 if (rc) { 2110 pr_err("%s: ib_register fails with %d\n", __func__, rc); 2111 goto out_sock; 2112 } 2113 2114 static_branch_enable(&tcp_have_smc); 2115 return 0; 2116 2117 out_sock: 2118 sock_unregister(PF_SMC); 2119 out_proto6: 2120 proto_unregister(&smc_proto6); 2121 out_proto: 2122 proto_unregister(&smc_proto); 2123 out_core: 2124 smc_core_exit(); 2125 out_pnet: 2126 smc_pnet_exit(); 2127 out_pernet_subsys: 2128 unregister_pernet_subsys(&smc_net_ops); 2129 2130 return rc; 2131 } 2132 2133 static void __exit smc_exit(void) 2134 { 2135 static_branch_disable(&tcp_have_smc); 2136 sock_unregister(PF_SMC); 2137 smc_core_exit(); 2138 smc_ib_unregister_client(); 2139 proto_unregister(&smc_proto6); 2140 proto_unregister(&smc_proto); 2141 smc_pnet_exit(); 2142 unregister_pernet_subsys(&smc_net_ops); 2143 rcu_barrier(); 2144 } 2145 2146 module_init(smc_init); 2147 module_exit(smc_exit); 2148 2149 MODULE_AUTHOR("Ursula Braun <ubraun@linux.vnet.ibm.com>"); 2150 MODULE_DESCRIPTION("smc socket address family"); 2151 MODULE_LICENSE("GPL"); 2152 MODULE_ALIAS_NETPROTO(PF_SMC); 2153