1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Shared Memory Communications over RDMA (SMC-R) and RoCE 4 * 5 * AF_SMC protocol family socket handler keeping the AF_INET sock address type 6 * applies to SOCK_STREAM sockets only 7 * offers an alternative communication option for TCP-protocol sockets 8 * applicable with RoCE-cards only 9 * 10 * Initial restrictions: 11 * - support for alternate links postponed 12 * 13 * Copyright IBM Corp. 2016, 2018 14 * 15 * Author(s): Ursula Braun <ubraun@linux.vnet.ibm.com> 16 * based on prototype from Frank Blaschka 17 */ 18 19 #define KMSG_COMPONENT "smc" 20 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt 21 22 #include <linux/module.h> 23 #include <linux/socket.h> 24 #include <linux/workqueue.h> 25 #include <linux/in.h> 26 #include <linux/sched/signal.h> 27 #include <linux/if_vlan.h> 28 #include <linux/rcupdate_wait.h> 29 30 #include <net/sock.h> 31 #include <net/tcp.h> 32 #include <net/smc.h> 33 #include <asm/ioctls.h> 34 35 #include <net/net_namespace.h> 36 #include <net/netns/generic.h> 37 #include "smc_netns.h" 38 39 #include "smc.h" 40 #include "smc_clc.h" 41 #include "smc_llc.h" 42 #include "smc_cdc.h" 43 #include "smc_core.h" 44 #include "smc_ib.h" 45 #include "smc_ism.h" 46 #include "smc_pnet.h" 47 #include "smc_tx.h" 48 #include "smc_rx.h" 49 #include "smc_close.h" 50 51 static DEFINE_MUTEX(smc_server_lgr_pending); /* serialize link group 52 * creation on server 53 */ 54 static DEFINE_MUTEX(smc_client_lgr_pending); /* serialize link group 55 * creation on client 56 */ 57 58 static void smc_tcp_listen_work(struct work_struct *); 59 static void smc_connect_work(struct work_struct *); 60 61 static void smc_set_keepalive(struct sock *sk, int val) 62 { 63 struct smc_sock *smc = smc_sk(sk); 64 65 smc->clcsock->sk->sk_prot->keepalive(smc->clcsock->sk, val); 66 } 67 68 static struct smc_hashinfo smc_v4_hashinfo = { 69 .lock = __RW_LOCK_UNLOCKED(smc_v4_hashinfo.lock), 70 }; 71 72 static struct smc_hashinfo smc_v6_hashinfo = { 73 .lock = __RW_LOCK_UNLOCKED(smc_v6_hashinfo.lock), 74 }; 75 76 int smc_hash_sk(struct sock *sk) 77 { 78 struct smc_hashinfo *h = sk->sk_prot->h.smc_hash; 79 struct hlist_head *head; 80 81 head = &h->ht; 82 83 write_lock_bh(&h->lock); 84 sk_add_node(sk, head); 85 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); 86 write_unlock_bh(&h->lock); 87 88 return 0; 89 } 90 EXPORT_SYMBOL_GPL(smc_hash_sk); 91 92 void smc_unhash_sk(struct sock *sk) 93 { 94 struct smc_hashinfo *h = sk->sk_prot->h.smc_hash; 95 96 write_lock_bh(&h->lock); 97 if (sk_del_node_init(sk)) 98 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); 99 write_unlock_bh(&h->lock); 100 } 101 EXPORT_SYMBOL_GPL(smc_unhash_sk); 102 103 struct proto smc_proto = { 104 .name = "SMC", 105 .owner = THIS_MODULE, 106 .keepalive = smc_set_keepalive, 107 .hash = smc_hash_sk, 108 .unhash = smc_unhash_sk, 109 .obj_size = sizeof(struct smc_sock), 110 .h.smc_hash = &smc_v4_hashinfo, 111 .slab_flags = SLAB_TYPESAFE_BY_RCU, 112 }; 113 EXPORT_SYMBOL_GPL(smc_proto); 114 115 struct proto smc_proto6 = { 116 .name = "SMC6", 117 .owner = THIS_MODULE, 118 .keepalive = smc_set_keepalive, 119 .hash = smc_hash_sk, 120 .unhash = smc_unhash_sk, 121 .obj_size = sizeof(struct smc_sock), 122 .h.smc_hash = &smc_v6_hashinfo, 123 .slab_flags = SLAB_TYPESAFE_BY_RCU, 124 }; 125 EXPORT_SYMBOL_GPL(smc_proto6); 126 127 static void smc_restore_fallback_changes(struct smc_sock *smc) 128 { 129 if (smc->clcsock->file) { /* non-accepted sockets have no file yet */ 130 smc->clcsock->file->private_data = smc->sk.sk_socket; 131 smc->clcsock->file = NULL; 132 } 133 } 134 135 static int __smc_release(struct smc_sock *smc) 136 { 137 struct sock *sk = &smc->sk; 138 int rc = 0; 139 140 if (!smc->use_fallback) { 141 rc = smc_close_active(smc); 142 sock_set_flag(sk, SOCK_DEAD); 143 sk->sk_shutdown |= SHUTDOWN_MASK; 144 } else { 145 if (sk->sk_state != SMC_LISTEN && sk->sk_state != SMC_INIT) 146 sock_put(sk); /* passive closing */ 147 if (sk->sk_state == SMC_LISTEN) { 148 /* wake up clcsock accept */ 149 rc = kernel_sock_shutdown(smc->clcsock, SHUT_RDWR); 150 } 151 sk->sk_state = SMC_CLOSED; 152 sk->sk_state_change(sk); 153 smc_restore_fallback_changes(smc); 154 } 155 156 sk->sk_prot->unhash(sk); 157 158 if (sk->sk_state == SMC_CLOSED) { 159 if (smc->clcsock) { 160 release_sock(sk); 161 smc_clcsock_release(smc); 162 lock_sock(sk); 163 } 164 if (!smc->use_fallback) 165 smc_conn_free(&smc->conn); 166 } 167 168 return rc; 169 } 170 171 static int smc_release(struct socket *sock) 172 { 173 struct sock *sk = sock->sk; 174 struct smc_sock *smc; 175 int rc = 0; 176 177 if (!sk) 178 goto out; 179 180 sock_hold(sk); /* sock_put below */ 181 smc = smc_sk(sk); 182 183 /* cleanup for a dangling non-blocking connect */ 184 if (smc->connect_nonblock && sk->sk_state == SMC_INIT) 185 tcp_abort(smc->clcsock->sk, ECONNABORTED); 186 flush_work(&smc->connect_work); 187 188 if (sk->sk_state == SMC_LISTEN) 189 /* smc_close_non_accepted() is called and acquires 190 * sock lock for child sockets again 191 */ 192 lock_sock_nested(sk, SINGLE_DEPTH_NESTING); 193 else 194 lock_sock(sk); 195 196 rc = __smc_release(smc); 197 198 /* detach socket */ 199 sock_orphan(sk); 200 sock->sk = NULL; 201 release_sock(sk); 202 203 sock_put(sk); /* sock_hold above */ 204 sock_put(sk); /* final sock_put */ 205 out: 206 return rc; 207 } 208 209 static void smc_destruct(struct sock *sk) 210 { 211 if (sk->sk_state != SMC_CLOSED) 212 return; 213 if (!sock_flag(sk, SOCK_DEAD)) 214 return; 215 216 sk_refcnt_debug_dec(sk); 217 } 218 219 static struct sock *smc_sock_alloc(struct net *net, struct socket *sock, 220 int protocol) 221 { 222 struct smc_sock *smc; 223 struct proto *prot; 224 struct sock *sk; 225 226 prot = (protocol == SMCPROTO_SMC6) ? &smc_proto6 : &smc_proto; 227 sk = sk_alloc(net, PF_SMC, GFP_KERNEL, prot, 0); 228 if (!sk) 229 return NULL; 230 231 sock_init_data(sock, sk); /* sets sk_refcnt to 1 */ 232 sk->sk_state = SMC_INIT; 233 sk->sk_destruct = smc_destruct; 234 sk->sk_protocol = protocol; 235 smc = smc_sk(sk); 236 INIT_WORK(&smc->tcp_listen_work, smc_tcp_listen_work); 237 INIT_WORK(&smc->connect_work, smc_connect_work); 238 INIT_DELAYED_WORK(&smc->conn.tx_work, smc_tx_work); 239 INIT_LIST_HEAD(&smc->accept_q); 240 spin_lock_init(&smc->accept_q_lock); 241 spin_lock_init(&smc->conn.send_lock); 242 sk->sk_prot->hash(sk); 243 sk_refcnt_debug_inc(sk); 244 mutex_init(&smc->clcsock_release_lock); 245 246 return sk; 247 } 248 249 static int smc_bind(struct socket *sock, struct sockaddr *uaddr, 250 int addr_len) 251 { 252 struct sockaddr_in *addr = (struct sockaddr_in *)uaddr; 253 struct sock *sk = sock->sk; 254 struct smc_sock *smc; 255 int rc; 256 257 smc = smc_sk(sk); 258 259 /* replicate tests from inet_bind(), to be safe wrt. future changes */ 260 rc = -EINVAL; 261 if (addr_len < sizeof(struct sockaddr_in)) 262 goto out; 263 264 rc = -EAFNOSUPPORT; 265 if (addr->sin_family != AF_INET && 266 addr->sin_family != AF_INET6 && 267 addr->sin_family != AF_UNSPEC) 268 goto out; 269 /* accept AF_UNSPEC (mapped to AF_INET) only if s_addr is INADDR_ANY */ 270 if (addr->sin_family == AF_UNSPEC && 271 addr->sin_addr.s_addr != htonl(INADDR_ANY)) 272 goto out; 273 274 lock_sock(sk); 275 276 /* Check if socket is already active */ 277 rc = -EINVAL; 278 if (sk->sk_state != SMC_INIT || smc->connect_nonblock) 279 goto out_rel; 280 281 smc->clcsock->sk->sk_reuse = sk->sk_reuse; 282 rc = kernel_bind(smc->clcsock, uaddr, addr_len); 283 284 out_rel: 285 release_sock(sk); 286 out: 287 return rc; 288 } 289 290 static void smc_copy_sock_settings(struct sock *nsk, struct sock *osk, 291 unsigned long mask) 292 { 293 /* options we don't get control via setsockopt for */ 294 nsk->sk_type = osk->sk_type; 295 nsk->sk_sndbuf = osk->sk_sndbuf; 296 nsk->sk_rcvbuf = osk->sk_rcvbuf; 297 nsk->sk_sndtimeo = osk->sk_sndtimeo; 298 nsk->sk_rcvtimeo = osk->sk_rcvtimeo; 299 nsk->sk_mark = osk->sk_mark; 300 nsk->sk_priority = osk->sk_priority; 301 nsk->sk_rcvlowat = osk->sk_rcvlowat; 302 nsk->sk_bound_dev_if = osk->sk_bound_dev_if; 303 nsk->sk_err = osk->sk_err; 304 305 nsk->sk_flags &= ~mask; 306 nsk->sk_flags |= osk->sk_flags & mask; 307 } 308 309 #define SK_FLAGS_SMC_TO_CLC ((1UL << SOCK_URGINLINE) | \ 310 (1UL << SOCK_KEEPOPEN) | \ 311 (1UL << SOCK_LINGER) | \ 312 (1UL << SOCK_BROADCAST) | \ 313 (1UL << SOCK_TIMESTAMP) | \ 314 (1UL << SOCK_DBG) | \ 315 (1UL << SOCK_RCVTSTAMP) | \ 316 (1UL << SOCK_RCVTSTAMPNS) | \ 317 (1UL << SOCK_LOCALROUTE) | \ 318 (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE) | \ 319 (1UL << SOCK_RXQ_OVFL) | \ 320 (1UL << SOCK_WIFI_STATUS) | \ 321 (1UL << SOCK_NOFCS) | \ 322 (1UL << SOCK_FILTER_LOCKED) | \ 323 (1UL << SOCK_TSTAMP_NEW)) 324 /* copy only relevant settings and flags of SOL_SOCKET level from smc to 325 * clc socket (since smc is not called for these options from net/core) 326 */ 327 static void smc_copy_sock_settings_to_clc(struct smc_sock *smc) 328 { 329 smc_copy_sock_settings(smc->clcsock->sk, &smc->sk, SK_FLAGS_SMC_TO_CLC); 330 } 331 332 #define SK_FLAGS_CLC_TO_SMC ((1UL << SOCK_URGINLINE) | \ 333 (1UL << SOCK_KEEPOPEN) | \ 334 (1UL << SOCK_LINGER) | \ 335 (1UL << SOCK_DBG)) 336 /* copy only settings and flags relevant for smc from clc to smc socket */ 337 static void smc_copy_sock_settings_to_smc(struct smc_sock *smc) 338 { 339 smc_copy_sock_settings(&smc->sk, smc->clcsock->sk, SK_FLAGS_CLC_TO_SMC); 340 } 341 342 /* register the new rmb on all links */ 343 static int smcr_lgr_reg_rmbs(struct smc_link *link, 344 struct smc_buf_desc *rmb_desc) 345 { 346 struct smc_link_group *lgr = link->lgr; 347 int i, rc = 0; 348 349 rc = smc_llc_flow_initiate(lgr, SMC_LLC_FLOW_RKEY); 350 if (rc) 351 return rc; 352 /* protect against parallel smc_llc_cli_rkey_exchange() and 353 * parallel smcr_link_reg_rmb() 354 */ 355 mutex_lock(&lgr->llc_conf_mutex); 356 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { 357 if (!smc_link_active(&lgr->lnk[i])) 358 continue; 359 rc = smcr_link_reg_rmb(&lgr->lnk[i], rmb_desc); 360 if (rc) 361 goto out; 362 } 363 364 /* exchange confirm_rkey msg with peer */ 365 rc = smc_llc_do_confirm_rkey(link, rmb_desc); 366 if (rc) { 367 rc = -EFAULT; 368 goto out; 369 } 370 rmb_desc->is_conf_rkey = true; 371 out: 372 mutex_unlock(&lgr->llc_conf_mutex); 373 smc_llc_flow_stop(lgr, &lgr->llc_flow_lcl); 374 return rc; 375 } 376 377 static int smcr_clnt_conf_first_link(struct smc_sock *smc) 378 { 379 struct smc_link *link = smc->conn.lnk; 380 struct smc_llc_qentry *qentry; 381 int rc; 382 383 /* receive CONFIRM LINK request from server over RoCE fabric */ 384 qentry = smc_llc_wait(link->lgr, NULL, SMC_LLC_WAIT_TIME, 385 SMC_LLC_CONFIRM_LINK); 386 if (!qentry) { 387 struct smc_clc_msg_decline dclc; 388 389 rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc), 390 SMC_CLC_DECLINE, CLC_WAIT_TIME_SHORT); 391 return rc == -EAGAIN ? SMC_CLC_DECL_TIMEOUT_CL : rc; 392 } 393 smc_llc_save_peer_uid(qentry); 394 rc = smc_llc_eval_conf_link(qentry, SMC_LLC_REQ); 395 smc_llc_flow_qentry_del(&link->lgr->llc_flow_lcl); 396 if (rc) 397 return SMC_CLC_DECL_RMBE_EC; 398 399 rc = smc_ib_modify_qp_rts(link); 400 if (rc) 401 return SMC_CLC_DECL_ERR_RDYLNK; 402 403 smc_wr_remember_qp_attr(link); 404 405 if (smcr_link_reg_rmb(link, smc->conn.rmb_desc)) 406 return SMC_CLC_DECL_ERR_REGRMB; 407 408 /* confirm_rkey is implicit on 1st contact */ 409 smc->conn.rmb_desc->is_conf_rkey = true; 410 411 /* send CONFIRM LINK response over RoCE fabric */ 412 rc = smc_llc_send_confirm_link(link, SMC_LLC_RESP); 413 if (rc < 0) 414 return SMC_CLC_DECL_TIMEOUT_CL; 415 416 smc_llc_link_active(link); 417 smcr_lgr_set_type(link->lgr, SMC_LGR_SINGLE); 418 419 /* optional 2nd link, receive ADD LINK request from server */ 420 qentry = smc_llc_wait(link->lgr, NULL, SMC_LLC_WAIT_TIME, 421 SMC_LLC_ADD_LINK); 422 if (!qentry) { 423 struct smc_clc_msg_decline dclc; 424 425 rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc), 426 SMC_CLC_DECLINE, CLC_WAIT_TIME_SHORT); 427 if (rc == -EAGAIN) 428 rc = 0; /* no DECLINE received, go with one link */ 429 return rc; 430 } 431 smc_llc_flow_qentry_clr(&link->lgr->llc_flow_lcl); 432 smc_llc_cli_add_link(link, qentry); 433 return 0; 434 } 435 436 static void smcr_conn_save_peer_info(struct smc_sock *smc, 437 struct smc_clc_msg_accept_confirm *clc) 438 { 439 int bufsize = smc_uncompress_bufsize(clc->rmbe_size); 440 441 smc->conn.peer_rmbe_idx = clc->rmbe_idx; 442 smc->conn.local_tx_ctrl.token = ntohl(clc->rmbe_alert_token); 443 smc->conn.peer_rmbe_size = bufsize; 444 atomic_set(&smc->conn.peer_rmbe_space, smc->conn.peer_rmbe_size); 445 smc->conn.tx_off = bufsize * (smc->conn.peer_rmbe_idx - 1); 446 } 447 448 static void smcd_conn_save_peer_info(struct smc_sock *smc, 449 struct smc_clc_msg_accept_confirm *clc) 450 { 451 int bufsize = smc_uncompress_bufsize(clc->dmbe_size); 452 453 smc->conn.peer_rmbe_idx = clc->dmbe_idx; 454 smc->conn.peer_token = clc->token; 455 /* msg header takes up space in the buffer */ 456 smc->conn.peer_rmbe_size = bufsize - sizeof(struct smcd_cdc_msg); 457 atomic_set(&smc->conn.peer_rmbe_space, smc->conn.peer_rmbe_size); 458 smc->conn.tx_off = bufsize * smc->conn.peer_rmbe_idx; 459 } 460 461 static void smc_conn_save_peer_info(struct smc_sock *smc, 462 struct smc_clc_msg_accept_confirm *clc) 463 { 464 if (smc->conn.lgr->is_smcd) 465 smcd_conn_save_peer_info(smc, clc); 466 else 467 smcr_conn_save_peer_info(smc, clc); 468 } 469 470 static void smc_link_save_peer_info(struct smc_link *link, 471 struct smc_clc_msg_accept_confirm *clc) 472 { 473 link->peer_qpn = ntoh24(clc->qpn); 474 memcpy(link->peer_gid, clc->lcl.gid, SMC_GID_SIZE); 475 memcpy(link->peer_mac, clc->lcl.mac, sizeof(link->peer_mac)); 476 link->peer_psn = ntoh24(clc->psn); 477 link->peer_mtu = clc->qp_mtu; 478 } 479 480 static void smc_switch_to_fallback(struct smc_sock *smc) 481 { 482 smc->use_fallback = true; 483 if (smc->sk.sk_socket && smc->sk.sk_socket->file) { 484 smc->clcsock->file = smc->sk.sk_socket->file; 485 smc->clcsock->file->private_data = smc->clcsock; 486 smc->clcsock->wq.fasync_list = 487 smc->sk.sk_socket->wq.fasync_list; 488 } 489 } 490 491 /* fall back during connect */ 492 static int smc_connect_fallback(struct smc_sock *smc, int reason_code) 493 { 494 smc_switch_to_fallback(smc); 495 smc->fallback_rsn = reason_code; 496 smc_copy_sock_settings_to_clc(smc); 497 smc->connect_nonblock = 0; 498 if (smc->sk.sk_state == SMC_INIT) 499 smc->sk.sk_state = SMC_ACTIVE; 500 return 0; 501 } 502 503 /* decline and fall back during connect */ 504 static int smc_connect_decline_fallback(struct smc_sock *smc, int reason_code) 505 { 506 int rc; 507 508 if (reason_code < 0) { /* error, fallback is not possible */ 509 if (smc->sk.sk_state == SMC_INIT) 510 sock_put(&smc->sk); /* passive closing */ 511 return reason_code; 512 } 513 if (reason_code != SMC_CLC_DECL_PEERDECL) { 514 rc = smc_clc_send_decline(smc, reason_code); 515 if (rc < 0) { 516 if (smc->sk.sk_state == SMC_INIT) 517 sock_put(&smc->sk); /* passive closing */ 518 return rc; 519 } 520 } 521 return smc_connect_fallback(smc, reason_code); 522 } 523 524 /* abort connecting */ 525 static int smc_connect_abort(struct smc_sock *smc, int reason_code, 526 int local_contact) 527 { 528 bool is_smcd = smc->conn.lgr->is_smcd; 529 530 if (local_contact == SMC_FIRST_CONTACT) 531 smc_lgr_cleanup_early(&smc->conn); 532 else 533 smc_conn_free(&smc->conn); 534 if (is_smcd) 535 /* there is only one lgr role for SMC-D; use server lock */ 536 mutex_unlock(&smc_server_lgr_pending); 537 else 538 mutex_unlock(&smc_client_lgr_pending); 539 540 smc->connect_nonblock = 0; 541 return reason_code; 542 } 543 544 /* check if there is a rdma device available for this connection. */ 545 /* called for connect and listen */ 546 static int smc_find_rdma_device(struct smc_sock *smc, struct smc_init_info *ini) 547 { 548 /* PNET table look up: search active ib_device and port 549 * within same PNETID that also contains the ethernet device 550 * used for the internal TCP socket 551 */ 552 smc_pnet_find_roce_resource(smc->clcsock->sk, ini); 553 if (!ini->ib_dev) 554 return SMC_CLC_DECL_NOSMCRDEV; 555 return 0; 556 } 557 558 /* check if there is an ISM device available for this connection. */ 559 /* called for connect and listen */ 560 static int smc_find_ism_device(struct smc_sock *smc, struct smc_init_info *ini) 561 { 562 /* Find ISM device with same PNETID as connecting interface */ 563 smc_pnet_find_ism_resource(smc->clcsock->sk, ini); 564 if (!ini->ism_dev) 565 return SMC_CLC_DECL_NOSMCDDEV; 566 return 0; 567 } 568 569 /* Check for VLAN ID and register it on ISM device just for CLC handshake */ 570 static int smc_connect_ism_vlan_setup(struct smc_sock *smc, 571 struct smc_init_info *ini) 572 { 573 if (ini->vlan_id && smc_ism_get_vlan(ini->ism_dev, ini->vlan_id)) 574 return SMC_CLC_DECL_ISMVLANERR; 575 return 0; 576 } 577 578 /* cleanup temporary VLAN ID registration used for CLC handshake. If ISM is 579 * used, the VLAN ID will be registered again during the connection setup. 580 */ 581 static int smc_connect_ism_vlan_cleanup(struct smc_sock *smc, bool is_smcd, 582 struct smc_init_info *ini) 583 { 584 if (!is_smcd) 585 return 0; 586 if (ini->vlan_id && smc_ism_put_vlan(ini->ism_dev, ini->vlan_id)) 587 return SMC_CLC_DECL_CNFERR; 588 return 0; 589 } 590 591 /* CLC handshake during connect */ 592 static int smc_connect_clc(struct smc_sock *smc, int smc_type, 593 struct smc_clc_msg_accept_confirm *aclc, 594 struct smc_init_info *ini) 595 { 596 int rc = 0; 597 598 /* do inband token exchange */ 599 rc = smc_clc_send_proposal(smc, smc_type, ini); 600 if (rc) 601 return rc; 602 /* receive SMC Accept CLC message */ 603 return smc_clc_wait_msg(smc, aclc, sizeof(*aclc), SMC_CLC_ACCEPT, 604 CLC_WAIT_TIME); 605 } 606 607 /* setup for RDMA connection of client */ 608 static int smc_connect_rdma(struct smc_sock *smc, 609 struct smc_clc_msg_accept_confirm *aclc, 610 struct smc_init_info *ini) 611 { 612 int i, reason_code = 0; 613 struct smc_link *link; 614 615 ini->is_smcd = false; 616 ini->ib_lcl = &aclc->lcl; 617 ini->ib_clcqpn = ntoh24(aclc->qpn); 618 ini->srv_first_contact = aclc->hdr.flag; 619 620 mutex_lock(&smc_client_lgr_pending); 621 reason_code = smc_conn_create(smc, ini); 622 if (reason_code) { 623 mutex_unlock(&smc_client_lgr_pending); 624 return reason_code; 625 } 626 627 smc_conn_save_peer_info(smc, aclc); 628 629 if (ini->cln_first_contact == SMC_FIRST_CONTACT) { 630 link = smc->conn.lnk; 631 } else { 632 /* set link that was assigned by server */ 633 link = NULL; 634 for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { 635 struct smc_link *l = &smc->conn.lgr->lnk[i]; 636 637 if (l->peer_qpn == ntoh24(aclc->qpn) && 638 !memcmp(l->peer_gid, &aclc->lcl.gid, SMC_GID_SIZE) && 639 !memcmp(l->peer_mac, &aclc->lcl.mac, sizeof(l->peer_mac))) { 640 link = l; 641 break; 642 } 643 } 644 if (!link) 645 return smc_connect_abort(smc, SMC_CLC_DECL_NOSRVLINK, 646 ini->cln_first_contact); 647 smc->conn.lnk = link; 648 } 649 650 /* create send buffer and rmb */ 651 if (smc_buf_create(smc, false)) 652 return smc_connect_abort(smc, SMC_CLC_DECL_MEM, 653 ini->cln_first_contact); 654 655 if (ini->cln_first_contact == SMC_FIRST_CONTACT) 656 smc_link_save_peer_info(link, aclc); 657 658 if (smc_rmb_rtoken_handling(&smc->conn, link, aclc)) 659 return smc_connect_abort(smc, SMC_CLC_DECL_ERR_RTOK, 660 ini->cln_first_contact); 661 662 smc_close_init(smc); 663 smc_rx_init(smc); 664 665 if (ini->cln_first_contact == SMC_FIRST_CONTACT) { 666 if (smc_ib_ready_link(link)) 667 return smc_connect_abort(smc, SMC_CLC_DECL_ERR_RDYLNK, 668 ini->cln_first_contact); 669 } else { 670 if (smcr_lgr_reg_rmbs(link, smc->conn.rmb_desc)) 671 return smc_connect_abort(smc, SMC_CLC_DECL_ERR_REGRMB, 672 ini->cln_first_contact); 673 } 674 smc_rmb_sync_sg_for_device(&smc->conn); 675 676 reason_code = smc_clc_send_confirm(smc); 677 if (reason_code) 678 return smc_connect_abort(smc, reason_code, 679 ini->cln_first_contact); 680 681 smc_tx_init(smc); 682 683 if (ini->cln_first_contact == SMC_FIRST_CONTACT) { 684 /* QP confirmation over RoCE fabric */ 685 smc_llc_flow_initiate(link->lgr, SMC_LLC_FLOW_ADD_LINK); 686 reason_code = smcr_clnt_conf_first_link(smc); 687 smc_llc_flow_stop(link->lgr, &link->lgr->llc_flow_lcl); 688 if (reason_code) 689 return smc_connect_abort(smc, reason_code, 690 ini->cln_first_contact); 691 } 692 mutex_unlock(&smc_client_lgr_pending); 693 694 smc_copy_sock_settings_to_clc(smc); 695 smc->connect_nonblock = 0; 696 if (smc->sk.sk_state == SMC_INIT) 697 smc->sk.sk_state = SMC_ACTIVE; 698 699 return 0; 700 } 701 702 /* setup for ISM connection of client */ 703 static int smc_connect_ism(struct smc_sock *smc, 704 struct smc_clc_msg_accept_confirm *aclc, 705 struct smc_init_info *ini) 706 { 707 int rc = 0; 708 709 ini->is_smcd = true; 710 ini->ism_gid = aclc->gid; 711 ini->srv_first_contact = aclc->hdr.flag; 712 713 /* there is only one lgr role for SMC-D; use server lock */ 714 mutex_lock(&smc_server_lgr_pending); 715 rc = smc_conn_create(smc, ini); 716 if (rc) { 717 mutex_unlock(&smc_server_lgr_pending); 718 return rc; 719 } 720 721 /* Create send and receive buffers */ 722 if (smc_buf_create(smc, true)) 723 return smc_connect_abort(smc, SMC_CLC_DECL_MEM, 724 ini->cln_first_contact); 725 726 smc_conn_save_peer_info(smc, aclc); 727 smc_close_init(smc); 728 smc_rx_init(smc); 729 smc_tx_init(smc); 730 731 rc = smc_clc_send_confirm(smc); 732 if (rc) 733 return smc_connect_abort(smc, rc, ini->cln_first_contact); 734 mutex_unlock(&smc_server_lgr_pending); 735 736 smc_copy_sock_settings_to_clc(smc); 737 smc->connect_nonblock = 0; 738 if (smc->sk.sk_state == SMC_INIT) 739 smc->sk.sk_state = SMC_ACTIVE; 740 741 return 0; 742 } 743 744 /* perform steps before actually connecting */ 745 static int __smc_connect(struct smc_sock *smc) 746 { 747 bool ism_supported = false, rdma_supported = false; 748 struct smc_clc_msg_accept_confirm aclc; 749 struct smc_init_info ini = {0}; 750 int smc_type; 751 int rc = 0; 752 753 if (smc->use_fallback) 754 return smc_connect_fallback(smc, smc->fallback_rsn); 755 756 /* if peer has not signalled SMC-capability, fall back */ 757 if (!tcp_sk(smc->clcsock->sk)->syn_smc) 758 return smc_connect_fallback(smc, SMC_CLC_DECL_PEERNOSMC); 759 760 /* IPSec connections opt out of SMC-R optimizations */ 761 if (using_ipsec(smc)) 762 return smc_connect_decline_fallback(smc, SMC_CLC_DECL_IPSEC); 763 764 /* get vlan id from IP device */ 765 if (smc_vlan_by_tcpsk(smc->clcsock, &ini)) 766 return smc_connect_decline_fallback(smc, 767 SMC_CLC_DECL_GETVLANERR); 768 769 /* check if there is an ism device available */ 770 if (!smc_find_ism_device(smc, &ini) && 771 !smc_connect_ism_vlan_setup(smc, &ini)) { 772 /* ISM is supported for this connection */ 773 ism_supported = true; 774 smc_type = SMC_TYPE_D; 775 } 776 777 /* check if there is a rdma device available */ 778 if (!smc_find_rdma_device(smc, &ini)) { 779 /* RDMA is supported for this connection */ 780 rdma_supported = true; 781 if (ism_supported) 782 smc_type = SMC_TYPE_B; /* both */ 783 else 784 smc_type = SMC_TYPE_R; /* only RDMA */ 785 } 786 787 /* if neither ISM nor RDMA are supported, fallback */ 788 if (!rdma_supported && !ism_supported) 789 return smc_connect_decline_fallback(smc, SMC_CLC_DECL_NOSMCDEV); 790 791 /* perform CLC handshake */ 792 rc = smc_connect_clc(smc, smc_type, &aclc, &ini); 793 if (rc) { 794 smc_connect_ism_vlan_cleanup(smc, ism_supported, &ini); 795 return smc_connect_decline_fallback(smc, rc); 796 } 797 798 /* depending on previous steps, connect using rdma or ism */ 799 if (rdma_supported && aclc.hdr.path == SMC_TYPE_R) 800 rc = smc_connect_rdma(smc, &aclc, &ini); 801 else if (ism_supported && aclc.hdr.path == SMC_TYPE_D) 802 rc = smc_connect_ism(smc, &aclc, &ini); 803 else 804 rc = SMC_CLC_DECL_MODEUNSUPP; 805 if (rc) { 806 smc_connect_ism_vlan_cleanup(smc, ism_supported, &ini); 807 return smc_connect_decline_fallback(smc, rc); 808 } 809 810 smc_connect_ism_vlan_cleanup(smc, ism_supported, &ini); 811 return 0; 812 } 813 814 static void smc_connect_work(struct work_struct *work) 815 { 816 struct smc_sock *smc = container_of(work, struct smc_sock, 817 connect_work); 818 long timeo = smc->sk.sk_sndtimeo; 819 int rc = 0; 820 821 if (!timeo) 822 timeo = MAX_SCHEDULE_TIMEOUT; 823 lock_sock(smc->clcsock->sk); 824 if (smc->clcsock->sk->sk_err) { 825 smc->sk.sk_err = smc->clcsock->sk->sk_err; 826 } else if ((1 << smc->clcsock->sk->sk_state) & 827 (TCPF_SYN_SENT | TCP_SYN_RECV)) { 828 rc = sk_stream_wait_connect(smc->clcsock->sk, &timeo); 829 if ((rc == -EPIPE) && 830 ((1 << smc->clcsock->sk->sk_state) & 831 (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT))) 832 rc = 0; 833 } 834 release_sock(smc->clcsock->sk); 835 lock_sock(&smc->sk); 836 if (rc != 0 || smc->sk.sk_err) { 837 smc->sk.sk_state = SMC_CLOSED; 838 if (rc == -EPIPE || rc == -EAGAIN) 839 smc->sk.sk_err = EPIPE; 840 else if (signal_pending(current)) 841 smc->sk.sk_err = -sock_intr_errno(timeo); 842 sock_put(&smc->sk); /* passive closing */ 843 goto out; 844 } 845 846 rc = __smc_connect(smc); 847 if (rc < 0) 848 smc->sk.sk_err = -rc; 849 850 out: 851 if (!sock_flag(&smc->sk, SOCK_DEAD)) { 852 if (smc->sk.sk_err) { 853 smc->sk.sk_state_change(&smc->sk); 854 } else { /* allow polling before and after fallback decision */ 855 smc->clcsock->sk->sk_write_space(smc->clcsock->sk); 856 smc->sk.sk_write_space(&smc->sk); 857 } 858 } 859 release_sock(&smc->sk); 860 } 861 862 static int smc_connect(struct socket *sock, struct sockaddr *addr, 863 int alen, int flags) 864 { 865 struct sock *sk = sock->sk; 866 struct smc_sock *smc; 867 int rc = -EINVAL; 868 869 smc = smc_sk(sk); 870 871 /* separate smc parameter checking to be safe */ 872 if (alen < sizeof(addr->sa_family)) 873 goto out_err; 874 if (addr->sa_family != AF_INET && addr->sa_family != AF_INET6) 875 goto out_err; 876 877 lock_sock(sk); 878 switch (sk->sk_state) { 879 default: 880 goto out; 881 case SMC_ACTIVE: 882 rc = -EISCONN; 883 goto out; 884 case SMC_INIT: 885 rc = 0; 886 break; 887 } 888 889 smc_copy_sock_settings_to_clc(smc); 890 tcp_sk(smc->clcsock->sk)->syn_smc = 1; 891 if (smc->connect_nonblock) { 892 rc = -EALREADY; 893 goto out; 894 } 895 rc = kernel_connect(smc->clcsock, addr, alen, flags); 896 if (rc && rc != -EINPROGRESS) 897 goto out; 898 899 sock_hold(&smc->sk); /* sock put in passive closing */ 900 if (smc->use_fallback) 901 goto out; 902 if (flags & O_NONBLOCK) { 903 if (schedule_work(&smc->connect_work)) 904 smc->connect_nonblock = 1; 905 rc = -EINPROGRESS; 906 } else { 907 rc = __smc_connect(smc); 908 if (rc < 0) 909 goto out; 910 else 911 rc = 0; /* success cases including fallback */ 912 } 913 914 out: 915 release_sock(sk); 916 out_err: 917 return rc; 918 } 919 920 static int smc_clcsock_accept(struct smc_sock *lsmc, struct smc_sock **new_smc) 921 { 922 struct socket *new_clcsock = NULL; 923 struct sock *lsk = &lsmc->sk; 924 struct sock *new_sk; 925 int rc = -EINVAL; 926 927 release_sock(lsk); 928 new_sk = smc_sock_alloc(sock_net(lsk), NULL, lsk->sk_protocol); 929 if (!new_sk) { 930 rc = -ENOMEM; 931 lsk->sk_err = ENOMEM; 932 *new_smc = NULL; 933 lock_sock(lsk); 934 goto out; 935 } 936 *new_smc = smc_sk(new_sk); 937 938 mutex_lock(&lsmc->clcsock_release_lock); 939 if (lsmc->clcsock) 940 rc = kernel_accept(lsmc->clcsock, &new_clcsock, 0); 941 mutex_unlock(&lsmc->clcsock_release_lock); 942 lock_sock(lsk); 943 if (rc < 0) 944 lsk->sk_err = -rc; 945 if (rc < 0 || lsk->sk_state == SMC_CLOSED) { 946 new_sk->sk_prot->unhash(new_sk); 947 if (new_clcsock) 948 sock_release(new_clcsock); 949 new_sk->sk_state = SMC_CLOSED; 950 sock_set_flag(new_sk, SOCK_DEAD); 951 sock_put(new_sk); /* final */ 952 *new_smc = NULL; 953 goto out; 954 } 955 956 (*new_smc)->clcsock = new_clcsock; 957 out: 958 return rc; 959 } 960 961 /* add a just created sock to the accept queue of the listen sock as 962 * candidate for a following socket accept call from user space 963 */ 964 static void smc_accept_enqueue(struct sock *parent, struct sock *sk) 965 { 966 struct smc_sock *par = smc_sk(parent); 967 968 sock_hold(sk); /* sock_put in smc_accept_unlink () */ 969 spin_lock(&par->accept_q_lock); 970 list_add_tail(&smc_sk(sk)->accept_q, &par->accept_q); 971 spin_unlock(&par->accept_q_lock); 972 sk_acceptq_added(parent); 973 } 974 975 /* remove a socket from the accept queue of its parental listening socket */ 976 static void smc_accept_unlink(struct sock *sk) 977 { 978 struct smc_sock *par = smc_sk(sk)->listen_smc; 979 980 spin_lock(&par->accept_q_lock); 981 list_del_init(&smc_sk(sk)->accept_q); 982 spin_unlock(&par->accept_q_lock); 983 sk_acceptq_removed(&smc_sk(sk)->listen_smc->sk); 984 sock_put(sk); /* sock_hold in smc_accept_enqueue */ 985 } 986 987 /* remove a sock from the accept queue to bind it to a new socket created 988 * for a socket accept call from user space 989 */ 990 struct sock *smc_accept_dequeue(struct sock *parent, 991 struct socket *new_sock) 992 { 993 struct smc_sock *isk, *n; 994 struct sock *new_sk; 995 996 list_for_each_entry_safe(isk, n, &smc_sk(parent)->accept_q, accept_q) { 997 new_sk = (struct sock *)isk; 998 999 smc_accept_unlink(new_sk); 1000 if (new_sk->sk_state == SMC_CLOSED) { 1001 new_sk->sk_prot->unhash(new_sk); 1002 if (isk->clcsock) { 1003 sock_release(isk->clcsock); 1004 isk->clcsock = NULL; 1005 } 1006 sock_put(new_sk); /* final */ 1007 continue; 1008 } 1009 if (new_sock) { 1010 sock_graft(new_sk, new_sock); 1011 if (isk->use_fallback) { 1012 smc_sk(new_sk)->clcsock->file = new_sock->file; 1013 isk->clcsock->file->private_data = isk->clcsock; 1014 } 1015 } 1016 return new_sk; 1017 } 1018 return NULL; 1019 } 1020 1021 /* clean up for a created but never accepted sock */ 1022 void smc_close_non_accepted(struct sock *sk) 1023 { 1024 struct smc_sock *smc = smc_sk(sk); 1025 1026 sock_hold(sk); /* sock_put below */ 1027 lock_sock(sk); 1028 if (!sk->sk_lingertime) 1029 /* wait for peer closing */ 1030 sk->sk_lingertime = SMC_MAX_STREAM_WAIT_TIMEOUT; 1031 __smc_release(smc); 1032 release_sock(sk); 1033 sock_put(sk); /* sock_hold above */ 1034 sock_put(sk); /* final sock_put */ 1035 } 1036 1037 static int smcr_serv_conf_first_link(struct smc_sock *smc) 1038 { 1039 struct smc_link *link = smc->conn.lnk; 1040 struct smc_llc_qentry *qentry; 1041 int rc; 1042 1043 if (smcr_link_reg_rmb(link, smc->conn.rmb_desc)) 1044 return SMC_CLC_DECL_ERR_REGRMB; 1045 1046 /* send CONFIRM LINK request to client over the RoCE fabric */ 1047 rc = smc_llc_send_confirm_link(link, SMC_LLC_REQ); 1048 if (rc < 0) 1049 return SMC_CLC_DECL_TIMEOUT_CL; 1050 1051 /* receive CONFIRM LINK response from client over the RoCE fabric */ 1052 qentry = smc_llc_wait(link->lgr, link, SMC_LLC_WAIT_TIME, 1053 SMC_LLC_CONFIRM_LINK); 1054 if (!qentry) { 1055 struct smc_clc_msg_decline dclc; 1056 1057 rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc), 1058 SMC_CLC_DECLINE, CLC_WAIT_TIME_SHORT); 1059 return rc == -EAGAIN ? SMC_CLC_DECL_TIMEOUT_CL : rc; 1060 } 1061 smc_llc_save_peer_uid(qentry); 1062 rc = smc_llc_eval_conf_link(qentry, SMC_LLC_RESP); 1063 smc_llc_flow_qentry_del(&link->lgr->llc_flow_lcl); 1064 if (rc) 1065 return SMC_CLC_DECL_RMBE_EC; 1066 1067 /* confirm_rkey is implicit on 1st contact */ 1068 smc->conn.rmb_desc->is_conf_rkey = true; 1069 1070 smc_llc_link_active(link); 1071 smcr_lgr_set_type(link->lgr, SMC_LGR_SINGLE); 1072 1073 /* initial contact - try to establish second link */ 1074 smc_llc_srv_add_link(link); 1075 return 0; 1076 } 1077 1078 /* listen worker: finish */ 1079 static void smc_listen_out(struct smc_sock *new_smc) 1080 { 1081 struct smc_sock *lsmc = new_smc->listen_smc; 1082 struct sock *newsmcsk = &new_smc->sk; 1083 1084 if (lsmc->sk.sk_state == SMC_LISTEN) { 1085 lock_sock_nested(&lsmc->sk, SINGLE_DEPTH_NESTING); 1086 smc_accept_enqueue(&lsmc->sk, newsmcsk); 1087 release_sock(&lsmc->sk); 1088 } else { /* no longer listening */ 1089 smc_close_non_accepted(newsmcsk); 1090 } 1091 1092 /* Wake up accept */ 1093 lsmc->sk.sk_data_ready(&lsmc->sk); 1094 sock_put(&lsmc->sk); /* sock_hold in smc_tcp_listen_work */ 1095 } 1096 1097 /* listen worker: finish in state connected */ 1098 static void smc_listen_out_connected(struct smc_sock *new_smc) 1099 { 1100 struct sock *newsmcsk = &new_smc->sk; 1101 1102 sk_refcnt_debug_inc(newsmcsk); 1103 if (newsmcsk->sk_state == SMC_INIT) 1104 newsmcsk->sk_state = SMC_ACTIVE; 1105 1106 smc_listen_out(new_smc); 1107 } 1108 1109 /* listen worker: finish in error state */ 1110 static void smc_listen_out_err(struct smc_sock *new_smc) 1111 { 1112 struct sock *newsmcsk = &new_smc->sk; 1113 1114 if (newsmcsk->sk_state == SMC_INIT) 1115 sock_put(&new_smc->sk); /* passive closing */ 1116 newsmcsk->sk_state = SMC_CLOSED; 1117 1118 smc_listen_out(new_smc); 1119 } 1120 1121 /* listen worker: decline and fall back if possible */ 1122 static void smc_listen_decline(struct smc_sock *new_smc, int reason_code, 1123 int local_contact) 1124 { 1125 /* RDMA setup failed, switch back to TCP */ 1126 if (local_contact == SMC_FIRST_CONTACT) 1127 smc_lgr_cleanup_early(&new_smc->conn); 1128 else 1129 smc_conn_free(&new_smc->conn); 1130 if (reason_code < 0) { /* error, no fallback possible */ 1131 smc_listen_out_err(new_smc); 1132 return; 1133 } 1134 smc_switch_to_fallback(new_smc); 1135 new_smc->fallback_rsn = reason_code; 1136 if (reason_code && reason_code != SMC_CLC_DECL_PEERDECL) { 1137 if (smc_clc_send_decline(new_smc, reason_code) < 0) { 1138 smc_listen_out_err(new_smc); 1139 return; 1140 } 1141 } 1142 smc_listen_out_connected(new_smc); 1143 } 1144 1145 /* listen worker: check prefixes */ 1146 static int smc_listen_prfx_check(struct smc_sock *new_smc, 1147 struct smc_clc_msg_proposal *pclc) 1148 { 1149 struct smc_clc_msg_proposal_prefix *pclc_prfx; 1150 struct socket *newclcsock = new_smc->clcsock; 1151 1152 pclc_prfx = smc_clc_proposal_get_prefix(pclc); 1153 if (smc_clc_prfx_match(newclcsock, pclc_prfx)) 1154 return SMC_CLC_DECL_DIFFPREFIX; 1155 1156 return 0; 1157 } 1158 1159 /* listen worker: initialize connection and buffers */ 1160 static int smc_listen_rdma_init(struct smc_sock *new_smc, 1161 struct smc_init_info *ini) 1162 { 1163 int rc; 1164 1165 /* allocate connection / link group */ 1166 rc = smc_conn_create(new_smc, ini); 1167 if (rc) 1168 return rc; 1169 1170 /* create send buffer and rmb */ 1171 if (smc_buf_create(new_smc, false)) 1172 return SMC_CLC_DECL_MEM; 1173 1174 return 0; 1175 } 1176 1177 /* listen worker: initialize connection and buffers for SMC-D */ 1178 static int smc_listen_ism_init(struct smc_sock *new_smc, 1179 struct smc_clc_msg_proposal *pclc, 1180 struct smc_init_info *ini) 1181 { 1182 struct smc_clc_msg_smcd *pclc_smcd; 1183 int rc; 1184 1185 pclc_smcd = smc_get_clc_msg_smcd(pclc); 1186 ini->ism_gid = pclc_smcd->gid; 1187 rc = smc_conn_create(new_smc, ini); 1188 if (rc) 1189 return rc; 1190 1191 /* Check if peer can be reached via ISM device */ 1192 if (smc_ism_cantalk(new_smc->conn.lgr->peer_gid, 1193 new_smc->conn.lgr->vlan_id, 1194 new_smc->conn.lgr->smcd)) { 1195 if (ini->cln_first_contact == SMC_FIRST_CONTACT) 1196 smc_lgr_cleanup_early(&new_smc->conn); 1197 else 1198 smc_conn_free(&new_smc->conn); 1199 return SMC_CLC_DECL_SMCDNOTALK; 1200 } 1201 1202 /* Create send and receive buffers */ 1203 if (smc_buf_create(new_smc, true)) { 1204 if (ini->cln_first_contact == SMC_FIRST_CONTACT) 1205 smc_lgr_cleanup_early(&new_smc->conn); 1206 else 1207 smc_conn_free(&new_smc->conn); 1208 return SMC_CLC_DECL_MEM; 1209 } 1210 1211 return 0; 1212 } 1213 1214 /* listen worker: register buffers */ 1215 static int smc_listen_rdma_reg(struct smc_sock *new_smc, int local_contact) 1216 { 1217 struct smc_connection *conn = &new_smc->conn; 1218 1219 if (local_contact != SMC_FIRST_CONTACT) { 1220 if (smcr_lgr_reg_rmbs(conn->lnk, conn->rmb_desc)) 1221 return SMC_CLC_DECL_ERR_REGRMB; 1222 } 1223 smc_rmb_sync_sg_for_device(&new_smc->conn); 1224 1225 return 0; 1226 } 1227 1228 /* listen worker: finish RDMA setup */ 1229 static int smc_listen_rdma_finish(struct smc_sock *new_smc, 1230 struct smc_clc_msg_accept_confirm *cclc, 1231 int local_contact) 1232 { 1233 struct smc_link *link = new_smc->conn.lnk; 1234 int reason_code = 0; 1235 1236 if (local_contact == SMC_FIRST_CONTACT) 1237 smc_link_save_peer_info(link, cclc); 1238 1239 if (smc_rmb_rtoken_handling(&new_smc->conn, link, cclc)) { 1240 reason_code = SMC_CLC_DECL_ERR_RTOK; 1241 goto decline; 1242 } 1243 1244 if (local_contact == SMC_FIRST_CONTACT) { 1245 if (smc_ib_ready_link(link)) { 1246 reason_code = SMC_CLC_DECL_ERR_RDYLNK; 1247 goto decline; 1248 } 1249 /* QP confirmation over RoCE fabric */ 1250 smc_llc_flow_initiate(link->lgr, SMC_LLC_FLOW_ADD_LINK); 1251 reason_code = smcr_serv_conf_first_link(new_smc); 1252 smc_llc_flow_stop(link->lgr, &link->lgr->llc_flow_lcl); 1253 if (reason_code) 1254 goto decline; 1255 } 1256 return 0; 1257 1258 decline: 1259 smc_listen_decline(new_smc, reason_code, local_contact); 1260 return reason_code; 1261 } 1262 1263 /* setup for RDMA connection of server */ 1264 static void smc_listen_work(struct work_struct *work) 1265 { 1266 struct smc_sock *new_smc = container_of(work, struct smc_sock, 1267 smc_listen_work); 1268 struct socket *newclcsock = new_smc->clcsock; 1269 struct smc_clc_msg_accept_confirm cclc; 1270 struct smc_clc_msg_proposal *pclc; 1271 struct smc_init_info ini = {0}; 1272 bool ism_supported = false; 1273 u8 buf[SMC_CLC_MAX_LEN]; 1274 int rc = 0; 1275 1276 if (new_smc->listen_smc->sk.sk_state != SMC_LISTEN) 1277 return smc_listen_out_err(new_smc); 1278 1279 if (new_smc->use_fallback) { 1280 smc_listen_out_connected(new_smc); 1281 return; 1282 } 1283 1284 /* check if peer is smc capable */ 1285 if (!tcp_sk(newclcsock->sk)->syn_smc) { 1286 smc_switch_to_fallback(new_smc); 1287 new_smc->fallback_rsn = SMC_CLC_DECL_PEERNOSMC; 1288 smc_listen_out_connected(new_smc); 1289 return; 1290 } 1291 1292 /* do inband token exchange - 1293 * wait for and receive SMC Proposal CLC message 1294 */ 1295 pclc = (struct smc_clc_msg_proposal *)&buf; 1296 rc = smc_clc_wait_msg(new_smc, pclc, SMC_CLC_MAX_LEN, 1297 SMC_CLC_PROPOSAL, CLC_WAIT_TIME); 1298 if (rc) 1299 goto out_decl; 1300 1301 /* IPSec connections opt out of SMC-R optimizations */ 1302 if (using_ipsec(new_smc)) { 1303 rc = SMC_CLC_DECL_IPSEC; 1304 goto out_decl; 1305 } 1306 1307 /* check for matching IP prefix and subnet length */ 1308 rc = smc_listen_prfx_check(new_smc, pclc); 1309 if (rc) 1310 goto out_decl; 1311 1312 /* get vlan id from IP device */ 1313 if (smc_vlan_by_tcpsk(new_smc->clcsock, &ini)) { 1314 rc = SMC_CLC_DECL_GETVLANERR; 1315 goto out_decl; 1316 } 1317 1318 mutex_lock(&smc_server_lgr_pending); 1319 smc_close_init(new_smc); 1320 smc_rx_init(new_smc); 1321 smc_tx_init(new_smc); 1322 1323 /* check if ISM is available */ 1324 if (pclc->hdr.path == SMC_TYPE_D || pclc->hdr.path == SMC_TYPE_B) { 1325 ini.is_smcd = true; /* prepare ISM check */ 1326 rc = smc_find_ism_device(new_smc, &ini); 1327 if (!rc) 1328 rc = smc_listen_ism_init(new_smc, pclc, &ini); 1329 if (!rc) 1330 ism_supported = true; 1331 else if (pclc->hdr.path == SMC_TYPE_D) 1332 goto out_unlock; /* skip RDMA and decline */ 1333 } 1334 1335 /* check if RDMA is available */ 1336 if (!ism_supported) { /* SMC_TYPE_R or SMC_TYPE_B */ 1337 /* prepare RDMA check */ 1338 ini.is_smcd = false; 1339 ini.ism_dev = NULL; 1340 ini.ib_lcl = &pclc->lcl; 1341 rc = smc_find_rdma_device(new_smc, &ini); 1342 if (rc) { 1343 /* no RDMA device found */ 1344 if (pclc->hdr.path == SMC_TYPE_B) 1345 /* neither ISM nor RDMA device found */ 1346 rc = SMC_CLC_DECL_NOSMCDEV; 1347 goto out_unlock; 1348 } 1349 rc = smc_listen_rdma_init(new_smc, &ini); 1350 if (rc) 1351 goto out_unlock; 1352 rc = smc_listen_rdma_reg(new_smc, ini.cln_first_contact); 1353 if (rc) 1354 goto out_unlock; 1355 } 1356 1357 /* send SMC Accept CLC message */ 1358 rc = smc_clc_send_accept(new_smc, ini.cln_first_contact); 1359 if (rc) 1360 goto out_unlock; 1361 1362 /* SMC-D does not need this lock any more */ 1363 if (ism_supported) 1364 mutex_unlock(&smc_server_lgr_pending); 1365 1366 /* receive SMC Confirm CLC message */ 1367 rc = smc_clc_wait_msg(new_smc, &cclc, sizeof(cclc), 1368 SMC_CLC_CONFIRM, CLC_WAIT_TIME); 1369 if (rc) { 1370 if (!ism_supported) 1371 goto out_unlock; 1372 goto out_decl; 1373 } 1374 1375 /* finish worker */ 1376 if (!ism_supported) { 1377 rc = smc_listen_rdma_finish(new_smc, &cclc, 1378 ini.cln_first_contact); 1379 mutex_unlock(&smc_server_lgr_pending); 1380 if (rc) 1381 return; 1382 } 1383 smc_conn_save_peer_info(new_smc, &cclc); 1384 smc_listen_out_connected(new_smc); 1385 return; 1386 1387 out_unlock: 1388 mutex_unlock(&smc_server_lgr_pending); 1389 out_decl: 1390 smc_listen_decline(new_smc, rc, ini.cln_first_contact); 1391 } 1392 1393 static void smc_tcp_listen_work(struct work_struct *work) 1394 { 1395 struct smc_sock *lsmc = container_of(work, struct smc_sock, 1396 tcp_listen_work); 1397 struct sock *lsk = &lsmc->sk; 1398 struct smc_sock *new_smc; 1399 int rc = 0; 1400 1401 lock_sock(lsk); 1402 while (lsk->sk_state == SMC_LISTEN) { 1403 rc = smc_clcsock_accept(lsmc, &new_smc); 1404 if (rc) 1405 goto out; 1406 if (!new_smc) 1407 continue; 1408 1409 new_smc->listen_smc = lsmc; 1410 new_smc->use_fallback = lsmc->use_fallback; 1411 new_smc->fallback_rsn = lsmc->fallback_rsn; 1412 sock_hold(lsk); /* sock_put in smc_listen_work */ 1413 INIT_WORK(&new_smc->smc_listen_work, smc_listen_work); 1414 smc_copy_sock_settings_to_smc(new_smc); 1415 new_smc->sk.sk_sndbuf = lsmc->sk.sk_sndbuf; 1416 new_smc->sk.sk_rcvbuf = lsmc->sk.sk_rcvbuf; 1417 sock_hold(&new_smc->sk); /* sock_put in passive closing */ 1418 if (!schedule_work(&new_smc->smc_listen_work)) 1419 sock_put(&new_smc->sk); 1420 } 1421 1422 out: 1423 release_sock(lsk); 1424 sock_put(&lsmc->sk); /* sock_hold in smc_listen */ 1425 } 1426 1427 static int smc_listen(struct socket *sock, int backlog) 1428 { 1429 struct sock *sk = sock->sk; 1430 struct smc_sock *smc; 1431 int rc; 1432 1433 smc = smc_sk(sk); 1434 lock_sock(sk); 1435 1436 rc = -EINVAL; 1437 if ((sk->sk_state != SMC_INIT && sk->sk_state != SMC_LISTEN) || 1438 smc->connect_nonblock) 1439 goto out; 1440 1441 rc = 0; 1442 if (sk->sk_state == SMC_LISTEN) { 1443 sk->sk_max_ack_backlog = backlog; 1444 goto out; 1445 } 1446 /* some socket options are handled in core, so we could not apply 1447 * them to the clc socket -- copy smc socket options to clc socket 1448 */ 1449 smc_copy_sock_settings_to_clc(smc); 1450 if (!smc->use_fallback) 1451 tcp_sk(smc->clcsock->sk)->syn_smc = 1; 1452 1453 rc = kernel_listen(smc->clcsock, backlog); 1454 if (rc) 1455 goto out; 1456 sk->sk_max_ack_backlog = backlog; 1457 sk->sk_ack_backlog = 0; 1458 sk->sk_state = SMC_LISTEN; 1459 sock_hold(sk); /* sock_hold in tcp_listen_worker */ 1460 if (!schedule_work(&smc->tcp_listen_work)) 1461 sock_put(sk); 1462 1463 out: 1464 release_sock(sk); 1465 return rc; 1466 } 1467 1468 static int smc_accept(struct socket *sock, struct socket *new_sock, 1469 int flags, bool kern) 1470 { 1471 struct sock *sk = sock->sk, *nsk; 1472 DECLARE_WAITQUEUE(wait, current); 1473 struct smc_sock *lsmc; 1474 long timeo; 1475 int rc = 0; 1476 1477 lsmc = smc_sk(sk); 1478 sock_hold(sk); /* sock_put below */ 1479 lock_sock(sk); 1480 1481 if (lsmc->sk.sk_state != SMC_LISTEN) { 1482 rc = -EINVAL; 1483 release_sock(sk); 1484 goto out; 1485 } 1486 1487 /* Wait for an incoming connection */ 1488 timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK); 1489 add_wait_queue_exclusive(sk_sleep(sk), &wait); 1490 while (!(nsk = smc_accept_dequeue(sk, new_sock))) { 1491 set_current_state(TASK_INTERRUPTIBLE); 1492 if (!timeo) { 1493 rc = -EAGAIN; 1494 break; 1495 } 1496 release_sock(sk); 1497 timeo = schedule_timeout(timeo); 1498 /* wakeup by sk_data_ready in smc_listen_work() */ 1499 sched_annotate_sleep(); 1500 lock_sock(sk); 1501 if (signal_pending(current)) { 1502 rc = sock_intr_errno(timeo); 1503 break; 1504 } 1505 } 1506 set_current_state(TASK_RUNNING); 1507 remove_wait_queue(sk_sleep(sk), &wait); 1508 1509 if (!rc) 1510 rc = sock_error(nsk); 1511 release_sock(sk); 1512 if (rc) 1513 goto out; 1514 1515 if (lsmc->sockopt_defer_accept && !(flags & O_NONBLOCK)) { 1516 /* wait till data arrives on the socket */ 1517 timeo = msecs_to_jiffies(lsmc->sockopt_defer_accept * 1518 MSEC_PER_SEC); 1519 if (smc_sk(nsk)->use_fallback) { 1520 struct sock *clcsk = smc_sk(nsk)->clcsock->sk; 1521 1522 lock_sock(clcsk); 1523 if (skb_queue_empty(&clcsk->sk_receive_queue)) 1524 sk_wait_data(clcsk, &timeo, NULL); 1525 release_sock(clcsk); 1526 } else if (!atomic_read(&smc_sk(nsk)->conn.bytes_to_rcv)) { 1527 lock_sock(nsk); 1528 smc_rx_wait(smc_sk(nsk), &timeo, smc_rx_data_available); 1529 release_sock(nsk); 1530 } 1531 } 1532 1533 out: 1534 sock_put(sk); /* sock_hold above */ 1535 return rc; 1536 } 1537 1538 static int smc_getname(struct socket *sock, struct sockaddr *addr, 1539 int peer) 1540 { 1541 struct smc_sock *smc; 1542 1543 if (peer && (sock->sk->sk_state != SMC_ACTIVE) && 1544 (sock->sk->sk_state != SMC_APPCLOSEWAIT1)) 1545 return -ENOTCONN; 1546 1547 smc = smc_sk(sock->sk); 1548 1549 return smc->clcsock->ops->getname(smc->clcsock, addr, peer); 1550 } 1551 1552 static int smc_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) 1553 { 1554 struct sock *sk = sock->sk; 1555 struct smc_sock *smc; 1556 int rc = -EPIPE; 1557 1558 smc = smc_sk(sk); 1559 lock_sock(sk); 1560 if ((sk->sk_state != SMC_ACTIVE) && 1561 (sk->sk_state != SMC_APPCLOSEWAIT1) && 1562 (sk->sk_state != SMC_INIT)) 1563 goto out; 1564 1565 if (msg->msg_flags & MSG_FASTOPEN) { 1566 if (sk->sk_state == SMC_INIT && !smc->connect_nonblock) { 1567 smc_switch_to_fallback(smc); 1568 smc->fallback_rsn = SMC_CLC_DECL_OPTUNSUPP; 1569 } else { 1570 rc = -EINVAL; 1571 goto out; 1572 } 1573 } 1574 1575 if (smc->use_fallback) 1576 rc = smc->clcsock->ops->sendmsg(smc->clcsock, msg, len); 1577 else 1578 rc = smc_tx_sendmsg(smc, msg, len); 1579 out: 1580 release_sock(sk); 1581 return rc; 1582 } 1583 1584 static int smc_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, 1585 int flags) 1586 { 1587 struct sock *sk = sock->sk; 1588 struct smc_sock *smc; 1589 int rc = -ENOTCONN; 1590 1591 smc = smc_sk(sk); 1592 lock_sock(sk); 1593 if (sk->sk_state == SMC_CLOSED && (sk->sk_shutdown & RCV_SHUTDOWN)) { 1594 /* socket was connected before, no more data to read */ 1595 rc = 0; 1596 goto out; 1597 } 1598 if ((sk->sk_state == SMC_INIT) || 1599 (sk->sk_state == SMC_LISTEN) || 1600 (sk->sk_state == SMC_CLOSED)) 1601 goto out; 1602 1603 if (sk->sk_state == SMC_PEERFINCLOSEWAIT) { 1604 rc = 0; 1605 goto out; 1606 } 1607 1608 if (smc->use_fallback) { 1609 rc = smc->clcsock->ops->recvmsg(smc->clcsock, msg, len, flags); 1610 } else { 1611 msg->msg_namelen = 0; 1612 rc = smc_rx_recvmsg(smc, msg, NULL, len, flags); 1613 } 1614 1615 out: 1616 release_sock(sk); 1617 return rc; 1618 } 1619 1620 static __poll_t smc_accept_poll(struct sock *parent) 1621 { 1622 struct smc_sock *isk = smc_sk(parent); 1623 __poll_t mask = 0; 1624 1625 spin_lock(&isk->accept_q_lock); 1626 if (!list_empty(&isk->accept_q)) 1627 mask = EPOLLIN | EPOLLRDNORM; 1628 spin_unlock(&isk->accept_q_lock); 1629 1630 return mask; 1631 } 1632 1633 static __poll_t smc_poll(struct file *file, struct socket *sock, 1634 poll_table *wait) 1635 { 1636 struct sock *sk = sock->sk; 1637 struct smc_sock *smc; 1638 __poll_t mask = 0; 1639 1640 if (!sk) 1641 return EPOLLNVAL; 1642 1643 smc = smc_sk(sock->sk); 1644 if (smc->use_fallback) { 1645 /* delegate to CLC child sock */ 1646 mask = smc->clcsock->ops->poll(file, smc->clcsock, wait); 1647 sk->sk_err = smc->clcsock->sk->sk_err; 1648 } else { 1649 if (sk->sk_state != SMC_CLOSED) 1650 sock_poll_wait(file, sock, wait); 1651 if (sk->sk_err) 1652 mask |= EPOLLERR; 1653 if ((sk->sk_shutdown == SHUTDOWN_MASK) || 1654 (sk->sk_state == SMC_CLOSED)) 1655 mask |= EPOLLHUP; 1656 if (sk->sk_state == SMC_LISTEN) { 1657 /* woken up by sk_data_ready in smc_listen_work() */ 1658 mask |= smc_accept_poll(sk); 1659 } else if (smc->use_fallback) { /* as result of connect_work()*/ 1660 mask |= smc->clcsock->ops->poll(file, smc->clcsock, 1661 wait); 1662 sk->sk_err = smc->clcsock->sk->sk_err; 1663 } else { 1664 if ((sk->sk_state != SMC_INIT && 1665 atomic_read(&smc->conn.sndbuf_space)) || 1666 sk->sk_shutdown & SEND_SHUTDOWN) { 1667 mask |= EPOLLOUT | EPOLLWRNORM; 1668 } else { 1669 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk); 1670 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); 1671 } 1672 if (atomic_read(&smc->conn.bytes_to_rcv)) 1673 mask |= EPOLLIN | EPOLLRDNORM; 1674 if (sk->sk_shutdown & RCV_SHUTDOWN) 1675 mask |= EPOLLIN | EPOLLRDNORM | EPOLLRDHUP; 1676 if (sk->sk_state == SMC_APPCLOSEWAIT1) 1677 mask |= EPOLLIN; 1678 if (smc->conn.urg_state == SMC_URG_VALID) 1679 mask |= EPOLLPRI; 1680 } 1681 } 1682 1683 return mask; 1684 } 1685 1686 static int smc_shutdown(struct socket *sock, int how) 1687 { 1688 struct sock *sk = sock->sk; 1689 struct smc_sock *smc; 1690 int rc = -EINVAL; 1691 int rc1 = 0; 1692 1693 smc = smc_sk(sk); 1694 1695 if ((how < SHUT_RD) || (how > SHUT_RDWR)) 1696 return rc; 1697 1698 lock_sock(sk); 1699 1700 rc = -ENOTCONN; 1701 if ((sk->sk_state != SMC_ACTIVE) && 1702 (sk->sk_state != SMC_PEERCLOSEWAIT1) && 1703 (sk->sk_state != SMC_PEERCLOSEWAIT2) && 1704 (sk->sk_state != SMC_APPCLOSEWAIT1) && 1705 (sk->sk_state != SMC_APPCLOSEWAIT2) && 1706 (sk->sk_state != SMC_APPFINCLOSEWAIT)) 1707 goto out; 1708 if (smc->use_fallback) { 1709 rc = kernel_sock_shutdown(smc->clcsock, how); 1710 sk->sk_shutdown = smc->clcsock->sk->sk_shutdown; 1711 if (sk->sk_shutdown == SHUTDOWN_MASK) 1712 sk->sk_state = SMC_CLOSED; 1713 goto out; 1714 } 1715 switch (how) { 1716 case SHUT_RDWR: /* shutdown in both directions */ 1717 rc = smc_close_active(smc); 1718 break; 1719 case SHUT_WR: 1720 rc = smc_close_shutdown_write(smc); 1721 break; 1722 case SHUT_RD: 1723 rc = 0; 1724 /* nothing more to do because peer is not involved */ 1725 break; 1726 } 1727 if (smc->clcsock) 1728 rc1 = kernel_sock_shutdown(smc->clcsock, how); 1729 /* map sock_shutdown_cmd constants to sk_shutdown value range */ 1730 sk->sk_shutdown |= how + 1; 1731 1732 out: 1733 release_sock(sk); 1734 return rc ? rc : rc1; 1735 } 1736 1737 static int smc_setsockopt(struct socket *sock, int level, int optname, 1738 char __user *optval, unsigned int optlen) 1739 { 1740 struct sock *sk = sock->sk; 1741 struct smc_sock *smc; 1742 int val, rc; 1743 1744 smc = smc_sk(sk); 1745 1746 /* generic setsockopts reaching us here always apply to the 1747 * CLC socket 1748 */ 1749 rc = smc->clcsock->ops->setsockopt(smc->clcsock, level, optname, 1750 optval, optlen); 1751 if (smc->clcsock->sk->sk_err) { 1752 sk->sk_err = smc->clcsock->sk->sk_err; 1753 sk->sk_error_report(sk); 1754 } 1755 1756 if (optlen < sizeof(int)) 1757 return -EINVAL; 1758 if (get_user(val, (int __user *)optval)) 1759 return -EFAULT; 1760 1761 lock_sock(sk); 1762 if (rc || smc->use_fallback) 1763 goto out; 1764 switch (optname) { 1765 case TCP_ULP: 1766 case TCP_FASTOPEN: 1767 case TCP_FASTOPEN_CONNECT: 1768 case TCP_FASTOPEN_KEY: 1769 case TCP_FASTOPEN_NO_COOKIE: 1770 /* option not supported by SMC */ 1771 if (sk->sk_state == SMC_INIT && !smc->connect_nonblock) { 1772 smc_switch_to_fallback(smc); 1773 smc->fallback_rsn = SMC_CLC_DECL_OPTUNSUPP; 1774 } else { 1775 rc = -EINVAL; 1776 } 1777 break; 1778 case TCP_NODELAY: 1779 if (sk->sk_state != SMC_INIT && 1780 sk->sk_state != SMC_LISTEN && 1781 sk->sk_state != SMC_CLOSED) { 1782 if (val) 1783 mod_delayed_work(system_wq, &smc->conn.tx_work, 1784 0); 1785 } 1786 break; 1787 case TCP_CORK: 1788 if (sk->sk_state != SMC_INIT && 1789 sk->sk_state != SMC_LISTEN && 1790 sk->sk_state != SMC_CLOSED) { 1791 if (!val) 1792 mod_delayed_work(system_wq, &smc->conn.tx_work, 1793 0); 1794 } 1795 break; 1796 case TCP_DEFER_ACCEPT: 1797 smc->sockopt_defer_accept = val; 1798 break; 1799 default: 1800 break; 1801 } 1802 out: 1803 release_sock(sk); 1804 1805 return rc; 1806 } 1807 1808 static int smc_getsockopt(struct socket *sock, int level, int optname, 1809 char __user *optval, int __user *optlen) 1810 { 1811 struct smc_sock *smc; 1812 1813 smc = smc_sk(sock->sk); 1814 /* socket options apply to the CLC socket */ 1815 return smc->clcsock->ops->getsockopt(smc->clcsock, level, optname, 1816 optval, optlen); 1817 } 1818 1819 static int smc_ioctl(struct socket *sock, unsigned int cmd, 1820 unsigned long arg) 1821 { 1822 union smc_host_cursor cons, urg; 1823 struct smc_connection *conn; 1824 struct smc_sock *smc; 1825 int answ; 1826 1827 smc = smc_sk(sock->sk); 1828 conn = &smc->conn; 1829 lock_sock(&smc->sk); 1830 if (smc->use_fallback) { 1831 if (!smc->clcsock) { 1832 release_sock(&smc->sk); 1833 return -EBADF; 1834 } 1835 answ = smc->clcsock->ops->ioctl(smc->clcsock, cmd, arg); 1836 release_sock(&smc->sk); 1837 return answ; 1838 } 1839 switch (cmd) { 1840 case SIOCINQ: /* same as FIONREAD */ 1841 if (smc->sk.sk_state == SMC_LISTEN) { 1842 release_sock(&smc->sk); 1843 return -EINVAL; 1844 } 1845 if (smc->sk.sk_state == SMC_INIT || 1846 smc->sk.sk_state == SMC_CLOSED) 1847 answ = 0; 1848 else 1849 answ = atomic_read(&smc->conn.bytes_to_rcv); 1850 break; 1851 case SIOCOUTQ: 1852 /* output queue size (not send + not acked) */ 1853 if (smc->sk.sk_state == SMC_LISTEN) { 1854 release_sock(&smc->sk); 1855 return -EINVAL; 1856 } 1857 if (smc->sk.sk_state == SMC_INIT || 1858 smc->sk.sk_state == SMC_CLOSED) 1859 answ = 0; 1860 else 1861 answ = smc->conn.sndbuf_desc->len - 1862 atomic_read(&smc->conn.sndbuf_space); 1863 break; 1864 case SIOCOUTQNSD: 1865 /* output queue size (not send only) */ 1866 if (smc->sk.sk_state == SMC_LISTEN) { 1867 release_sock(&smc->sk); 1868 return -EINVAL; 1869 } 1870 if (smc->sk.sk_state == SMC_INIT || 1871 smc->sk.sk_state == SMC_CLOSED) 1872 answ = 0; 1873 else 1874 answ = smc_tx_prepared_sends(&smc->conn); 1875 break; 1876 case SIOCATMARK: 1877 if (smc->sk.sk_state == SMC_LISTEN) { 1878 release_sock(&smc->sk); 1879 return -EINVAL; 1880 } 1881 if (smc->sk.sk_state == SMC_INIT || 1882 smc->sk.sk_state == SMC_CLOSED) { 1883 answ = 0; 1884 } else { 1885 smc_curs_copy(&cons, &conn->local_tx_ctrl.cons, conn); 1886 smc_curs_copy(&urg, &conn->urg_curs, conn); 1887 answ = smc_curs_diff(conn->rmb_desc->len, 1888 &cons, &urg) == 1; 1889 } 1890 break; 1891 default: 1892 release_sock(&smc->sk); 1893 return -ENOIOCTLCMD; 1894 } 1895 release_sock(&smc->sk); 1896 1897 return put_user(answ, (int __user *)arg); 1898 } 1899 1900 static ssize_t smc_sendpage(struct socket *sock, struct page *page, 1901 int offset, size_t size, int flags) 1902 { 1903 struct sock *sk = sock->sk; 1904 struct smc_sock *smc; 1905 int rc = -EPIPE; 1906 1907 smc = smc_sk(sk); 1908 lock_sock(sk); 1909 if (sk->sk_state != SMC_ACTIVE) { 1910 release_sock(sk); 1911 goto out; 1912 } 1913 release_sock(sk); 1914 if (smc->use_fallback) 1915 rc = kernel_sendpage(smc->clcsock, page, offset, 1916 size, flags); 1917 else 1918 rc = sock_no_sendpage(sock, page, offset, size, flags); 1919 1920 out: 1921 return rc; 1922 } 1923 1924 /* Map the affected portions of the rmbe into an spd, note the number of bytes 1925 * to splice in conn->splice_pending, and press 'go'. Delays consumer cursor 1926 * updates till whenever a respective page has been fully processed. 1927 * Note that subsequent recv() calls have to wait till all splice() processing 1928 * completed. 1929 */ 1930 static ssize_t smc_splice_read(struct socket *sock, loff_t *ppos, 1931 struct pipe_inode_info *pipe, size_t len, 1932 unsigned int flags) 1933 { 1934 struct sock *sk = sock->sk; 1935 struct smc_sock *smc; 1936 int rc = -ENOTCONN; 1937 1938 smc = smc_sk(sk); 1939 lock_sock(sk); 1940 if (sk->sk_state == SMC_CLOSED && (sk->sk_shutdown & RCV_SHUTDOWN)) { 1941 /* socket was connected before, no more data to read */ 1942 rc = 0; 1943 goto out; 1944 } 1945 if (sk->sk_state == SMC_INIT || 1946 sk->sk_state == SMC_LISTEN || 1947 sk->sk_state == SMC_CLOSED) 1948 goto out; 1949 1950 if (sk->sk_state == SMC_PEERFINCLOSEWAIT) { 1951 rc = 0; 1952 goto out; 1953 } 1954 1955 if (smc->use_fallback) { 1956 rc = smc->clcsock->ops->splice_read(smc->clcsock, ppos, 1957 pipe, len, flags); 1958 } else { 1959 if (*ppos) { 1960 rc = -ESPIPE; 1961 goto out; 1962 } 1963 if (flags & SPLICE_F_NONBLOCK) 1964 flags = MSG_DONTWAIT; 1965 else 1966 flags = 0; 1967 rc = smc_rx_recvmsg(smc, NULL, pipe, len, flags); 1968 } 1969 out: 1970 release_sock(sk); 1971 1972 return rc; 1973 } 1974 1975 /* must look like tcp */ 1976 static const struct proto_ops smc_sock_ops = { 1977 .family = PF_SMC, 1978 .owner = THIS_MODULE, 1979 .release = smc_release, 1980 .bind = smc_bind, 1981 .connect = smc_connect, 1982 .socketpair = sock_no_socketpair, 1983 .accept = smc_accept, 1984 .getname = smc_getname, 1985 .poll = smc_poll, 1986 .ioctl = smc_ioctl, 1987 .listen = smc_listen, 1988 .shutdown = smc_shutdown, 1989 .setsockopt = smc_setsockopt, 1990 .getsockopt = smc_getsockopt, 1991 .sendmsg = smc_sendmsg, 1992 .recvmsg = smc_recvmsg, 1993 .mmap = sock_no_mmap, 1994 .sendpage = smc_sendpage, 1995 .splice_read = smc_splice_read, 1996 }; 1997 1998 static int smc_create(struct net *net, struct socket *sock, int protocol, 1999 int kern) 2000 { 2001 int family = (protocol == SMCPROTO_SMC6) ? PF_INET6 : PF_INET; 2002 struct smc_sock *smc; 2003 struct sock *sk; 2004 int rc; 2005 2006 rc = -ESOCKTNOSUPPORT; 2007 if (sock->type != SOCK_STREAM) 2008 goto out; 2009 2010 rc = -EPROTONOSUPPORT; 2011 if (protocol != SMCPROTO_SMC && protocol != SMCPROTO_SMC6) 2012 goto out; 2013 2014 rc = -ENOBUFS; 2015 sock->ops = &smc_sock_ops; 2016 sk = smc_sock_alloc(net, sock, protocol); 2017 if (!sk) 2018 goto out; 2019 2020 /* create internal TCP socket for CLC handshake and fallback */ 2021 smc = smc_sk(sk); 2022 smc->use_fallback = false; /* assume rdma capability first */ 2023 smc->fallback_rsn = 0; 2024 rc = sock_create_kern(net, family, SOCK_STREAM, IPPROTO_TCP, 2025 &smc->clcsock); 2026 if (rc) { 2027 sk_common_release(sk); 2028 goto out; 2029 } 2030 smc->sk.sk_sndbuf = max(smc->clcsock->sk->sk_sndbuf, SMC_BUF_MIN_SIZE); 2031 smc->sk.sk_rcvbuf = max(smc->clcsock->sk->sk_rcvbuf, SMC_BUF_MIN_SIZE); 2032 2033 out: 2034 return rc; 2035 } 2036 2037 static const struct net_proto_family smc_sock_family_ops = { 2038 .family = PF_SMC, 2039 .owner = THIS_MODULE, 2040 .create = smc_create, 2041 }; 2042 2043 unsigned int smc_net_id; 2044 2045 static __net_init int smc_net_init(struct net *net) 2046 { 2047 return smc_pnet_net_init(net); 2048 } 2049 2050 static void __net_exit smc_net_exit(struct net *net) 2051 { 2052 smc_pnet_net_exit(net); 2053 } 2054 2055 static struct pernet_operations smc_net_ops = { 2056 .init = smc_net_init, 2057 .exit = smc_net_exit, 2058 .id = &smc_net_id, 2059 .size = sizeof(struct smc_net), 2060 }; 2061 2062 static int __init smc_init(void) 2063 { 2064 int rc; 2065 2066 rc = register_pernet_subsys(&smc_net_ops); 2067 if (rc) 2068 return rc; 2069 2070 rc = smc_pnet_init(); 2071 if (rc) 2072 goto out_pernet_subsys; 2073 2074 rc = smc_core_init(); 2075 if (rc) { 2076 pr_err("%s: smc_core_init fails with %d\n", __func__, rc); 2077 goto out_pnet; 2078 } 2079 2080 rc = smc_llc_init(); 2081 if (rc) { 2082 pr_err("%s: smc_llc_init fails with %d\n", __func__, rc); 2083 goto out_core; 2084 } 2085 2086 rc = smc_cdc_init(); 2087 if (rc) { 2088 pr_err("%s: smc_cdc_init fails with %d\n", __func__, rc); 2089 goto out_core; 2090 } 2091 2092 rc = proto_register(&smc_proto, 1); 2093 if (rc) { 2094 pr_err("%s: proto_register(v4) fails with %d\n", __func__, rc); 2095 goto out_core; 2096 } 2097 2098 rc = proto_register(&smc_proto6, 1); 2099 if (rc) { 2100 pr_err("%s: proto_register(v6) fails with %d\n", __func__, rc); 2101 goto out_proto; 2102 } 2103 2104 rc = sock_register(&smc_sock_family_ops); 2105 if (rc) { 2106 pr_err("%s: sock_register fails with %d\n", __func__, rc); 2107 goto out_proto6; 2108 } 2109 INIT_HLIST_HEAD(&smc_v4_hashinfo.ht); 2110 INIT_HLIST_HEAD(&smc_v6_hashinfo.ht); 2111 2112 rc = smc_ib_register_client(); 2113 if (rc) { 2114 pr_err("%s: ib_register fails with %d\n", __func__, rc); 2115 goto out_sock; 2116 } 2117 2118 static_branch_enable(&tcp_have_smc); 2119 return 0; 2120 2121 out_sock: 2122 sock_unregister(PF_SMC); 2123 out_proto6: 2124 proto_unregister(&smc_proto6); 2125 out_proto: 2126 proto_unregister(&smc_proto); 2127 out_core: 2128 smc_core_exit(); 2129 out_pnet: 2130 smc_pnet_exit(); 2131 out_pernet_subsys: 2132 unregister_pernet_subsys(&smc_net_ops); 2133 2134 return rc; 2135 } 2136 2137 static void __exit smc_exit(void) 2138 { 2139 static_branch_disable(&tcp_have_smc); 2140 sock_unregister(PF_SMC); 2141 smc_core_exit(); 2142 smc_ib_unregister_client(); 2143 proto_unregister(&smc_proto6); 2144 proto_unregister(&smc_proto); 2145 smc_pnet_exit(); 2146 unregister_pernet_subsys(&smc_net_ops); 2147 rcu_barrier(); 2148 } 2149 2150 module_init(smc_init); 2151 module_exit(smc_exit); 2152 2153 MODULE_AUTHOR("Ursula Braun <ubraun@linux.vnet.ibm.com>"); 2154 MODULE_DESCRIPTION("smc socket address family"); 2155 MODULE_LICENSE("GPL"); 2156 MODULE_ALIAS_NETPROTO(PF_SMC); 2157