1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Shared Memory Communications over RDMA (SMC-R) and RoCE 4 * 5 * AF_SMC protocol family socket handler keeping the AF_INET sock address type 6 * applies to SOCK_STREAM sockets only 7 * offers an alternative communication option for TCP-protocol sockets 8 * applicable with RoCE-cards only 9 * 10 * Initial restrictions: 11 * - support for alternate links postponed 12 * 13 * Copyright IBM Corp. 2016, 2018 14 * 15 * Author(s): Ursula Braun <ubraun@linux.vnet.ibm.com> 16 * based on prototype from Frank Blaschka 17 */ 18 19 #define KMSG_COMPONENT "smc" 20 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt 21 22 #include <linux/module.h> 23 #include <linux/socket.h> 24 #include <linux/workqueue.h> 25 #include <linux/in.h> 26 #include <linux/sched/signal.h> 27 #include <linux/if_vlan.h> 28 29 #include <net/sock.h> 30 #include <net/tcp.h> 31 #include <net/smc.h> 32 #include <asm/ioctls.h> 33 34 #include <net/net_namespace.h> 35 #include <net/netns/generic.h> 36 #include "smc_netns.h" 37 38 #include "smc.h" 39 #include "smc_clc.h" 40 #include "smc_llc.h" 41 #include "smc_cdc.h" 42 #include "smc_core.h" 43 #include "smc_ib.h" 44 #include "smc_ism.h" 45 #include "smc_pnet.h" 46 #include "smc_tx.h" 47 #include "smc_rx.h" 48 #include "smc_close.h" 49 50 static DEFINE_MUTEX(smc_server_lgr_pending); /* serialize link group 51 * creation on server 52 */ 53 static DEFINE_MUTEX(smc_client_lgr_pending); /* serialize link group 54 * creation on client 55 */ 56 57 static void smc_tcp_listen_work(struct work_struct *); 58 static void smc_connect_work(struct work_struct *); 59 60 static void smc_set_keepalive(struct sock *sk, int val) 61 { 62 struct smc_sock *smc = smc_sk(sk); 63 64 smc->clcsock->sk->sk_prot->keepalive(smc->clcsock->sk, val); 65 } 66 67 static struct smc_hashinfo smc_v4_hashinfo = { 68 .lock = __RW_LOCK_UNLOCKED(smc_v4_hashinfo.lock), 69 }; 70 71 static struct smc_hashinfo smc_v6_hashinfo = { 72 .lock = __RW_LOCK_UNLOCKED(smc_v6_hashinfo.lock), 73 }; 74 75 int smc_hash_sk(struct sock *sk) 76 { 77 struct smc_hashinfo *h = sk->sk_prot->h.smc_hash; 78 struct hlist_head *head; 79 80 head = &h->ht; 81 82 write_lock_bh(&h->lock); 83 sk_add_node(sk, head); 84 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); 85 write_unlock_bh(&h->lock); 86 87 return 0; 88 } 89 EXPORT_SYMBOL_GPL(smc_hash_sk); 90 91 void smc_unhash_sk(struct sock *sk) 92 { 93 struct smc_hashinfo *h = sk->sk_prot->h.smc_hash; 94 95 write_lock_bh(&h->lock); 96 if (sk_del_node_init(sk)) 97 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); 98 write_unlock_bh(&h->lock); 99 } 100 EXPORT_SYMBOL_GPL(smc_unhash_sk); 101 102 struct proto smc_proto = { 103 .name = "SMC", 104 .owner = THIS_MODULE, 105 .keepalive = smc_set_keepalive, 106 .hash = smc_hash_sk, 107 .unhash = smc_unhash_sk, 108 .obj_size = sizeof(struct smc_sock), 109 .h.smc_hash = &smc_v4_hashinfo, 110 .slab_flags = SLAB_TYPESAFE_BY_RCU, 111 }; 112 EXPORT_SYMBOL_GPL(smc_proto); 113 114 struct proto smc_proto6 = { 115 .name = "SMC6", 116 .owner = THIS_MODULE, 117 .keepalive = smc_set_keepalive, 118 .hash = smc_hash_sk, 119 .unhash = smc_unhash_sk, 120 .obj_size = sizeof(struct smc_sock), 121 .h.smc_hash = &smc_v6_hashinfo, 122 .slab_flags = SLAB_TYPESAFE_BY_RCU, 123 }; 124 EXPORT_SYMBOL_GPL(smc_proto6); 125 126 static int __smc_release(struct smc_sock *smc) 127 { 128 struct sock *sk = &smc->sk; 129 int rc = 0; 130 131 if (!smc->use_fallback) { 132 rc = smc_close_active(smc); 133 sock_set_flag(sk, SOCK_DEAD); 134 sk->sk_shutdown |= SHUTDOWN_MASK; 135 } else { 136 if (sk->sk_state != SMC_LISTEN && sk->sk_state != SMC_INIT) 137 sock_put(sk); /* passive closing */ 138 if (sk->sk_state == SMC_LISTEN) { 139 /* wake up clcsock accept */ 140 rc = kernel_sock_shutdown(smc->clcsock, SHUT_RDWR); 141 } 142 sk->sk_state = SMC_CLOSED; 143 sk->sk_state_change(sk); 144 } 145 146 sk->sk_prot->unhash(sk); 147 148 if (sk->sk_state == SMC_CLOSED) { 149 if (smc->clcsock) { 150 release_sock(sk); 151 smc_clcsock_release(smc); 152 lock_sock(sk); 153 } 154 if (!smc->use_fallback) 155 smc_conn_free(&smc->conn); 156 } 157 158 return rc; 159 } 160 161 static int smc_release(struct socket *sock) 162 { 163 struct sock *sk = sock->sk; 164 struct smc_sock *smc; 165 int rc = 0; 166 167 if (!sk) 168 goto out; 169 170 smc = smc_sk(sk); 171 172 /* cleanup for a dangling non-blocking connect */ 173 if (smc->connect_nonblock && sk->sk_state == SMC_INIT) 174 tcp_abort(smc->clcsock->sk, ECONNABORTED); 175 flush_work(&smc->connect_work); 176 177 if (sk->sk_state == SMC_LISTEN) 178 /* smc_close_non_accepted() is called and acquires 179 * sock lock for child sockets again 180 */ 181 lock_sock_nested(sk, SINGLE_DEPTH_NESTING); 182 else 183 lock_sock(sk); 184 185 rc = __smc_release(smc); 186 187 /* detach socket */ 188 sock_orphan(sk); 189 sock->sk = NULL; 190 release_sock(sk); 191 192 sock_put(sk); /* final sock_put */ 193 out: 194 return rc; 195 } 196 197 static void smc_destruct(struct sock *sk) 198 { 199 if (sk->sk_state != SMC_CLOSED) 200 return; 201 if (!sock_flag(sk, SOCK_DEAD)) 202 return; 203 204 sk_refcnt_debug_dec(sk); 205 } 206 207 static struct sock *smc_sock_alloc(struct net *net, struct socket *sock, 208 int protocol) 209 { 210 struct smc_sock *smc; 211 struct proto *prot; 212 struct sock *sk; 213 214 prot = (protocol == SMCPROTO_SMC6) ? &smc_proto6 : &smc_proto; 215 sk = sk_alloc(net, PF_SMC, GFP_KERNEL, prot, 0); 216 if (!sk) 217 return NULL; 218 219 sock_init_data(sock, sk); /* sets sk_refcnt to 1 */ 220 sk->sk_state = SMC_INIT; 221 sk->sk_destruct = smc_destruct; 222 sk->sk_protocol = protocol; 223 smc = smc_sk(sk); 224 INIT_WORK(&smc->tcp_listen_work, smc_tcp_listen_work); 225 INIT_WORK(&smc->connect_work, smc_connect_work); 226 INIT_DELAYED_WORK(&smc->conn.tx_work, smc_tx_work); 227 INIT_LIST_HEAD(&smc->accept_q); 228 spin_lock_init(&smc->accept_q_lock); 229 spin_lock_init(&smc->conn.send_lock); 230 sk->sk_prot->hash(sk); 231 sk_refcnt_debug_inc(sk); 232 mutex_init(&smc->clcsock_release_lock); 233 234 return sk; 235 } 236 237 static int smc_bind(struct socket *sock, struct sockaddr *uaddr, 238 int addr_len) 239 { 240 struct sockaddr_in *addr = (struct sockaddr_in *)uaddr; 241 struct sock *sk = sock->sk; 242 struct smc_sock *smc; 243 int rc; 244 245 smc = smc_sk(sk); 246 247 /* replicate tests from inet_bind(), to be safe wrt. future changes */ 248 rc = -EINVAL; 249 if (addr_len < sizeof(struct sockaddr_in)) 250 goto out; 251 252 rc = -EAFNOSUPPORT; 253 if (addr->sin_family != AF_INET && 254 addr->sin_family != AF_INET6 && 255 addr->sin_family != AF_UNSPEC) 256 goto out; 257 /* accept AF_UNSPEC (mapped to AF_INET) only if s_addr is INADDR_ANY */ 258 if (addr->sin_family == AF_UNSPEC && 259 addr->sin_addr.s_addr != htonl(INADDR_ANY)) 260 goto out; 261 262 lock_sock(sk); 263 264 /* Check if socket is already active */ 265 rc = -EINVAL; 266 if (sk->sk_state != SMC_INIT || smc->connect_nonblock) 267 goto out_rel; 268 269 smc->clcsock->sk->sk_reuse = sk->sk_reuse; 270 rc = kernel_bind(smc->clcsock, uaddr, addr_len); 271 272 out_rel: 273 release_sock(sk); 274 out: 275 return rc; 276 } 277 278 static void smc_copy_sock_settings(struct sock *nsk, struct sock *osk, 279 unsigned long mask) 280 { 281 /* options we don't get control via setsockopt for */ 282 nsk->sk_type = osk->sk_type; 283 nsk->sk_sndbuf = osk->sk_sndbuf; 284 nsk->sk_rcvbuf = osk->sk_rcvbuf; 285 nsk->sk_sndtimeo = osk->sk_sndtimeo; 286 nsk->sk_rcvtimeo = osk->sk_rcvtimeo; 287 nsk->sk_mark = osk->sk_mark; 288 nsk->sk_priority = osk->sk_priority; 289 nsk->sk_rcvlowat = osk->sk_rcvlowat; 290 nsk->sk_bound_dev_if = osk->sk_bound_dev_if; 291 nsk->sk_err = osk->sk_err; 292 293 nsk->sk_flags &= ~mask; 294 nsk->sk_flags |= osk->sk_flags & mask; 295 } 296 297 #define SK_FLAGS_SMC_TO_CLC ((1UL << SOCK_URGINLINE) | \ 298 (1UL << SOCK_KEEPOPEN) | \ 299 (1UL << SOCK_LINGER) | \ 300 (1UL << SOCK_BROADCAST) | \ 301 (1UL << SOCK_TIMESTAMP) | \ 302 (1UL << SOCK_DBG) | \ 303 (1UL << SOCK_RCVTSTAMP) | \ 304 (1UL << SOCK_RCVTSTAMPNS) | \ 305 (1UL << SOCK_LOCALROUTE) | \ 306 (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE) | \ 307 (1UL << SOCK_RXQ_OVFL) | \ 308 (1UL << SOCK_WIFI_STATUS) | \ 309 (1UL << SOCK_NOFCS) | \ 310 (1UL << SOCK_FILTER_LOCKED) | \ 311 (1UL << SOCK_TSTAMP_NEW)) 312 /* copy only relevant settings and flags of SOL_SOCKET level from smc to 313 * clc socket (since smc is not called for these options from net/core) 314 */ 315 static void smc_copy_sock_settings_to_clc(struct smc_sock *smc) 316 { 317 smc_copy_sock_settings(smc->clcsock->sk, &smc->sk, SK_FLAGS_SMC_TO_CLC); 318 } 319 320 #define SK_FLAGS_CLC_TO_SMC ((1UL << SOCK_URGINLINE) | \ 321 (1UL << SOCK_KEEPOPEN) | \ 322 (1UL << SOCK_LINGER) | \ 323 (1UL << SOCK_DBG)) 324 /* copy only settings and flags relevant for smc from clc to smc socket */ 325 static void smc_copy_sock_settings_to_smc(struct smc_sock *smc) 326 { 327 smc_copy_sock_settings(&smc->sk, smc->clcsock->sk, SK_FLAGS_CLC_TO_SMC); 328 } 329 330 /* register a new rmb, send confirm_rkey msg to register with peer */ 331 static int smc_reg_rmb(struct smc_link *link, struct smc_buf_desc *rmb_desc, 332 bool conf_rkey) 333 { 334 if (!rmb_desc->wr_reg) { 335 /* register memory region for new rmb */ 336 if (smc_wr_reg_send(link, rmb_desc->mr_rx[SMC_SINGLE_LINK])) { 337 rmb_desc->regerr = 1; 338 return -EFAULT; 339 } 340 rmb_desc->wr_reg = 1; 341 } 342 if (!conf_rkey) 343 return 0; 344 /* exchange confirm_rkey msg with peer */ 345 if (smc_llc_do_confirm_rkey(link, rmb_desc)) { 346 rmb_desc->regerr = 1; 347 return -EFAULT; 348 } 349 return 0; 350 } 351 352 static int smc_clnt_conf_first_link(struct smc_sock *smc) 353 { 354 struct net *net = sock_net(smc->clcsock->sk); 355 struct smc_link_group *lgr = smc->conn.lgr; 356 struct smc_link *link; 357 int rest; 358 int rc; 359 360 link = &lgr->lnk[SMC_SINGLE_LINK]; 361 /* receive CONFIRM LINK request from server over RoCE fabric */ 362 rest = wait_for_completion_interruptible_timeout( 363 &link->llc_confirm, 364 SMC_LLC_WAIT_FIRST_TIME); 365 if (rest <= 0) { 366 struct smc_clc_msg_decline dclc; 367 368 rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc), 369 SMC_CLC_DECLINE, CLC_WAIT_TIME_SHORT); 370 return rc == -EAGAIN ? SMC_CLC_DECL_TIMEOUT_CL : rc; 371 } 372 373 if (link->llc_confirm_rc) 374 return SMC_CLC_DECL_RMBE_EC; 375 376 rc = smc_ib_modify_qp_rts(link); 377 if (rc) 378 return SMC_CLC_DECL_ERR_RDYLNK; 379 380 smc_wr_remember_qp_attr(link); 381 382 if (smc_reg_rmb(link, smc->conn.rmb_desc, false)) 383 return SMC_CLC_DECL_ERR_REGRMB; 384 385 /* send CONFIRM LINK response over RoCE fabric */ 386 rc = smc_llc_send_confirm_link(link, SMC_LLC_RESP); 387 if (rc < 0) 388 return SMC_CLC_DECL_TIMEOUT_CL; 389 390 /* receive ADD LINK request from server over RoCE fabric */ 391 rest = wait_for_completion_interruptible_timeout(&link->llc_add, 392 SMC_LLC_WAIT_TIME); 393 if (rest <= 0) { 394 struct smc_clc_msg_decline dclc; 395 396 rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc), 397 SMC_CLC_DECLINE, CLC_WAIT_TIME_SHORT); 398 return rc == -EAGAIN ? SMC_CLC_DECL_TIMEOUT_AL : rc; 399 } 400 401 /* send add link reject message, only one link supported for now */ 402 rc = smc_llc_send_add_link(link, 403 link->smcibdev->mac[link->ibport - 1], 404 link->gid, SMC_LLC_RESP); 405 if (rc < 0) 406 return SMC_CLC_DECL_TIMEOUT_AL; 407 408 smc_llc_link_active(link, net->ipv4.sysctl_tcp_keepalive_time); 409 410 return 0; 411 } 412 413 static void smcr_conn_save_peer_info(struct smc_sock *smc, 414 struct smc_clc_msg_accept_confirm *clc) 415 { 416 int bufsize = smc_uncompress_bufsize(clc->rmbe_size); 417 418 smc->conn.peer_rmbe_idx = clc->rmbe_idx; 419 smc->conn.local_tx_ctrl.token = ntohl(clc->rmbe_alert_token); 420 smc->conn.peer_rmbe_size = bufsize; 421 atomic_set(&smc->conn.peer_rmbe_space, smc->conn.peer_rmbe_size); 422 smc->conn.tx_off = bufsize * (smc->conn.peer_rmbe_idx - 1); 423 } 424 425 static void smcd_conn_save_peer_info(struct smc_sock *smc, 426 struct smc_clc_msg_accept_confirm *clc) 427 { 428 int bufsize = smc_uncompress_bufsize(clc->dmbe_size); 429 430 smc->conn.peer_rmbe_idx = clc->dmbe_idx; 431 smc->conn.peer_token = clc->token; 432 /* msg header takes up space in the buffer */ 433 smc->conn.peer_rmbe_size = bufsize - sizeof(struct smcd_cdc_msg); 434 atomic_set(&smc->conn.peer_rmbe_space, smc->conn.peer_rmbe_size); 435 smc->conn.tx_off = bufsize * smc->conn.peer_rmbe_idx; 436 } 437 438 static void smc_conn_save_peer_info(struct smc_sock *smc, 439 struct smc_clc_msg_accept_confirm *clc) 440 { 441 if (smc->conn.lgr->is_smcd) 442 smcd_conn_save_peer_info(smc, clc); 443 else 444 smcr_conn_save_peer_info(smc, clc); 445 } 446 447 static void smc_link_save_peer_info(struct smc_link *link, 448 struct smc_clc_msg_accept_confirm *clc) 449 { 450 link->peer_qpn = ntoh24(clc->qpn); 451 memcpy(link->peer_gid, clc->lcl.gid, SMC_GID_SIZE); 452 memcpy(link->peer_mac, clc->lcl.mac, sizeof(link->peer_mac)); 453 link->peer_psn = ntoh24(clc->psn); 454 link->peer_mtu = clc->qp_mtu; 455 } 456 457 static void smc_switch_to_fallback(struct smc_sock *smc) 458 { 459 smc->use_fallback = true; 460 if (smc->sk.sk_socket && smc->sk.sk_socket->file) { 461 smc->clcsock->file = smc->sk.sk_socket->file; 462 smc->clcsock->file->private_data = smc->clcsock; 463 } 464 } 465 466 /* fall back during connect */ 467 static int smc_connect_fallback(struct smc_sock *smc, int reason_code) 468 { 469 smc_switch_to_fallback(smc); 470 smc->fallback_rsn = reason_code; 471 smc_copy_sock_settings_to_clc(smc); 472 smc->connect_nonblock = 0; 473 if (smc->sk.sk_state == SMC_INIT) 474 smc->sk.sk_state = SMC_ACTIVE; 475 return 0; 476 } 477 478 /* decline and fall back during connect */ 479 static int smc_connect_decline_fallback(struct smc_sock *smc, int reason_code) 480 { 481 int rc; 482 483 if (reason_code < 0) { /* error, fallback is not possible */ 484 if (smc->sk.sk_state == SMC_INIT) 485 sock_put(&smc->sk); /* passive closing */ 486 return reason_code; 487 } 488 if (reason_code != SMC_CLC_DECL_PEERDECL) { 489 rc = smc_clc_send_decline(smc, reason_code); 490 if (rc < 0) { 491 if (smc->sk.sk_state == SMC_INIT) 492 sock_put(&smc->sk); /* passive closing */ 493 return rc; 494 } 495 } 496 return smc_connect_fallback(smc, reason_code); 497 } 498 499 /* abort connecting */ 500 static int smc_connect_abort(struct smc_sock *smc, int reason_code, 501 int local_contact) 502 { 503 if (local_contact == SMC_FIRST_CONTACT) 504 smc_lgr_forget(smc->conn.lgr); 505 if (smc->conn.lgr->is_smcd) 506 /* there is only one lgr role for SMC-D; use server lock */ 507 mutex_unlock(&smc_server_lgr_pending); 508 else 509 mutex_unlock(&smc_client_lgr_pending); 510 511 smc_conn_free(&smc->conn); 512 smc->connect_nonblock = 0; 513 return reason_code; 514 } 515 516 /* check if there is a rdma device available for this connection. */ 517 /* called for connect and listen */ 518 static int smc_find_rdma_device(struct smc_sock *smc, struct smc_init_info *ini) 519 { 520 /* PNET table look up: search active ib_device and port 521 * within same PNETID that also contains the ethernet device 522 * used for the internal TCP socket 523 */ 524 smc_pnet_find_roce_resource(smc->clcsock->sk, ini); 525 if (!ini->ib_dev) 526 return SMC_CLC_DECL_NOSMCRDEV; 527 return 0; 528 } 529 530 /* check if there is an ISM device available for this connection. */ 531 /* called for connect and listen */ 532 static int smc_find_ism_device(struct smc_sock *smc, struct smc_init_info *ini) 533 { 534 /* Find ISM device with same PNETID as connecting interface */ 535 smc_pnet_find_ism_resource(smc->clcsock->sk, ini); 536 if (!ini->ism_dev) 537 return SMC_CLC_DECL_NOSMCDDEV; 538 return 0; 539 } 540 541 /* Check for VLAN ID and register it on ISM device just for CLC handshake */ 542 static int smc_connect_ism_vlan_setup(struct smc_sock *smc, 543 struct smc_init_info *ini) 544 { 545 if (ini->vlan_id && smc_ism_get_vlan(ini->ism_dev, ini->vlan_id)) 546 return SMC_CLC_DECL_ISMVLANERR; 547 return 0; 548 } 549 550 /* cleanup temporary VLAN ID registration used for CLC handshake. If ISM is 551 * used, the VLAN ID will be registered again during the connection setup. 552 */ 553 static int smc_connect_ism_vlan_cleanup(struct smc_sock *smc, bool is_smcd, 554 struct smc_init_info *ini) 555 { 556 if (!is_smcd) 557 return 0; 558 if (ini->vlan_id && smc_ism_put_vlan(ini->ism_dev, ini->vlan_id)) 559 return SMC_CLC_DECL_CNFERR; 560 return 0; 561 } 562 563 /* CLC handshake during connect */ 564 static int smc_connect_clc(struct smc_sock *smc, int smc_type, 565 struct smc_clc_msg_accept_confirm *aclc, 566 struct smc_init_info *ini) 567 { 568 int rc = 0; 569 570 /* do inband token exchange */ 571 rc = smc_clc_send_proposal(smc, smc_type, ini); 572 if (rc) 573 return rc; 574 /* receive SMC Accept CLC message */ 575 return smc_clc_wait_msg(smc, aclc, sizeof(*aclc), SMC_CLC_ACCEPT, 576 CLC_WAIT_TIME); 577 } 578 579 /* setup for RDMA connection of client */ 580 static int smc_connect_rdma(struct smc_sock *smc, 581 struct smc_clc_msg_accept_confirm *aclc, 582 struct smc_init_info *ini) 583 { 584 struct smc_link *link; 585 int reason_code = 0; 586 587 ini->is_smcd = false; 588 ini->ib_lcl = &aclc->lcl; 589 ini->ib_clcqpn = ntoh24(aclc->qpn); 590 ini->srv_first_contact = aclc->hdr.flag; 591 592 mutex_lock(&smc_client_lgr_pending); 593 reason_code = smc_conn_create(smc, ini); 594 if (reason_code) { 595 mutex_unlock(&smc_client_lgr_pending); 596 return reason_code; 597 } 598 link = &smc->conn.lgr->lnk[SMC_SINGLE_LINK]; 599 600 smc_conn_save_peer_info(smc, aclc); 601 602 /* create send buffer and rmb */ 603 if (smc_buf_create(smc, false)) 604 return smc_connect_abort(smc, SMC_CLC_DECL_MEM, 605 ini->cln_first_contact); 606 607 if (ini->cln_first_contact == SMC_FIRST_CONTACT) 608 smc_link_save_peer_info(link, aclc); 609 610 if (smc_rmb_rtoken_handling(&smc->conn, aclc)) 611 return smc_connect_abort(smc, SMC_CLC_DECL_ERR_RTOK, 612 ini->cln_first_contact); 613 614 smc_close_init(smc); 615 smc_rx_init(smc); 616 617 if (ini->cln_first_contact == SMC_FIRST_CONTACT) { 618 if (smc_ib_ready_link(link)) 619 return smc_connect_abort(smc, SMC_CLC_DECL_ERR_RDYLNK, 620 ini->cln_first_contact); 621 } else { 622 if (smc_reg_rmb(link, smc->conn.rmb_desc, true)) 623 return smc_connect_abort(smc, SMC_CLC_DECL_ERR_REGRMB, 624 ini->cln_first_contact); 625 } 626 smc_rmb_sync_sg_for_device(&smc->conn); 627 628 reason_code = smc_clc_send_confirm(smc); 629 if (reason_code) 630 return smc_connect_abort(smc, reason_code, 631 ini->cln_first_contact); 632 633 smc_tx_init(smc); 634 635 if (ini->cln_first_contact == SMC_FIRST_CONTACT) { 636 /* QP confirmation over RoCE fabric */ 637 reason_code = smc_clnt_conf_first_link(smc); 638 if (reason_code) 639 return smc_connect_abort(smc, reason_code, 640 ini->cln_first_contact); 641 } 642 mutex_unlock(&smc_client_lgr_pending); 643 644 smc_copy_sock_settings_to_clc(smc); 645 smc->connect_nonblock = 0; 646 if (smc->sk.sk_state == SMC_INIT) 647 smc->sk.sk_state = SMC_ACTIVE; 648 649 return 0; 650 } 651 652 /* setup for ISM connection of client */ 653 static int smc_connect_ism(struct smc_sock *smc, 654 struct smc_clc_msg_accept_confirm *aclc, 655 struct smc_init_info *ini) 656 { 657 int rc = 0; 658 659 ini->is_smcd = true; 660 ini->ism_gid = aclc->gid; 661 ini->srv_first_contact = aclc->hdr.flag; 662 663 /* there is only one lgr role for SMC-D; use server lock */ 664 mutex_lock(&smc_server_lgr_pending); 665 rc = smc_conn_create(smc, ini); 666 if (rc) { 667 mutex_unlock(&smc_server_lgr_pending); 668 return rc; 669 } 670 671 /* Create send and receive buffers */ 672 if (smc_buf_create(smc, true)) 673 return smc_connect_abort(smc, SMC_CLC_DECL_MEM, 674 ini->cln_first_contact); 675 676 smc_conn_save_peer_info(smc, aclc); 677 smc_close_init(smc); 678 smc_rx_init(smc); 679 smc_tx_init(smc); 680 681 rc = smc_clc_send_confirm(smc); 682 if (rc) 683 return smc_connect_abort(smc, rc, ini->cln_first_contact); 684 mutex_unlock(&smc_server_lgr_pending); 685 686 smc_copy_sock_settings_to_clc(smc); 687 smc->connect_nonblock = 0; 688 if (smc->sk.sk_state == SMC_INIT) 689 smc->sk.sk_state = SMC_ACTIVE; 690 691 return 0; 692 } 693 694 /* perform steps before actually connecting */ 695 static int __smc_connect(struct smc_sock *smc) 696 { 697 bool ism_supported = false, rdma_supported = false; 698 struct smc_clc_msg_accept_confirm aclc; 699 struct smc_init_info ini = {0}; 700 int smc_type; 701 int rc = 0; 702 703 sock_hold(&smc->sk); /* sock put in passive closing */ 704 705 if (smc->use_fallback) 706 return smc_connect_fallback(smc, smc->fallback_rsn); 707 708 /* if peer has not signalled SMC-capability, fall back */ 709 if (!tcp_sk(smc->clcsock->sk)->syn_smc) 710 return smc_connect_fallback(smc, SMC_CLC_DECL_PEERNOSMC); 711 712 /* IPSec connections opt out of SMC-R optimizations */ 713 if (using_ipsec(smc)) 714 return smc_connect_decline_fallback(smc, SMC_CLC_DECL_IPSEC); 715 716 /* get vlan id from IP device */ 717 if (smc_vlan_by_tcpsk(smc->clcsock, &ini)) 718 return smc_connect_decline_fallback(smc, 719 SMC_CLC_DECL_GETVLANERR); 720 721 /* check if there is an ism device available */ 722 if (!smc_find_ism_device(smc, &ini) && 723 !smc_connect_ism_vlan_setup(smc, &ini)) { 724 /* ISM is supported for this connection */ 725 ism_supported = true; 726 smc_type = SMC_TYPE_D; 727 } 728 729 /* check if there is a rdma device available */ 730 if (!smc_find_rdma_device(smc, &ini)) { 731 /* RDMA is supported for this connection */ 732 rdma_supported = true; 733 if (ism_supported) 734 smc_type = SMC_TYPE_B; /* both */ 735 else 736 smc_type = SMC_TYPE_R; /* only RDMA */ 737 } 738 739 /* if neither ISM nor RDMA are supported, fallback */ 740 if (!rdma_supported && !ism_supported) 741 return smc_connect_decline_fallback(smc, SMC_CLC_DECL_NOSMCDEV); 742 743 /* perform CLC handshake */ 744 rc = smc_connect_clc(smc, smc_type, &aclc, &ini); 745 if (rc) { 746 smc_connect_ism_vlan_cleanup(smc, ism_supported, &ini); 747 return smc_connect_decline_fallback(smc, rc); 748 } 749 750 /* depending on previous steps, connect using rdma or ism */ 751 if (rdma_supported && aclc.hdr.path == SMC_TYPE_R) 752 rc = smc_connect_rdma(smc, &aclc, &ini); 753 else if (ism_supported && aclc.hdr.path == SMC_TYPE_D) 754 rc = smc_connect_ism(smc, &aclc, &ini); 755 else 756 rc = SMC_CLC_DECL_MODEUNSUPP; 757 if (rc) { 758 smc_connect_ism_vlan_cleanup(smc, ism_supported, &ini); 759 return smc_connect_decline_fallback(smc, rc); 760 } 761 762 smc_connect_ism_vlan_cleanup(smc, ism_supported, &ini); 763 return 0; 764 } 765 766 static void smc_connect_work(struct work_struct *work) 767 { 768 struct smc_sock *smc = container_of(work, struct smc_sock, 769 connect_work); 770 long timeo = smc->sk.sk_sndtimeo; 771 int rc = 0; 772 773 if (!timeo) 774 timeo = MAX_SCHEDULE_TIMEOUT; 775 lock_sock(smc->clcsock->sk); 776 if (smc->clcsock->sk->sk_err) { 777 smc->sk.sk_err = smc->clcsock->sk->sk_err; 778 } else if ((1 << smc->clcsock->sk->sk_state) & 779 (TCPF_SYN_SENT | TCP_SYN_RECV)) { 780 rc = sk_stream_wait_connect(smc->clcsock->sk, &timeo); 781 if ((rc == -EPIPE) && 782 ((1 << smc->clcsock->sk->sk_state) & 783 (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT))) 784 rc = 0; 785 } 786 release_sock(smc->clcsock->sk); 787 lock_sock(&smc->sk); 788 if (rc != 0 || smc->sk.sk_err) { 789 smc->sk.sk_state = SMC_CLOSED; 790 if (rc == -EPIPE || rc == -EAGAIN) 791 smc->sk.sk_err = EPIPE; 792 else if (signal_pending(current)) 793 smc->sk.sk_err = -sock_intr_errno(timeo); 794 goto out; 795 } 796 797 rc = __smc_connect(smc); 798 if (rc < 0) 799 smc->sk.sk_err = -rc; 800 801 out: 802 if (!sock_flag(&smc->sk, SOCK_DEAD)) { 803 if (smc->sk.sk_err) { 804 smc->sk.sk_state_change(&smc->sk); 805 } else { /* allow polling before and after fallback decision */ 806 smc->clcsock->sk->sk_write_space(smc->clcsock->sk); 807 smc->sk.sk_write_space(&smc->sk); 808 } 809 } 810 release_sock(&smc->sk); 811 } 812 813 static int smc_connect(struct socket *sock, struct sockaddr *addr, 814 int alen, int flags) 815 { 816 struct sock *sk = sock->sk; 817 struct smc_sock *smc; 818 int rc = -EINVAL; 819 820 smc = smc_sk(sk); 821 822 /* separate smc parameter checking to be safe */ 823 if (alen < sizeof(addr->sa_family)) 824 goto out_err; 825 if (addr->sa_family != AF_INET && addr->sa_family != AF_INET6) 826 goto out_err; 827 828 lock_sock(sk); 829 switch (sk->sk_state) { 830 default: 831 goto out; 832 case SMC_ACTIVE: 833 rc = -EISCONN; 834 goto out; 835 case SMC_INIT: 836 rc = 0; 837 break; 838 } 839 840 smc_copy_sock_settings_to_clc(smc); 841 tcp_sk(smc->clcsock->sk)->syn_smc = 1; 842 if (smc->connect_nonblock) { 843 rc = -EALREADY; 844 goto out; 845 } 846 rc = kernel_connect(smc->clcsock, addr, alen, flags); 847 if (rc && rc != -EINPROGRESS) 848 goto out; 849 if (flags & O_NONBLOCK) { 850 if (schedule_work(&smc->connect_work)) 851 smc->connect_nonblock = 1; 852 rc = -EINPROGRESS; 853 } else { 854 rc = __smc_connect(smc); 855 if (rc < 0) 856 goto out; 857 else 858 rc = 0; /* success cases including fallback */ 859 } 860 861 out: 862 release_sock(sk); 863 out_err: 864 return rc; 865 } 866 867 static int smc_clcsock_accept(struct smc_sock *lsmc, struct smc_sock **new_smc) 868 { 869 struct socket *new_clcsock = NULL; 870 struct sock *lsk = &lsmc->sk; 871 struct sock *new_sk; 872 int rc = -EINVAL; 873 874 release_sock(lsk); 875 new_sk = smc_sock_alloc(sock_net(lsk), NULL, lsk->sk_protocol); 876 if (!new_sk) { 877 rc = -ENOMEM; 878 lsk->sk_err = ENOMEM; 879 *new_smc = NULL; 880 lock_sock(lsk); 881 goto out; 882 } 883 *new_smc = smc_sk(new_sk); 884 885 mutex_lock(&lsmc->clcsock_release_lock); 886 if (lsmc->clcsock) 887 rc = kernel_accept(lsmc->clcsock, &new_clcsock, 0); 888 mutex_unlock(&lsmc->clcsock_release_lock); 889 lock_sock(lsk); 890 if (rc < 0) 891 lsk->sk_err = -rc; 892 if (rc < 0 || lsk->sk_state == SMC_CLOSED) { 893 new_sk->sk_prot->unhash(new_sk); 894 if (new_clcsock) 895 sock_release(new_clcsock); 896 new_sk->sk_state = SMC_CLOSED; 897 sock_set_flag(new_sk, SOCK_DEAD); 898 sock_put(new_sk); /* final */ 899 *new_smc = NULL; 900 goto out; 901 } 902 903 (*new_smc)->clcsock = new_clcsock; 904 out: 905 return rc; 906 } 907 908 /* add a just created sock to the accept queue of the listen sock as 909 * candidate for a following socket accept call from user space 910 */ 911 static void smc_accept_enqueue(struct sock *parent, struct sock *sk) 912 { 913 struct smc_sock *par = smc_sk(parent); 914 915 sock_hold(sk); /* sock_put in smc_accept_unlink () */ 916 spin_lock(&par->accept_q_lock); 917 list_add_tail(&smc_sk(sk)->accept_q, &par->accept_q); 918 spin_unlock(&par->accept_q_lock); 919 sk_acceptq_added(parent); 920 } 921 922 /* remove a socket from the accept queue of its parental listening socket */ 923 static void smc_accept_unlink(struct sock *sk) 924 { 925 struct smc_sock *par = smc_sk(sk)->listen_smc; 926 927 spin_lock(&par->accept_q_lock); 928 list_del_init(&smc_sk(sk)->accept_q); 929 spin_unlock(&par->accept_q_lock); 930 sk_acceptq_removed(&smc_sk(sk)->listen_smc->sk); 931 sock_put(sk); /* sock_hold in smc_accept_enqueue */ 932 } 933 934 /* remove a sock from the accept queue to bind it to a new socket created 935 * for a socket accept call from user space 936 */ 937 struct sock *smc_accept_dequeue(struct sock *parent, 938 struct socket *new_sock) 939 { 940 struct smc_sock *isk, *n; 941 struct sock *new_sk; 942 943 list_for_each_entry_safe(isk, n, &smc_sk(parent)->accept_q, accept_q) { 944 new_sk = (struct sock *)isk; 945 946 smc_accept_unlink(new_sk); 947 if (new_sk->sk_state == SMC_CLOSED) { 948 new_sk->sk_prot->unhash(new_sk); 949 if (isk->clcsock) { 950 sock_release(isk->clcsock); 951 isk->clcsock = NULL; 952 } 953 sock_put(new_sk); /* final */ 954 continue; 955 } 956 if (new_sock) { 957 sock_graft(new_sk, new_sock); 958 if (isk->use_fallback) { 959 smc_sk(new_sk)->clcsock->file = new_sock->file; 960 isk->clcsock->file->private_data = isk->clcsock; 961 } 962 } 963 return new_sk; 964 } 965 return NULL; 966 } 967 968 /* clean up for a created but never accepted sock */ 969 void smc_close_non_accepted(struct sock *sk) 970 { 971 struct smc_sock *smc = smc_sk(sk); 972 973 lock_sock(sk); 974 if (!sk->sk_lingertime) 975 /* wait for peer closing */ 976 sk->sk_lingertime = SMC_MAX_STREAM_WAIT_TIMEOUT; 977 __smc_release(smc); 978 release_sock(sk); 979 sock_put(sk); /* final sock_put */ 980 } 981 982 static int smc_serv_conf_first_link(struct smc_sock *smc) 983 { 984 struct net *net = sock_net(smc->clcsock->sk); 985 struct smc_link_group *lgr = smc->conn.lgr; 986 struct smc_link *link; 987 int rest; 988 int rc; 989 990 link = &lgr->lnk[SMC_SINGLE_LINK]; 991 992 if (smc_reg_rmb(link, smc->conn.rmb_desc, false)) 993 return SMC_CLC_DECL_ERR_REGRMB; 994 995 /* send CONFIRM LINK request to client over the RoCE fabric */ 996 rc = smc_llc_send_confirm_link(link, SMC_LLC_REQ); 997 if (rc < 0) 998 return SMC_CLC_DECL_TIMEOUT_CL; 999 1000 /* receive CONFIRM LINK response from client over the RoCE fabric */ 1001 rest = wait_for_completion_interruptible_timeout( 1002 &link->llc_confirm_resp, 1003 SMC_LLC_WAIT_FIRST_TIME); 1004 if (rest <= 0) { 1005 struct smc_clc_msg_decline dclc; 1006 1007 rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc), 1008 SMC_CLC_DECLINE, CLC_WAIT_TIME_SHORT); 1009 return rc == -EAGAIN ? SMC_CLC_DECL_TIMEOUT_CL : rc; 1010 } 1011 1012 if (link->llc_confirm_resp_rc) 1013 return SMC_CLC_DECL_RMBE_EC; 1014 1015 /* send ADD LINK request to client over the RoCE fabric */ 1016 rc = smc_llc_send_add_link(link, 1017 link->smcibdev->mac[link->ibport - 1], 1018 link->gid, SMC_LLC_REQ); 1019 if (rc < 0) 1020 return SMC_CLC_DECL_TIMEOUT_AL; 1021 1022 /* receive ADD LINK response from client over the RoCE fabric */ 1023 rest = wait_for_completion_interruptible_timeout(&link->llc_add_resp, 1024 SMC_LLC_WAIT_TIME); 1025 if (rest <= 0) { 1026 struct smc_clc_msg_decline dclc; 1027 1028 rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc), 1029 SMC_CLC_DECLINE, CLC_WAIT_TIME_SHORT); 1030 return rc == -EAGAIN ? SMC_CLC_DECL_TIMEOUT_AL : rc; 1031 } 1032 1033 smc_llc_link_active(link, net->ipv4.sysctl_tcp_keepalive_time); 1034 1035 return 0; 1036 } 1037 1038 /* listen worker: finish */ 1039 static void smc_listen_out(struct smc_sock *new_smc) 1040 { 1041 struct smc_sock *lsmc = new_smc->listen_smc; 1042 struct sock *newsmcsk = &new_smc->sk; 1043 1044 if (lsmc->sk.sk_state == SMC_LISTEN) { 1045 lock_sock_nested(&lsmc->sk, SINGLE_DEPTH_NESTING); 1046 smc_accept_enqueue(&lsmc->sk, newsmcsk); 1047 release_sock(&lsmc->sk); 1048 } else { /* no longer listening */ 1049 smc_close_non_accepted(newsmcsk); 1050 } 1051 1052 /* Wake up accept */ 1053 lsmc->sk.sk_data_ready(&lsmc->sk); 1054 sock_put(&lsmc->sk); /* sock_hold in smc_tcp_listen_work */ 1055 } 1056 1057 /* listen worker: finish in state connected */ 1058 static void smc_listen_out_connected(struct smc_sock *new_smc) 1059 { 1060 struct sock *newsmcsk = &new_smc->sk; 1061 1062 sk_refcnt_debug_inc(newsmcsk); 1063 if (newsmcsk->sk_state == SMC_INIT) 1064 newsmcsk->sk_state = SMC_ACTIVE; 1065 1066 smc_listen_out(new_smc); 1067 } 1068 1069 /* listen worker: finish in error state */ 1070 static void smc_listen_out_err(struct smc_sock *new_smc) 1071 { 1072 struct sock *newsmcsk = &new_smc->sk; 1073 1074 if (newsmcsk->sk_state == SMC_INIT) 1075 sock_put(&new_smc->sk); /* passive closing */ 1076 newsmcsk->sk_state = SMC_CLOSED; 1077 smc_conn_free(&new_smc->conn); 1078 1079 smc_listen_out(new_smc); 1080 } 1081 1082 /* listen worker: decline and fall back if possible */ 1083 static void smc_listen_decline(struct smc_sock *new_smc, int reason_code, 1084 int local_contact) 1085 { 1086 /* RDMA setup failed, switch back to TCP */ 1087 if (local_contact == SMC_FIRST_CONTACT) 1088 smc_lgr_forget(new_smc->conn.lgr); 1089 if (reason_code < 0) { /* error, no fallback possible */ 1090 smc_listen_out_err(new_smc); 1091 return; 1092 } 1093 smc_conn_free(&new_smc->conn); 1094 smc_switch_to_fallback(new_smc); 1095 new_smc->fallback_rsn = reason_code; 1096 if (reason_code && reason_code != SMC_CLC_DECL_PEERDECL) { 1097 if (smc_clc_send_decline(new_smc, reason_code) < 0) { 1098 smc_listen_out_err(new_smc); 1099 return; 1100 } 1101 } 1102 smc_listen_out_connected(new_smc); 1103 } 1104 1105 /* listen worker: check prefixes */ 1106 static int smc_listen_prfx_check(struct smc_sock *new_smc, 1107 struct smc_clc_msg_proposal *pclc) 1108 { 1109 struct smc_clc_msg_proposal_prefix *pclc_prfx; 1110 struct socket *newclcsock = new_smc->clcsock; 1111 1112 pclc_prfx = smc_clc_proposal_get_prefix(pclc); 1113 if (smc_clc_prfx_match(newclcsock, pclc_prfx)) 1114 return SMC_CLC_DECL_DIFFPREFIX; 1115 1116 return 0; 1117 } 1118 1119 /* listen worker: initialize connection and buffers */ 1120 static int smc_listen_rdma_init(struct smc_sock *new_smc, 1121 struct smc_init_info *ini) 1122 { 1123 int rc; 1124 1125 /* allocate connection / link group */ 1126 rc = smc_conn_create(new_smc, ini); 1127 if (rc) 1128 return rc; 1129 1130 /* create send buffer and rmb */ 1131 if (smc_buf_create(new_smc, false)) 1132 return SMC_CLC_DECL_MEM; 1133 1134 return 0; 1135 } 1136 1137 /* listen worker: initialize connection and buffers for SMC-D */ 1138 static int smc_listen_ism_init(struct smc_sock *new_smc, 1139 struct smc_clc_msg_proposal *pclc, 1140 struct smc_init_info *ini) 1141 { 1142 struct smc_clc_msg_smcd *pclc_smcd; 1143 int rc; 1144 1145 pclc_smcd = smc_get_clc_msg_smcd(pclc); 1146 ini->ism_gid = pclc_smcd->gid; 1147 rc = smc_conn_create(new_smc, ini); 1148 if (rc) 1149 return rc; 1150 1151 /* Check if peer can be reached via ISM device */ 1152 if (smc_ism_cantalk(new_smc->conn.lgr->peer_gid, 1153 new_smc->conn.lgr->vlan_id, 1154 new_smc->conn.lgr->smcd)) { 1155 if (ini->cln_first_contact == SMC_FIRST_CONTACT) 1156 smc_lgr_forget(new_smc->conn.lgr); 1157 smc_conn_free(&new_smc->conn); 1158 return SMC_CLC_DECL_SMCDNOTALK; 1159 } 1160 1161 /* Create send and receive buffers */ 1162 if (smc_buf_create(new_smc, true)) { 1163 if (ini->cln_first_contact == SMC_FIRST_CONTACT) 1164 smc_lgr_forget(new_smc->conn.lgr); 1165 smc_conn_free(&new_smc->conn); 1166 return SMC_CLC_DECL_MEM; 1167 } 1168 1169 return 0; 1170 } 1171 1172 /* listen worker: register buffers */ 1173 static int smc_listen_rdma_reg(struct smc_sock *new_smc, int local_contact) 1174 { 1175 struct smc_link *link = &new_smc->conn.lgr->lnk[SMC_SINGLE_LINK]; 1176 1177 if (local_contact != SMC_FIRST_CONTACT) { 1178 if (smc_reg_rmb(link, new_smc->conn.rmb_desc, true)) 1179 return SMC_CLC_DECL_ERR_REGRMB; 1180 } 1181 smc_rmb_sync_sg_for_device(&new_smc->conn); 1182 1183 return 0; 1184 } 1185 1186 /* listen worker: finish RDMA setup */ 1187 static int smc_listen_rdma_finish(struct smc_sock *new_smc, 1188 struct smc_clc_msg_accept_confirm *cclc, 1189 int local_contact) 1190 { 1191 struct smc_link *link = &new_smc->conn.lgr->lnk[SMC_SINGLE_LINK]; 1192 int reason_code = 0; 1193 1194 if (local_contact == SMC_FIRST_CONTACT) 1195 smc_link_save_peer_info(link, cclc); 1196 1197 if (smc_rmb_rtoken_handling(&new_smc->conn, cclc)) { 1198 reason_code = SMC_CLC_DECL_ERR_RTOK; 1199 goto decline; 1200 } 1201 1202 if (local_contact == SMC_FIRST_CONTACT) { 1203 if (smc_ib_ready_link(link)) { 1204 reason_code = SMC_CLC_DECL_ERR_RDYLNK; 1205 goto decline; 1206 } 1207 /* QP confirmation over RoCE fabric */ 1208 reason_code = smc_serv_conf_first_link(new_smc); 1209 if (reason_code) 1210 goto decline; 1211 } 1212 return 0; 1213 1214 decline: 1215 smc_listen_decline(new_smc, reason_code, local_contact); 1216 return reason_code; 1217 } 1218 1219 /* setup for RDMA connection of server */ 1220 static void smc_listen_work(struct work_struct *work) 1221 { 1222 struct smc_sock *new_smc = container_of(work, struct smc_sock, 1223 smc_listen_work); 1224 struct socket *newclcsock = new_smc->clcsock; 1225 struct smc_clc_msg_accept_confirm cclc; 1226 struct smc_clc_msg_proposal *pclc; 1227 struct smc_init_info ini = {0}; 1228 bool ism_supported = false; 1229 u8 buf[SMC_CLC_MAX_LEN]; 1230 int rc = 0; 1231 1232 if (new_smc->listen_smc->sk.sk_state != SMC_LISTEN) 1233 return smc_listen_out_err(new_smc); 1234 1235 if (new_smc->use_fallback) { 1236 smc_listen_out_connected(new_smc); 1237 return; 1238 } 1239 1240 /* check if peer is smc capable */ 1241 if (!tcp_sk(newclcsock->sk)->syn_smc) { 1242 smc_switch_to_fallback(new_smc); 1243 new_smc->fallback_rsn = SMC_CLC_DECL_PEERNOSMC; 1244 smc_listen_out_connected(new_smc); 1245 return; 1246 } 1247 1248 /* do inband token exchange - 1249 * wait for and receive SMC Proposal CLC message 1250 */ 1251 pclc = (struct smc_clc_msg_proposal *)&buf; 1252 rc = smc_clc_wait_msg(new_smc, pclc, SMC_CLC_MAX_LEN, 1253 SMC_CLC_PROPOSAL, CLC_WAIT_TIME); 1254 if (rc) 1255 goto out_decl; 1256 1257 /* IPSec connections opt out of SMC-R optimizations */ 1258 if (using_ipsec(new_smc)) { 1259 rc = SMC_CLC_DECL_IPSEC; 1260 goto out_decl; 1261 } 1262 1263 /* check for matching IP prefix and subnet length */ 1264 rc = smc_listen_prfx_check(new_smc, pclc); 1265 if (rc) 1266 goto out_decl; 1267 1268 /* get vlan id from IP device */ 1269 if (smc_vlan_by_tcpsk(new_smc->clcsock, &ini)) { 1270 rc = SMC_CLC_DECL_GETVLANERR; 1271 goto out_decl; 1272 } 1273 1274 mutex_lock(&smc_server_lgr_pending); 1275 smc_close_init(new_smc); 1276 smc_rx_init(new_smc); 1277 smc_tx_init(new_smc); 1278 1279 /* check if ISM is available */ 1280 if (pclc->hdr.path == SMC_TYPE_D || pclc->hdr.path == SMC_TYPE_B) { 1281 ini.is_smcd = true; /* prepare ISM check */ 1282 rc = smc_find_ism_device(new_smc, &ini); 1283 if (!rc) 1284 rc = smc_listen_ism_init(new_smc, pclc, &ini); 1285 if (!rc) 1286 ism_supported = true; 1287 else if (pclc->hdr.path == SMC_TYPE_D) 1288 goto out_unlock; /* skip RDMA and decline */ 1289 } 1290 1291 /* check if RDMA is available */ 1292 if (!ism_supported) { /* SMC_TYPE_R or SMC_TYPE_B */ 1293 /* prepare RDMA check */ 1294 memset(&ini, 0, sizeof(ini)); 1295 ini.is_smcd = false; 1296 ini.ib_lcl = &pclc->lcl; 1297 rc = smc_find_rdma_device(new_smc, &ini); 1298 if (rc) { 1299 /* no RDMA device found */ 1300 if (pclc->hdr.path == SMC_TYPE_B) 1301 /* neither ISM nor RDMA device found */ 1302 rc = SMC_CLC_DECL_NOSMCDEV; 1303 goto out_unlock; 1304 } 1305 rc = smc_listen_rdma_init(new_smc, &ini); 1306 if (rc) 1307 goto out_unlock; 1308 rc = smc_listen_rdma_reg(new_smc, ini.cln_first_contact); 1309 if (rc) 1310 goto out_unlock; 1311 } 1312 1313 /* send SMC Accept CLC message */ 1314 rc = smc_clc_send_accept(new_smc, ini.cln_first_contact); 1315 if (rc) 1316 goto out_unlock; 1317 1318 /* SMC-D does not need this lock any more */ 1319 if (ism_supported) 1320 mutex_unlock(&smc_server_lgr_pending); 1321 1322 /* receive SMC Confirm CLC message */ 1323 rc = smc_clc_wait_msg(new_smc, &cclc, sizeof(cclc), 1324 SMC_CLC_CONFIRM, CLC_WAIT_TIME); 1325 if (rc) { 1326 if (!ism_supported) 1327 goto out_unlock; 1328 goto out_decl; 1329 } 1330 1331 /* finish worker */ 1332 if (!ism_supported) { 1333 rc = smc_listen_rdma_finish(new_smc, &cclc, 1334 ini.cln_first_contact); 1335 mutex_unlock(&smc_server_lgr_pending); 1336 if (rc) 1337 return; 1338 } 1339 smc_conn_save_peer_info(new_smc, &cclc); 1340 smc_listen_out_connected(new_smc); 1341 return; 1342 1343 out_unlock: 1344 mutex_unlock(&smc_server_lgr_pending); 1345 out_decl: 1346 smc_listen_decline(new_smc, rc, ini.cln_first_contact); 1347 } 1348 1349 static void smc_tcp_listen_work(struct work_struct *work) 1350 { 1351 struct smc_sock *lsmc = container_of(work, struct smc_sock, 1352 tcp_listen_work); 1353 struct sock *lsk = &lsmc->sk; 1354 struct smc_sock *new_smc; 1355 int rc = 0; 1356 1357 lock_sock(lsk); 1358 while (lsk->sk_state == SMC_LISTEN) { 1359 rc = smc_clcsock_accept(lsmc, &new_smc); 1360 if (rc) 1361 goto out; 1362 if (!new_smc) 1363 continue; 1364 1365 new_smc->listen_smc = lsmc; 1366 new_smc->use_fallback = lsmc->use_fallback; 1367 new_smc->fallback_rsn = lsmc->fallback_rsn; 1368 sock_hold(lsk); /* sock_put in smc_listen_work */ 1369 INIT_WORK(&new_smc->smc_listen_work, smc_listen_work); 1370 smc_copy_sock_settings_to_smc(new_smc); 1371 new_smc->sk.sk_sndbuf = lsmc->sk.sk_sndbuf; 1372 new_smc->sk.sk_rcvbuf = lsmc->sk.sk_rcvbuf; 1373 sock_hold(&new_smc->sk); /* sock_put in passive closing */ 1374 if (!schedule_work(&new_smc->smc_listen_work)) 1375 sock_put(&new_smc->sk); 1376 } 1377 1378 out: 1379 release_sock(lsk); 1380 sock_put(&lsmc->sk); /* sock_hold in smc_listen */ 1381 } 1382 1383 static int smc_listen(struct socket *sock, int backlog) 1384 { 1385 struct sock *sk = sock->sk; 1386 struct smc_sock *smc; 1387 int rc; 1388 1389 smc = smc_sk(sk); 1390 lock_sock(sk); 1391 1392 rc = -EINVAL; 1393 if ((sk->sk_state != SMC_INIT && sk->sk_state != SMC_LISTEN) || 1394 smc->connect_nonblock) 1395 goto out; 1396 1397 rc = 0; 1398 if (sk->sk_state == SMC_LISTEN) { 1399 sk->sk_max_ack_backlog = backlog; 1400 goto out; 1401 } 1402 /* some socket options are handled in core, so we could not apply 1403 * them to the clc socket -- copy smc socket options to clc socket 1404 */ 1405 smc_copy_sock_settings_to_clc(smc); 1406 if (!smc->use_fallback) 1407 tcp_sk(smc->clcsock->sk)->syn_smc = 1; 1408 1409 rc = kernel_listen(smc->clcsock, backlog); 1410 if (rc) 1411 goto out; 1412 sk->sk_max_ack_backlog = backlog; 1413 sk->sk_ack_backlog = 0; 1414 sk->sk_state = SMC_LISTEN; 1415 sock_hold(sk); /* sock_hold in tcp_listen_worker */ 1416 if (!schedule_work(&smc->tcp_listen_work)) 1417 sock_put(sk); 1418 1419 out: 1420 release_sock(sk); 1421 return rc; 1422 } 1423 1424 static int smc_accept(struct socket *sock, struct socket *new_sock, 1425 int flags, bool kern) 1426 { 1427 struct sock *sk = sock->sk, *nsk; 1428 DECLARE_WAITQUEUE(wait, current); 1429 struct smc_sock *lsmc; 1430 long timeo; 1431 int rc = 0; 1432 1433 lsmc = smc_sk(sk); 1434 sock_hold(sk); /* sock_put below */ 1435 lock_sock(sk); 1436 1437 if (lsmc->sk.sk_state != SMC_LISTEN) { 1438 rc = -EINVAL; 1439 release_sock(sk); 1440 goto out; 1441 } 1442 1443 /* Wait for an incoming connection */ 1444 timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK); 1445 add_wait_queue_exclusive(sk_sleep(sk), &wait); 1446 while (!(nsk = smc_accept_dequeue(sk, new_sock))) { 1447 set_current_state(TASK_INTERRUPTIBLE); 1448 if (!timeo) { 1449 rc = -EAGAIN; 1450 break; 1451 } 1452 release_sock(sk); 1453 timeo = schedule_timeout(timeo); 1454 /* wakeup by sk_data_ready in smc_listen_work() */ 1455 sched_annotate_sleep(); 1456 lock_sock(sk); 1457 if (signal_pending(current)) { 1458 rc = sock_intr_errno(timeo); 1459 break; 1460 } 1461 } 1462 set_current_state(TASK_RUNNING); 1463 remove_wait_queue(sk_sleep(sk), &wait); 1464 1465 if (!rc) 1466 rc = sock_error(nsk); 1467 release_sock(sk); 1468 if (rc) 1469 goto out; 1470 1471 if (lsmc->sockopt_defer_accept && !(flags & O_NONBLOCK)) { 1472 /* wait till data arrives on the socket */ 1473 timeo = msecs_to_jiffies(lsmc->sockopt_defer_accept * 1474 MSEC_PER_SEC); 1475 if (smc_sk(nsk)->use_fallback) { 1476 struct sock *clcsk = smc_sk(nsk)->clcsock->sk; 1477 1478 lock_sock(clcsk); 1479 if (skb_queue_empty(&clcsk->sk_receive_queue)) 1480 sk_wait_data(clcsk, &timeo, NULL); 1481 release_sock(clcsk); 1482 } else if (!atomic_read(&smc_sk(nsk)->conn.bytes_to_rcv)) { 1483 lock_sock(nsk); 1484 smc_rx_wait(smc_sk(nsk), &timeo, smc_rx_data_available); 1485 release_sock(nsk); 1486 } 1487 } 1488 1489 out: 1490 sock_put(sk); /* sock_hold above */ 1491 return rc; 1492 } 1493 1494 static int smc_getname(struct socket *sock, struct sockaddr *addr, 1495 int peer) 1496 { 1497 struct smc_sock *smc; 1498 1499 if (peer && (sock->sk->sk_state != SMC_ACTIVE) && 1500 (sock->sk->sk_state != SMC_APPCLOSEWAIT1)) 1501 return -ENOTCONN; 1502 1503 smc = smc_sk(sock->sk); 1504 1505 return smc->clcsock->ops->getname(smc->clcsock, addr, peer); 1506 } 1507 1508 static int smc_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) 1509 { 1510 struct sock *sk = sock->sk; 1511 struct smc_sock *smc; 1512 int rc = -EPIPE; 1513 1514 smc = smc_sk(sk); 1515 lock_sock(sk); 1516 if ((sk->sk_state != SMC_ACTIVE) && 1517 (sk->sk_state != SMC_APPCLOSEWAIT1) && 1518 (sk->sk_state != SMC_INIT)) 1519 goto out; 1520 1521 if (msg->msg_flags & MSG_FASTOPEN) { 1522 if (sk->sk_state == SMC_INIT && !smc->connect_nonblock) { 1523 smc_switch_to_fallback(smc); 1524 smc->fallback_rsn = SMC_CLC_DECL_OPTUNSUPP; 1525 } else { 1526 rc = -EINVAL; 1527 goto out; 1528 } 1529 } 1530 1531 if (smc->use_fallback) 1532 rc = smc->clcsock->ops->sendmsg(smc->clcsock, msg, len); 1533 else 1534 rc = smc_tx_sendmsg(smc, msg, len); 1535 out: 1536 release_sock(sk); 1537 return rc; 1538 } 1539 1540 static int smc_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, 1541 int flags) 1542 { 1543 struct sock *sk = sock->sk; 1544 struct smc_sock *smc; 1545 int rc = -ENOTCONN; 1546 1547 smc = smc_sk(sk); 1548 lock_sock(sk); 1549 if (sk->sk_state == SMC_CLOSED && (sk->sk_shutdown & RCV_SHUTDOWN)) { 1550 /* socket was connected before, no more data to read */ 1551 rc = 0; 1552 goto out; 1553 } 1554 if ((sk->sk_state == SMC_INIT) || 1555 (sk->sk_state == SMC_LISTEN) || 1556 (sk->sk_state == SMC_CLOSED)) 1557 goto out; 1558 1559 if (sk->sk_state == SMC_PEERFINCLOSEWAIT) { 1560 rc = 0; 1561 goto out; 1562 } 1563 1564 if (smc->use_fallback) { 1565 rc = smc->clcsock->ops->recvmsg(smc->clcsock, msg, len, flags); 1566 } else { 1567 msg->msg_namelen = 0; 1568 rc = smc_rx_recvmsg(smc, msg, NULL, len, flags); 1569 } 1570 1571 out: 1572 release_sock(sk); 1573 return rc; 1574 } 1575 1576 static __poll_t smc_accept_poll(struct sock *parent) 1577 { 1578 struct smc_sock *isk = smc_sk(parent); 1579 __poll_t mask = 0; 1580 1581 spin_lock(&isk->accept_q_lock); 1582 if (!list_empty(&isk->accept_q)) 1583 mask = EPOLLIN | EPOLLRDNORM; 1584 spin_unlock(&isk->accept_q_lock); 1585 1586 return mask; 1587 } 1588 1589 static __poll_t smc_poll(struct file *file, struct socket *sock, 1590 poll_table *wait) 1591 { 1592 struct sock *sk = sock->sk; 1593 struct smc_sock *smc; 1594 __poll_t mask = 0; 1595 1596 if (!sk) 1597 return EPOLLNVAL; 1598 1599 smc = smc_sk(sock->sk); 1600 if (smc->use_fallback) { 1601 /* delegate to CLC child sock */ 1602 mask = smc->clcsock->ops->poll(file, smc->clcsock, wait); 1603 sk->sk_err = smc->clcsock->sk->sk_err; 1604 } else { 1605 if (sk->sk_state != SMC_CLOSED) 1606 sock_poll_wait(file, sock, wait); 1607 if (sk->sk_err) 1608 mask |= EPOLLERR; 1609 if ((sk->sk_shutdown == SHUTDOWN_MASK) || 1610 (sk->sk_state == SMC_CLOSED)) 1611 mask |= EPOLLHUP; 1612 if (sk->sk_state == SMC_LISTEN) { 1613 /* woken up by sk_data_ready in smc_listen_work() */ 1614 mask |= smc_accept_poll(sk); 1615 } else if (smc->use_fallback) { /* as result of connect_work()*/ 1616 mask |= smc->clcsock->ops->poll(file, smc->clcsock, 1617 wait); 1618 sk->sk_err = smc->clcsock->sk->sk_err; 1619 } else { 1620 if ((sk->sk_state != SMC_INIT && 1621 atomic_read(&smc->conn.sndbuf_space)) || 1622 sk->sk_shutdown & SEND_SHUTDOWN) { 1623 mask |= EPOLLOUT | EPOLLWRNORM; 1624 } else { 1625 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk); 1626 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); 1627 } 1628 if (atomic_read(&smc->conn.bytes_to_rcv)) 1629 mask |= EPOLLIN | EPOLLRDNORM; 1630 if (sk->sk_shutdown & RCV_SHUTDOWN) 1631 mask |= EPOLLIN | EPOLLRDNORM | EPOLLRDHUP; 1632 if (sk->sk_state == SMC_APPCLOSEWAIT1) 1633 mask |= EPOLLIN; 1634 if (smc->conn.urg_state == SMC_URG_VALID) 1635 mask |= EPOLLPRI; 1636 } 1637 } 1638 1639 return mask; 1640 } 1641 1642 static int smc_shutdown(struct socket *sock, int how) 1643 { 1644 struct sock *sk = sock->sk; 1645 struct smc_sock *smc; 1646 int rc = -EINVAL; 1647 int rc1 = 0; 1648 1649 smc = smc_sk(sk); 1650 1651 if ((how < SHUT_RD) || (how > SHUT_RDWR)) 1652 return rc; 1653 1654 lock_sock(sk); 1655 1656 rc = -ENOTCONN; 1657 if ((sk->sk_state != SMC_ACTIVE) && 1658 (sk->sk_state != SMC_PEERCLOSEWAIT1) && 1659 (sk->sk_state != SMC_PEERCLOSEWAIT2) && 1660 (sk->sk_state != SMC_APPCLOSEWAIT1) && 1661 (sk->sk_state != SMC_APPCLOSEWAIT2) && 1662 (sk->sk_state != SMC_APPFINCLOSEWAIT)) 1663 goto out; 1664 if (smc->use_fallback) { 1665 rc = kernel_sock_shutdown(smc->clcsock, how); 1666 sk->sk_shutdown = smc->clcsock->sk->sk_shutdown; 1667 if (sk->sk_shutdown == SHUTDOWN_MASK) 1668 sk->sk_state = SMC_CLOSED; 1669 goto out; 1670 } 1671 switch (how) { 1672 case SHUT_RDWR: /* shutdown in both directions */ 1673 rc = smc_close_active(smc); 1674 break; 1675 case SHUT_WR: 1676 rc = smc_close_shutdown_write(smc); 1677 break; 1678 case SHUT_RD: 1679 rc = 0; 1680 /* nothing more to do because peer is not involved */ 1681 break; 1682 } 1683 if (smc->clcsock) 1684 rc1 = kernel_sock_shutdown(smc->clcsock, how); 1685 /* map sock_shutdown_cmd constants to sk_shutdown value range */ 1686 sk->sk_shutdown |= how + 1; 1687 1688 out: 1689 release_sock(sk); 1690 return rc ? rc : rc1; 1691 } 1692 1693 static int smc_setsockopt(struct socket *sock, int level, int optname, 1694 char __user *optval, unsigned int optlen) 1695 { 1696 struct sock *sk = sock->sk; 1697 struct smc_sock *smc; 1698 int val, rc; 1699 1700 smc = smc_sk(sk); 1701 1702 /* generic setsockopts reaching us here always apply to the 1703 * CLC socket 1704 */ 1705 rc = smc->clcsock->ops->setsockopt(smc->clcsock, level, optname, 1706 optval, optlen); 1707 if (smc->clcsock->sk->sk_err) { 1708 sk->sk_err = smc->clcsock->sk->sk_err; 1709 sk->sk_error_report(sk); 1710 } 1711 if (rc) 1712 return rc; 1713 1714 if (optlen < sizeof(int)) 1715 return -EINVAL; 1716 if (get_user(val, (int __user *)optval)) 1717 return -EFAULT; 1718 1719 lock_sock(sk); 1720 switch (optname) { 1721 case TCP_ULP: 1722 case TCP_FASTOPEN: 1723 case TCP_FASTOPEN_CONNECT: 1724 case TCP_FASTOPEN_KEY: 1725 case TCP_FASTOPEN_NO_COOKIE: 1726 /* option not supported by SMC */ 1727 if (sk->sk_state == SMC_INIT) { 1728 smc_switch_to_fallback(smc); 1729 smc->fallback_rsn = SMC_CLC_DECL_OPTUNSUPP; 1730 } else { 1731 if (!smc->use_fallback) 1732 rc = -EINVAL; 1733 } 1734 break; 1735 case TCP_NODELAY: 1736 if (sk->sk_state != SMC_INIT && 1737 sk->sk_state != SMC_LISTEN && 1738 sk->sk_state != SMC_CLOSED) { 1739 if (val && !smc->use_fallback) 1740 mod_delayed_work(system_wq, &smc->conn.tx_work, 1741 0); 1742 } 1743 break; 1744 case TCP_CORK: 1745 if (sk->sk_state != SMC_INIT && 1746 sk->sk_state != SMC_LISTEN && 1747 sk->sk_state != SMC_CLOSED) { 1748 if (!val && !smc->use_fallback) 1749 mod_delayed_work(system_wq, &smc->conn.tx_work, 1750 0); 1751 } 1752 break; 1753 case TCP_DEFER_ACCEPT: 1754 smc->sockopt_defer_accept = val; 1755 break; 1756 default: 1757 break; 1758 } 1759 release_sock(sk); 1760 1761 return rc; 1762 } 1763 1764 static int smc_getsockopt(struct socket *sock, int level, int optname, 1765 char __user *optval, int __user *optlen) 1766 { 1767 struct smc_sock *smc; 1768 1769 smc = smc_sk(sock->sk); 1770 /* socket options apply to the CLC socket */ 1771 return smc->clcsock->ops->getsockopt(smc->clcsock, level, optname, 1772 optval, optlen); 1773 } 1774 1775 static int smc_ioctl(struct socket *sock, unsigned int cmd, 1776 unsigned long arg) 1777 { 1778 union smc_host_cursor cons, urg; 1779 struct smc_connection *conn; 1780 struct smc_sock *smc; 1781 int answ; 1782 1783 smc = smc_sk(sock->sk); 1784 conn = &smc->conn; 1785 lock_sock(&smc->sk); 1786 if (smc->use_fallback) { 1787 if (!smc->clcsock) { 1788 release_sock(&smc->sk); 1789 return -EBADF; 1790 } 1791 answ = smc->clcsock->ops->ioctl(smc->clcsock, cmd, arg); 1792 release_sock(&smc->sk); 1793 return answ; 1794 } 1795 switch (cmd) { 1796 case SIOCINQ: /* same as FIONREAD */ 1797 if (smc->sk.sk_state == SMC_LISTEN) { 1798 release_sock(&smc->sk); 1799 return -EINVAL; 1800 } 1801 if (smc->sk.sk_state == SMC_INIT || 1802 smc->sk.sk_state == SMC_CLOSED) 1803 answ = 0; 1804 else 1805 answ = atomic_read(&smc->conn.bytes_to_rcv); 1806 break; 1807 case SIOCOUTQ: 1808 /* output queue size (not send + not acked) */ 1809 if (smc->sk.sk_state == SMC_LISTEN) { 1810 release_sock(&smc->sk); 1811 return -EINVAL; 1812 } 1813 if (smc->sk.sk_state == SMC_INIT || 1814 smc->sk.sk_state == SMC_CLOSED) 1815 answ = 0; 1816 else 1817 answ = smc->conn.sndbuf_desc->len - 1818 atomic_read(&smc->conn.sndbuf_space); 1819 break; 1820 case SIOCOUTQNSD: 1821 /* output queue size (not send only) */ 1822 if (smc->sk.sk_state == SMC_LISTEN) { 1823 release_sock(&smc->sk); 1824 return -EINVAL; 1825 } 1826 if (smc->sk.sk_state == SMC_INIT || 1827 smc->sk.sk_state == SMC_CLOSED) 1828 answ = 0; 1829 else 1830 answ = smc_tx_prepared_sends(&smc->conn); 1831 break; 1832 case SIOCATMARK: 1833 if (smc->sk.sk_state == SMC_LISTEN) { 1834 release_sock(&smc->sk); 1835 return -EINVAL; 1836 } 1837 if (smc->sk.sk_state == SMC_INIT || 1838 smc->sk.sk_state == SMC_CLOSED) { 1839 answ = 0; 1840 } else { 1841 smc_curs_copy(&cons, &conn->local_tx_ctrl.cons, conn); 1842 smc_curs_copy(&urg, &conn->urg_curs, conn); 1843 answ = smc_curs_diff(conn->rmb_desc->len, 1844 &cons, &urg) == 1; 1845 } 1846 break; 1847 default: 1848 release_sock(&smc->sk); 1849 return -ENOIOCTLCMD; 1850 } 1851 release_sock(&smc->sk); 1852 1853 return put_user(answ, (int __user *)arg); 1854 } 1855 1856 static ssize_t smc_sendpage(struct socket *sock, struct page *page, 1857 int offset, size_t size, int flags) 1858 { 1859 struct sock *sk = sock->sk; 1860 struct smc_sock *smc; 1861 int rc = -EPIPE; 1862 1863 smc = smc_sk(sk); 1864 lock_sock(sk); 1865 if (sk->sk_state != SMC_ACTIVE) { 1866 release_sock(sk); 1867 goto out; 1868 } 1869 release_sock(sk); 1870 if (smc->use_fallback) 1871 rc = kernel_sendpage(smc->clcsock, page, offset, 1872 size, flags); 1873 else 1874 rc = sock_no_sendpage(sock, page, offset, size, flags); 1875 1876 out: 1877 return rc; 1878 } 1879 1880 /* Map the affected portions of the rmbe into an spd, note the number of bytes 1881 * to splice in conn->splice_pending, and press 'go'. Delays consumer cursor 1882 * updates till whenever a respective page has been fully processed. 1883 * Note that subsequent recv() calls have to wait till all splice() processing 1884 * completed. 1885 */ 1886 static ssize_t smc_splice_read(struct socket *sock, loff_t *ppos, 1887 struct pipe_inode_info *pipe, size_t len, 1888 unsigned int flags) 1889 { 1890 struct sock *sk = sock->sk; 1891 struct smc_sock *smc; 1892 int rc = -ENOTCONN; 1893 1894 smc = smc_sk(sk); 1895 lock_sock(sk); 1896 if (sk->sk_state == SMC_CLOSED && (sk->sk_shutdown & RCV_SHUTDOWN)) { 1897 /* socket was connected before, no more data to read */ 1898 rc = 0; 1899 goto out; 1900 } 1901 if (sk->sk_state == SMC_INIT || 1902 sk->sk_state == SMC_LISTEN || 1903 sk->sk_state == SMC_CLOSED) 1904 goto out; 1905 1906 if (sk->sk_state == SMC_PEERFINCLOSEWAIT) { 1907 rc = 0; 1908 goto out; 1909 } 1910 1911 if (smc->use_fallback) { 1912 rc = smc->clcsock->ops->splice_read(smc->clcsock, ppos, 1913 pipe, len, flags); 1914 } else { 1915 if (*ppos) { 1916 rc = -ESPIPE; 1917 goto out; 1918 } 1919 if (flags & SPLICE_F_NONBLOCK) 1920 flags = MSG_DONTWAIT; 1921 else 1922 flags = 0; 1923 rc = smc_rx_recvmsg(smc, NULL, pipe, len, flags); 1924 } 1925 out: 1926 release_sock(sk); 1927 1928 return rc; 1929 } 1930 1931 /* must look like tcp */ 1932 static const struct proto_ops smc_sock_ops = { 1933 .family = PF_SMC, 1934 .owner = THIS_MODULE, 1935 .release = smc_release, 1936 .bind = smc_bind, 1937 .connect = smc_connect, 1938 .socketpair = sock_no_socketpair, 1939 .accept = smc_accept, 1940 .getname = smc_getname, 1941 .poll = smc_poll, 1942 .ioctl = smc_ioctl, 1943 .listen = smc_listen, 1944 .shutdown = smc_shutdown, 1945 .setsockopt = smc_setsockopt, 1946 .getsockopt = smc_getsockopt, 1947 .sendmsg = smc_sendmsg, 1948 .recvmsg = smc_recvmsg, 1949 .mmap = sock_no_mmap, 1950 .sendpage = smc_sendpage, 1951 .splice_read = smc_splice_read, 1952 }; 1953 1954 static int smc_create(struct net *net, struct socket *sock, int protocol, 1955 int kern) 1956 { 1957 int family = (protocol == SMCPROTO_SMC6) ? PF_INET6 : PF_INET; 1958 struct smc_sock *smc; 1959 struct sock *sk; 1960 int rc; 1961 1962 rc = -ESOCKTNOSUPPORT; 1963 if (sock->type != SOCK_STREAM) 1964 goto out; 1965 1966 rc = -EPROTONOSUPPORT; 1967 if (protocol != SMCPROTO_SMC && protocol != SMCPROTO_SMC6) 1968 goto out; 1969 1970 rc = -ENOBUFS; 1971 sock->ops = &smc_sock_ops; 1972 sk = smc_sock_alloc(net, sock, protocol); 1973 if (!sk) 1974 goto out; 1975 1976 /* create internal TCP socket for CLC handshake and fallback */ 1977 smc = smc_sk(sk); 1978 smc->use_fallback = false; /* assume rdma capability first */ 1979 smc->fallback_rsn = 0; 1980 rc = sock_create_kern(net, family, SOCK_STREAM, IPPROTO_TCP, 1981 &smc->clcsock); 1982 if (rc) { 1983 sk_common_release(sk); 1984 goto out; 1985 } 1986 smc->sk.sk_sndbuf = max(smc->clcsock->sk->sk_sndbuf, SMC_BUF_MIN_SIZE); 1987 smc->sk.sk_rcvbuf = max(smc->clcsock->sk->sk_rcvbuf, SMC_BUF_MIN_SIZE); 1988 1989 out: 1990 return rc; 1991 } 1992 1993 static const struct net_proto_family smc_sock_family_ops = { 1994 .family = PF_SMC, 1995 .owner = THIS_MODULE, 1996 .create = smc_create, 1997 }; 1998 1999 unsigned int smc_net_id; 2000 2001 static __net_init int smc_net_init(struct net *net) 2002 { 2003 return smc_pnet_net_init(net); 2004 } 2005 2006 static void __net_exit smc_net_exit(struct net *net) 2007 { 2008 smc_pnet_net_exit(net); 2009 } 2010 2011 static struct pernet_operations smc_net_ops = { 2012 .init = smc_net_init, 2013 .exit = smc_net_exit, 2014 .id = &smc_net_id, 2015 .size = sizeof(struct smc_net), 2016 }; 2017 2018 static int __init smc_init(void) 2019 { 2020 int rc; 2021 2022 rc = register_pernet_subsys(&smc_net_ops); 2023 if (rc) 2024 return rc; 2025 2026 rc = smc_pnet_init(); 2027 if (rc) 2028 goto out_pernet_subsys; 2029 2030 rc = smc_llc_init(); 2031 if (rc) { 2032 pr_err("%s: smc_llc_init fails with %d\n", __func__, rc); 2033 goto out_pnet; 2034 } 2035 2036 rc = smc_cdc_init(); 2037 if (rc) { 2038 pr_err("%s: smc_cdc_init fails with %d\n", __func__, rc); 2039 goto out_pnet; 2040 } 2041 2042 rc = proto_register(&smc_proto, 1); 2043 if (rc) { 2044 pr_err("%s: proto_register(v4) fails with %d\n", __func__, rc); 2045 goto out_pnet; 2046 } 2047 2048 rc = proto_register(&smc_proto6, 1); 2049 if (rc) { 2050 pr_err("%s: proto_register(v6) fails with %d\n", __func__, rc); 2051 goto out_proto; 2052 } 2053 2054 rc = sock_register(&smc_sock_family_ops); 2055 if (rc) { 2056 pr_err("%s: sock_register fails with %d\n", __func__, rc); 2057 goto out_proto6; 2058 } 2059 INIT_HLIST_HEAD(&smc_v4_hashinfo.ht); 2060 INIT_HLIST_HEAD(&smc_v6_hashinfo.ht); 2061 2062 rc = smc_ib_register_client(); 2063 if (rc) { 2064 pr_err("%s: ib_register fails with %d\n", __func__, rc); 2065 goto out_sock; 2066 } 2067 2068 static_branch_enable(&tcp_have_smc); 2069 return 0; 2070 2071 out_sock: 2072 sock_unregister(PF_SMC); 2073 out_proto6: 2074 proto_unregister(&smc_proto6); 2075 out_proto: 2076 proto_unregister(&smc_proto); 2077 out_pnet: 2078 smc_pnet_exit(); 2079 out_pernet_subsys: 2080 unregister_pernet_subsys(&smc_net_ops); 2081 2082 return rc; 2083 } 2084 2085 static void __exit smc_exit(void) 2086 { 2087 smc_core_exit(); 2088 static_branch_disable(&tcp_have_smc); 2089 smc_ib_unregister_client(); 2090 sock_unregister(PF_SMC); 2091 proto_unregister(&smc_proto6); 2092 proto_unregister(&smc_proto); 2093 smc_pnet_exit(); 2094 unregister_pernet_subsys(&smc_net_ops); 2095 } 2096 2097 module_init(smc_init); 2098 module_exit(smc_exit); 2099 2100 MODULE_AUTHOR("Ursula Braun <ubraun@linux.vnet.ibm.com>"); 2101 MODULE_DESCRIPTION("smc socket address family"); 2102 MODULE_LICENSE("GPL"); 2103 MODULE_ALIAS_NETPROTO(PF_SMC); 2104