1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Shared Memory Communications over RDMA (SMC-R) and RoCE 4 * 5 * AF_SMC protocol family socket handler keeping the AF_INET sock address type 6 * applies to SOCK_STREAM sockets only 7 * offers an alternative communication option for TCP-protocol sockets 8 * applicable with RoCE-cards only 9 * 10 * Initial restrictions: 11 * - support for alternate links postponed 12 * 13 * Copyright IBM Corp. 2016, 2018 14 * 15 * Author(s): Ursula Braun <ubraun@linux.vnet.ibm.com> 16 * based on prototype from Frank Blaschka 17 */ 18 19 #define KMSG_COMPONENT "smc" 20 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt 21 22 #include <linux/module.h> 23 #include <linux/socket.h> 24 #include <linux/workqueue.h> 25 #include <linux/in.h> 26 #include <linux/sched/signal.h> 27 #include <linux/if_vlan.h> 28 29 #include <net/sock.h> 30 #include <net/tcp.h> 31 #include <net/smc.h> 32 #include <asm/ioctls.h> 33 34 #include <net/net_namespace.h> 35 #include <net/netns/generic.h> 36 #include "smc_netns.h" 37 38 #include "smc.h" 39 #include "smc_clc.h" 40 #include "smc_llc.h" 41 #include "smc_cdc.h" 42 #include "smc_core.h" 43 #include "smc_ib.h" 44 #include "smc_ism.h" 45 #include "smc_pnet.h" 46 #include "smc_tx.h" 47 #include "smc_rx.h" 48 #include "smc_close.h" 49 50 static DEFINE_MUTEX(smc_server_lgr_pending); /* serialize link group 51 * creation on server 52 */ 53 static DEFINE_MUTEX(smc_client_lgr_pending); /* serialize link group 54 * creation on client 55 */ 56 57 static void smc_tcp_listen_work(struct work_struct *); 58 static void smc_connect_work(struct work_struct *); 59 60 static void smc_set_keepalive(struct sock *sk, int val) 61 { 62 struct smc_sock *smc = smc_sk(sk); 63 64 smc->clcsock->sk->sk_prot->keepalive(smc->clcsock->sk, val); 65 } 66 67 static struct smc_hashinfo smc_v4_hashinfo = { 68 .lock = __RW_LOCK_UNLOCKED(smc_v4_hashinfo.lock), 69 }; 70 71 static struct smc_hashinfo smc_v6_hashinfo = { 72 .lock = __RW_LOCK_UNLOCKED(smc_v6_hashinfo.lock), 73 }; 74 75 int smc_hash_sk(struct sock *sk) 76 { 77 struct smc_hashinfo *h = sk->sk_prot->h.smc_hash; 78 struct hlist_head *head; 79 80 head = &h->ht; 81 82 write_lock_bh(&h->lock); 83 sk_add_node(sk, head); 84 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); 85 write_unlock_bh(&h->lock); 86 87 return 0; 88 } 89 EXPORT_SYMBOL_GPL(smc_hash_sk); 90 91 void smc_unhash_sk(struct sock *sk) 92 { 93 struct smc_hashinfo *h = sk->sk_prot->h.smc_hash; 94 95 write_lock_bh(&h->lock); 96 if (sk_del_node_init(sk)) 97 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); 98 write_unlock_bh(&h->lock); 99 } 100 EXPORT_SYMBOL_GPL(smc_unhash_sk); 101 102 struct proto smc_proto = { 103 .name = "SMC", 104 .owner = THIS_MODULE, 105 .keepalive = smc_set_keepalive, 106 .hash = smc_hash_sk, 107 .unhash = smc_unhash_sk, 108 .obj_size = sizeof(struct smc_sock), 109 .h.smc_hash = &smc_v4_hashinfo, 110 .slab_flags = SLAB_TYPESAFE_BY_RCU, 111 }; 112 EXPORT_SYMBOL_GPL(smc_proto); 113 114 struct proto smc_proto6 = { 115 .name = "SMC6", 116 .owner = THIS_MODULE, 117 .keepalive = smc_set_keepalive, 118 .hash = smc_hash_sk, 119 .unhash = smc_unhash_sk, 120 .obj_size = sizeof(struct smc_sock), 121 .h.smc_hash = &smc_v6_hashinfo, 122 .slab_flags = SLAB_TYPESAFE_BY_RCU, 123 }; 124 EXPORT_SYMBOL_GPL(smc_proto6); 125 126 static int __smc_release(struct smc_sock *smc) 127 { 128 struct sock *sk = &smc->sk; 129 int rc = 0; 130 131 if (!smc->use_fallback) { 132 rc = smc_close_active(smc); 133 sock_set_flag(sk, SOCK_DEAD); 134 sk->sk_shutdown |= SHUTDOWN_MASK; 135 } else { 136 if (sk->sk_state != SMC_LISTEN && sk->sk_state != SMC_INIT) 137 sock_put(sk); /* passive closing */ 138 if (sk->sk_state == SMC_LISTEN) { 139 /* wake up clcsock accept */ 140 rc = kernel_sock_shutdown(smc->clcsock, SHUT_RDWR); 141 } 142 sk->sk_state = SMC_CLOSED; 143 sk->sk_state_change(sk); 144 } 145 146 sk->sk_prot->unhash(sk); 147 148 if (sk->sk_state == SMC_CLOSED) { 149 if (smc->clcsock) { 150 release_sock(sk); 151 smc_clcsock_release(smc); 152 lock_sock(sk); 153 } 154 if (!smc->use_fallback) 155 smc_conn_free(&smc->conn); 156 } 157 158 return rc; 159 } 160 161 static int smc_release(struct socket *sock) 162 { 163 struct sock *sk = sock->sk; 164 struct smc_sock *smc; 165 int rc = 0; 166 167 if (!sk) 168 goto out; 169 170 smc = smc_sk(sk); 171 172 /* cleanup for a dangling non-blocking connect */ 173 if (smc->connect_nonblock && sk->sk_state == SMC_INIT) 174 tcp_abort(smc->clcsock->sk, ECONNABORTED); 175 flush_work(&smc->connect_work); 176 177 if (sk->sk_state == SMC_LISTEN) 178 /* smc_close_non_accepted() is called and acquires 179 * sock lock for child sockets again 180 */ 181 lock_sock_nested(sk, SINGLE_DEPTH_NESTING); 182 else 183 lock_sock(sk); 184 185 rc = __smc_release(smc); 186 187 /* detach socket */ 188 sock_orphan(sk); 189 sock->sk = NULL; 190 release_sock(sk); 191 192 sock_put(sk); /* final sock_put */ 193 out: 194 return rc; 195 } 196 197 static void smc_destruct(struct sock *sk) 198 { 199 if (sk->sk_state != SMC_CLOSED) 200 return; 201 if (!sock_flag(sk, SOCK_DEAD)) 202 return; 203 204 sk_refcnt_debug_dec(sk); 205 } 206 207 static struct sock *smc_sock_alloc(struct net *net, struct socket *sock, 208 int protocol) 209 { 210 struct smc_sock *smc; 211 struct proto *prot; 212 struct sock *sk; 213 214 prot = (protocol == SMCPROTO_SMC6) ? &smc_proto6 : &smc_proto; 215 sk = sk_alloc(net, PF_SMC, GFP_KERNEL, prot, 0); 216 if (!sk) 217 return NULL; 218 219 sock_init_data(sock, sk); /* sets sk_refcnt to 1 */ 220 sk->sk_state = SMC_INIT; 221 sk->sk_destruct = smc_destruct; 222 sk->sk_protocol = protocol; 223 smc = smc_sk(sk); 224 INIT_WORK(&smc->tcp_listen_work, smc_tcp_listen_work); 225 INIT_WORK(&smc->connect_work, smc_connect_work); 226 INIT_DELAYED_WORK(&smc->conn.tx_work, smc_tx_work); 227 INIT_LIST_HEAD(&smc->accept_q); 228 spin_lock_init(&smc->accept_q_lock); 229 spin_lock_init(&smc->conn.send_lock); 230 sk->sk_prot->hash(sk); 231 sk_refcnt_debug_inc(sk); 232 mutex_init(&smc->clcsock_release_lock); 233 234 return sk; 235 } 236 237 static int smc_bind(struct socket *sock, struct sockaddr *uaddr, 238 int addr_len) 239 { 240 struct sockaddr_in *addr = (struct sockaddr_in *)uaddr; 241 struct sock *sk = sock->sk; 242 struct smc_sock *smc; 243 int rc; 244 245 smc = smc_sk(sk); 246 247 /* replicate tests from inet_bind(), to be safe wrt. future changes */ 248 rc = -EINVAL; 249 if (addr_len < sizeof(struct sockaddr_in)) 250 goto out; 251 252 rc = -EAFNOSUPPORT; 253 if (addr->sin_family != AF_INET && 254 addr->sin_family != AF_INET6 && 255 addr->sin_family != AF_UNSPEC) 256 goto out; 257 /* accept AF_UNSPEC (mapped to AF_INET) only if s_addr is INADDR_ANY */ 258 if (addr->sin_family == AF_UNSPEC && 259 addr->sin_addr.s_addr != htonl(INADDR_ANY)) 260 goto out; 261 262 lock_sock(sk); 263 264 /* Check if socket is already active */ 265 rc = -EINVAL; 266 if (sk->sk_state != SMC_INIT) 267 goto out_rel; 268 269 smc->clcsock->sk->sk_reuse = sk->sk_reuse; 270 rc = kernel_bind(smc->clcsock, uaddr, addr_len); 271 272 out_rel: 273 release_sock(sk); 274 out: 275 return rc; 276 } 277 278 static void smc_copy_sock_settings(struct sock *nsk, struct sock *osk, 279 unsigned long mask) 280 { 281 /* options we don't get control via setsockopt for */ 282 nsk->sk_type = osk->sk_type; 283 nsk->sk_sndbuf = osk->sk_sndbuf; 284 nsk->sk_rcvbuf = osk->sk_rcvbuf; 285 nsk->sk_sndtimeo = osk->sk_sndtimeo; 286 nsk->sk_rcvtimeo = osk->sk_rcvtimeo; 287 nsk->sk_mark = osk->sk_mark; 288 nsk->sk_priority = osk->sk_priority; 289 nsk->sk_rcvlowat = osk->sk_rcvlowat; 290 nsk->sk_bound_dev_if = osk->sk_bound_dev_if; 291 nsk->sk_err = osk->sk_err; 292 293 nsk->sk_flags &= ~mask; 294 nsk->sk_flags |= osk->sk_flags & mask; 295 } 296 297 #define SK_FLAGS_SMC_TO_CLC ((1UL << SOCK_URGINLINE) | \ 298 (1UL << SOCK_KEEPOPEN) | \ 299 (1UL << SOCK_LINGER) | \ 300 (1UL << SOCK_BROADCAST) | \ 301 (1UL << SOCK_TIMESTAMP) | \ 302 (1UL << SOCK_DBG) | \ 303 (1UL << SOCK_RCVTSTAMP) | \ 304 (1UL << SOCK_RCVTSTAMPNS) | \ 305 (1UL << SOCK_LOCALROUTE) | \ 306 (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE) | \ 307 (1UL << SOCK_RXQ_OVFL) | \ 308 (1UL << SOCK_WIFI_STATUS) | \ 309 (1UL << SOCK_NOFCS) | \ 310 (1UL << SOCK_FILTER_LOCKED) | \ 311 (1UL << SOCK_TSTAMP_NEW)) 312 /* copy only relevant settings and flags of SOL_SOCKET level from smc to 313 * clc socket (since smc is not called for these options from net/core) 314 */ 315 static void smc_copy_sock_settings_to_clc(struct smc_sock *smc) 316 { 317 smc_copy_sock_settings(smc->clcsock->sk, &smc->sk, SK_FLAGS_SMC_TO_CLC); 318 } 319 320 #define SK_FLAGS_CLC_TO_SMC ((1UL << SOCK_URGINLINE) | \ 321 (1UL << SOCK_KEEPOPEN) | \ 322 (1UL << SOCK_LINGER) | \ 323 (1UL << SOCK_DBG)) 324 /* copy only settings and flags relevant for smc from clc to smc socket */ 325 static void smc_copy_sock_settings_to_smc(struct smc_sock *smc) 326 { 327 smc_copy_sock_settings(&smc->sk, smc->clcsock->sk, SK_FLAGS_CLC_TO_SMC); 328 } 329 330 /* register a new rmb, send confirm_rkey msg to register with peer */ 331 static int smc_reg_rmb(struct smc_link *link, struct smc_buf_desc *rmb_desc, 332 bool conf_rkey) 333 { 334 if (!rmb_desc->wr_reg) { 335 /* register memory region for new rmb */ 336 if (smc_wr_reg_send(link, rmb_desc->mr_rx[SMC_SINGLE_LINK])) { 337 rmb_desc->regerr = 1; 338 return -EFAULT; 339 } 340 rmb_desc->wr_reg = 1; 341 } 342 if (!conf_rkey) 343 return 0; 344 /* exchange confirm_rkey msg with peer */ 345 if (smc_llc_do_confirm_rkey(link, rmb_desc)) { 346 rmb_desc->regerr = 1; 347 return -EFAULT; 348 } 349 return 0; 350 } 351 352 static int smc_clnt_conf_first_link(struct smc_sock *smc) 353 { 354 struct net *net = sock_net(smc->clcsock->sk); 355 struct smc_link_group *lgr = smc->conn.lgr; 356 struct smc_link *link; 357 int rest; 358 int rc; 359 360 link = &lgr->lnk[SMC_SINGLE_LINK]; 361 /* receive CONFIRM LINK request from server over RoCE fabric */ 362 rest = wait_for_completion_interruptible_timeout( 363 &link->llc_confirm, 364 SMC_LLC_WAIT_FIRST_TIME); 365 if (rest <= 0) { 366 struct smc_clc_msg_decline dclc; 367 368 rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc), 369 SMC_CLC_DECLINE, CLC_WAIT_TIME_SHORT); 370 return rc == -EAGAIN ? SMC_CLC_DECL_TIMEOUT_CL : rc; 371 } 372 373 if (link->llc_confirm_rc) 374 return SMC_CLC_DECL_RMBE_EC; 375 376 rc = smc_ib_modify_qp_rts(link); 377 if (rc) 378 return SMC_CLC_DECL_ERR_RDYLNK; 379 380 smc_wr_remember_qp_attr(link); 381 382 if (smc_reg_rmb(link, smc->conn.rmb_desc, false)) 383 return SMC_CLC_DECL_ERR_REGRMB; 384 385 /* send CONFIRM LINK response over RoCE fabric */ 386 rc = smc_llc_send_confirm_link(link, SMC_LLC_RESP); 387 if (rc < 0) 388 return SMC_CLC_DECL_TIMEOUT_CL; 389 390 /* receive ADD LINK request from server over RoCE fabric */ 391 rest = wait_for_completion_interruptible_timeout(&link->llc_add, 392 SMC_LLC_WAIT_TIME); 393 if (rest <= 0) { 394 struct smc_clc_msg_decline dclc; 395 396 rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc), 397 SMC_CLC_DECLINE, CLC_WAIT_TIME_SHORT); 398 return rc == -EAGAIN ? SMC_CLC_DECL_TIMEOUT_AL : rc; 399 } 400 401 /* send add link reject message, only one link supported for now */ 402 rc = smc_llc_send_add_link(link, 403 link->smcibdev->mac[link->ibport - 1], 404 link->gid, SMC_LLC_RESP); 405 if (rc < 0) 406 return SMC_CLC_DECL_TIMEOUT_AL; 407 408 smc_llc_link_active(link, net->ipv4.sysctl_tcp_keepalive_time); 409 410 return 0; 411 } 412 413 static void smcr_conn_save_peer_info(struct smc_sock *smc, 414 struct smc_clc_msg_accept_confirm *clc) 415 { 416 int bufsize = smc_uncompress_bufsize(clc->rmbe_size); 417 418 smc->conn.peer_rmbe_idx = clc->rmbe_idx; 419 smc->conn.local_tx_ctrl.token = ntohl(clc->rmbe_alert_token); 420 smc->conn.peer_rmbe_size = bufsize; 421 atomic_set(&smc->conn.peer_rmbe_space, smc->conn.peer_rmbe_size); 422 smc->conn.tx_off = bufsize * (smc->conn.peer_rmbe_idx - 1); 423 } 424 425 static void smcd_conn_save_peer_info(struct smc_sock *smc, 426 struct smc_clc_msg_accept_confirm *clc) 427 { 428 int bufsize = smc_uncompress_bufsize(clc->dmbe_size); 429 430 smc->conn.peer_rmbe_idx = clc->dmbe_idx; 431 smc->conn.peer_token = clc->token; 432 /* msg header takes up space in the buffer */ 433 smc->conn.peer_rmbe_size = bufsize - sizeof(struct smcd_cdc_msg); 434 atomic_set(&smc->conn.peer_rmbe_space, smc->conn.peer_rmbe_size); 435 smc->conn.tx_off = bufsize * smc->conn.peer_rmbe_idx; 436 } 437 438 static void smc_conn_save_peer_info(struct smc_sock *smc, 439 struct smc_clc_msg_accept_confirm *clc) 440 { 441 if (smc->conn.lgr->is_smcd) 442 smcd_conn_save_peer_info(smc, clc); 443 else 444 smcr_conn_save_peer_info(smc, clc); 445 } 446 447 static void smc_link_save_peer_info(struct smc_link *link, 448 struct smc_clc_msg_accept_confirm *clc) 449 { 450 link->peer_qpn = ntoh24(clc->qpn); 451 memcpy(link->peer_gid, clc->lcl.gid, SMC_GID_SIZE); 452 memcpy(link->peer_mac, clc->lcl.mac, sizeof(link->peer_mac)); 453 link->peer_psn = ntoh24(clc->psn); 454 link->peer_mtu = clc->qp_mtu; 455 } 456 457 static void smc_switch_to_fallback(struct smc_sock *smc) 458 { 459 smc->use_fallback = true; 460 if (smc->sk.sk_socket && smc->sk.sk_socket->file) { 461 smc->clcsock->file = smc->sk.sk_socket->file; 462 smc->clcsock->file->private_data = smc->clcsock; 463 } 464 } 465 466 /* fall back during connect */ 467 static int smc_connect_fallback(struct smc_sock *smc, int reason_code) 468 { 469 smc_switch_to_fallback(smc); 470 smc->fallback_rsn = reason_code; 471 smc_copy_sock_settings_to_clc(smc); 472 smc->connect_nonblock = 0; 473 if (smc->sk.sk_state == SMC_INIT) 474 smc->sk.sk_state = SMC_ACTIVE; 475 return 0; 476 } 477 478 /* decline and fall back during connect */ 479 static int smc_connect_decline_fallback(struct smc_sock *smc, int reason_code) 480 { 481 int rc; 482 483 if (reason_code < 0) { /* error, fallback is not possible */ 484 if (smc->sk.sk_state == SMC_INIT) 485 sock_put(&smc->sk); /* passive closing */ 486 return reason_code; 487 } 488 if (reason_code != SMC_CLC_DECL_PEERDECL) { 489 rc = smc_clc_send_decline(smc, reason_code); 490 if (rc < 0) { 491 if (smc->sk.sk_state == SMC_INIT) 492 sock_put(&smc->sk); /* passive closing */ 493 return rc; 494 } 495 } 496 return smc_connect_fallback(smc, reason_code); 497 } 498 499 /* abort connecting */ 500 static int smc_connect_abort(struct smc_sock *smc, int reason_code, 501 int local_contact) 502 { 503 if (local_contact == SMC_FIRST_CONTACT) 504 smc_lgr_forget(smc->conn.lgr); 505 if (smc->conn.lgr->is_smcd) 506 /* there is only one lgr role for SMC-D; use server lock */ 507 mutex_unlock(&smc_server_lgr_pending); 508 else 509 mutex_unlock(&smc_client_lgr_pending); 510 511 smc_conn_free(&smc->conn); 512 smc->connect_nonblock = 0; 513 return reason_code; 514 } 515 516 /* check if there is a rdma device available for this connection. */ 517 /* called for connect and listen */ 518 static int smc_find_rdma_device(struct smc_sock *smc, struct smc_init_info *ini) 519 { 520 /* PNET table look up: search active ib_device and port 521 * within same PNETID that also contains the ethernet device 522 * used for the internal TCP socket 523 */ 524 smc_pnet_find_roce_resource(smc->clcsock->sk, ini); 525 if (!ini->ib_dev) 526 return SMC_CLC_DECL_NOSMCRDEV; 527 return 0; 528 } 529 530 /* check if there is an ISM device available for this connection. */ 531 /* called for connect and listen */ 532 static int smc_find_ism_device(struct smc_sock *smc, struct smc_init_info *ini) 533 { 534 /* Find ISM device with same PNETID as connecting interface */ 535 smc_pnet_find_ism_resource(smc->clcsock->sk, ini); 536 if (!ini->ism_dev) 537 return SMC_CLC_DECL_NOSMCDDEV; 538 return 0; 539 } 540 541 /* Check for VLAN ID and register it on ISM device just for CLC handshake */ 542 static int smc_connect_ism_vlan_setup(struct smc_sock *smc, 543 struct smc_init_info *ini) 544 { 545 if (ini->vlan_id && smc_ism_get_vlan(ini->ism_dev, ini->vlan_id)) 546 return SMC_CLC_DECL_ISMVLANERR; 547 return 0; 548 } 549 550 /* cleanup temporary VLAN ID registration used for CLC handshake. If ISM is 551 * used, the VLAN ID will be registered again during the connection setup. 552 */ 553 static int smc_connect_ism_vlan_cleanup(struct smc_sock *smc, bool is_smcd, 554 struct smc_init_info *ini) 555 { 556 if (!is_smcd) 557 return 0; 558 if (ini->vlan_id && smc_ism_put_vlan(ini->ism_dev, ini->vlan_id)) 559 return SMC_CLC_DECL_CNFERR; 560 return 0; 561 } 562 563 /* CLC handshake during connect */ 564 static int smc_connect_clc(struct smc_sock *smc, int smc_type, 565 struct smc_clc_msg_accept_confirm *aclc, 566 struct smc_init_info *ini) 567 { 568 int rc = 0; 569 570 /* do inband token exchange */ 571 rc = smc_clc_send_proposal(smc, smc_type, ini); 572 if (rc) 573 return rc; 574 /* receive SMC Accept CLC message */ 575 return smc_clc_wait_msg(smc, aclc, sizeof(*aclc), SMC_CLC_ACCEPT, 576 CLC_WAIT_TIME); 577 } 578 579 /* setup for RDMA connection of client */ 580 static int smc_connect_rdma(struct smc_sock *smc, 581 struct smc_clc_msg_accept_confirm *aclc, 582 struct smc_init_info *ini) 583 { 584 struct smc_link *link; 585 int reason_code = 0; 586 587 ini->is_smcd = false; 588 ini->ib_lcl = &aclc->lcl; 589 ini->ib_clcqpn = ntoh24(aclc->qpn); 590 ini->srv_first_contact = aclc->hdr.flag; 591 592 mutex_lock(&smc_client_lgr_pending); 593 reason_code = smc_conn_create(smc, ini); 594 if (reason_code) { 595 mutex_unlock(&smc_client_lgr_pending); 596 return reason_code; 597 } 598 link = &smc->conn.lgr->lnk[SMC_SINGLE_LINK]; 599 600 smc_conn_save_peer_info(smc, aclc); 601 602 /* create send buffer and rmb */ 603 if (smc_buf_create(smc, false)) 604 return smc_connect_abort(smc, SMC_CLC_DECL_MEM, 605 ini->cln_first_contact); 606 607 if (ini->cln_first_contact == SMC_FIRST_CONTACT) 608 smc_link_save_peer_info(link, aclc); 609 610 if (smc_rmb_rtoken_handling(&smc->conn, aclc)) 611 return smc_connect_abort(smc, SMC_CLC_DECL_ERR_RTOK, 612 ini->cln_first_contact); 613 614 smc_close_init(smc); 615 smc_rx_init(smc); 616 617 if (ini->cln_first_contact == SMC_FIRST_CONTACT) { 618 if (smc_ib_ready_link(link)) 619 return smc_connect_abort(smc, SMC_CLC_DECL_ERR_RDYLNK, 620 ini->cln_first_contact); 621 } else { 622 if (smc_reg_rmb(link, smc->conn.rmb_desc, true)) 623 return smc_connect_abort(smc, SMC_CLC_DECL_ERR_REGRMB, 624 ini->cln_first_contact); 625 } 626 smc_rmb_sync_sg_for_device(&smc->conn); 627 628 reason_code = smc_clc_send_confirm(smc); 629 if (reason_code) 630 return smc_connect_abort(smc, reason_code, 631 ini->cln_first_contact); 632 633 smc_tx_init(smc); 634 635 if (ini->cln_first_contact == SMC_FIRST_CONTACT) { 636 /* QP confirmation over RoCE fabric */ 637 reason_code = smc_clnt_conf_first_link(smc); 638 if (reason_code) 639 return smc_connect_abort(smc, reason_code, 640 ini->cln_first_contact); 641 } 642 mutex_unlock(&smc_client_lgr_pending); 643 644 smc_copy_sock_settings_to_clc(smc); 645 smc->connect_nonblock = 0; 646 if (smc->sk.sk_state == SMC_INIT) 647 smc->sk.sk_state = SMC_ACTIVE; 648 649 return 0; 650 } 651 652 /* setup for ISM connection of client */ 653 static int smc_connect_ism(struct smc_sock *smc, 654 struct smc_clc_msg_accept_confirm *aclc, 655 struct smc_init_info *ini) 656 { 657 int rc = 0; 658 659 ini->is_smcd = true; 660 ini->ism_gid = aclc->gid; 661 ini->srv_first_contact = aclc->hdr.flag; 662 663 /* there is only one lgr role for SMC-D; use server lock */ 664 mutex_lock(&smc_server_lgr_pending); 665 rc = smc_conn_create(smc, ini); 666 if (rc) { 667 mutex_unlock(&smc_server_lgr_pending); 668 return rc; 669 } 670 671 /* Create send and receive buffers */ 672 if (smc_buf_create(smc, true)) 673 return smc_connect_abort(smc, SMC_CLC_DECL_MEM, 674 ini->cln_first_contact); 675 676 smc_conn_save_peer_info(smc, aclc); 677 smc_close_init(smc); 678 smc_rx_init(smc); 679 smc_tx_init(smc); 680 681 rc = smc_clc_send_confirm(smc); 682 if (rc) 683 return smc_connect_abort(smc, rc, ini->cln_first_contact); 684 mutex_unlock(&smc_server_lgr_pending); 685 686 smc_copy_sock_settings_to_clc(smc); 687 smc->connect_nonblock = 0; 688 if (smc->sk.sk_state == SMC_INIT) 689 smc->sk.sk_state = SMC_ACTIVE; 690 691 return 0; 692 } 693 694 /* perform steps before actually connecting */ 695 static int __smc_connect(struct smc_sock *smc) 696 { 697 bool ism_supported = false, rdma_supported = false; 698 struct smc_clc_msg_accept_confirm aclc; 699 struct smc_init_info ini = {0}; 700 int smc_type; 701 int rc = 0; 702 703 sock_hold(&smc->sk); /* sock put in passive closing */ 704 705 if (smc->use_fallback) 706 return smc_connect_fallback(smc, smc->fallback_rsn); 707 708 /* if peer has not signalled SMC-capability, fall back */ 709 if (!tcp_sk(smc->clcsock->sk)->syn_smc) 710 return smc_connect_fallback(smc, SMC_CLC_DECL_PEERNOSMC); 711 712 /* IPSec connections opt out of SMC-R optimizations */ 713 if (using_ipsec(smc)) 714 return smc_connect_decline_fallback(smc, SMC_CLC_DECL_IPSEC); 715 716 /* get vlan id from IP device */ 717 if (smc_vlan_by_tcpsk(smc->clcsock, &ini)) 718 return smc_connect_decline_fallback(smc, 719 SMC_CLC_DECL_GETVLANERR); 720 721 /* check if there is an ism device available */ 722 if (!smc_find_ism_device(smc, &ini) && 723 !smc_connect_ism_vlan_setup(smc, &ini)) { 724 /* ISM is supported for this connection */ 725 ism_supported = true; 726 smc_type = SMC_TYPE_D; 727 } 728 729 /* check if there is a rdma device available */ 730 if (!smc_find_rdma_device(smc, &ini)) { 731 /* RDMA is supported for this connection */ 732 rdma_supported = true; 733 if (ism_supported) 734 smc_type = SMC_TYPE_B; /* both */ 735 else 736 smc_type = SMC_TYPE_R; /* only RDMA */ 737 } 738 739 /* if neither ISM nor RDMA are supported, fallback */ 740 if (!rdma_supported && !ism_supported) 741 return smc_connect_decline_fallback(smc, SMC_CLC_DECL_NOSMCDEV); 742 743 /* perform CLC handshake */ 744 rc = smc_connect_clc(smc, smc_type, &aclc, &ini); 745 if (rc) { 746 smc_connect_ism_vlan_cleanup(smc, ism_supported, &ini); 747 return smc_connect_decline_fallback(smc, rc); 748 } 749 750 /* depending on previous steps, connect using rdma or ism */ 751 if (rdma_supported && aclc.hdr.path == SMC_TYPE_R) 752 rc = smc_connect_rdma(smc, &aclc, &ini); 753 else if (ism_supported && aclc.hdr.path == SMC_TYPE_D) 754 rc = smc_connect_ism(smc, &aclc, &ini); 755 else 756 rc = SMC_CLC_DECL_MODEUNSUPP; 757 if (rc) { 758 smc_connect_ism_vlan_cleanup(smc, ism_supported, &ini); 759 return smc_connect_decline_fallback(smc, rc); 760 } 761 762 smc_connect_ism_vlan_cleanup(smc, ism_supported, &ini); 763 return 0; 764 } 765 766 static void smc_connect_work(struct work_struct *work) 767 { 768 struct smc_sock *smc = container_of(work, struct smc_sock, 769 connect_work); 770 long timeo = smc->sk.sk_sndtimeo; 771 int rc = 0; 772 773 if (!timeo) 774 timeo = MAX_SCHEDULE_TIMEOUT; 775 lock_sock(smc->clcsock->sk); 776 if (smc->clcsock->sk->sk_err) { 777 smc->sk.sk_err = smc->clcsock->sk->sk_err; 778 } else if ((1 << smc->clcsock->sk->sk_state) & 779 (TCPF_SYN_SENT | TCP_SYN_RECV)) { 780 rc = sk_stream_wait_connect(smc->clcsock->sk, &timeo); 781 if ((rc == -EPIPE) && 782 ((1 << smc->clcsock->sk->sk_state) & 783 (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT))) 784 rc = 0; 785 } 786 release_sock(smc->clcsock->sk); 787 lock_sock(&smc->sk); 788 if (rc != 0 || smc->sk.sk_err) { 789 smc->sk.sk_state = SMC_CLOSED; 790 if (rc == -EPIPE || rc == -EAGAIN) 791 smc->sk.sk_err = EPIPE; 792 else if (signal_pending(current)) 793 smc->sk.sk_err = -sock_intr_errno(timeo); 794 goto out; 795 } 796 797 rc = __smc_connect(smc); 798 if (rc < 0) 799 smc->sk.sk_err = -rc; 800 801 out: 802 if (!sock_flag(&smc->sk, SOCK_DEAD)) { 803 if (smc->sk.sk_err) { 804 smc->sk.sk_state_change(&smc->sk); 805 } else { /* allow polling before and after fallback decision */ 806 smc->clcsock->sk->sk_write_space(smc->clcsock->sk); 807 smc->sk.sk_write_space(&smc->sk); 808 } 809 } 810 release_sock(&smc->sk); 811 } 812 813 static int smc_connect(struct socket *sock, struct sockaddr *addr, 814 int alen, int flags) 815 { 816 struct sock *sk = sock->sk; 817 struct smc_sock *smc; 818 int rc = -EINVAL; 819 820 smc = smc_sk(sk); 821 822 /* separate smc parameter checking to be safe */ 823 if (alen < sizeof(addr->sa_family)) 824 goto out_err; 825 if (addr->sa_family != AF_INET && addr->sa_family != AF_INET6) 826 goto out_err; 827 828 lock_sock(sk); 829 switch (sk->sk_state) { 830 default: 831 goto out; 832 case SMC_ACTIVE: 833 rc = -EISCONN; 834 goto out; 835 case SMC_INIT: 836 rc = 0; 837 break; 838 } 839 840 smc_copy_sock_settings_to_clc(smc); 841 tcp_sk(smc->clcsock->sk)->syn_smc = 1; 842 if (smc->connect_nonblock) { 843 rc = -EALREADY; 844 goto out; 845 } 846 rc = kernel_connect(smc->clcsock, addr, alen, flags); 847 if (rc && rc != -EINPROGRESS) 848 goto out; 849 if (flags & O_NONBLOCK) { 850 if (schedule_work(&smc->connect_work)) 851 smc->connect_nonblock = 1; 852 rc = -EINPROGRESS; 853 } else { 854 rc = __smc_connect(smc); 855 if (rc < 0) 856 goto out; 857 else 858 rc = 0; /* success cases including fallback */ 859 } 860 861 out: 862 release_sock(sk); 863 out_err: 864 return rc; 865 } 866 867 static int smc_clcsock_accept(struct smc_sock *lsmc, struct smc_sock **new_smc) 868 { 869 struct socket *new_clcsock = NULL; 870 struct sock *lsk = &lsmc->sk; 871 struct sock *new_sk; 872 int rc = -EINVAL; 873 874 release_sock(lsk); 875 new_sk = smc_sock_alloc(sock_net(lsk), NULL, lsk->sk_protocol); 876 if (!new_sk) { 877 rc = -ENOMEM; 878 lsk->sk_err = ENOMEM; 879 *new_smc = NULL; 880 lock_sock(lsk); 881 goto out; 882 } 883 *new_smc = smc_sk(new_sk); 884 885 mutex_lock(&lsmc->clcsock_release_lock); 886 if (lsmc->clcsock) 887 rc = kernel_accept(lsmc->clcsock, &new_clcsock, 0); 888 mutex_unlock(&lsmc->clcsock_release_lock); 889 lock_sock(lsk); 890 if (rc < 0) 891 lsk->sk_err = -rc; 892 if (rc < 0 || lsk->sk_state == SMC_CLOSED) { 893 new_sk->sk_prot->unhash(new_sk); 894 if (new_clcsock) 895 sock_release(new_clcsock); 896 new_sk->sk_state = SMC_CLOSED; 897 sock_set_flag(new_sk, SOCK_DEAD); 898 sock_put(new_sk); /* final */ 899 *new_smc = NULL; 900 goto out; 901 } 902 903 (*new_smc)->clcsock = new_clcsock; 904 out: 905 return rc; 906 } 907 908 /* add a just created sock to the accept queue of the listen sock as 909 * candidate for a following socket accept call from user space 910 */ 911 static void smc_accept_enqueue(struct sock *parent, struct sock *sk) 912 { 913 struct smc_sock *par = smc_sk(parent); 914 915 sock_hold(sk); /* sock_put in smc_accept_unlink () */ 916 spin_lock(&par->accept_q_lock); 917 list_add_tail(&smc_sk(sk)->accept_q, &par->accept_q); 918 spin_unlock(&par->accept_q_lock); 919 sk_acceptq_added(parent); 920 } 921 922 /* remove a socket from the accept queue of its parental listening socket */ 923 static void smc_accept_unlink(struct sock *sk) 924 { 925 struct smc_sock *par = smc_sk(sk)->listen_smc; 926 927 spin_lock(&par->accept_q_lock); 928 list_del_init(&smc_sk(sk)->accept_q); 929 spin_unlock(&par->accept_q_lock); 930 sk_acceptq_removed(&smc_sk(sk)->listen_smc->sk); 931 sock_put(sk); /* sock_hold in smc_accept_enqueue */ 932 } 933 934 /* remove a sock from the accept queue to bind it to a new socket created 935 * for a socket accept call from user space 936 */ 937 struct sock *smc_accept_dequeue(struct sock *parent, 938 struct socket *new_sock) 939 { 940 struct smc_sock *isk, *n; 941 struct sock *new_sk; 942 943 list_for_each_entry_safe(isk, n, &smc_sk(parent)->accept_q, accept_q) { 944 new_sk = (struct sock *)isk; 945 946 smc_accept_unlink(new_sk); 947 if (new_sk->sk_state == SMC_CLOSED) { 948 new_sk->sk_prot->unhash(new_sk); 949 if (isk->clcsock) { 950 sock_release(isk->clcsock); 951 isk->clcsock = NULL; 952 } 953 sock_put(new_sk); /* final */ 954 continue; 955 } 956 if (new_sock) { 957 sock_graft(new_sk, new_sock); 958 if (isk->use_fallback) { 959 smc_sk(new_sk)->clcsock->file = new_sock->file; 960 isk->clcsock->file->private_data = isk->clcsock; 961 } 962 } 963 return new_sk; 964 } 965 return NULL; 966 } 967 968 /* clean up for a created but never accepted sock */ 969 void smc_close_non_accepted(struct sock *sk) 970 { 971 struct smc_sock *smc = smc_sk(sk); 972 973 lock_sock(sk); 974 if (!sk->sk_lingertime) 975 /* wait for peer closing */ 976 sk->sk_lingertime = SMC_MAX_STREAM_WAIT_TIMEOUT; 977 __smc_release(smc); 978 release_sock(sk); 979 sock_put(sk); /* final sock_put */ 980 } 981 982 static int smc_serv_conf_first_link(struct smc_sock *smc) 983 { 984 struct net *net = sock_net(smc->clcsock->sk); 985 struct smc_link_group *lgr = smc->conn.lgr; 986 struct smc_link *link; 987 int rest; 988 int rc; 989 990 link = &lgr->lnk[SMC_SINGLE_LINK]; 991 992 if (smc_reg_rmb(link, smc->conn.rmb_desc, false)) 993 return SMC_CLC_DECL_ERR_REGRMB; 994 995 /* send CONFIRM LINK request to client over the RoCE fabric */ 996 rc = smc_llc_send_confirm_link(link, SMC_LLC_REQ); 997 if (rc < 0) 998 return SMC_CLC_DECL_TIMEOUT_CL; 999 1000 /* receive CONFIRM LINK response from client over the RoCE fabric */ 1001 rest = wait_for_completion_interruptible_timeout( 1002 &link->llc_confirm_resp, 1003 SMC_LLC_WAIT_FIRST_TIME); 1004 if (rest <= 0) { 1005 struct smc_clc_msg_decline dclc; 1006 1007 rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc), 1008 SMC_CLC_DECLINE, CLC_WAIT_TIME_SHORT); 1009 return rc == -EAGAIN ? SMC_CLC_DECL_TIMEOUT_CL : rc; 1010 } 1011 1012 if (link->llc_confirm_resp_rc) 1013 return SMC_CLC_DECL_RMBE_EC; 1014 1015 /* send ADD LINK request to client over the RoCE fabric */ 1016 rc = smc_llc_send_add_link(link, 1017 link->smcibdev->mac[link->ibport - 1], 1018 link->gid, SMC_LLC_REQ); 1019 if (rc < 0) 1020 return SMC_CLC_DECL_TIMEOUT_AL; 1021 1022 /* receive ADD LINK response from client over the RoCE fabric */ 1023 rest = wait_for_completion_interruptible_timeout(&link->llc_add_resp, 1024 SMC_LLC_WAIT_TIME); 1025 if (rest <= 0) { 1026 struct smc_clc_msg_decline dclc; 1027 1028 rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc), 1029 SMC_CLC_DECLINE, CLC_WAIT_TIME_SHORT); 1030 return rc == -EAGAIN ? SMC_CLC_DECL_TIMEOUT_AL : rc; 1031 } 1032 1033 smc_llc_link_active(link, net->ipv4.sysctl_tcp_keepalive_time); 1034 1035 return 0; 1036 } 1037 1038 /* listen worker: finish */ 1039 static void smc_listen_out(struct smc_sock *new_smc) 1040 { 1041 struct smc_sock *lsmc = new_smc->listen_smc; 1042 struct sock *newsmcsk = &new_smc->sk; 1043 1044 if (lsmc->sk.sk_state == SMC_LISTEN) { 1045 lock_sock_nested(&lsmc->sk, SINGLE_DEPTH_NESTING); 1046 smc_accept_enqueue(&lsmc->sk, newsmcsk); 1047 release_sock(&lsmc->sk); 1048 } else { /* no longer listening */ 1049 smc_close_non_accepted(newsmcsk); 1050 } 1051 1052 /* Wake up accept */ 1053 lsmc->sk.sk_data_ready(&lsmc->sk); 1054 sock_put(&lsmc->sk); /* sock_hold in smc_tcp_listen_work */ 1055 } 1056 1057 /* listen worker: finish in state connected */ 1058 static void smc_listen_out_connected(struct smc_sock *new_smc) 1059 { 1060 struct sock *newsmcsk = &new_smc->sk; 1061 1062 sk_refcnt_debug_inc(newsmcsk); 1063 if (newsmcsk->sk_state == SMC_INIT) 1064 newsmcsk->sk_state = SMC_ACTIVE; 1065 1066 smc_listen_out(new_smc); 1067 } 1068 1069 /* listen worker: finish in error state */ 1070 static void smc_listen_out_err(struct smc_sock *new_smc) 1071 { 1072 struct sock *newsmcsk = &new_smc->sk; 1073 1074 if (newsmcsk->sk_state == SMC_INIT) 1075 sock_put(&new_smc->sk); /* passive closing */ 1076 newsmcsk->sk_state = SMC_CLOSED; 1077 smc_conn_free(&new_smc->conn); 1078 1079 smc_listen_out(new_smc); 1080 } 1081 1082 /* listen worker: decline and fall back if possible */ 1083 static void smc_listen_decline(struct smc_sock *new_smc, int reason_code, 1084 int local_contact) 1085 { 1086 /* RDMA setup failed, switch back to TCP */ 1087 if (local_contact == SMC_FIRST_CONTACT) 1088 smc_lgr_forget(new_smc->conn.lgr); 1089 if (reason_code < 0) { /* error, no fallback possible */ 1090 smc_listen_out_err(new_smc); 1091 return; 1092 } 1093 smc_conn_free(&new_smc->conn); 1094 smc_switch_to_fallback(new_smc); 1095 new_smc->fallback_rsn = reason_code; 1096 if (reason_code && reason_code != SMC_CLC_DECL_PEERDECL) { 1097 if (smc_clc_send_decline(new_smc, reason_code) < 0) { 1098 smc_listen_out_err(new_smc); 1099 return; 1100 } 1101 } 1102 smc_listen_out_connected(new_smc); 1103 } 1104 1105 /* listen worker: check prefixes */ 1106 static int smc_listen_prfx_check(struct smc_sock *new_smc, 1107 struct smc_clc_msg_proposal *pclc) 1108 { 1109 struct smc_clc_msg_proposal_prefix *pclc_prfx; 1110 struct socket *newclcsock = new_smc->clcsock; 1111 1112 pclc_prfx = smc_clc_proposal_get_prefix(pclc); 1113 if (smc_clc_prfx_match(newclcsock, pclc_prfx)) 1114 return SMC_CLC_DECL_DIFFPREFIX; 1115 1116 return 0; 1117 } 1118 1119 /* listen worker: initialize connection and buffers */ 1120 static int smc_listen_rdma_init(struct smc_sock *new_smc, 1121 struct smc_init_info *ini) 1122 { 1123 int rc; 1124 1125 /* allocate connection / link group */ 1126 rc = smc_conn_create(new_smc, ini); 1127 if (rc) 1128 return rc; 1129 1130 /* create send buffer and rmb */ 1131 if (smc_buf_create(new_smc, false)) 1132 return SMC_CLC_DECL_MEM; 1133 1134 return 0; 1135 } 1136 1137 /* listen worker: initialize connection and buffers for SMC-D */ 1138 static int smc_listen_ism_init(struct smc_sock *new_smc, 1139 struct smc_clc_msg_proposal *pclc, 1140 struct smc_init_info *ini) 1141 { 1142 struct smc_clc_msg_smcd *pclc_smcd; 1143 int rc; 1144 1145 pclc_smcd = smc_get_clc_msg_smcd(pclc); 1146 ini->ism_gid = pclc_smcd->gid; 1147 rc = smc_conn_create(new_smc, ini); 1148 if (rc) 1149 return rc; 1150 1151 /* Check if peer can be reached via ISM device */ 1152 if (smc_ism_cantalk(new_smc->conn.lgr->peer_gid, 1153 new_smc->conn.lgr->vlan_id, 1154 new_smc->conn.lgr->smcd)) { 1155 if (ini->cln_first_contact == SMC_FIRST_CONTACT) 1156 smc_lgr_forget(new_smc->conn.lgr); 1157 smc_conn_free(&new_smc->conn); 1158 return SMC_CLC_DECL_SMCDNOTALK; 1159 } 1160 1161 /* Create send and receive buffers */ 1162 if (smc_buf_create(new_smc, true)) { 1163 if (ini->cln_first_contact == SMC_FIRST_CONTACT) 1164 smc_lgr_forget(new_smc->conn.lgr); 1165 smc_conn_free(&new_smc->conn); 1166 return SMC_CLC_DECL_MEM; 1167 } 1168 1169 return 0; 1170 } 1171 1172 /* listen worker: register buffers */ 1173 static int smc_listen_rdma_reg(struct smc_sock *new_smc, int local_contact) 1174 { 1175 struct smc_link *link = &new_smc->conn.lgr->lnk[SMC_SINGLE_LINK]; 1176 1177 if (local_contact != SMC_FIRST_CONTACT) { 1178 if (smc_reg_rmb(link, new_smc->conn.rmb_desc, true)) 1179 return SMC_CLC_DECL_ERR_REGRMB; 1180 } 1181 smc_rmb_sync_sg_for_device(&new_smc->conn); 1182 1183 return 0; 1184 } 1185 1186 /* listen worker: finish RDMA setup */ 1187 static int smc_listen_rdma_finish(struct smc_sock *new_smc, 1188 struct smc_clc_msg_accept_confirm *cclc, 1189 int local_contact) 1190 { 1191 struct smc_link *link = &new_smc->conn.lgr->lnk[SMC_SINGLE_LINK]; 1192 int reason_code = 0; 1193 1194 if (local_contact == SMC_FIRST_CONTACT) 1195 smc_link_save_peer_info(link, cclc); 1196 1197 if (smc_rmb_rtoken_handling(&new_smc->conn, cclc)) { 1198 reason_code = SMC_CLC_DECL_ERR_RTOK; 1199 goto decline; 1200 } 1201 1202 if (local_contact == SMC_FIRST_CONTACT) { 1203 if (smc_ib_ready_link(link)) { 1204 reason_code = SMC_CLC_DECL_ERR_RDYLNK; 1205 goto decline; 1206 } 1207 /* QP confirmation over RoCE fabric */ 1208 reason_code = smc_serv_conf_first_link(new_smc); 1209 if (reason_code) 1210 goto decline; 1211 } 1212 return 0; 1213 1214 decline: 1215 smc_listen_decline(new_smc, reason_code, local_contact); 1216 return reason_code; 1217 } 1218 1219 /* setup for RDMA connection of server */ 1220 static void smc_listen_work(struct work_struct *work) 1221 { 1222 struct smc_sock *new_smc = container_of(work, struct smc_sock, 1223 smc_listen_work); 1224 struct socket *newclcsock = new_smc->clcsock; 1225 struct smc_clc_msg_accept_confirm cclc; 1226 struct smc_clc_msg_proposal *pclc; 1227 struct smc_init_info ini = {0}; 1228 bool ism_supported = false; 1229 u8 buf[SMC_CLC_MAX_LEN]; 1230 int rc = 0; 1231 1232 if (new_smc->listen_smc->sk.sk_state != SMC_LISTEN) 1233 return smc_listen_out_err(new_smc); 1234 1235 if (new_smc->use_fallback) { 1236 smc_listen_out_connected(new_smc); 1237 return; 1238 } 1239 1240 /* check if peer is smc capable */ 1241 if (!tcp_sk(newclcsock->sk)->syn_smc) { 1242 smc_switch_to_fallback(new_smc); 1243 new_smc->fallback_rsn = SMC_CLC_DECL_PEERNOSMC; 1244 smc_listen_out_connected(new_smc); 1245 return; 1246 } 1247 1248 /* do inband token exchange - 1249 * wait for and receive SMC Proposal CLC message 1250 */ 1251 pclc = (struct smc_clc_msg_proposal *)&buf; 1252 rc = smc_clc_wait_msg(new_smc, pclc, SMC_CLC_MAX_LEN, 1253 SMC_CLC_PROPOSAL, CLC_WAIT_TIME); 1254 if (rc) 1255 goto out_decl; 1256 1257 /* IPSec connections opt out of SMC-R optimizations */ 1258 if (using_ipsec(new_smc)) { 1259 rc = SMC_CLC_DECL_IPSEC; 1260 goto out_decl; 1261 } 1262 1263 /* check for matching IP prefix and subnet length */ 1264 rc = smc_listen_prfx_check(new_smc, pclc); 1265 if (rc) 1266 goto out_decl; 1267 1268 /* get vlan id from IP device */ 1269 if (smc_vlan_by_tcpsk(new_smc->clcsock, &ini)) { 1270 rc = SMC_CLC_DECL_GETVLANERR; 1271 goto out_decl; 1272 } 1273 1274 mutex_lock(&smc_server_lgr_pending); 1275 smc_close_init(new_smc); 1276 smc_rx_init(new_smc); 1277 smc_tx_init(new_smc); 1278 1279 /* check if ISM is available */ 1280 if (pclc->hdr.path == SMC_TYPE_D || pclc->hdr.path == SMC_TYPE_B) { 1281 ini.is_smcd = true; /* prepare ISM check */ 1282 rc = smc_find_ism_device(new_smc, &ini); 1283 if (!rc) 1284 rc = smc_listen_ism_init(new_smc, pclc, &ini); 1285 if (!rc) 1286 ism_supported = true; 1287 else if (pclc->hdr.path == SMC_TYPE_D) 1288 goto out_unlock; /* skip RDMA and decline */ 1289 } 1290 1291 /* check if RDMA is available */ 1292 if (!ism_supported) { /* SMC_TYPE_R or SMC_TYPE_B */ 1293 /* prepare RDMA check */ 1294 memset(&ini, 0, sizeof(ini)); 1295 ini.is_smcd = false; 1296 ini.ib_lcl = &pclc->lcl; 1297 rc = smc_find_rdma_device(new_smc, &ini); 1298 if (rc) { 1299 /* no RDMA device found */ 1300 if (pclc->hdr.path == SMC_TYPE_B) 1301 /* neither ISM nor RDMA device found */ 1302 rc = SMC_CLC_DECL_NOSMCDEV; 1303 goto out_unlock; 1304 } 1305 rc = smc_listen_rdma_init(new_smc, &ini); 1306 if (rc) 1307 goto out_unlock; 1308 rc = smc_listen_rdma_reg(new_smc, ini.cln_first_contact); 1309 if (rc) 1310 goto out_unlock; 1311 } 1312 1313 /* send SMC Accept CLC message */ 1314 rc = smc_clc_send_accept(new_smc, ini.cln_first_contact); 1315 if (rc) 1316 goto out_unlock; 1317 1318 /* SMC-D does not need this lock any more */ 1319 if (ism_supported) 1320 mutex_unlock(&smc_server_lgr_pending); 1321 1322 /* receive SMC Confirm CLC message */ 1323 rc = smc_clc_wait_msg(new_smc, &cclc, sizeof(cclc), 1324 SMC_CLC_CONFIRM, CLC_WAIT_TIME); 1325 if (rc) { 1326 if (!ism_supported) 1327 goto out_unlock; 1328 goto out_decl; 1329 } 1330 1331 /* finish worker */ 1332 if (!ism_supported) { 1333 rc = smc_listen_rdma_finish(new_smc, &cclc, 1334 ini.cln_first_contact); 1335 mutex_unlock(&smc_server_lgr_pending); 1336 if (rc) 1337 return; 1338 } 1339 smc_conn_save_peer_info(new_smc, &cclc); 1340 smc_listen_out_connected(new_smc); 1341 return; 1342 1343 out_unlock: 1344 mutex_unlock(&smc_server_lgr_pending); 1345 out_decl: 1346 smc_listen_decline(new_smc, rc, ini.cln_first_contact); 1347 } 1348 1349 static void smc_tcp_listen_work(struct work_struct *work) 1350 { 1351 struct smc_sock *lsmc = container_of(work, struct smc_sock, 1352 tcp_listen_work); 1353 struct sock *lsk = &lsmc->sk; 1354 struct smc_sock *new_smc; 1355 int rc = 0; 1356 1357 lock_sock(lsk); 1358 while (lsk->sk_state == SMC_LISTEN) { 1359 rc = smc_clcsock_accept(lsmc, &new_smc); 1360 if (rc) 1361 goto out; 1362 if (!new_smc) 1363 continue; 1364 1365 new_smc->listen_smc = lsmc; 1366 new_smc->use_fallback = lsmc->use_fallback; 1367 new_smc->fallback_rsn = lsmc->fallback_rsn; 1368 sock_hold(lsk); /* sock_put in smc_listen_work */ 1369 INIT_WORK(&new_smc->smc_listen_work, smc_listen_work); 1370 smc_copy_sock_settings_to_smc(new_smc); 1371 new_smc->sk.sk_sndbuf = lsmc->sk.sk_sndbuf; 1372 new_smc->sk.sk_rcvbuf = lsmc->sk.sk_rcvbuf; 1373 sock_hold(&new_smc->sk); /* sock_put in passive closing */ 1374 if (!schedule_work(&new_smc->smc_listen_work)) 1375 sock_put(&new_smc->sk); 1376 } 1377 1378 out: 1379 release_sock(lsk); 1380 sock_put(&lsmc->sk); /* sock_hold in smc_listen */ 1381 } 1382 1383 static int smc_listen(struct socket *sock, int backlog) 1384 { 1385 struct sock *sk = sock->sk; 1386 struct smc_sock *smc; 1387 int rc; 1388 1389 smc = smc_sk(sk); 1390 lock_sock(sk); 1391 1392 rc = -EINVAL; 1393 if ((sk->sk_state != SMC_INIT) && (sk->sk_state != SMC_LISTEN)) 1394 goto out; 1395 1396 rc = 0; 1397 if (sk->sk_state == SMC_LISTEN) { 1398 sk->sk_max_ack_backlog = backlog; 1399 goto out; 1400 } 1401 /* some socket options are handled in core, so we could not apply 1402 * them to the clc socket -- copy smc socket options to clc socket 1403 */ 1404 smc_copy_sock_settings_to_clc(smc); 1405 if (!smc->use_fallback) 1406 tcp_sk(smc->clcsock->sk)->syn_smc = 1; 1407 1408 rc = kernel_listen(smc->clcsock, backlog); 1409 if (rc) 1410 goto out; 1411 sk->sk_max_ack_backlog = backlog; 1412 sk->sk_ack_backlog = 0; 1413 sk->sk_state = SMC_LISTEN; 1414 sock_hold(sk); /* sock_hold in tcp_listen_worker */ 1415 if (!schedule_work(&smc->tcp_listen_work)) 1416 sock_put(sk); 1417 1418 out: 1419 release_sock(sk); 1420 return rc; 1421 } 1422 1423 static int smc_accept(struct socket *sock, struct socket *new_sock, 1424 int flags, bool kern) 1425 { 1426 struct sock *sk = sock->sk, *nsk; 1427 DECLARE_WAITQUEUE(wait, current); 1428 struct smc_sock *lsmc; 1429 long timeo; 1430 int rc = 0; 1431 1432 lsmc = smc_sk(sk); 1433 sock_hold(sk); /* sock_put below */ 1434 lock_sock(sk); 1435 1436 if (lsmc->sk.sk_state != SMC_LISTEN) { 1437 rc = -EINVAL; 1438 release_sock(sk); 1439 goto out; 1440 } 1441 1442 /* Wait for an incoming connection */ 1443 timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK); 1444 add_wait_queue_exclusive(sk_sleep(sk), &wait); 1445 while (!(nsk = smc_accept_dequeue(sk, new_sock))) { 1446 set_current_state(TASK_INTERRUPTIBLE); 1447 if (!timeo) { 1448 rc = -EAGAIN; 1449 break; 1450 } 1451 release_sock(sk); 1452 timeo = schedule_timeout(timeo); 1453 /* wakeup by sk_data_ready in smc_listen_work() */ 1454 sched_annotate_sleep(); 1455 lock_sock(sk); 1456 if (signal_pending(current)) { 1457 rc = sock_intr_errno(timeo); 1458 break; 1459 } 1460 } 1461 set_current_state(TASK_RUNNING); 1462 remove_wait_queue(sk_sleep(sk), &wait); 1463 1464 if (!rc) 1465 rc = sock_error(nsk); 1466 release_sock(sk); 1467 if (rc) 1468 goto out; 1469 1470 if (lsmc->sockopt_defer_accept && !(flags & O_NONBLOCK)) { 1471 /* wait till data arrives on the socket */ 1472 timeo = msecs_to_jiffies(lsmc->sockopt_defer_accept * 1473 MSEC_PER_SEC); 1474 if (smc_sk(nsk)->use_fallback) { 1475 struct sock *clcsk = smc_sk(nsk)->clcsock->sk; 1476 1477 lock_sock(clcsk); 1478 if (skb_queue_empty(&clcsk->sk_receive_queue)) 1479 sk_wait_data(clcsk, &timeo, NULL); 1480 release_sock(clcsk); 1481 } else if (!atomic_read(&smc_sk(nsk)->conn.bytes_to_rcv)) { 1482 lock_sock(nsk); 1483 smc_rx_wait(smc_sk(nsk), &timeo, smc_rx_data_available); 1484 release_sock(nsk); 1485 } 1486 } 1487 1488 out: 1489 sock_put(sk); /* sock_hold above */ 1490 return rc; 1491 } 1492 1493 static int smc_getname(struct socket *sock, struct sockaddr *addr, 1494 int peer) 1495 { 1496 struct smc_sock *smc; 1497 1498 if (peer && (sock->sk->sk_state != SMC_ACTIVE) && 1499 (sock->sk->sk_state != SMC_APPCLOSEWAIT1)) 1500 return -ENOTCONN; 1501 1502 smc = smc_sk(sock->sk); 1503 1504 return smc->clcsock->ops->getname(smc->clcsock, addr, peer); 1505 } 1506 1507 static int smc_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) 1508 { 1509 struct sock *sk = sock->sk; 1510 struct smc_sock *smc; 1511 int rc = -EPIPE; 1512 1513 smc = smc_sk(sk); 1514 lock_sock(sk); 1515 if ((sk->sk_state != SMC_ACTIVE) && 1516 (sk->sk_state != SMC_APPCLOSEWAIT1) && 1517 (sk->sk_state != SMC_INIT)) 1518 goto out; 1519 1520 if (msg->msg_flags & MSG_FASTOPEN) { 1521 if (sk->sk_state == SMC_INIT) { 1522 smc_switch_to_fallback(smc); 1523 smc->fallback_rsn = SMC_CLC_DECL_OPTUNSUPP; 1524 } else { 1525 rc = -EINVAL; 1526 goto out; 1527 } 1528 } 1529 1530 if (smc->use_fallback) 1531 rc = smc->clcsock->ops->sendmsg(smc->clcsock, msg, len); 1532 else 1533 rc = smc_tx_sendmsg(smc, msg, len); 1534 out: 1535 release_sock(sk); 1536 return rc; 1537 } 1538 1539 static int smc_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, 1540 int flags) 1541 { 1542 struct sock *sk = sock->sk; 1543 struct smc_sock *smc; 1544 int rc = -ENOTCONN; 1545 1546 smc = smc_sk(sk); 1547 lock_sock(sk); 1548 if (sk->sk_state == SMC_CLOSED && (sk->sk_shutdown & RCV_SHUTDOWN)) { 1549 /* socket was connected before, no more data to read */ 1550 rc = 0; 1551 goto out; 1552 } 1553 if ((sk->sk_state == SMC_INIT) || 1554 (sk->sk_state == SMC_LISTEN) || 1555 (sk->sk_state == SMC_CLOSED)) 1556 goto out; 1557 1558 if (sk->sk_state == SMC_PEERFINCLOSEWAIT) { 1559 rc = 0; 1560 goto out; 1561 } 1562 1563 if (smc->use_fallback) { 1564 rc = smc->clcsock->ops->recvmsg(smc->clcsock, msg, len, flags); 1565 } else { 1566 msg->msg_namelen = 0; 1567 rc = smc_rx_recvmsg(smc, msg, NULL, len, flags); 1568 } 1569 1570 out: 1571 release_sock(sk); 1572 return rc; 1573 } 1574 1575 static __poll_t smc_accept_poll(struct sock *parent) 1576 { 1577 struct smc_sock *isk = smc_sk(parent); 1578 __poll_t mask = 0; 1579 1580 spin_lock(&isk->accept_q_lock); 1581 if (!list_empty(&isk->accept_q)) 1582 mask = EPOLLIN | EPOLLRDNORM; 1583 spin_unlock(&isk->accept_q_lock); 1584 1585 return mask; 1586 } 1587 1588 static __poll_t smc_poll(struct file *file, struct socket *sock, 1589 poll_table *wait) 1590 { 1591 struct sock *sk = sock->sk; 1592 struct smc_sock *smc; 1593 __poll_t mask = 0; 1594 1595 if (!sk) 1596 return EPOLLNVAL; 1597 1598 smc = smc_sk(sock->sk); 1599 if (smc->use_fallback) { 1600 /* delegate to CLC child sock */ 1601 mask = smc->clcsock->ops->poll(file, smc->clcsock, wait); 1602 sk->sk_err = smc->clcsock->sk->sk_err; 1603 } else { 1604 if (sk->sk_state != SMC_CLOSED) 1605 sock_poll_wait(file, sock, wait); 1606 if (sk->sk_err) 1607 mask |= EPOLLERR; 1608 if ((sk->sk_shutdown == SHUTDOWN_MASK) || 1609 (sk->sk_state == SMC_CLOSED)) 1610 mask |= EPOLLHUP; 1611 if (sk->sk_state == SMC_LISTEN) { 1612 /* woken up by sk_data_ready in smc_listen_work() */ 1613 mask |= smc_accept_poll(sk); 1614 } else if (smc->use_fallback) { /* as result of connect_work()*/ 1615 mask |= smc->clcsock->ops->poll(file, smc->clcsock, 1616 wait); 1617 sk->sk_err = smc->clcsock->sk->sk_err; 1618 } else { 1619 if ((sk->sk_state != SMC_INIT && 1620 atomic_read(&smc->conn.sndbuf_space)) || 1621 sk->sk_shutdown & SEND_SHUTDOWN) { 1622 mask |= EPOLLOUT | EPOLLWRNORM; 1623 } else { 1624 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk); 1625 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); 1626 } 1627 if (atomic_read(&smc->conn.bytes_to_rcv)) 1628 mask |= EPOLLIN | EPOLLRDNORM; 1629 if (sk->sk_shutdown & RCV_SHUTDOWN) 1630 mask |= EPOLLIN | EPOLLRDNORM | EPOLLRDHUP; 1631 if (sk->sk_state == SMC_APPCLOSEWAIT1) 1632 mask |= EPOLLIN; 1633 if (smc->conn.urg_state == SMC_URG_VALID) 1634 mask |= EPOLLPRI; 1635 } 1636 } 1637 1638 return mask; 1639 } 1640 1641 static int smc_shutdown(struct socket *sock, int how) 1642 { 1643 struct sock *sk = sock->sk; 1644 struct smc_sock *smc; 1645 int rc = -EINVAL; 1646 int rc1 = 0; 1647 1648 smc = smc_sk(sk); 1649 1650 if ((how < SHUT_RD) || (how > SHUT_RDWR)) 1651 return rc; 1652 1653 lock_sock(sk); 1654 1655 rc = -ENOTCONN; 1656 if ((sk->sk_state != SMC_ACTIVE) && 1657 (sk->sk_state != SMC_PEERCLOSEWAIT1) && 1658 (sk->sk_state != SMC_PEERCLOSEWAIT2) && 1659 (sk->sk_state != SMC_APPCLOSEWAIT1) && 1660 (sk->sk_state != SMC_APPCLOSEWAIT2) && 1661 (sk->sk_state != SMC_APPFINCLOSEWAIT)) 1662 goto out; 1663 if (smc->use_fallback) { 1664 rc = kernel_sock_shutdown(smc->clcsock, how); 1665 sk->sk_shutdown = smc->clcsock->sk->sk_shutdown; 1666 if (sk->sk_shutdown == SHUTDOWN_MASK) 1667 sk->sk_state = SMC_CLOSED; 1668 goto out; 1669 } 1670 switch (how) { 1671 case SHUT_RDWR: /* shutdown in both directions */ 1672 rc = smc_close_active(smc); 1673 break; 1674 case SHUT_WR: 1675 rc = smc_close_shutdown_write(smc); 1676 break; 1677 case SHUT_RD: 1678 rc = 0; 1679 /* nothing more to do because peer is not involved */ 1680 break; 1681 } 1682 if (smc->clcsock) 1683 rc1 = kernel_sock_shutdown(smc->clcsock, how); 1684 /* map sock_shutdown_cmd constants to sk_shutdown value range */ 1685 sk->sk_shutdown |= how + 1; 1686 1687 out: 1688 release_sock(sk); 1689 return rc ? rc : rc1; 1690 } 1691 1692 static int smc_setsockopt(struct socket *sock, int level, int optname, 1693 char __user *optval, unsigned int optlen) 1694 { 1695 struct sock *sk = sock->sk; 1696 struct smc_sock *smc; 1697 int val, rc; 1698 1699 smc = smc_sk(sk); 1700 1701 /* generic setsockopts reaching us here always apply to the 1702 * CLC socket 1703 */ 1704 rc = smc->clcsock->ops->setsockopt(smc->clcsock, level, optname, 1705 optval, optlen); 1706 if (smc->clcsock->sk->sk_err) { 1707 sk->sk_err = smc->clcsock->sk->sk_err; 1708 sk->sk_error_report(sk); 1709 } 1710 if (rc) 1711 return rc; 1712 1713 if (optlen < sizeof(int)) 1714 return -EINVAL; 1715 if (get_user(val, (int __user *)optval)) 1716 return -EFAULT; 1717 1718 lock_sock(sk); 1719 switch (optname) { 1720 case TCP_ULP: 1721 case TCP_FASTOPEN: 1722 case TCP_FASTOPEN_CONNECT: 1723 case TCP_FASTOPEN_KEY: 1724 case TCP_FASTOPEN_NO_COOKIE: 1725 /* option not supported by SMC */ 1726 if (sk->sk_state == SMC_INIT) { 1727 smc_switch_to_fallback(smc); 1728 smc->fallback_rsn = SMC_CLC_DECL_OPTUNSUPP; 1729 } else { 1730 if (!smc->use_fallback) 1731 rc = -EINVAL; 1732 } 1733 break; 1734 case TCP_NODELAY: 1735 if (sk->sk_state != SMC_INIT && sk->sk_state != SMC_LISTEN) { 1736 if (val && !smc->use_fallback) 1737 mod_delayed_work(system_wq, &smc->conn.tx_work, 1738 0); 1739 } 1740 break; 1741 case TCP_CORK: 1742 if (sk->sk_state != SMC_INIT && sk->sk_state != SMC_LISTEN) { 1743 if (!val && !smc->use_fallback) 1744 mod_delayed_work(system_wq, &smc->conn.tx_work, 1745 0); 1746 } 1747 break; 1748 case TCP_DEFER_ACCEPT: 1749 smc->sockopt_defer_accept = val; 1750 break; 1751 default: 1752 break; 1753 } 1754 release_sock(sk); 1755 1756 return rc; 1757 } 1758 1759 static int smc_getsockopt(struct socket *sock, int level, int optname, 1760 char __user *optval, int __user *optlen) 1761 { 1762 struct smc_sock *smc; 1763 1764 smc = smc_sk(sock->sk); 1765 /* socket options apply to the CLC socket */ 1766 return smc->clcsock->ops->getsockopt(smc->clcsock, level, optname, 1767 optval, optlen); 1768 } 1769 1770 static int smc_ioctl(struct socket *sock, unsigned int cmd, 1771 unsigned long arg) 1772 { 1773 union smc_host_cursor cons, urg; 1774 struct smc_connection *conn; 1775 struct smc_sock *smc; 1776 int answ; 1777 1778 smc = smc_sk(sock->sk); 1779 conn = &smc->conn; 1780 lock_sock(&smc->sk); 1781 if (smc->use_fallback) { 1782 if (!smc->clcsock) { 1783 release_sock(&smc->sk); 1784 return -EBADF; 1785 } 1786 answ = smc->clcsock->ops->ioctl(smc->clcsock, cmd, arg); 1787 release_sock(&smc->sk); 1788 return answ; 1789 } 1790 switch (cmd) { 1791 case SIOCINQ: /* same as FIONREAD */ 1792 if (smc->sk.sk_state == SMC_LISTEN) { 1793 release_sock(&smc->sk); 1794 return -EINVAL; 1795 } 1796 if (smc->sk.sk_state == SMC_INIT || 1797 smc->sk.sk_state == SMC_CLOSED) 1798 answ = 0; 1799 else 1800 answ = atomic_read(&smc->conn.bytes_to_rcv); 1801 break; 1802 case SIOCOUTQ: 1803 /* output queue size (not send + not acked) */ 1804 if (smc->sk.sk_state == SMC_LISTEN) { 1805 release_sock(&smc->sk); 1806 return -EINVAL; 1807 } 1808 if (smc->sk.sk_state == SMC_INIT || 1809 smc->sk.sk_state == SMC_CLOSED) 1810 answ = 0; 1811 else 1812 answ = smc->conn.sndbuf_desc->len - 1813 atomic_read(&smc->conn.sndbuf_space); 1814 break; 1815 case SIOCOUTQNSD: 1816 /* output queue size (not send only) */ 1817 if (smc->sk.sk_state == SMC_LISTEN) { 1818 release_sock(&smc->sk); 1819 return -EINVAL; 1820 } 1821 if (smc->sk.sk_state == SMC_INIT || 1822 smc->sk.sk_state == SMC_CLOSED) 1823 answ = 0; 1824 else 1825 answ = smc_tx_prepared_sends(&smc->conn); 1826 break; 1827 case SIOCATMARK: 1828 if (smc->sk.sk_state == SMC_LISTEN) { 1829 release_sock(&smc->sk); 1830 return -EINVAL; 1831 } 1832 if (smc->sk.sk_state == SMC_INIT || 1833 smc->sk.sk_state == SMC_CLOSED) { 1834 answ = 0; 1835 } else { 1836 smc_curs_copy(&cons, &conn->local_tx_ctrl.cons, conn); 1837 smc_curs_copy(&urg, &conn->urg_curs, conn); 1838 answ = smc_curs_diff(conn->rmb_desc->len, 1839 &cons, &urg) == 1; 1840 } 1841 break; 1842 default: 1843 release_sock(&smc->sk); 1844 return -ENOIOCTLCMD; 1845 } 1846 release_sock(&smc->sk); 1847 1848 return put_user(answ, (int __user *)arg); 1849 } 1850 1851 static ssize_t smc_sendpage(struct socket *sock, struct page *page, 1852 int offset, size_t size, int flags) 1853 { 1854 struct sock *sk = sock->sk; 1855 struct smc_sock *smc; 1856 int rc = -EPIPE; 1857 1858 smc = smc_sk(sk); 1859 lock_sock(sk); 1860 if (sk->sk_state != SMC_ACTIVE) { 1861 release_sock(sk); 1862 goto out; 1863 } 1864 release_sock(sk); 1865 if (smc->use_fallback) 1866 rc = kernel_sendpage(smc->clcsock, page, offset, 1867 size, flags); 1868 else 1869 rc = sock_no_sendpage(sock, page, offset, size, flags); 1870 1871 out: 1872 return rc; 1873 } 1874 1875 /* Map the affected portions of the rmbe into an spd, note the number of bytes 1876 * to splice in conn->splice_pending, and press 'go'. Delays consumer cursor 1877 * updates till whenever a respective page has been fully processed. 1878 * Note that subsequent recv() calls have to wait till all splice() processing 1879 * completed. 1880 */ 1881 static ssize_t smc_splice_read(struct socket *sock, loff_t *ppos, 1882 struct pipe_inode_info *pipe, size_t len, 1883 unsigned int flags) 1884 { 1885 struct sock *sk = sock->sk; 1886 struct smc_sock *smc; 1887 int rc = -ENOTCONN; 1888 1889 smc = smc_sk(sk); 1890 lock_sock(sk); 1891 if (sk->sk_state == SMC_CLOSED && (sk->sk_shutdown & RCV_SHUTDOWN)) { 1892 /* socket was connected before, no more data to read */ 1893 rc = 0; 1894 goto out; 1895 } 1896 if (sk->sk_state == SMC_INIT || 1897 sk->sk_state == SMC_LISTEN || 1898 sk->sk_state == SMC_CLOSED) 1899 goto out; 1900 1901 if (sk->sk_state == SMC_PEERFINCLOSEWAIT) { 1902 rc = 0; 1903 goto out; 1904 } 1905 1906 if (smc->use_fallback) { 1907 rc = smc->clcsock->ops->splice_read(smc->clcsock, ppos, 1908 pipe, len, flags); 1909 } else { 1910 if (*ppos) { 1911 rc = -ESPIPE; 1912 goto out; 1913 } 1914 if (flags & SPLICE_F_NONBLOCK) 1915 flags = MSG_DONTWAIT; 1916 else 1917 flags = 0; 1918 rc = smc_rx_recvmsg(smc, NULL, pipe, len, flags); 1919 } 1920 out: 1921 release_sock(sk); 1922 1923 return rc; 1924 } 1925 1926 /* must look like tcp */ 1927 static const struct proto_ops smc_sock_ops = { 1928 .family = PF_SMC, 1929 .owner = THIS_MODULE, 1930 .release = smc_release, 1931 .bind = smc_bind, 1932 .connect = smc_connect, 1933 .socketpair = sock_no_socketpair, 1934 .accept = smc_accept, 1935 .getname = smc_getname, 1936 .poll = smc_poll, 1937 .ioctl = smc_ioctl, 1938 .listen = smc_listen, 1939 .shutdown = smc_shutdown, 1940 .setsockopt = smc_setsockopt, 1941 .getsockopt = smc_getsockopt, 1942 .sendmsg = smc_sendmsg, 1943 .recvmsg = smc_recvmsg, 1944 .mmap = sock_no_mmap, 1945 .sendpage = smc_sendpage, 1946 .splice_read = smc_splice_read, 1947 }; 1948 1949 static int smc_create(struct net *net, struct socket *sock, int protocol, 1950 int kern) 1951 { 1952 int family = (protocol == SMCPROTO_SMC6) ? PF_INET6 : PF_INET; 1953 struct smc_sock *smc; 1954 struct sock *sk; 1955 int rc; 1956 1957 rc = -ESOCKTNOSUPPORT; 1958 if (sock->type != SOCK_STREAM) 1959 goto out; 1960 1961 rc = -EPROTONOSUPPORT; 1962 if (protocol != SMCPROTO_SMC && protocol != SMCPROTO_SMC6) 1963 goto out; 1964 1965 rc = -ENOBUFS; 1966 sock->ops = &smc_sock_ops; 1967 sk = smc_sock_alloc(net, sock, protocol); 1968 if (!sk) 1969 goto out; 1970 1971 /* create internal TCP socket for CLC handshake and fallback */ 1972 smc = smc_sk(sk); 1973 smc->use_fallback = false; /* assume rdma capability first */ 1974 smc->fallback_rsn = 0; 1975 rc = sock_create_kern(net, family, SOCK_STREAM, IPPROTO_TCP, 1976 &smc->clcsock); 1977 if (rc) { 1978 sk_common_release(sk); 1979 goto out; 1980 } 1981 smc->sk.sk_sndbuf = max(smc->clcsock->sk->sk_sndbuf, SMC_BUF_MIN_SIZE); 1982 smc->sk.sk_rcvbuf = max(smc->clcsock->sk->sk_rcvbuf, SMC_BUF_MIN_SIZE); 1983 1984 out: 1985 return rc; 1986 } 1987 1988 static const struct net_proto_family smc_sock_family_ops = { 1989 .family = PF_SMC, 1990 .owner = THIS_MODULE, 1991 .create = smc_create, 1992 }; 1993 1994 unsigned int smc_net_id; 1995 1996 static __net_init int smc_net_init(struct net *net) 1997 { 1998 return smc_pnet_net_init(net); 1999 } 2000 2001 static void __net_exit smc_net_exit(struct net *net) 2002 { 2003 smc_pnet_net_exit(net); 2004 } 2005 2006 static struct pernet_operations smc_net_ops = { 2007 .init = smc_net_init, 2008 .exit = smc_net_exit, 2009 .id = &smc_net_id, 2010 .size = sizeof(struct smc_net), 2011 }; 2012 2013 static int __init smc_init(void) 2014 { 2015 int rc; 2016 2017 rc = register_pernet_subsys(&smc_net_ops); 2018 if (rc) 2019 return rc; 2020 2021 rc = smc_pnet_init(); 2022 if (rc) 2023 goto out_pernet_subsys; 2024 2025 rc = smc_llc_init(); 2026 if (rc) { 2027 pr_err("%s: smc_llc_init fails with %d\n", __func__, rc); 2028 goto out_pnet; 2029 } 2030 2031 rc = smc_cdc_init(); 2032 if (rc) { 2033 pr_err("%s: smc_cdc_init fails with %d\n", __func__, rc); 2034 goto out_pnet; 2035 } 2036 2037 rc = proto_register(&smc_proto, 1); 2038 if (rc) { 2039 pr_err("%s: proto_register(v4) fails with %d\n", __func__, rc); 2040 goto out_pnet; 2041 } 2042 2043 rc = proto_register(&smc_proto6, 1); 2044 if (rc) { 2045 pr_err("%s: proto_register(v6) fails with %d\n", __func__, rc); 2046 goto out_proto; 2047 } 2048 2049 rc = sock_register(&smc_sock_family_ops); 2050 if (rc) { 2051 pr_err("%s: sock_register fails with %d\n", __func__, rc); 2052 goto out_proto6; 2053 } 2054 INIT_HLIST_HEAD(&smc_v4_hashinfo.ht); 2055 INIT_HLIST_HEAD(&smc_v6_hashinfo.ht); 2056 2057 rc = smc_ib_register_client(); 2058 if (rc) { 2059 pr_err("%s: ib_register fails with %d\n", __func__, rc); 2060 goto out_sock; 2061 } 2062 2063 static_branch_enable(&tcp_have_smc); 2064 return 0; 2065 2066 out_sock: 2067 sock_unregister(PF_SMC); 2068 out_proto6: 2069 proto_unregister(&smc_proto6); 2070 out_proto: 2071 proto_unregister(&smc_proto); 2072 out_pnet: 2073 smc_pnet_exit(); 2074 out_pernet_subsys: 2075 unregister_pernet_subsys(&smc_net_ops); 2076 2077 return rc; 2078 } 2079 2080 static void __exit smc_exit(void) 2081 { 2082 smc_core_exit(); 2083 static_branch_disable(&tcp_have_smc); 2084 smc_ib_unregister_client(); 2085 sock_unregister(PF_SMC); 2086 proto_unregister(&smc_proto6); 2087 proto_unregister(&smc_proto); 2088 smc_pnet_exit(); 2089 unregister_pernet_subsys(&smc_net_ops); 2090 } 2091 2092 module_init(smc_init); 2093 module_exit(smc_exit); 2094 2095 MODULE_AUTHOR("Ursula Braun <ubraun@linux.vnet.ibm.com>"); 2096 MODULE_DESCRIPTION("smc socket address family"); 2097 MODULE_LICENSE("GPL"); 2098 MODULE_ALIAS_NETPROTO(PF_SMC); 2099