1 /* 2 * Shared Memory Communications over RDMA (SMC-R) and RoCE 3 * 4 * AF_SMC protocol family socket handler keeping the AF_INET sock address type 5 * applies to SOCK_STREAM sockets only 6 * offers an alternative communication option for TCP-protocol sockets 7 * applicable with RoCE-cards only 8 * 9 * Initial restrictions: 10 * - support for alternate links postponed 11 * 12 * Copyright IBM Corp. 2016, 2018 13 * 14 * Author(s): Ursula Braun <ubraun@linux.vnet.ibm.com> 15 * based on prototype from Frank Blaschka 16 */ 17 18 #define KMSG_COMPONENT "smc" 19 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt 20 21 #include <linux/module.h> 22 #include <linux/socket.h> 23 #include <linux/workqueue.h> 24 #include <linux/in.h> 25 #include <linux/sched/signal.h> 26 #include <linux/if_vlan.h> 27 28 #include <net/sock.h> 29 #include <net/tcp.h> 30 #include <net/smc.h> 31 #include <asm/ioctls.h> 32 33 #include "smc.h" 34 #include "smc_clc.h" 35 #include "smc_llc.h" 36 #include "smc_cdc.h" 37 #include "smc_core.h" 38 #include "smc_ib.h" 39 #include "smc_ism.h" 40 #include "smc_pnet.h" 41 #include "smc_tx.h" 42 #include "smc_rx.h" 43 #include "smc_close.h" 44 45 static DEFINE_MUTEX(smc_create_lgr_pending); /* serialize link group 46 * creation 47 */ 48 49 static void smc_tcp_listen_work(struct work_struct *); 50 static void smc_connect_work(struct work_struct *); 51 52 static void smc_set_keepalive(struct sock *sk, int val) 53 { 54 struct smc_sock *smc = smc_sk(sk); 55 56 smc->clcsock->sk->sk_prot->keepalive(smc->clcsock->sk, val); 57 } 58 59 static struct smc_hashinfo smc_v4_hashinfo = { 60 .lock = __RW_LOCK_UNLOCKED(smc_v4_hashinfo.lock), 61 }; 62 63 static struct smc_hashinfo smc_v6_hashinfo = { 64 .lock = __RW_LOCK_UNLOCKED(smc_v6_hashinfo.lock), 65 }; 66 67 int smc_hash_sk(struct sock *sk) 68 { 69 struct smc_hashinfo *h = sk->sk_prot->h.smc_hash; 70 struct hlist_head *head; 71 72 head = &h->ht; 73 74 write_lock_bh(&h->lock); 75 sk_add_node(sk, head); 76 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); 77 write_unlock_bh(&h->lock); 78 79 return 0; 80 } 81 EXPORT_SYMBOL_GPL(smc_hash_sk); 82 83 void smc_unhash_sk(struct sock *sk) 84 { 85 struct smc_hashinfo *h = sk->sk_prot->h.smc_hash; 86 87 write_lock_bh(&h->lock); 88 if (sk_del_node_init(sk)) 89 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); 90 write_unlock_bh(&h->lock); 91 } 92 EXPORT_SYMBOL_GPL(smc_unhash_sk); 93 94 struct proto smc_proto = { 95 .name = "SMC", 96 .owner = THIS_MODULE, 97 .keepalive = smc_set_keepalive, 98 .hash = smc_hash_sk, 99 .unhash = smc_unhash_sk, 100 .obj_size = sizeof(struct smc_sock), 101 .h.smc_hash = &smc_v4_hashinfo, 102 .slab_flags = SLAB_TYPESAFE_BY_RCU, 103 }; 104 EXPORT_SYMBOL_GPL(smc_proto); 105 106 struct proto smc_proto6 = { 107 .name = "SMC6", 108 .owner = THIS_MODULE, 109 .keepalive = smc_set_keepalive, 110 .hash = smc_hash_sk, 111 .unhash = smc_unhash_sk, 112 .obj_size = sizeof(struct smc_sock), 113 .h.smc_hash = &smc_v6_hashinfo, 114 .slab_flags = SLAB_TYPESAFE_BY_RCU, 115 }; 116 EXPORT_SYMBOL_GPL(smc_proto6); 117 118 static int smc_release(struct socket *sock) 119 { 120 struct sock *sk = sock->sk; 121 struct smc_sock *smc; 122 int rc = 0; 123 124 if (!sk) 125 goto out; 126 127 smc = smc_sk(sk); 128 129 /* cleanup for a dangling non-blocking connect */ 130 if (smc->connect_info && sk->sk_state == SMC_INIT) 131 tcp_abort(smc->clcsock->sk, ECONNABORTED); 132 flush_work(&smc->connect_work); 133 kfree(smc->connect_info); 134 smc->connect_info = NULL; 135 136 if (sk->sk_state == SMC_LISTEN) 137 /* smc_close_non_accepted() is called and acquires 138 * sock lock for child sockets again 139 */ 140 lock_sock_nested(sk, SINGLE_DEPTH_NESTING); 141 else 142 lock_sock(sk); 143 144 if (!smc->use_fallback) { 145 rc = smc_close_active(smc); 146 sock_set_flag(sk, SOCK_DEAD); 147 sk->sk_shutdown |= SHUTDOWN_MASK; 148 } 149 150 sk->sk_prot->unhash(sk); 151 152 if (smc->clcsock) { 153 if (smc->use_fallback && sk->sk_state == SMC_LISTEN) { 154 /* wake up clcsock accept */ 155 rc = kernel_sock_shutdown(smc->clcsock, SHUT_RDWR); 156 } 157 mutex_lock(&smc->clcsock_release_lock); 158 sock_release(smc->clcsock); 159 smc->clcsock = NULL; 160 mutex_unlock(&smc->clcsock_release_lock); 161 } 162 if (smc->use_fallback) { 163 if (sk->sk_state != SMC_LISTEN && sk->sk_state != SMC_INIT) 164 sock_put(sk); /* passive closing */ 165 sk->sk_state = SMC_CLOSED; 166 sk->sk_state_change(sk); 167 } 168 169 /* detach socket */ 170 sock_orphan(sk); 171 sock->sk = NULL; 172 if (!smc->use_fallback && sk->sk_state == SMC_CLOSED) 173 smc_conn_free(&smc->conn); 174 release_sock(sk); 175 176 sock_put(sk); /* final sock_put */ 177 out: 178 return rc; 179 } 180 181 static void smc_destruct(struct sock *sk) 182 { 183 if (sk->sk_state != SMC_CLOSED) 184 return; 185 if (!sock_flag(sk, SOCK_DEAD)) 186 return; 187 188 sk_refcnt_debug_dec(sk); 189 } 190 191 static struct sock *smc_sock_alloc(struct net *net, struct socket *sock, 192 int protocol) 193 { 194 struct smc_sock *smc; 195 struct proto *prot; 196 struct sock *sk; 197 198 prot = (protocol == SMCPROTO_SMC6) ? &smc_proto6 : &smc_proto; 199 sk = sk_alloc(net, PF_SMC, GFP_KERNEL, prot, 0); 200 if (!sk) 201 return NULL; 202 203 sock_init_data(sock, sk); /* sets sk_refcnt to 1 */ 204 sk->sk_state = SMC_INIT; 205 sk->sk_destruct = smc_destruct; 206 sk->sk_protocol = protocol; 207 smc = smc_sk(sk); 208 INIT_WORK(&smc->tcp_listen_work, smc_tcp_listen_work); 209 INIT_WORK(&smc->connect_work, smc_connect_work); 210 INIT_DELAYED_WORK(&smc->conn.tx_work, smc_tx_work); 211 INIT_LIST_HEAD(&smc->accept_q); 212 spin_lock_init(&smc->accept_q_lock); 213 spin_lock_init(&smc->conn.send_lock); 214 sk->sk_prot->hash(sk); 215 sk_refcnt_debug_inc(sk); 216 mutex_init(&smc->clcsock_release_lock); 217 218 return sk; 219 } 220 221 static int smc_bind(struct socket *sock, struct sockaddr *uaddr, 222 int addr_len) 223 { 224 struct sockaddr_in *addr = (struct sockaddr_in *)uaddr; 225 struct sock *sk = sock->sk; 226 struct smc_sock *smc; 227 int rc; 228 229 smc = smc_sk(sk); 230 231 /* replicate tests from inet_bind(), to be safe wrt. future changes */ 232 rc = -EINVAL; 233 if (addr_len < sizeof(struct sockaddr_in)) 234 goto out; 235 236 rc = -EAFNOSUPPORT; 237 if (addr->sin_family != AF_INET && 238 addr->sin_family != AF_INET6 && 239 addr->sin_family != AF_UNSPEC) 240 goto out; 241 /* accept AF_UNSPEC (mapped to AF_INET) only if s_addr is INADDR_ANY */ 242 if (addr->sin_family == AF_UNSPEC && 243 addr->sin_addr.s_addr != htonl(INADDR_ANY)) 244 goto out; 245 246 lock_sock(sk); 247 248 /* Check if socket is already active */ 249 rc = -EINVAL; 250 if (sk->sk_state != SMC_INIT) 251 goto out_rel; 252 253 smc->clcsock->sk->sk_reuse = sk->sk_reuse; 254 rc = kernel_bind(smc->clcsock, uaddr, addr_len); 255 256 out_rel: 257 release_sock(sk); 258 out: 259 return rc; 260 } 261 262 static void smc_copy_sock_settings(struct sock *nsk, struct sock *osk, 263 unsigned long mask) 264 { 265 /* options we don't get control via setsockopt for */ 266 nsk->sk_type = osk->sk_type; 267 nsk->sk_sndbuf = osk->sk_sndbuf; 268 nsk->sk_rcvbuf = osk->sk_rcvbuf; 269 nsk->sk_sndtimeo = osk->sk_sndtimeo; 270 nsk->sk_rcvtimeo = osk->sk_rcvtimeo; 271 nsk->sk_mark = osk->sk_mark; 272 nsk->sk_priority = osk->sk_priority; 273 nsk->sk_rcvlowat = osk->sk_rcvlowat; 274 nsk->sk_bound_dev_if = osk->sk_bound_dev_if; 275 nsk->sk_err = osk->sk_err; 276 277 nsk->sk_flags &= ~mask; 278 nsk->sk_flags |= osk->sk_flags & mask; 279 } 280 281 #define SK_FLAGS_SMC_TO_CLC ((1UL << SOCK_URGINLINE) | \ 282 (1UL << SOCK_KEEPOPEN) | \ 283 (1UL << SOCK_LINGER) | \ 284 (1UL << SOCK_BROADCAST) | \ 285 (1UL << SOCK_TIMESTAMP) | \ 286 (1UL << SOCK_DBG) | \ 287 (1UL << SOCK_RCVTSTAMP) | \ 288 (1UL << SOCK_RCVTSTAMPNS) | \ 289 (1UL << SOCK_LOCALROUTE) | \ 290 (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE) | \ 291 (1UL << SOCK_RXQ_OVFL) | \ 292 (1UL << SOCK_WIFI_STATUS) | \ 293 (1UL << SOCK_NOFCS) | \ 294 (1UL << SOCK_FILTER_LOCKED)) 295 /* copy only relevant settings and flags of SOL_SOCKET level from smc to 296 * clc socket (since smc is not called for these options from net/core) 297 */ 298 static void smc_copy_sock_settings_to_clc(struct smc_sock *smc) 299 { 300 smc_copy_sock_settings(smc->clcsock->sk, &smc->sk, SK_FLAGS_SMC_TO_CLC); 301 } 302 303 #define SK_FLAGS_CLC_TO_SMC ((1UL << SOCK_URGINLINE) | \ 304 (1UL << SOCK_KEEPOPEN) | \ 305 (1UL << SOCK_LINGER) | \ 306 (1UL << SOCK_DBG)) 307 /* copy only settings and flags relevant for smc from clc to smc socket */ 308 static void smc_copy_sock_settings_to_smc(struct smc_sock *smc) 309 { 310 smc_copy_sock_settings(&smc->sk, smc->clcsock->sk, SK_FLAGS_CLC_TO_SMC); 311 } 312 313 /* register a new rmb, send confirm_rkey msg to register with peer */ 314 static int smc_reg_rmb(struct smc_link *link, struct smc_buf_desc *rmb_desc, 315 bool conf_rkey) 316 { 317 if (!rmb_desc->wr_reg) { 318 /* register memory region for new rmb */ 319 if (smc_wr_reg_send(link, rmb_desc->mr_rx[SMC_SINGLE_LINK])) { 320 rmb_desc->regerr = 1; 321 return -EFAULT; 322 } 323 rmb_desc->wr_reg = 1; 324 } 325 if (!conf_rkey) 326 return 0; 327 /* exchange confirm_rkey msg with peer */ 328 if (smc_llc_do_confirm_rkey(link, rmb_desc)) { 329 rmb_desc->regerr = 1; 330 return -EFAULT; 331 } 332 return 0; 333 } 334 335 static int smc_clnt_conf_first_link(struct smc_sock *smc) 336 { 337 struct net *net = sock_net(smc->clcsock->sk); 338 struct smc_link_group *lgr = smc->conn.lgr; 339 struct smc_link *link; 340 int rest; 341 int rc; 342 343 link = &lgr->lnk[SMC_SINGLE_LINK]; 344 /* receive CONFIRM LINK request from server over RoCE fabric */ 345 rest = wait_for_completion_interruptible_timeout( 346 &link->llc_confirm, 347 SMC_LLC_WAIT_FIRST_TIME); 348 if (rest <= 0) { 349 struct smc_clc_msg_decline dclc; 350 351 rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc), 352 SMC_CLC_DECLINE, CLC_WAIT_TIME_SHORT); 353 return rc == -EAGAIN ? SMC_CLC_DECL_TIMEOUT_CL : rc; 354 } 355 356 if (link->llc_confirm_rc) 357 return SMC_CLC_DECL_RMBE_EC; 358 359 rc = smc_ib_modify_qp_rts(link); 360 if (rc) 361 return SMC_CLC_DECL_ERR_RDYLNK; 362 363 smc_wr_remember_qp_attr(link); 364 365 if (smc_reg_rmb(link, smc->conn.rmb_desc, false)) 366 return SMC_CLC_DECL_ERR_REGRMB; 367 368 /* send CONFIRM LINK response over RoCE fabric */ 369 rc = smc_llc_send_confirm_link(link, SMC_LLC_RESP); 370 if (rc < 0) 371 return SMC_CLC_DECL_TIMEOUT_CL; 372 373 /* receive ADD LINK request from server over RoCE fabric */ 374 rest = wait_for_completion_interruptible_timeout(&link->llc_add, 375 SMC_LLC_WAIT_TIME); 376 if (rest <= 0) { 377 struct smc_clc_msg_decline dclc; 378 379 rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc), 380 SMC_CLC_DECLINE, CLC_WAIT_TIME_SHORT); 381 return rc == -EAGAIN ? SMC_CLC_DECL_TIMEOUT_AL : rc; 382 } 383 384 /* send add link reject message, only one link supported for now */ 385 rc = smc_llc_send_add_link(link, 386 link->smcibdev->mac[link->ibport - 1], 387 link->gid, SMC_LLC_RESP); 388 if (rc < 0) 389 return SMC_CLC_DECL_TIMEOUT_AL; 390 391 smc_llc_link_active(link, net->ipv4.sysctl_tcp_keepalive_time); 392 393 return 0; 394 } 395 396 static void smcr_conn_save_peer_info(struct smc_sock *smc, 397 struct smc_clc_msg_accept_confirm *clc) 398 { 399 int bufsize = smc_uncompress_bufsize(clc->rmbe_size); 400 401 smc->conn.peer_rmbe_idx = clc->rmbe_idx; 402 smc->conn.local_tx_ctrl.token = ntohl(clc->rmbe_alert_token); 403 smc->conn.peer_rmbe_size = bufsize; 404 atomic_set(&smc->conn.peer_rmbe_space, smc->conn.peer_rmbe_size); 405 smc->conn.tx_off = bufsize * (smc->conn.peer_rmbe_idx - 1); 406 } 407 408 static void smcd_conn_save_peer_info(struct smc_sock *smc, 409 struct smc_clc_msg_accept_confirm *clc) 410 { 411 int bufsize = smc_uncompress_bufsize(clc->dmbe_size); 412 413 smc->conn.peer_rmbe_idx = clc->dmbe_idx; 414 smc->conn.peer_token = clc->token; 415 /* msg header takes up space in the buffer */ 416 smc->conn.peer_rmbe_size = bufsize - sizeof(struct smcd_cdc_msg); 417 atomic_set(&smc->conn.peer_rmbe_space, smc->conn.peer_rmbe_size); 418 smc->conn.tx_off = bufsize * smc->conn.peer_rmbe_idx; 419 } 420 421 static void smc_conn_save_peer_info(struct smc_sock *smc, 422 struct smc_clc_msg_accept_confirm *clc) 423 { 424 if (smc->conn.lgr->is_smcd) 425 smcd_conn_save_peer_info(smc, clc); 426 else 427 smcr_conn_save_peer_info(smc, clc); 428 } 429 430 static void smc_link_save_peer_info(struct smc_link *link, 431 struct smc_clc_msg_accept_confirm *clc) 432 { 433 link->peer_qpn = ntoh24(clc->qpn); 434 memcpy(link->peer_gid, clc->lcl.gid, SMC_GID_SIZE); 435 memcpy(link->peer_mac, clc->lcl.mac, sizeof(link->peer_mac)); 436 link->peer_psn = ntoh24(clc->psn); 437 link->peer_mtu = clc->qp_mtu; 438 } 439 440 /* fall back during connect */ 441 static int smc_connect_fallback(struct smc_sock *smc, int reason_code) 442 { 443 smc->use_fallback = true; 444 smc->fallback_rsn = reason_code; 445 smc_copy_sock_settings_to_clc(smc); 446 if (smc->sk.sk_state == SMC_INIT) 447 smc->sk.sk_state = SMC_ACTIVE; 448 return 0; 449 } 450 451 /* decline and fall back during connect */ 452 static int smc_connect_decline_fallback(struct smc_sock *smc, int reason_code) 453 { 454 int rc; 455 456 if (reason_code < 0) { /* error, fallback is not possible */ 457 if (smc->sk.sk_state == SMC_INIT) 458 sock_put(&smc->sk); /* passive closing */ 459 return reason_code; 460 } 461 if (reason_code != SMC_CLC_DECL_PEERDECL) { 462 rc = smc_clc_send_decline(smc, reason_code); 463 if (rc < 0) { 464 if (smc->sk.sk_state == SMC_INIT) 465 sock_put(&smc->sk); /* passive closing */ 466 return rc; 467 } 468 } 469 return smc_connect_fallback(smc, reason_code); 470 } 471 472 /* abort connecting */ 473 static int smc_connect_abort(struct smc_sock *smc, int reason_code, 474 int local_contact) 475 { 476 if (local_contact == SMC_FIRST_CONTACT) 477 smc_lgr_forget(smc->conn.lgr); 478 mutex_unlock(&smc_create_lgr_pending); 479 smc_conn_free(&smc->conn); 480 return reason_code; 481 } 482 483 /* check if there is a rdma device available for this connection. */ 484 /* called for connect and listen */ 485 static int smc_check_rdma(struct smc_sock *smc, struct smc_ib_device **ibdev, 486 u8 *ibport, unsigned short vlan_id, u8 gid[]) 487 { 488 int reason_code = 0; 489 490 /* PNET table look up: search active ib_device and port 491 * within same PNETID that also contains the ethernet device 492 * used for the internal TCP socket 493 */ 494 smc_pnet_find_roce_resource(smc->clcsock->sk, ibdev, ibport, vlan_id, 495 gid); 496 if (!(*ibdev)) 497 reason_code = SMC_CLC_DECL_CNFERR; /* configuration error */ 498 499 return reason_code; 500 } 501 502 /* check if there is an ISM device available for this connection. */ 503 /* called for connect and listen */ 504 static int smc_check_ism(struct smc_sock *smc, struct smcd_dev **ismdev) 505 { 506 /* Find ISM device with same PNETID as connecting interface */ 507 smc_pnet_find_ism_resource(smc->clcsock->sk, ismdev); 508 if (!(*ismdev)) 509 return SMC_CLC_DECL_CNFERR; /* configuration error */ 510 return 0; 511 } 512 513 /* Check for VLAN ID and register it on ISM device just for CLC handshake */ 514 static int smc_connect_ism_vlan_setup(struct smc_sock *smc, 515 struct smcd_dev *ismdev, 516 unsigned short vlan_id) 517 { 518 if (vlan_id && smc_ism_get_vlan(ismdev, vlan_id)) 519 return SMC_CLC_DECL_CNFERR; 520 return 0; 521 } 522 523 /* cleanup temporary VLAN ID registration used for CLC handshake. If ISM is 524 * used, the VLAN ID will be registered again during the connection setup. 525 */ 526 static int smc_connect_ism_vlan_cleanup(struct smc_sock *smc, bool is_smcd, 527 struct smcd_dev *ismdev, 528 unsigned short vlan_id) 529 { 530 if (!is_smcd) 531 return 0; 532 if (vlan_id && smc_ism_put_vlan(ismdev, vlan_id)) 533 return SMC_CLC_DECL_CNFERR; 534 return 0; 535 } 536 537 /* CLC handshake during connect */ 538 static int smc_connect_clc(struct smc_sock *smc, int smc_type, 539 struct smc_clc_msg_accept_confirm *aclc, 540 struct smc_ib_device *ibdev, u8 ibport, 541 u8 gid[], struct smcd_dev *ismdev) 542 { 543 int rc = 0; 544 545 /* do inband token exchange */ 546 rc = smc_clc_send_proposal(smc, smc_type, ibdev, ibport, gid, ismdev); 547 if (rc) 548 return rc; 549 /* receive SMC Accept CLC message */ 550 return smc_clc_wait_msg(smc, aclc, sizeof(*aclc), SMC_CLC_ACCEPT, 551 CLC_WAIT_TIME); 552 } 553 554 /* setup for RDMA connection of client */ 555 static int smc_connect_rdma(struct smc_sock *smc, 556 struct smc_clc_msg_accept_confirm *aclc, 557 struct smc_ib_device *ibdev, u8 ibport) 558 { 559 int local_contact = SMC_FIRST_CONTACT; 560 struct smc_link *link; 561 int reason_code = 0; 562 563 mutex_lock(&smc_create_lgr_pending); 564 local_contact = smc_conn_create(smc, false, aclc->hdr.flag, ibdev, 565 ibport, ntoh24(aclc->qpn), &aclc->lcl, 566 NULL, 0); 567 if (local_contact < 0) { 568 if (local_contact == -ENOMEM) 569 reason_code = SMC_CLC_DECL_MEM;/* insufficient memory*/ 570 else if (local_contact == -ENOLINK) 571 reason_code = SMC_CLC_DECL_SYNCERR; /* synchr. error */ 572 else 573 reason_code = SMC_CLC_DECL_INTERR; /* other error */ 574 return smc_connect_abort(smc, reason_code, 0); 575 } 576 link = &smc->conn.lgr->lnk[SMC_SINGLE_LINK]; 577 578 smc_conn_save_peer_info(smc, aclc); 579 580 /* create send buffer and rmb */ 581 if (smc_buf_create(smc, false)) 582 return smc_connect_abort(smc, SMC_CLC_DECL_MEM, local_contact); 583 584 if (local_contact == SMC_FIRST_CONTACT) 585 smc_link_save_peer_info(link, aclc); 586 587 if (smc_rmb_rtoken_handling(&smc->conn, aclc)) 588 return smc_connect_abort(smc, SMC_CLC_DECL_ERR_RTOK, 589 local_contact); 590 591 smc_close_init(smc); 592 smc_rx_init(smc); 593 594 if (local_contact == SMC_FIRST_CONTACT) { 595 if (smc_ib_ready_link(link)) 596 return smc_connect_abort(smc, SMC_CLC_DECL_ERR_RDYLNK, 597 local_contact); 598 } else { 599 if (smc_reg_rmb(link, smc->conn.rmb_desc, true)) 600 return smc_connect_abort(smc, SMC_CLC_DECL_ERR_REGRMB, 601 local_contact); 602 } 603 smc_rmb_sync_sg_for_device(&smc->conn); 604 605 reason_code = smc_clc_send_confirm(smc); 606 if (reason_code) 607 return smc_connect_abort(smc, reason_code, local_contact); 608 609 smc_tx_init(smc); 610 611 if (local_contact == SMC_FIRST_CONTACT) { 612 /* QP confirmation over RoCE fabric */ 613 reason_code = smc_clnt_conf_first_link(smc); 614 if (reason_code) 615 return smc_connect_abort(smc, reason_code, 616 local_contact); 617 } 618 mutex_unlock(&smc_create_lgr_pending); 619 620 smc_copy_sock_settings_to_clc(smc); 621 if (smc->sk.sk_state == SMC_INIT) 622 smc->sk.sk_state = SMC_ACTIVE; 623 624 return 0; 625 } 626 627 /* setup for ISM connection of client */ 628 static int smc_connect_ism(struct smc_sock *smc, 629 struct smc_clc_msg_accept_confirm *aclc, 630 struct smcd_dev *ismdev) 631 { 632 int local_contact = SMC_FIRST_CONTACT; 633 int rc = 0; 634 635 mutex_lock(&smc_create_lgr_pending); 636 local_contact = smc_conn_create(smc, true, aclc->hdr.flag, NULL, 0, 0, 637 NULL, ismdev, aclc->gid); 638 if (local_contact < 0) 639 return smc_connect_abort(smc, SMC_CLC_DECL_MEM, 0); 640 641 /* Create send and receive buffers */ 642 if (smc_buf_create(smc, true)) 643 return smc_connect_abort(smc, SMC_CLC_DECL_MEM, local_contact); 644 645 smc_conn_save_peer_info(smc, aclc); 646 smc_close_init(smc); 647 smc_rx_init(smc); 648 smc_tx_init(smc); 649 650 rc = smc_clc_send_confirm(smc); 651 if (rc) 652 return smc_connect_abort(smc, rc, local_contact); 653 mutex_unlock(&smc_create_lgr_pending); 654 655 smc_copy_sock_settings_to_clc(smc); 656 if (smc->sk.sk_state == SMC_INIT) 657 smc->sk.sk_state = SMC_ACTIVE; 658 659 return 0; 660 } 661 662 /* perform steps before actually connecting */ 663 static int __smc_connect(struct smc_sock *smc) 664 { 665 bool ism_supported = false, rdma_supported = false; 666 struct smc_clc_msg_accept_confirm aclc; 667 struct smc_ib_device *ibdev; 668 struct smcd_dev *ismdev; 669 u8 gid[SMC_GID_SIZE]; 670 unsigned short vlan; 671 int smc_type; 672 int rc = 0; 673 u8 ibport; 674 675 sock_hold(&smc->sk); /* sock put in passive closing */ 676 677 if (smc->use_fallback) 678 return smc_connect_fallback(smc, smc->fallback_rsn); 679 680 /* if peer has not signalled SMC-capability, fall back */ 681 if (!tcp_sk(smc->clcsock->sk)->syn_smc) 682 return smc_connect_fallback(smc, SMC_CLC_DECL_PEERNOSMC); 683 684 /* IPSec connections opt out of SMC-R optimizations */ 685 if (using_ipsec(smc)) 686 return smc_connect_decline_fallback(smc, SMC_CLC_DECL_IPSEC); 687 688 /* check for VLAN ID */ 689 if (smc_vlan_by_tcpsk(smc->clcsock, &vlan)) 690 return smc_connect_decline_fallback(smc, SMC_CLC_DECL_CNFERR); 691 692 /* check if there is an ism device available */ 693 if (!smc_check_ism(smc, &ismdev) && 694 !smc_connect_ism_vlan_setup(smc, ismdev, vlan)) { 695 /* ISM is supported for this connection */ 696 ism_supported = true; 697 smc_type = SMC_TYPE_D; 698 } 699 700 /* check if there is a rdma device available */ 701 if (!smc_check_rdma(smc, &ibdev, &ibport, vlan, gid)) { 702 /* RDMA is supported for this connection */ 703 rdma_supported = true; 704 if (ism_supported) 705 smc_type = SMC_TYPE_B; /* both */ 706 else 707 smc_type = SMC_TYPE_R; /* only RDMA */ 708 } 709 710 /* if neither ISM nor RDMA are supported, fallback */ 711 if (!rdma_supported && !ism_supported) 712 return smc_connect_decline_fallback(smc, SMC_CLC_DECL_NOSMCDEV); 713 714 /* perform CLC handshake */ 715 rc = smc_connect_clc(smc, smc_type, &aclc, ibdev, ibport, gid, ismdev); 716 if (rc) { 717 smc_connect_ism_vlan_cleanup(smc, ism_supported, ismdev, vlan); 718 return smc_connect_decline_fallback(smc, rc); 719 } 720 721 /* depending on previous steps, connect using rdma or ism */ 722 if (rdma_supported && aclc.hdr.path == SMC_TYPE_R) 723 rc = smc_connect_rdma(smc, &aclc, ibdev, ibport); 724 else if (ism_supported && aclc.hdr.path == SMC_TYPE_D) 725 rc = smc_connect_ism(smc, &aclc, ismdev); 726 else 727 rc = SMC_CLC_DECL_MODEUNSUPP; 728 if (rc) { 729 smc_connect_ism_vlan_cleanup(smc, ism_supported, ismdev, vlan); 730 return smc_connect_decline_fallback(smc, rc); 731 } 732 733 smc_connect_ism_vlan_cleanup(smc, ism_supported, ismdev, vlan); 734 return 0; 735 } 736 737 static void smc_connect_work(struct work_struct *work) 738 { 739 struct smc_sock *smc = container_of(work, struct smc_sock, 740 connect_work); 741 int rc; 742 743 lock_sock(&smc->sk); 744 rc = kernel_connect(smc->clcsock, &smc->connect_info->addr, 745 smc->connect_info->alen, smc->connect_info->flags); 746 if (smc->clcsock->sk->sk_err) { 747 smc->sk.sk_err = smc->clcsock->sk->sk_err; 748 goto out; 749 } 750 if (rc < 0) { 751 smc->sk.sk_err = -rc; 752 goto out; 753 } 754 755 rc = __smc_connect(smc); 756 if (rc < 0) 757 smc->sk.sk_err = -rc; 758 759 out: 760 if (smc->sk.sk_err) 761 smc->sk.sk_state_change(&smc->sk); 762 else 763 smc->sk.sk_write_space(&smc->sk); 764 kfree(smc->connect_info); 765 smc->connect_info = NULL; 766 release_sock(&smc->sk); 767 } 768 769 static int smc_connect(struct socket *sock, struct sockaddr *addr, 770 int alen, int flags) 771 { 772 struct sock *sk = sock->sk; 773 struct smc_sock *smc; 774 int rc = -EINVAL; 775 776 smc = smc_sk(sk); 777 778 /* separate smc parameter checking to be safe */ 779 if (alen < sizeof(addr->sa_family)) 780 goto out_err; 781 if (addr->sa_family != AF_INET && addr->sa_family != AF_INET6) 782 goto out_err; 783 784 lock_sock(sk); 785 switch (sk->sk_state) { 786 default: 787 goto out; 788 case SMC_ACTIVE: 789 rc = -EISCONN; 790 goto out; 791 case SMC_INIT: 792 rc = 0; 793 break; 794 } 795 796 smc_copy_sock_settings_to_clc(smc); 797 tcp_sk(smc->clcsock->sk)->syn_smc = 1; 798 if (flags & O_NONBLOCK) { 799 if (smc->connect_info) { 800 rc = -EALREADY; 801 goto out; 802 } 803 smc->connect_info = kzalloc(alen + 2 * sizeof(int), GFP_KERNEL); 804 if (!smc->connect_info) { 805 rc = -ENOMEM; 806 goto out; 807 } 808 smc->connect_info->alen = alen; 809 smc->connect_info->flags = flags ^ O_NONBLOCK; 810 memcpy(&smc->connect_info->addr, addr, alen); 811 schedule_work(&smc->connect_work); 812 rc = -EINPROGRESS; 813 } else { 814 rc = kernel_connect(smc->clcsock, addr, alen, flags); 815 if (rc) 816 goto out; 817 818 rc = __smc_connect(smc); 819 if (rc < 0) 820 goto out; 821 else 822 rc = 0; /* success cases including fallback */ 823 } 824 825 out: 826 release_sock(sk); 827 out_err: 828 return rc; 829 } 830 831 static int smc_clcsock_accept(struct smc_sock *lsmc, struct smc_sock **new_smc) 832 { 833 struct socket *new_clcsock = NULL; 834 struct sock *lsk = &lsmc->sk; 835 struct sock *new_sk; 836 int rc = -EINVAL; 837 838 release_sock(lsk); 839 new_sk = smc_sock_alloc(sock_net(lsk), NULL, lsk->sk_protocol); 840 if (!new_sk) { 841 rc = -ENOMEM; 842 lsk->sk_err = ENOMEM; 843 *new_smc = NULL; 844 lock_sock(lsk); 845 goto out; 846 } 847 *new_smc = smc_sk(new_sk); 848 849 mutex_lock(&lsmc->clcsock_release_lock); 850 if (lsmc->clcsock) 851 rc = kernel_accept(lsmc->clcsock, &new_clcsock, 0); 852 mutex_unlock(&lsmc->clcsock_release_lock); 853 lock_sock(lsk); 854 if (rc < 0) 855 lsk->sk_err = -rc; 856 if (rc < 0 || lsk->sk_state == SMC_CLOSED) { 857 if (new_clcsock) 858 sock_release(new_clcsock); 859 new_sk->sk_state = SMC_CLOSED; 860 sock_set_flag(new_sk, SOCK_DEAD); 861 new_sk->sk_prot->unhash(new_sk); 862 sock_put(new_sk); /* final */ 863 *new_smc = NULL; 864 goto out; 865 } 866 867 (*new_smc)->clcsock = new_clcsock; 868 out: 869 return rc; 870 } 871 872 /* add a just created sock to the accept queue of the listen sock as 873 * candidate for a following socket accept call from user space 874 */ 875 static void smc_accept_enqueue(struct sock *parent, struct sock *sk) 876 { 877 struct smc_sock *par = smc_sk(parent); 878 879 sock_hold(sk); /* sock_put in smc_accept_unlink () */ 880 spin_lock(&par->accept_q_lock); 881 list_add_tail(&smc_sk(sk)->accept_q, &par->accept_q); 882 spin_unlock(&par->accept_q_lock); 883 sk_acceptq_added(parent); 884 } 885 886 /* remove a socket from the accept queue of its parental listening socket */ 887 static void smc_accept_unlink(struct sock *sk) 888 { 889 struct smc_sock *par = smc_sk(sk)->listen_smc; 890 891 spin_lock(&par->accept_q_lock); 892 list_del_init(&smc_sk(sk)->accept_q); 893 spin_unlock(&par->accept_q_lock); 894 sk_acceptq_removed(&smc_sk(sk)->listen_smc->sk); 895 sock_put(sk); /* sock_hold in smc_accept_enqueue */ 896 } 897 898 /* remove a sock from the accept queue to bind it to a new socket created 899 * for a socket accept call from user space 900 */ 901 struct sock *smc_accept_dequeue(struct sock *parent, 902 struct socket *new_sock) 903 { 904 struct smc_sock *isk, *n; 905 struct sock *new_sk; 906 907 list_for_each_entry_safe(isk, n, &smc_sk(parent)->accept_q, accept_q) { 908 new_sk = (struct sock *)isk; 909 910 smc_accept_unlink(new_sk); 911 if (new_sk->sk_state == SMC_CLOSED) { 912 if (isk->clcsock) { 913 sock_release(isk->clcsock); 914 isk->clcsock = NULL; 915 } 916 new_sk->sk_prot->unhash(new_sk); 917 sock_put(new_sk); /* final */ 918 continue; 919 } 920 if (new_sock) 921 sock_graft(new_sk, new_sock); 922 return new_sk; 923 } 924 return NULL; 925 } 926 927 /* clean up for a created but never accepted sock */ 928 void smc_close_non_accepted(struct sock *sk) 929 { 930 struct smc_sock *smc = smc_sk(sk); 931 932 lock_sock(sk); 933 if (!sk->sk_lingertime) 934 /* wait for peer closing */ 935 sk->sk_lingertime = SMC_MAX_STREAM_WAIT_TIMEOUT; 936 if (!smc->use_fallback) { 937 smc_close_active(smc); 938 sock_set_flag(sk, SOCK_DEAD); 939 sk->sk_shutdown |= SHUTDOWN_MASK; 940 } 941 if (smc->clcsock) { 942 struct socket *tcp; 943 944 tcp = smc->clcsock; 945 smc->clcsock = NULL; 946 sock_release(tcp); 947 } 948 if (smc->use_fallback) { 949 sock_put(sk); /* passive closing */ 950 sk->sk_state = SMC_CLOSED; 951 } else { 952 if (sk->sk_state == SMC_CLOSED) 953 smc_conn_free(&smc->conn); 954 } 955 release_sock(sk); 956 sk->sk_prot->unhash(sk); 957 sock_put(sk); /* final sock_put */ 958 } 959 960 static int smc_serv_conf_first_link(struct smc_sock *smc) 961 { 962 struct net *net = sock_net(smc->clcsock->sk); 963 struct smc_link_group *lgr = smc->conn.lgr; 964 struct smc_link *link; 965 int rest; 966 int rc; 967 968 link = &lgr->lnk[SMC_SINGLE_LINK]; 969 970 if (smc_reg_rmb(link, smc->conn.rmb_desc, false)) 971 return SMC_CLC_DECL_ERR_REGRMB; 972 973 /* send CONFIRM LINK request to client over the RoCE fabric */ 974 rc = smc_llc_send_confirm_link(link, SMC_LLC_REQ); 975 if (rc < 0) 976 return SMC_CLC_DECL_TIMEOUT_CL; 977 978 /* receive CONFIRM LINK response from client over the RoCE fabric */ 979 rest = wait_for_completion_interruptible_timeout( 980 &link->llc_confirm_resp, 981 SMC_LLC_WAIT_FIRST_TIME); 982 if (rest <= 0) { 983 struct smc_clc_msg_decline dclc; 984 985 rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc), 986 SMC_CLC_DECLINE, CLC_WAIT_TIME_SHORT); 987 return rc == -EAGAIN ? SMC_CLC_DECL_TIMEOUT_CL : rc; 988 } 989 990 if (link->llc_confirm_resp_rc) 991 return SMC_CLC_DECL_RMBE_EC; 992 993 /* send ADD LINK request to client over the RoCE fabric */ 994 rc = smc_llc_send_add_link(link, 995 link->smcibdev->mac[link->ibport - 1], 996 link->gid, SMC_LLC_REQ); 997 if (rc < 0) 998 return SMC_CLC_DECL_TIMEOUT_AL; 999 1000 /* receive ADD LINK response from client over the RoCE fabric */ 1001 rest = wait_for_completion_interruptible_timeout(&link->llc_add_resp, 1002 SMC_LLC_WAIT_TIME); 1003 if (rest <= 0) { 1004 struct smc_clc_msg_decline dclc; 1005 1006 rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc), 1007 SMC_CLC_DECLINE, CLC_WAIT_TIME_SHORT); 1008 return rc == -EAGAIN ? SMC_CLC_DECL_TIMEOUT_AL : rc; 1009 } 1010 1011 smc_llc_link_active(link, net->ipv4.sysctl_tcp_keepalive_time); 1012 1013 return 0; 1014 } 1015 1016 /* listen worker: finish */ 1017 static void smc_listen_out(struct smc_sock *new_smc) 1018 { 1019 struct smc_sock *lsmc = new_smc->listen_smc; 1020 struct sock *newsmcsk = &new_smc->sk; 1021 1022 lock_sock_nested(&lsmc->sk, SINGLE_DEPTH_NESTING); 1023 if (lsmc->sk.sk_state == SMC_LISTEN) { 1024 smc_accept_enqueue(&lsmc->sk, newsmcsk); 1025 } else { /* no longer listening */ 1026 smc_close_non_accepted(newsmcsk); 1027 } 1028 release_sock(&lsmc->sk); 1029 1030 /* Wake up accept */ 1031 lsmc->sk.sk_data_ready(&lsmc->sk); 1032 sock_put(&lsmc->sk); /* sock_hold in smc_tcp_listen_work */ 1033 } 1034 1035 /* listen worker: finish in state connected */ 1036 static void smc_listen_out_connected(struct smc_sock *new_smc) 1037 { 1038 struct sock *newsmcsk = &new_smc->sk; 1039 1040 sk_refcnt_debug_inc(newsmcsk); 1041 if (newsmcsk->sk_state == SMC_INIT) 1042 newsmcsk->sk_state = SMC_ACTIVE; 1043 1044 smc_listen_out(new_smc); 1045 } 1046 1047 /* listen worker: finish in error state */ 1048 static void smc_listen_out_err(struct smc_sock *new_smc) 1049 { 1050 struct sock *newsmcsk = &new_smc->sk; 1051 1052 if (newsmcsk->sk_state == SMC_INIT) 1053 sock_put(&new_smc->sk); /* passive closing */ 1054 newsmcsk->sk_state = SMC_CLOSED; 1055 smc_conn_free(&new_smc->conn); 1056 1057 smc_listen_out(new_smc); 1058 } 1059 1060 /* listen worker: decline and fall back if possible */ 1061 static void smc_listen_decline(struct smc_sock *new_smc, int reason_code, 1062 int local_contact) 1063 { 1064 /* RDMA setup failed, switch back to TCP */ 1065 if (local_contact == SMC_FIRST_CONTACT) 1066 smc_lgr_forget(new_smc->conn.lgr); 1067 if (reason_code < 0) { /* error, no fallback possible */ 1068 smc_listen_out_err(new_smc); 1069 return; 1070 } 1071 smc_conn_free(&new_smc->conn); 1072 new_smc->use_fallback = true; 1073 new_smc->fallback_rsn = reason_code; 1074 if (reason_code && reason_code != SMC_CLC_DECL_PEERDECL) { 1075 if (smc_clc_send_decline(new_smc, reason_code) < 0) { 1076 smc_listen_out_err(new_smc); 1077 return; 1078 } 1079 } 1080 smc_listen_out_connected(new_smc); 1081 } 1082 1083 /* listen worker: check prefixes */ 1084 static int smc_listen_rdma_check(struct smc_sock *new_smc, 1085 struct smc_clc_msg_proposal *pclc) 1086 { 1087 struct smc_clc_msg_proposal_prefix *pclc_prfx; 1088 struct socket *newclcsock = new_smc->clcsock; 1089 1090 pclc_prfx = smc_clc_proposal_get_prefix(pclc); 1091 if (smc_clc_prfx_match(newclcsock, pclc_prfx)) 1092 return SMC_CLC_DECL_CNFERR; 1093 1094 return 0; 1095 } 1096 1097 /* listen worker: initialize connection and buffers */ 1098 static int smc_listen_rdma_init(struct smc_sock *new_smc, 1099 struct smc_clc_msg_proposal *pclc, 1100 struct smc_ib_device *ibdev, u8 ibport, 1101 int *local_contact) 1102 { 1103 /* allocate connection / link group */ 1104 *local_contact = smc_conn_create(new_smc, false, 0, ibdev, ibport, 0, 1105 &pclc->lcl, NULL, 0); 1106 if (*local_contact < 0) { 1107 if (*local_contact == -ENOMEM) 1108 return SMC_CLC_DECL_MEM;/* insufficient memory*/ 1109 return SMC_CLC_DECL_INTERR; /* other error */ 1110 } 1111 1112 /* create send buffer and rmb */ 1113 if (smc_buf_create(new_smc, false)) 1114 return SMC_CLC_DECL_MEM; 1115 1116 return 0; 1117 } 1118 1119 /* listen worker: initialize connection and buffers for SMC-D */ 1120 static int smc_listen_ism_init(struct smc_sock *new_smc, 1121 struct smc_clc_msg_proposal *pclc, 1122 struct smcd_dev *ismdev, 1123 int *local_contact) 1124 { 1125 struct smc_clc_msg_smcd *pclc_smcd; 1126 1127 pclc_smcd = smc_get_clc_msg_smcd(pclc); 1128 *local_contact = smc_conn_create(new_smc, true, 0, NULL, 0, 0, NULL, 1129 ismdev, pclc_smcd->gid); 1130 if (*local_contact < 0) { 1131 if (*local_contact == -ENOMEM) 1132 return SMC_CLC_DECL_MEM;/* insufficient memory*/ 1133 return SMC_CLC_DECL_INTERR; /* other error */ 1134 } 1135 1136 /* Check if peer can be reached via ISM device */ 1137 if (smc_ism_cantalk(new_smc->conn.lgr->peer_gid, 1138 new_smc->conn.lgr->vlan_id, 1139 new_smc->conn.lgr->smcd)) { 1140 if (*local_contact == SMC_FIRST_CONTACT) 1141 smc_lgr_forget(new_smc->conn.lgr); 1142 smc_conn_free(&new_smc->conn); 1143 return SMC_CLC_DECL_CNFERR; 1144 } 1145 1146 /* Create send and receive buffers */ 1147 if (smc_buf_create(new_smc, true)) { 1148 if (*local_contact == SMC_FIRST_CONTACT) 1149 smc_lgr_forget(new_smc->conn.lgr); 1150 smc_conn_free(&new_smc->conn); 1151 return SMC_CLC_DECL_MEM; 1152 } 1153 1154 return 0; 1155 } 1156 1157 /* listen worker: register buffers */ 1158 static int smc_listen_rdma_reg(struct smc_sock *new_smc, int local_contact) 1159 { 1160 struct smc_link *link = &new_smc->conn.lgr->lnk[SMC_SINGLE_LINK]; 1161 1162 if (local_contact != SMC_FIRST_CONTACT) { 1163 if (smc_reg_rmb(link, new_smc->conn.rmb_desc, true)) 1164 return SMC_CLC_DECL_ERR_REGRMB; 1165 } 1166 smc_rmb_sync_sg_for_device(&new_smc->conn); 1167 1168 return 0; 1169 } 1170 1171 /* listen worker: finish RDMA setup */ 1172 static int smc_listen_rdma_finish(struct smc_sock *new_smc, 1173 struct smc_clc_msg_accept_confirm *cclc, 1174 int local_contact) 1175 { 1176 struct smc_link *link = &new_smc->conn.lgr->lnk[SMC_SINGLE_LINK]; 1177 int reason_code = 0; 1178 1179 if (local_contact == SMC_FIRST_CONTACT) 1180 smc_link_save_peer_info(link, cclc); 1181 1182 if (smc_rmb_rtoken_handling(&new_smc->conn, cclc)) { 1183 reason_code = SMC_CLC_DECL_ERR_RTOK; 1184 goto decline; 1185 } 1186 1187 if (local_contact == SMC_FIRST_CONTACT) { 1188 if (smc_ib_ready_link(link)) { 1189 reason_code = SMC_CLC_DECL_ERR_RDYLNK; 1190 goto decline; 1191 } 1192 /* QP confirmation over RoCE fabric */ 1193 reason_code = smc_serv_conf_first_link(new_smc); 1194 if (reason_code) 1195 goto decline; 1196 } 1197 return 0; 1198 1199 decline: 1200 smc_listen_decline(new_smc, reason_code, local_contact); 1201 return reason_code; 1202 } 1203 1204 /* setup for RDMA connection of server */ 1205 static void smc_listen_work(struct work_struct *work) 1206 { 1207 struct smc_sock *new_smc = container_of(work, struct smc_sock, 1208 smc_listen_work); 1209 struct socket *newclcsock = new_smc->clcsock; 1210 struct smc_clc_msg_accept_confirm cclc; 1211 struct smc_clc_msg_proposal *pclc; 1212 struct smc_ib_device *ibdev; 1213 bool ism_supported = false; 1214 struct smcd_dev *ismdev; 1215 u8 buf[SMC_CLC_MAX_LEN]; 1216 int local_contact = 0; 1217 unsigned short vlan; 1218 int reason_code = 0; 1219 int rc = 0; 1220 u8 ibport; 1221 1222 if (new_smc->use_fallback) { 1223 smc_listen_out_connected(new_smc); 1224 return; 1225 } 1226 1227 /* check if peer is smc capable */ 1228 if (!tcp_sk(newclcsock->sk)->syn_smc) { 1229 new_smc->use_fallback = true; 1230 new_smc->fallback_rsn = SMC_CLC_DECL_PEERNOSMC; 1231 smc_listen_out_connected(new_smc); 1232 return; 1233 } 1234 1235 /* do inband token exchange - 1236 * wait for and receive SMC Proposal CLC message 1237 */ 1238 pclc = (struct smc_clc_msg_proposal *)&buf; 1239 reason_code = smc_clc_wait_msg(new_smc, pclc, SMC_CLC_MAX_LEN, 1240 SMC_CLC_PROPOSAL, CLC_WAIT_TIME); 1241 if (reason_code) { 1242 smc_listen_decline(new_smc, reason_code, 0); 1243 return; 1244 } 1245 1246 /* IPSec connections opt out of SMC-R optimizations */ 1247 if (using_ipsec(new_smc)) { 1248 smc_listen_decline(new_smc, SMC_CLC_DECL_IPSEC, 0); 1249 return; 1250 } 1251 1252 mutex_lock(&smc_create_lgr_pending); 1253 smc_close_init(new_smc); 1254 smc_rx_init(new_smc); 1255 smc_tx_init(new_smc); 1256 1257 /* check if ISM is available */ 1258 if ((pclc->hdr.path == SMC_TYPE_D || pclc->hdr.path == SMC_TYPE_B) && 1259 !smc_check_ism(new_smc, &ismdev) && 1260 !smc_listen_ism_init(new_smc, pclc, ismdev, &local_contact)) { 1261 ism_supported = true; 1262 } 1263 1264 /* check if RDMA is available */ 1265 if (!ism_supported && 1266 ((pclc->hdr.path != SMC_TYPE_R && pclc->hdr.path != SMC_TYPE_B) || 1267 smc_vlan_by_tcpsk(new_smc->clcsock, &vlan) || 1268 smc_check_rdma(new_smc, &ibdev, &ibport, vlan, NULL) || 1269 smc_listen_rdma_check(new_smc, pclc) || 1270 smc_listen_rdma_init(new_smc, pclc, ibdev, ibport, 1271 &local_contact) || 1272 smc_listen_rdma_reg(new_smc, local_contact))) { 1273 /* SMC not supported, decline */ 1274 mutex_unlock(&smc_create_lgr_pending); 1275 smc_listen_decline(new_smc, SMC_CLC_DECL_MODEUNSUPP, 1276 local_contact); 1277 return; 1278 } 1279 1280 /* send SMC Accept CLC message */ 1281 rc = smc_clc_send_accept(new_smc, local_contact); 1282 if (rc) { 1283 mutex_unlock(&smc_create_lgr_pending); 1284 smc_listen_decline(new_smc, rc, local_contact); 1285 return; 1286 } 1287 1288 /* receive SMC Confirm CLC message */ 1289 reason_code = smc_clc_wait_msg(new_smc, &cclc, sizeof(cclc), 1290 SMC_CLC_CONFIRM, CLC_WAIT_TIME); 1291 if (reason_code) { 1292 mutex_unlock(&smc_create_lgr_pending); 1293 smc_listen_decline(new_smc, reason_code, local_contact); 1294 return; 1295 } 1296 1297 /* finish worker */ 1298 if (!ism_supported) { 1299 if (smc_listen_rdma_finish(new_smc, &cclc, local_contact)) { 1300 mutex_unlock(&smc_create_lgr_pending); 1301 return; 1302 } 1303 } 1304 smc_conn_save_peer_info(new_smc, &cclc); 1305 mutex_unlock(&smc_create_lgr_pending); 1306 smc_listen_out_connected(new_smc); 1307 } 1308 1309 static void smc_tcp_listen_work(struct work_struct *work) 1310 { 1311 struct smc_sock *lsmc = container_of(work, struct smc_sock, 1312 tcp_listen_work); 1313 struct sock *lsk = &lsmc->sk; 1314 struct smc_sock *new_smc; 1315 int rc = 0; 1316 1317 lock_sock(lsk); 1318 while (lsk->sk_state == SMC_LISTEN) { 1319 rc = smc_clcsock_accept(lsmc, &new_smc); 1320 if (rc) 1321 goto out; 1322 if (!new_smc) 1323 continue; 1324 1325 new_smc->listen_smc = lsmc; 1326 new_smc->use_fallback = lsmc->use_fallback; 1327 new_smc->fallback_rsn = lsmc->fallback_rsn; 1328 sock_hold(lsk); /* sock_put in smc_listen_work */ 1329 INIT_WORK(&new_smc->smc_listen_work, smc_listen_work); 1330 smc_copy_sock_settings_to_smc(new_smc); 1331 new_smc->sk.sk_sndbuf = lsmc->sk.sk_sndbuf; 1332 new_smc->sk.sk_rcvbuf = lsmc->sk.sk_rcvbuf; 1333 sock_hold(&new_smc->sk); /* sock_put in passive closing */ 1334 if (!schedule_work(&new_smc->smc_listen_work)) 1335 sock_put(&new_smc->sk); 1336 } 1337 1338 out: 1339 release_sock(lsk); 1340 sock_put(&lsmc->sk); /* sock_hold in smc_listen */ 1341 } 1342 1343 static int smc_listen(struct socket *sock, int backlog) 1344 { 1345 struct sock *sk = sock->sk; 1346 struct smc_sock *smc; 1347 int rc; 1348 1349 smc = smc_sk(sk); 1350 lock_sock(sk); 1351 1352 rc = -EINVAL; 1353 if ((sk->sk_state != SMC_INIT) && (sk->sk_state != SMC_LISTEN)) 1354 goto out; 1355 1356 rc = 0; 1357 if (sk->sk_state == SMC_LISTEN) { 1358 sk->sk_max_ack_backlog = backlog; 1359 goto out; 1360 } 1361 /* some socket options are handled in core, so we could not apply 1362 * them to the clc socket -- copy smc socket options to clc socket 1363 */ 1364 smc_copy_sock_settings_to_clc(smc); 1365 if (!smc->use_fallback) 1366 tcp_sk(smc->clcsock->sk)->syn_smc = 1; 1367 1368 rc = kernel_listen(smc->clcsock, backlog); 1369 if (rc) 1370 goto out; 1371 sk->sk_max_ack_backlog = backlog; 1372 sk->sk_ack_backlog = 0; 1373 sk->sk_state = SMC_LISTEN; 1374 sock_hold(sk); /* sock_hold in tcp_listen_worker */ 1375 if (!schedule_work(&smc->tcp_listen_work)) 1376 sock_put(sk); 1377 1378 out: 1379 release_sock(sk); 1380 return rc; 1381 } 1382 1383 static int smc_accept(struct socket *sock, struct socket *new_sock, 1384 int flags, bool kern) 1385 { 1386 struct sock *sk = sock->sk, *nsk; 1387 DECLARE_WAITQUEUE(wait, current); 1388 struct smc_sock *lsmc; 1389 long timeo; 1390 int rc = 0; 1391 1392 lsmc = smc_sk(sk); 1393 sock_hold(sk); /* sock_put below */ 1394 lock_sock(sk); 1395 1396 if (lsmc->sk.sk_state != SMC_LISTEN) { 1397 rc = -EINVAL; 1398 release_sock(sk); 1399 goto out; 1400 } 1401 1402 /* Wait for an incoming connection */ 1403 timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK); 1404 add_wait_queue_exclusive(sk_sleep(sk), &wait); 1405 while (!(nsk = smc_accept_dequeue(sk, new_sock))) { 1406 set_current_state(TASK_INTERRUPTIBLE); 1407 if (!timeo) { 1408 rc = -EAGAIN; 1409 break; 1410 } 1411 release_sock(sk); 1412 timeo = schedule_timeout(timeo); 1413 /* wakeup by sk_data_ready in smc_listen_work() */ 1414 sched_annotate_sleep(); 1415 lock_sock(sk); 1416 if (signal_pending(current)) { 1417 rc = sock_intr_errno(timeo); 1418 break; 1419 } 1420 } 1421 set_current_state(TASK_RUNNING); 1422 remove_wait_queue(sk_sleep(sk), &wait); 1423 1424 if (!rc) 1425 rc = sock_error(nsk); 1426 release_sock(sk); 1427 if (rc) 1428 goto out; 1429 1430 if (lsmc->sockopt_defer_accept && !(flags & O_NONBLOCK)) { 1431 /* wait till data arrives on the socket */ 1432 timeo = msecs_to_jiffies(lsmc->sockopt_defer_accept * 1433 MSEC_PER_SEC); 1434 if (smc_sk(nsk)->use_fallback) { 1435 struct sock *clcsk = smc_sk(nsk)->clcsock->sk; 1436 1437 lock_sock(clcsk); 1438 if (skb_queue_empty(&clcsk->sk_receive_queue)) 1439 sk_wait_data(clcsk, &timeo, NULL); 1440 release_sock(clcsk); 1441 } else if (!atomic_read(&smc_sk(nsk)->conn.bytes_to_rcv)) { 1442 lock_sock(nsk); 1443 smc_rx_wait(smc_sk(nsk), &timeo, smc_rx_data_available); 1444 release_sock(nsk); 1445 } 1446 } 1447 1448 out: 1449 sock_put(sk); /* sock_hold above */ 1450 return rc; 1451 } 1452 1453 static int smc_getname(struct socket *sock, struct sockaddr *addr, 1454 int peer) 1455 { 1456 struct smc_sock *smc; 1457 1458 if (peer && (sock->sk->sk_state != SMC_ACTIVE) && 1459 (sock->sk->sk_state != SMC_APPCLOSEWAIT1)) 1460 return -ENOTCONN; 1461 1462 smc = smc_sk(sock->sk); 1463 1464 return smc->clcsock->ops->getname(smc->clcsock, addr, peer); 1465 } 1466 1467 static int smc_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) 1468 { 1469 struct sock *sk = sock->sk; 1470 struct smc_sock *smc; 1471 int rc = -EPIPE; 1472 1473 smc = smc_sk(sk); 1474 lock_sock(sk); 1475 if ((sk->sk_state != SMC_ACTIVE) && 1476 (sk->sk_state != SMC_APPCLOSEWAIT1) && 1477 (sk->sk_state != SMC_INIT)) 1478 goto out; 1479 1480 if (msg->msg_flags & MSG_FASTOPEN) { 1481 if (sk->sk_state == SMC_INIT) { 1482 smc->use_fallback = true; 1483 smc->fallback_rsn = SMC_CLC_DECL_OPTUNSUPP; 1484 } else { 1485 rc = -EINVAL; 1486 goto out; 1487 } 1488 } 1489 1490 if (smc->use_fallback) 1491 rc = smc->clcsock->ops->sendmsg(smc->clcsock, msg, len); 1492 else 1493 rc = smc_tx_sendmsg(smc, msg, len); 1494 out: 1495 release_sock(sk); 1496 return rc; 1497 } 1498 1499 static int smc_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, 1500 int flags) 1501 { 1502 struct sock *sk = sock->sk; 1503 struct smc_sock *smc; 1504 int rc = -ENOTCONN; 1505 1506 smc = smc_sk(sk); 1507 lock_sock(sk); 1508 if ((sk->sk_state == SMC_INIT) || 1509 (sk->sk_state == SMC_LISTEN) || 1510 (sk->sk_state == SMC_CLOSED)) 1511 goto out; 1512 1513 if (sk->sk_state == SMC_PEERFINCLOSEWAIT) { 1514 rc = 0; 1515 goto out; 1516 } 1517 1518 if (smc->use_fallback) { 1519 rc = smc->clcsock->ops->recvmsg(smc->clcsock, msg, len, flags); 1520 } else { 1521 msg->msg_namelen = 0; 1522 rc = smc_rx_recvmsg(smc, msg, NULL, len, flags); 1523 } 1524 1525 out: 1526 release_sock(sk); 1527 return rc; 1528 } 1529 1530 static __poll_t smc_accept_poll(struct sock *parent) 1531 { 1532 struct smc_sock *isk = smc_sk(parent); 1533 __poll_t mask = 0; 1534 1535 spin_lock(&isk->accept_q_lock); 1536 if (!list_empty(&isk->accept_q)) 1537 mask = EPOLLIN | EPOLLRDNORM; 1538 spin_unlock(&isk->accept_q_lock); 1539 1540 return mask; 1541 } 1542 1543 static __poll_t smc_poll(struct file *file, struct socket *sock, 1544 poll_table *wait) 1545 { 1546 struct sock *sk = sock->sk; 1547 __poll_t mask = 0; 1548 struct smc_sock *smc; 1549 1550 if (!sk) 1551 return EPOLLNVAL; 1552 1553 smc = smc_sk(sock->sk); 1554 if (smc->use_fallback) { 1555 /* delegate to CLC child sock */ 1556 mask = smc->clcsock->ops->poll(file, smc->clcsock, wait); 1557 sk->sk_err = smc->clcsock->sk->sk_err; 1558 if (sk->sk_err) 1559 mask |= EPOLLERR; 1560 } else { 1561 if (sk->sk_state != SMC_CLOSED) 1562 sock_poll_wait(file, sock, wait); 1563 if (sk->sk_err) 1564 mask |= EPOLLERR; 1565 if ((sk->sk_shutdown == SHUTDOWN_MASK) || 1566 (sk->sk_state == SMC_CLOSED)) 1567 mask |= EPOLLHUP; 1568 if (sk->sk_state == SMC_LISTEN) { 1569 /* woken up by sk_data_ready in smc_listen_work() */ 1570 mask = smc_accept_poll(sk); 1571 } else { 1572 if (atomic_read(&smc->conn.sndbuf_space) || 1573 sk->sk_shutdown & SEND_SHUTDOWN) { 1574 mask |= EPOLLOUT | EPOLLWRNORM; 1575 } else { 1576 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk); 1577 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); 1578 } 1579 if (atomic_read(&smc->conn.bytes_to_rcv)) 1580 mask |= EPOLLIN | EPOLLRDNORM; 1581 if (sk->sk_shutdown & RCV_SHUTDOWN) 1582 mask |= EPOLLIN | EPOLLRDNORM | EPOLLRDHUP; 1583 if (sk->sk_state == SMC_APPCLOSEWAIT1) 1584 mask |= EPOLLIN; 1585 if (smc->conn.urg_state == SMC_URG_VALID) 1586 mask |= EPOLLPRI; 1587 } 1588 } 1589 1590 return mask; 1591 } 1592 1593 static int smc_shutdown(struct socket *sock, int how) 1594 { 1595 struct sock *sk = sock->sk; 1596 struct smc_sock *smc; 1597 int rc = -EINVAL; 1598 int rc1 = 0; 1599 1600 smc = smc_sk(sk); 1601 1602 if ((how < SHUT_RD) || (how > SHUT_RDWR)) 1603 return rc; 1604 1605 lock_sock(sk); 1606 1607 rc = -ENOTCONN; 1608 if ((sk->sk_state != SMC_ACTIVE) && 1609 (sk->sk_state != SMC_PEERCLOSEWAIT1) && 1610 (sk->sk_state != SMC_PEERCLOSEWAIT2) && 1611 (sk->sk_state != SMC_APPCLOSEWAIT1) && 1612 (sk->sk_state != SMC_APPCLOSEWAIT2) && 1613 (sk->sk_state != SMC_APPFINCLOSEWAIT)) 1614 goto out; 1615 if (smc->use_fallback) { 1616 rc = kernel_sock_shutdown(smc->clcsock, how); 1617 sk->sk_shutdown = smc->clcsock->sk->sk_shutdown; 1618 if (sk->sk_shutdown == SHUTDOWN_MASK) 1619 sk->sk_state = SMC_CLOSED; 1620 goto out; 1621 } 1622 switch (how) { 1623 case SHUT_RDWR: /* shutdown in both directions */ 1624 rc = smc_close_active(smc); 1625 break; 1626 case SHUT_WR: 1627 rc = smc_close_shutdown_write(smc); 1628 break; 1629 case SHUT_RD: 1630 rc = 0; 1631 /* nothing more to do because peer is not involved */ 1632 break; 1633 } 1634 if (smc->clcsock) 1635 rc1 = kernel_sock_shutdown(smc->clcsock, how); 1636 /* map sock_shutdown_cmd constants to sk_shutdown value range */ 1637 sk->sk_shutdown |= how + 1; 1638 1639 out: 1640 release_sock(sk); 1641 return rc ? rc : rc1; 1642 } 1643 1644 static int smc_setsockopt(struct socket *sock, int level, int optname, 1645 char __user *optval, unsigned int optlen) 1646 { 1647 struct sock *sk = sock->sk; 1648 struct smc_sock *smc; 1649 int val, rc; 1650 1651 smc = smc_sk(sk); 1652 1653 /* generic setsockopts reaching us here always apply to the 1654 * CLC socket 1655 */ 1656 rc = smc->clcsock->ops->setsockopt(smc->clcsock, level, optname, 1657 optval, optlen); 1658 if (smc->clcsock->sk->sk_err) { 1659 sk->sk_err = smc->clcsock->sk->sk_err; 1660 sk->sk_error_report(sk); 1661 } 1662 if (rc) 1663 return rc; 1664 1665 if (optlen < sizeof(int)) 1666 return -EINVAL; 1667 if (get_user(val, (int __user *)optval)) 1668 return -EFAULT; 1669 1670 lock_sock(sk); 1671 switch (optname) { 1672 case TCP_ULP: 1673 case TCP_FASTOPEN: 1674 case TCP_FASTOPEN_CONNECT: 1675 case TCP_FASTOPEN_KEY: 1676 case TCP_FASTOPEN_NO_COOKIE: 1677 /* option not supported by SMC */ 1678 if (sk->sk_state == SMC_INIT) { 1679 smc->use_fallback = true; 1680 smc->fallback_rsn = SMC_CLC_DECL_OPTUNSUPP; 1681 } else { 1682 if (!smc->use_fallback) 1683 rc = -EINVAL; 1684 } 1685 break; 1686 case TCP_NODELAY: 1687 if (sk->sk_state != SMC_INIT && sk->sk_state != SMC_LISTEN) { 1688 if (val && !smc->use_fallback) 1689 mod_delayed_work(system_wq, &smc->conn.tx_work, 1690 0); 1691 } 1692 break; 1693 case TCP_CORK: 1694 if (sk->sk_state != SMC_INIT && sk->sk_state != SMC_LISTEN) { 1695 if (!val && !smc->use_fallback) 1696 mod_delayed_work(system_wq, &smc->conn.tx_work, 1697 0); 1698 } 1699 break; 1700 case TCP_DEFER_ACCEPT: 1701 smc->sockopt_defer_accept = val; 1702 break; 1703 default: 1704 break; 1705 } 1706 release_sock(sk); 1707 1708 return rc; 1709 } 1710 1711 static int smc_getsockopt(struct socket *sock, int level, int optname, 1712 char __user *optval, int __user *optlen) 1713 { 1714 struct smc_sock *smc; 1715 1716 smc = smc_sk(sock->sk); 1717 /* socket options apply to the CLC socket */ 1718 return smc->clcsock->ops->getsockopt(smc->clcsock, level, optname, 1719 optval, optlen); 1720 } 1721 1722 static int smc_ioctl(struct socket *sock, unsigned int cmd, 1723 unsigned long arg) 1724 { 1725 union smc_host_cursor cons, urg; 1726 struct smc_connection *conn; 1727 struct smc_sock *smc; 1728 int answ; 1729 1730 smc = smc_sk(sock->sk); 1731 conn = &smc->conn; 1732 lock_sock(&smc->sk); 1733 if (smc->use_fallback) { 1734 if (!smc->clcsock) { 1735 release_sock(&smc->sk); 1736 return -EBADF; 1737 } 1738 answ = smc->clcsock->ops->ioctl(smc->clcsock, cmd, arg); 1739 release_sock(&smc->sk); 1740 return answ; 1741 } 1742 switch (cmd) { 1743 case SIOCINQ: /* same as FIONREAD */ 1744 if (smc->sk.sk_state == SMC_LISTEN) { 1745 release_sock(&smc->sk); 1746 return -EINVAL; 1747 } 1748 if (smc->sk.sk_state == SMC_INIT || 1749 smc->sk.sk_state == SMC_CLOSED) 1750 answ = 0; 1751 else 1752 answ = atomic_read(&smc->conn.bytes_to_rcv); 1753 break; 1754 case SIOCOUTQ: 1755 /* output queue size (not send + not acked) */ 1756 if (smc->sk.sk_state == SMC_LISTEN) { 1757 release_sock(&smc->sk); 1758 return -EINVAL; 1759 } 1760 if (smc->sk.sk_state == SMC_INIT || 1761 smc->sk.sk_state == SMC_CLOSED) 1762 answ = 0; 1763 else 1764 answ = smc->conn.sndbuf_desc->len - 1765 atomic_read(&smc->conn.sndbuf_space); 1766 break; 1767 case SIOCOUTQNSD: 1768 /* output queue size (not send only) */ 1769 if (smc->sk.sk_state == SMC_LISTEN) { 1770 release_sock(&smc->sk); 1771 return -EINVAL; 1772 } 1773 if (smc->sk.sk_state == SMC_INIT || 1774 smc->sk.sk_state == SMC_CLOSED) 1775 answ = 0; 1776 else 1777 answ = smc_tx_prepared_sends(&smc->conn); 1778 break; 1779 case SIOCATMARK: 1780 if (smc->sk.sk_state == SMC_LISTEN) { 1781 release_sock(&smc->sk); 1782 return -EINVAL; 1783 } 1784 if (smc->sk.sk_state == SMC_INIT || 1785 smc->sk.sk_state == SMC_CLOSED) { 1786 answ = 0; 1787 } else { 1788 smc_curs_copy(&cons, &conn->local_tx_ctrl.cons, conn); 1789 smc_curs_copy(&urg, &conn->urg_curs, conn); 1790 answ = smc_curs_diff(conn->rmb_desc->len, 1791 &cons, &urg) == 1; 1792 } 1793 break; 1794 default: 1795 release_sock(&smc->sk); 1796 return -ENOIOCTLCMD; 1797 } 1798 release_sock(&smc->sk); 1799 1800 return put_user(answ, (int __user *)arg); 1801 } 1802 1803 static ssize_t smc_sendpage(struct socket *sock, struct page *page, 1804 int offset, size_t size, int flags) 1805 { 1806 struct sock *sk = sock->sk; 1807 struct smc_sock *smc; 1808 int rc = -EPIPE; 1809 1810 smc = smc_sk(sk); 1811 lock_sock(sk); 1812 if (sk->sk_state != SMC_ACTIVE) { 1813 release_sock(sk); 1814 goto out; 1815 } 1816 release_sock(sk); 1817 if (smc->use_fallback) 1818 rc = kernel_sendpage(smc->clcsock, page, offset, 1819 size, flags); 1820 else 1821 rc = sock_no_sendpage(sock, page, offset, size, flags); 1822 1823 out: 1824 return rc; 1825 } 1826 1827 /* Map the affected portions of the rmbe into an spd, note the number of bytes 1828 * to splice in conn->splice_pending, and press 'go'. Delays consumer cursor 1829 * updates till whenever a respective page has been fully processed. 1830 * Note that subsequent recv() calls have to wait till all splice() processing 1831 * completed. 1832 */ 1833 static ssize_t smc_splice_read(struct socket *sock, loff_t *ppos, 1834 struct pipe_inode_info *pipe, size_t len, 1835 unsigned int flags) 1836 { 1837 struct sock *sk = sock->sk; 1838 struct smc_sock *smc; 1839 int rc = -ENOTCONN; 1840 1841 smc = smc_sk(sk); 1842 lock_sock(sk); 1843 1844 if (sk->sk_state == SMC_INIT || 1845 sk->sk_state == SMC_LISTEN || 1846 sk->sk_state == SMC_CLOSED) 1847 goto out; 1848 1849 if (sk->sk_state == SMC_PEERFINCLOSEWAIT) { 1850 rc = 0; 1851 goto out; 1852 } 1853 1854 if (smc->use_fallback) { 1855 rc = smc->clcsock->ops->splice_read(smc->clcsock, ppos, 1856 pipe, len, flags); 1857 } else { 1858 if (*ppos) { 1859 rc = -ESPIPE; 1860 goto out; 1861 } 1862 if (flags & SPLICE_F_NONBLOCK) 1863 flags = MSG_DONTWAIT; 1864 else 1865 flags = 0; 1866 rc = smc_rx_recvmsg(smc, NULL, pipe, len, flags); 1867 } 1868 out: 1869 release_sock(sk); 1870 1871 return rc; 1872 } 1873 1874 /* must look like tcp */ 1875 static const struct proto_ops smc_sock_ops = { 1876 .family = PF_SMC, 1877 .owner = THIS_MODULE, 1878 .release = smc_release, 1879 .bind = smc_bind, 1880 .connect = smc_connect, 1881 .socketpair = sock_no_socketpair, 1882 .accept = smc_accept, 1883 .getname = smc_getname, 1884 .poll = smc_poll, 1885 .ioctl = smc_ioctl, 1886 .listen = smc_listen, 1887 .shutdown = smc_shutdown, 1888 .setsockopt = smc_setsockopt, 1889 .getsockopt = smc_getsockopt, 1890 .sendmsg = smc_sendmsg, 1891 .recvmsg = smc_recvmsg, 1892 .mmap = sock_no_mmap, 1893 .sendpage = smc_sendpage, 1894 .splice_read = smc_splice_read, 1895 }; 1896 1897 static int smc_create(struct net *net, struct socket *sock, int protocol, 1898 int kern) 1899 { 1900 int family = (protocol == SMCPROTO_SMC6) ? PF_INET6 : PF_INET; 1901 struct smc_sock *smc; 1902 struct sock *sk; 1903 int rc; 1904 1905 rc = -ESOCKTNOSUPPORT; 1906 if (sock->type != SOCK_STREAM) 1907 goto out; 1908 1909 rc = -EPROTONOSUPPORT; 1910 if (protocol != SMCPROTO_SMC && protocol != SMCPROTO_SMC6) 1911 goto out; 1912 1913 rc = -ENOBUFS; 1914 sock->ops = &smc_sock_ops; 1915 sk = smc_sock_alloc(net, sock, protocol); 1916 if (!sk) 1917 goto out; 1918 1919 /* create internal TCP socket for CLC handshake and fallback */ 1920 smc = smc_sk(sk); 1921 smc->use_fallback = false; /* assume rdma capability first */ 1922 smc->fallback_rsn = 0; 1923 rc = sock_create_kern(net, family, SOCK_STREAM, IPPROTO_TCP, 1924 &smc->clcsock); 1925 if (rc) { 1926 sk_common_release(sk); 1927 goto out; 1928 } 1929 smc->sk.sk_sndbuf = max(smc->clcsock->sk->sk_sndbuf, SMC_BUF_MIN_SIZE); 1930 smc->sk.sk_rcvbuf = max(smc->clcsock->sk->sk_rcvbuf, SMC_BUF_MIN_SIZE); 1931 1932 out: 1933 return rc; 1934 } 1935 1936 static const struct net_proto_family smc_sock_family_ops = { 1937 .family = PF_SMC, 1938 .owner = THIS_MODULE, 1939 .create = smc_create, 1940 }; 1941 1942 static int __init smc_init(void) 1943 { 1944 int rc; 1945 1946 rc = smc_pnet_init(); 1947 if (rc) 1948 return rc; 1949 1950 rc = smc_llc_init(); 1951 if (rc) { 1952 pr_err("%s: smc_llc_init fails with %d\n", __func__, rc); 1953 goto out_pnet; 1954 } 1955 1956 rc = smc_cdc_init(); 1957 if (rc) { 1958 pr_err("%s: smc_cdc_init fails with %d\n", __func__, rc); 1959 goto out_pnet; 1960 } 1961 1962 rc = proto_register(&smc_proto, 1); 1963 if (rc) { 1964 pr_err("%s: proto_register(v4) fails with %d\n", __func__, rc); 1965 goto out_pnet; 1966 } 1967 1968 rc = proto_register(&smc_proto6, 1); 1969 if (rc) { 1970 pr_err("%s: proto_register(v6) fails with %d\n", __func__, rc); 1971 goto out_proto; 1972 } 1973 1974 rc = sock_register(&smc_sock_family_ops); 1975 if (rc) { 1976 pr_err("%s: sock_register fails with %d\n", __func__, rc); 1977 goto out_proto6; 1978 } 1979 INIT_HLIST_HEAD(&smc_v4_hashinfo.ht); 1980 INIT_HLIST_HEAD(&smc_v6_hashinfo.ht); 1981 1982 rc = smc_ib_register_client(); 1983 if (rc) { 1984 pr_err("%s: ib_register fails with %d\n", __func__, rc); 1985 goto out_sock; 1986 } 1987 1988 static_branch_enable(&tcp_have_smc); 1989 return 0; 1990 1991 out_sock: 1992 sock_unregister(PF_SMC); 1993 out_proto6: 1994 proto_unregister(&smc_proto6); 1995 out_proto: 1996 proto_unregister(&smc_proto); 1997 out_pnet: 1998 smc_pnet_exit(); 1999 return rc; 2000 } 2001 2002 static void __exit smc_exit(void) 2003 { 2004 smc_core_exit(); 2005 static_branch_disable(&tcp_have_smc); 2006 smc_ib_unregister_client(); 2007 sock_unregister(PF_SMC); 2008 proto_unregister(&smc_proto6); 2009 proto_unregister(&smc_proto); 2010 smc_pnet_exit(); 2011 } 2012 2013 module_init(smc_init); 2014 module_exit(smc_exit); 2015 2016 MODULE_AUTHOR("Ursula Braun <ubraun@linux.vnet.ibm.com>"); 2017 MODULE_DESCRIPTION("smc socket address family"); 2018 MODULE_LICENSE("GPL"); 2019 MODULE_ALIAS_NETPROTO(PF_SMC); 2020