1ac713874SUrsula Braun /* 2ac713874SUrsula Braun * Shared Memory Communications over RDMA (SMC-R) and RoCE 3ac713874SUrsula Braun * 4ac713874SUrsula Braun * AF_SMC protocol family socket handler keeping the AF_INET sock address type 5ac713874SUrsula Braun * applies to SOCK_STREAM sockets only 6ac713874SUrsula Braun * offers an alternative communication option for TCP-protocol sockets 7ac713874SUrsula Braun * applicable with RoCE-cards only 8ac713874SUrsula Braun * 9a046d57dSUrsula Braun * Initial restrictions: 10a046d57dSUrsula Braun * - non-blocking connect postponed 11a046d57dSUrsula Braun * - IPv6 support postponed 12a046d57dSUrsula Braun * - support for alternate links postponed 13a046d57dSUrsula Braun * - partial support for non-blocking sockets only 14a046d57dSUrsula Braun * - support for urgent data postponed 15a046d57dSUrsula Braun * 16ac713874SUrsula Braun * Copyright IBM Corp. 2016 17ac713874SUrsula Braun * 18ac713874SUrsula Braun * Author(s): Ursula Braun <ubraun@linux.vnet.ibm.com> 19ac713874SUrsula Braun * based on prototype from Frank Blaschka 20ac713874SUrsula Braun */ 21ac713874SUrsula Braun 22ac713874SUrsula Braun #define KMSG_COMPONENT "smc" 23ac713874SUrsula Braun #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt 24ac713874SUrsula Braun 25ac713874SUrsula Braun #include <linux/module.h> 26ac713874SUrsula Braun #include <linux/socket.h> 27a046d57dSUrsula Braun #include <linux/inetdevice.h> 28a046d57dSUrsula Braun #include <linux/workqueue.h> 29ac713874SUrsula Braun #include <net/sock.h> 30a046d57dSUrsula Braun #include <net/tcp.h> 31ac713874SUrsula Braun 32ac713874SUrsula Braun #include "smc.h" 33a046d57dSUrsula Braun #include "smc_clc.h" 34*0cfdd8f9SUrsula Braun #include "smc_core.h" 35a4cf0443SUrsula Braun #include "smc_ib.h" 366812baabSThomas Richter #include "smc_pnet.h" 37ac713874SUrsula Braun 38*0cfdd8f9SUrsula Braun static DEFINE_MUTEX(smc_create_lgr_pending); /* serialize link group 39*0cfdd8f9SUrsula Braun * creation 40*0cfdd8f9SUrsula Braun */ 41*0cfdd8f9SUrsula Braun 42*0cfdd8f9SUrsula Braun struct smc_lgr_list smc_lgr_list = { /* established link groups */ 43*0cfdd8f9SUrsula Braun .lock = __SPIN_LOCK_UNLOCKED(smc_lgr_list.lock), 44*0cfdd8f9SUrsula Braun .list = LIST_HEAD_INIT(smc_lgr_list.list), 45*0cfdd8f9SUrsula Braun }; 46*0cfdd8f9SUrsula Braun 47a046d57dSUrsula Braun static void smc_tcp_listen_work(struct work_struct *); 48a046d57dSUrsula Braun 49ac713874SUrsula Braun static void smc_set_keepalive(struct sock *sk, int val) 50ac713874SUrsula Braun { 51ac713874SUrsula Braun struct smc_sock *smc = smc_sk(sk); 52ac713874SUrsula Braun 53ac713874SUrsula Braun smc->clcsock->sk->sk_prot->keepalive(smc->clcsock->sk, val); 54ac713874SUrsula Braun } 55ac713874SUrsula Braun 56ac713874SUrsula Braun static struct proto smc_proto = { 57ac713874SUrsula Braun .name = "SMC", 58ac713874SUrsula Braun .owner = THIS_MODULE, 59ac713874SUrsula Braun .keepalive = smc_set_keepalive, 60ac713874SUrsula Braun .obj_size = sizeof(struct smc_sock), 61ac713874SUrsula Braun .slab_flags = SLAB_DESTROY_BY_RCU, 62ac713874SUrsula Braun }; 63ac713874SUrsula Braun 64ac713874SUrsula Braun static int smc_release(struct socket *sock) 65ac713874SUrsula Braun { 66ac713874SUrsula Braun struct sock *sk = sock->sk; 67ac713874SUrsula Braun struct smc_sock *smc; 68ac713874SUrsula Braun 69ac713874SUrsula Braun if (!sk) 70ac713874SUrsula Braun goto out; 71ac713874SUrsula Braun 72ac713874SUrsula Braun smc = smc_sk(sk); 73ac713874SUrsula Braun lock_sock(sk); 74ac713874SUrsula Braun 75ac713874SUrsula Braun sk->sk_state = SMC_CLOSED; 76ac713874SUrsula Braun if (smc->clcsock) { 77ac713874SUrsula Braun sock_release(smc->clcsock); 78ac713874SUrsula Braun smc->clcsock = NULL; 79ac713874SUrsula Braun } 80ac713874SUrsula Braun 81ac713874SUrsula Braun /* detach socket */ 82ac713874SUrsula Braun sock_orphan(sk); 83ac713874SUrsula Braun sock->sk = NULL; 84ac713874SUrsula Braun release_sock(sk); 85ac713874SUrsula Braun 86ac713874SUrsula Braun sock_put(sk); 87ac713874SUrsula Braun out: 88ac713874SUrsula Braun return 0; 89ac713874SUrsula Braun } 90ac713874SUrsula Braun 91ac713874SUrsula Braun static void smc_destruct(struct sock *sk) 92ac713874SUrsula Braun { 93ac713874SUrsula Braun if (sk->sk_state != SMC_CLOSED) 94ac713874SUrsula Braun return; 95ac713874SUrsula Braun if (!sock_flag(sk, SOCK_DEAD)) 96ac713874SUrsula Braun return; 97ac713874SUrsula Braun 98ac713874SUrsula Braun sk_refcnt_debug_dec(sk); 99ac713874SUrsula Braun } 100ac713874SUrsula Braun 101ac713874SUrsula Braun static struct sock *smc_sock_alloc(struct net *net, struct socket *sock) 102ac713874SUrsula Braun { 103ac713874SUrsula Braun struct smc_sock *smc; 104ac713874SUrsula Braun struct sock *sk; 105ac713874SUrsula Braun 106ac713874SUrsula Braun sk = sk_alloc(net, PF_SMC, GFP_KERNEL, &smc_proto, 0); 107ac713874SUrsula Braun if (!sk) 108ac713874SUrsula Braun return NULL; 109ac713874SUrsula Braun 110ac713874SUrsula Braun sock_init_data(sock, sk); /* sets sk_refcnt to 1 */ 111ac713874SUrsula Braun sk->sk_state = SMC_INIT; 112ac713874SUrsula Braun sk->sk_destruct = smc_destruct; 113ac713874SUrsula Braun sk->sk_protocol = SMCPROTO_SMC; 114ac713874SUrsula Braun smc = smc_sk(sk); 115a046d57dSUrsula Braun INIT_WORK(&smc->tcp_listen_work, smc_tcp_listen_work); 116a046d57dSUrsula Braun INIT_LIST_HEAD(&smc->accept_q); 117a046d57dSUrsula Braun spin_lock_init(&smc->accept_q_lock); 118a046d57dSUrsula Braun sk_refcnt_debug_inc(sk); 119ac713874SUrsula Braun 120ac713874SUrsula Braun return sk; 121ac713874SUrsula Braun } 122ac713874SUrsula Braun 123ac713874SUrsula Braun static int smc_bind(struct socket *sock, struct sockaddr *uaddr, 124ac713874SUrsula Braun int addr_len) 125ac713874SUrsula Braun { 126ac713874SUrsula Braun struct sockaddr_in *addr = (struct sockaddr_in *)uaddr; 127ac713874SUrsula Braun struct sock *sk = sock->sk; 128ac713874SUrsula Braun struct smc_sock *smc; 129ac713874SUrsula Braun int rc; 130ac713874SUrsula Braun 131ac713874SUrsula Braun smc = smc_sk(sk); 132ac713874SUrsula Braun 133ac713874SUrsula Braun /* replicate tests from inet_bind(), to be safe wrt. future changes */ 134ac713874SUrsula Braun rc = -EINVAL; 135ac713874SUrsula Braun if (addr_len < sizeof(struct sockaddr_in)) 136ac713874SUrsula Braun goto out; 137ac713874SUrsula Braun 138ac713874SUrsula Braun rc = -EAFNOSUPPORT; 139ac713874SUrsula Braun /* accept AF_UNSPEC (mapped to AF_INET) only if s_addr is INADDR_ANY */ 140ac713874SUrsula Braun if ((addr->sin_family != AF_INET) && 141ac713874SUrsula Braun ((addr->sin_family != AF_UNSPEC) || 142ac713874SUrsula Braun (addr->sin_addr.s_addr != htonl(INADDR_ANY)))) 143ac713874SUrsula Braun goto out; 144ac713874SUrsula Braun 145ac713874SUrsula Braun lock_sock(sk); 146ac713874SUrsula Braun 147ac713874SUrsula Braun /* Check if socket is already active */ 148ac713874SUrsula Braun rc = -EINVAL; 149ac713874SUrsula Braun if (sk->sk_state != SMC_INIT) 150ac713874SUrsula Braun goto out_rel; 151ac713874SUrsula Braun 152ac713874SUrsula Braun smc->clcsock->sk->sk_reuse = sk->sk_reuse; 153ac713874SUrsula Braun rc = kernel_bind(smc->clcsock, uaddr, addr_len); 154ac713874SUrsula Braun 155ac713874SUrsula Braun out_rel: 156ac713874SUrsula Braun release_sock(sk); 157ac713874SUrsula Braun out: 158ac713874SUrsula Braun return rc; 159ac713874SUrsula Braun } 160ac713874SUrsula Braun 161ac713874SUrsula Braun static void smc_copy_sock_settings(struct sock *nsk, struct sock *osk, 162ac713874SUrsula Braun unsigned long mask) 163ac713874SUrsula Braun { 164ac713874SUrsula Braun /* options we don't get control via setsockopt for */ 165ac713874SUrsula Braun nsk->sk_type = osk->sk_type; 166ac713874SUrsula Braun nsk->sk_sndbuf = osk->sk_sndbuf; 167ac713874SUrsula Braun nsk->sk_rcvbuf = osk->sk_rcvbuf; 168ac713874SUrsula Braun nsk->sk_sndtimeo = osk->sk_sndtimeo; 169ac713874SUrsula Braun nsk->sk_rcvtimeo = osk->sk_rcvtimeo; 170ac713874SUrsula Braun nsk->sk_mark = osk->sk_mark; 171ac713874SUrsula Braun nsk->sk_priority = osk->sk_priority; 172ac713874SUrsula Braun nsk->sk_rcvlowat = osk->sk_rcvlowat; 173ac713874SUrsula Braun nsk->sk_bound_dev_if = osk->sk_bound_dev_if; 174ac713874SUrsula Braun nsk->sk_err = osk->sk_err; 175ac713874SUrsula Braun 176ac713874SUrsula Braun nsk->sk_flags &= ~mask; 177ac713874SUrsula Braun nsk->sk_flags |= osk->sk_flags & mask; 178ac713874SUrsula Braun } 179ac713874SUrsula Braun 180ac713874SUrsula Braun #define SK_FLAGS_SMC_TO_CLC ((1UL << SOCK_URGINLINE) | \ 181ac713874SUrsula Braun (1UL << SOCK_KEEPOPEN) | \ 182ac713874SUrsula Braun (1UL << SOCK_LINGER) | \ 183ac713874SUrsula Braun (1UL << SOCK_BROADCAST) | \ 184ac713874SUrsula Braun (1UL << SOCK_TIMESTAMP) | \ 185ac713874SUrsula Braun (1UL << SOCK_DBG) | \ 186ac713874SUrsula Braun (1UL << SOCK_RCVTSTAMP) | \ 187ac713874SUrsula Braun (1UL << SOCK_RCVTSTAMPNS) | \ 188ac713874SUrsula Braun (1UL << SOCK_LOCALROUTE) | \ 189ac713874SUrsula Braun (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE) | \ 190ac713874SUrsula Braun (1UL << SOCK_RXQ_OVFL) | \ 191ac713874SUrsula Braun (1UL << SOCK_WIFI_STATUS) | \ 192ac713874SUrsula Braun (1UL << SOCK_NOFCS) | \ 193ac713874SUrsula Braun (1UL << SOCK_FILTER_LOCKED)) 194ac713874SUrsula Braun /* copy only relevant settings and flags of SOL_SOCKET level from smc to 195ac713874SUrsula Braun * clc socket (since smc is not called for these options from net/core) 196ac713874SUrsula Braun */ 197ac713874SUrsula Braun static void smc_copy_sock_settings_to_clc(struct smc_sock *smc) 198ac713874SUrsula Braun { 199ac713874SUrsula Braun smc_copy_sock_settings(smc->clcsock->sk, &smc->sk, SK_FLAGS_SMC_TO_CLC); 200ac713874SUrsula Braun } 201ac713874SUrsula Braun 202ac713874SUrsula Braun #define SK_FLAGS_CLC_TO_SMC ((1UL << SOCK_URGINLINE) | \ 203ac713874SUrsula Braun (1UL << SOCK_KEEPOPEN) | \ 204ac713874SUrsula Braun (1UL << SOCK_LINGER) | \ 205ac713874SUrsula Braun (1UL << SOCK_DBG)) 206ac713874SUrsula Braun /* copy only settings and flags relevant for smc from clc to smc socket */ 207ac713874SUrsula Braun static void smc_copy_sock_settings_to_smc(struct smc_sock *smc) 208ac713874SUrsula Braun { 209ac713874SUrsula Braun smc_copy_sock_settings(&smc->sk, smc->clcsock->sk, SK_FLAGS_CLC_TO_SMC); 210ac713874SUrsula Braun } 211ac713874SUrsula Braun 212a046d57dSUrsula Braun /* determine subnet and mask of internal TCP socket */ 213a046d57dSUrsula Braun int smc_netinfo_by_tcpsk(struct socket *clcsock, 214a046d57dSUrsula Braun __be32 *subnet, u8 *prefix_len) 215a046d57dSUrsula Braun { 216a046d57dSUrsula Braun struct dst_entry *dst = sk_dst_get(clcsock->sk); 217a046d57dSUrsula Braun struct sockaddr_in addr; 218a046d57dSUrsula Braun int rc = -ENOENT; 219a046d57dSUrsula Braun int len; 220a046d57dSUrsula Braun 221a046d57dSUrsula Braun if (!dst) { 222a046d57dSUrsula Braun rc = -ENOTCONN; 223a046d57dSUrsula Braun goto out; 224a046d57dSUrsula Braun } 225a046d57dSUrsula Braun if (!dst->dev) { 226a046d57dSUrsula Braun rc = -ENODEV; 227a046d57dSUrsula Braun goto out_rel; 228a046d57dSUrsula Braun } 229a046d57dSUrsula Braun 230a046d57dSUrsula Braun /* get address to which the internal TCP socket is bound */ 231a046d57dSUrsula Braun kernel_getsockname(clcsock, (struct sockaddr *)&addr, &len); 232a046d57dSUrsula Braun /* analyze IPv4 specific data of net_device belonging to TCP socket */ 233a046d57dSUrsula Braun for_ifa(dst->dev->ip_ptr) { 234a046d57dSUrsula Braun if (ifa->ifa_address != addr.sin_addr.s_addr) 235a046d57dSUrsula Braun continue; 236a046d57dSUrsula Braun *prefix_len = inet_mask_len(ifa->ifa_mask); 237a046d57dSUrsula Braun *subnet = ifa->ifa_address & ifa->ifa_mask; 238a046d57dSUrsula Braun rc = 0; 239a046d57dSUrsula Braun break; 240a046d57dSUrsula Braun } endfor_ifa(dst->dev->ip_ptr); 241a046d57dSUrsula Braun 242a046d57dSUrsula Braun out_rel: 243a046d57dSUrsula Braun dst_release(dst); 244a046d57dSUrsula Braun out: 245a046d57dSUrsula Braun return rc; 246a046d57dSUrsula Braun } 247a046d57dSUrsula Braun 248*0cfdd8f9SUrsula Braun static void smc_conn_save_peer_info(struct smc_sock *smc, 249*0cfdd8f9SUrsula Braun struct smc_clc_msg_accept_confirm *clc) 250*0cfdd8f9SUrsula Braun { 251*0cfdd8f9SUrsula Braun smc->conn.peer_conn_idx = clc->conn_idx; 252*0cfdd8f9SUrsula Braun } 253*0cfdd8f9SUrsula Braun 254*0cfdd8f9SUrsula Braun static void smc_link_save_peer_info(struct smc_link *link, 255*0cfdd8f9SUrsula Braun struct smc_clc_msg_accept_confirm *clc) 256*0cfdd8f9SUrsula Braun { 257*0cfdd8f9SUrsula Braun link->peer_qpn = ntoh24(clc->qpn); 258*0cfdd8f9SUrsula Braun memcpy(link->peer_gid, clc->lcl.gid, SMC_GID_SIZE); 259*0cfdd8f9SUrsula Braun memcpy(link->peer_mac, clc->lcl.mac, sizeof(link->peer_mac)); 260*0cfdd8f9SUrsula Braun link->peer_psn = ntoh24(clc->psn); 261*0cfdd8f9SUrsula Braun link->peer_mtu = clc->qp_mtu; 262*0cfdd8f9SUrsula Braun } 263*0cfdd8f9SUrsula Braun 264a046d57dSUrsula Braun /* setup for RDMA connection of client */ 265a046d57dSUrsula Braun static int smc_connect_rdma(struct smc_sock *smc) 266a046d57dSUrsula Braun { 267*0cfdd8f9SUrsula Braun struct sockaddr_in *inaddr = (struct sockaddr_in *)smc->addr; 268a046d57dSUrsula Braun struct smc_clc_msg_accept_confirm aclc; 269*0cfdd8f9SUrsula Braun int local_contact = SMC_FIRST_CONTACT; 270a046d57dSUrsula Braun struct smc_ib_device *smcibdev; 271*0cfdd8f9SUrsula Braun struct smc_link *link; 272*0cfdd8f9SUrsula Braun u8 srv_first_contact; 273a046d57dSUrsula Braun int reason_code = 0; 274a046d57dSUrsula Braun int rc = 0; 275a046d57dSUrsula Braun u8 ibport; 276a046d57dSUrsula Braun 277a046d57dSUrsula Braun /* IPSec connections opt out of SMC-R optimizations */ 278a046d57dSUrsula Braun if (using_ipsec(smc)) { 279a046d57dSUrsula Braun reason_code = SMC_CLC_DECL_IPSEC; 280a046d57dSUrsula Braun goto decline_rdma; 281a046d57dSUrsula Braun } 282a046d57dSUrsula Braun 283a046d57dSUrsula Braun /* PNET table look up: search active ib_device and port 284a046d57dSUrsula Braun * within same PNETID that also contains the ethernet device 285a046d57dSUrsula Braun * used for the internal TCP socket 286a046d57dSUrsula Braun */ 287a046d57dSUrsula Braun smc_pnet_find_roce_resource(smc->clcsock->sk, &smcibdev, &ibport); 288a046d57dSUrsula Braun if (!smcibdev) { 289a046d57dSUrsula Braun reason_code = SMC_CLC_DECL_CNFERR; /* configuration error */ 290a046d57dSUrsula Braun goto decline_rdma; 291a046d57dSUrsula Braun } 292a046d57dSUrsula Braun 293a046d57dSUrsula Braun /* do inband token exchange */ 294a046d57dSUrsula Braun reason_code = smc_clc_send_proposal(smc, smcibdev, ibport); 295a046d57dSUrsula Braun if (reason_code < 0) { 296a046d57dSUrsula Braun rc = reason_code; 297a046d57dSUrsula Braun goto out_err; 298a046d57dSUrsula Braun } 299a046d57dSUrsula Braun if (reason_code > 0) /* configuration error */ 300a046d57dSUrsula Braun goto decline_rdma; 301a046d57dSUrsula Braun /* receive SMC Accept CLC message */ 302a046d57dSUrsula Braun reason_code = smc_clc_wait_msg(smc, &aclc, sizeof(aclc), 303a046d57dSUrsula Braun SMC_CLC_ACCEPT); 304a046d57dSUrsula Braun if (reason_code < 0) { 305a046d57dSUrsula Braun rc = reason_code; 306a046d57dSUrsula Braun goto out_err; 307a046d57dSUrsula Braun } 308a046d57dSUrsula Braun if (reason_code > 0) 309a046d57dSUrsula Braun goto decline_rdma; 310a046d57dSUrsula Braun 311*0cfdd8f9SUrsula Braun srv_first_contact = aclc.hdr.flag; 312*0cfdd8f9SUrsula Braun mutex_lock(&smc_create_lgr_pending); 313*0cfdd8f9SUrsula Braun local_contact = smc_conn_create(smc, inaddr->sin_addr.s_addr, smcibdev, 314*0cfdd8f9SUrsula Braun ibport, &aclc.lcl, srv_first_contact); 315*0cfdd8f9SUrsula Braun if (local_contact < 0) { 316*0cfdd8f9SUrsula Braun rc = local_contact; 317*0cfdd8f9SUrsula Braun if (rc == -ENOMEM) 318*0cfdd8f9SUrsula Braun reason_code = SMC_CLC_DECL_MEM;/* insufficient memory*/ 319*0cfdd8f9SUrsula Braun else if (rc == -ENOLINK) 320*0cfdd8f9SUrsula Braun reason_code = SMC_CLC_DECL_SYNCERR; /* synchr. error */ 321*0cfdd8f9SUrsula Braun goto decline_rdma_unlock; 322*0cfdd8f9SUrsula Braun } 323*0cfdd8f9SUrsula Braun link = &smc->conn.lgr->lnk[SMC_SINGLE_LINK]; 324a046d57dSUrsula Braun 325*0cfdd8f9SUrsula Braun smc_conn_save_peer_info(smc, &aclc); 326*0cfdd8f9SUrsula Braun if (local_contact == SMC_FIRST_CONTACT) 327*0cfdd8f9SUrsula Braun smc_link_save_peer_info(link, &aclc); 328a046d57dSUrsula Braun /* tbd in follow-on patch: more steps to setup RDMA communcication, 329a046d57dSUrsula Braun * create rmbs, map rmbs, rtoken_handling, modify_qp 330a046d57dSUrsula Braun */ 331a046d57dSUrsula Braun 332a046d57dSUrsula Braun rc = smc_clc_send_confirm(smc); 333a046d57dSUrsula Braun if (rc) 334*0cfdd8f9SUrsula Braun goto out_err_unlock; 335a046d57dSUrsula Braun 336a046d57dSUrsula Braun /* tbd in follow-on patch: llc_confirm */ 337a046d57dSUrsula Braun 338*0cfdd8f9SUrsula Braun mutex_unlock(&smc_create_lgr_pending); 339a046d57dSUrsula Braun out_connected: 340a046d57dSUrsula Braun smc_copy_sock_settings_to_clc(smc); 341a046d57dSUrsula Braun smc->sk.sk_state = SMC_ACTIVE; 342a046d57dSUrsula Braun 343*0cfdd8f9SUrsula Braun return rc ? rc : local_contact; 344a046d57dSUrsula Braun 345*0cfdd8f9SUrsula Braun decline_rdma_unlock: 346*0cfdd8f9SUrsula Braun mutex_unlock(&smc_create_lgr_pending); 347*0cfdd8f9SUrsula Braun smc_conn_free(&smc->conn); 348a046d57dSUrsula Braun decline_rdma: 349a046d57dSUrsula Braun /* RDMA setup failed, switch back to TCP */ 350a046d57dSUrsula Braun smc->use_fallback = true; 351a046d57dSUrsula Braun if (reason_code && (reason_code != SMC_CLC_DECL_REPLY)) { 352a046d57dSUrsula Braun rc = smc_clc_send_decline(smc, reason_code, 0); 353a046d57dSUrsula Braun if (rc < sizeof(struct smc_clc_msg_decline)) 354a046d57dSUrsula Braun goto out_err; 355a046d57dSUrsula Braun } 356a046d57dSUrsula Braun goto out_connected; 357a046d57dSUrsula Braun 358*0cfdd8f9SUrsula Braun out_err_unlock: 359*0cfdd8f9SUrsula Braun mutex_unlock(&smc_create_lgr_pending); 360*0cfdd8f9SUrsula Braun smc_conn_free(&smc->conn); 361a046d57dSUrsula Braun out_err: 362a046d57dSUrsula Braun return rc; 363a046d57dSUrsula Braun } 364a046d57dSUrsula Braun 365ac713874SUrsula Braun static int smc_connect(struct socket *sock, struct sockaddr *addr, 366ac713874SUrsula Braun int alen, int flags) 367ac713874SUrsula Braun { 368ac713874SUrsula Braun struct sock *sk = sock->sk; 369ac713874SUrsula Braun struct smc_sock *smc; 370ac713874SUrsula Braun int rc = -EINVAL; 371ac713874SUrsula Braun 372ac713874SUrsula Braun smc = smc_sk(sk); 373ac713874SUrsula Braun 374ac713874SUrsula Braun /* separate smc parameter checking to be safe */ 375ac713874SUrsula Braun if (alen < sizeof(addr->sa_family)) 376ac713874SUrsula Braun goto out_err; 377ac713874SUrsula Braun if (addr->sa_family != AF_INET) 378ac713874SUrsula Braun goto out_err; 379a046d57dSUrsula Braun smc->addr = addr; /* needed for nonblocking connect */ 380ac713874SUrsula Braun 381ac713874SUrsula Braun lock_sock(sk); 382ac713874SUrsula Braun switch (sk->sk_state) { 383ac713874SUrsula Braun default: 384ac713874SUrsula Braun goto out; 385ac713874SUrsula Braun case SMC_ACTIVE: 386ac713874SUrsula Braun rc = -EISCONN; 387ac713874SUrsula Braun goto out; 388ac713874SUrsula Braun case SMC_INIT: 389ac713874SUrsula Braun rc = 0; 390ac713874SUrsula Braun break; 391ac713874SUrsula Braun } 392ac713874SUrsula Braun 393ac713874SUrsula Braun smc_copy_sock_settings_to_clc(smc); 394ac713874SUrsula Braun rc = kernel_connect(smc->clcsock, addr, alen, flags); 395ac713874SUrsula Braun if (rc) 396ac713874SUrsula Braun goto out; 397ac713874SUrsula Braun 398a046d57dSUrsula Braun /* setup RDMA connection */ 399a046d57dSUrsula Braun rc = smc_connect_rdma(smc); 400a046d57dSUrsula Braun if (rc < 0) 401a046d57dSUrsula Braun goto out; 402a046d57dSUrsula Braun else 403a046d57dSUrsula Braun rc = 0; /* success cases including fallback */ 404ac713874SUrsula Braun 405ac713874SUrsula Braun out: 406ac713874SUrsula Braun release_sock(sk); 407ac713874SUrsula Braun out_err: 408ac713874SUrsula Braun return rc; 409ac713874SUrsula Braun } 410ac713874SUrsula Braun 411ac713874SUrsula Braun static int smc_clcsock_accept(struct smc_sock *lsmc, struct smc_sock **new_smc) 412ac713874SUrsula Braun { 413ac713874SUrsula Braun struct sock *sk = &lsmc->sk; 414ac713874SUrsula Braun struct socket *new_clcsock; 415ac713874SUrsula Braun struct sock *new_sk; 416ac713874SUrsula Braun int rc; 417ac713874SUrsula Braun 418a046d57dSUrsula Braun release_sock(&lsmc->sk); 419ac713874SUrsula Braun new_sk = smc_sock_alloc(sock_net(sk), NULL); 420ac713874SUrsula Braun if (!new_sk) { 421ac713874SUrsula Braun rc = -ENOMEM; 422ac713874SUrsula Braun lsmc->sk.sk_err = ENOMEM; 423ac713874SUrsula Braun *new_smc = NULL; 424a046d57dSUrsula Braun lock_sock(&lsmc->sk); 425ac713874SUrsula Braun goto out; 426ac713874SUrsula Braun } 427ac713874SUrsula Braun *new_smc = smc_sk(new_sk); 428ac713874SUrsula Braun 429ac713874SUrsula Braun rc = kernel_accept(lsmc->clcsock, &new_clcsock, 0); 430a046d57dSUrsula Braun lock_sock(&lsmc->sk); 431a046d57dSUrsula Braun if (rc < 0) { 432a046d57dSUrsula Braun lsmc->sk.sk_err = -rc; 433a046d57dSUrsula Braun new_sk->sk_state = SMC_CLOSED; 434a046d57dSUrsula Braun sock_set_flag(new_sk, SOCK_DEAD); 435a046d57dSUrsula Braun sock_put(new_sk); 436a046d57dSUrsula Braun *new_smc = NULL; 437a046d57dSUrsula Braun goto out; 438a046d57dSUrsula Braun } 439a046d57dSUrsula Braun if (lsmc->sk.sk_state == SMC_CLOSED) { 440a046d57dSUrsula Braun if (new_clcsock) 441a046d57dSUrsula Braun sock_release(new_clcsock); 442a046d57dSUrsula Braun new_sk->sk_state = SMC_CLOSED; 443a046d57dSUrsula Braun sock_set_flag(new_sk, SOCK_DEAD); 444ac713874SUrsula Braun sock_put(new_sk); 445ac713874SUrsula Braun *new_smc = NULL; 446ac713874SUrsula Braun goto out; 447ac713874SUrsula Braun } 448ac713874SUrsula Braun 449ac713874SUrsula Braun (*new_smc)->clcsock = new_clcsock; 450ac713874SUrsula Braun out: 451ac713874SUrsula Braun return rc; 452ac713874SUrsula Braun } 453ac713874SUrsula Braun 454a046d57dSUrsula Braun /* add a just created sock to the accept queue of the listen sock as 455a046d57dSUrsula Braun * candidate for a following socket accept call from user space 456a046d57dSUrsula Braun */ 457a046d57dSUrsula Braun static void smc_accept_enqueue(struct sock *parent, struct sock *sk) 458a046d57dSUrsula Braun { 459a046d57dSUrsula Braun struct smc_sock *par = smc_sk(parent); 460a046d57dSUrsula Braun 461a046d57dSUrsula Braun sock_hold(sk); 462a046d57dSUrsula Braun spin_lock(&par->accept_q_lock); 463a046d57dSUrsula Braun list_add_tail(&smc_sk(sk)->accept_q, &par->accept_q); 464a046d57dSUrsula Braun spin_unlock(&par->accept_q_lock); 465a046d57dSUrsula Braun sk_acceptq_added(parent); 466a046d57dSUrsula Braun } 467a046d57dSUrsula Braun 468a046d57dSUrsula Braun /* remove a socket from the accept queue of its parental listening socket */ 469a046d57dSUrsula Braun static void smc_accept_unlink(struct sock *sk) 470a046d57dSUrsula Braun { 471a046d57dSUrsula Braun struct smc_sock *par = smc_sk(sk)->listen_smc; 472a046d57dSUrsula Braun 473a046d57dSUrsula Braun spin_lock(&par->accept_q_lock); 474a046d57dSUrsula Braun list_del_init(&smc_sk(sk)->accept_q); 475a046d57dSUrsula Braun spin_unlock(&par->accept_q_lock); 476a046d57dSUrsula Braun sk_acceptq_removed(&smc_sk(sk)->listen_smc->sk); 477a046d57dSUrsula Braun sock_put(sk); 478a046d57dSUrsula Braun } 479a046d57dSUrsula Braun 480a046d57dSUrsula Braun /* remove a sock from the accept queue to bind it to a new socket created 481a046d57dSUrsula Braun * for a socket accept call from user space 482a046d57dSUrsula Braun */ 483a046d57dSUrsula Braun static struct sock *smc_accept_dequeue(struct sock *parent, 484a046d57dSUrsula Braun struct socket *new_sock) 485a046d57dSUrsula Braun { 486a046d57dSUrsula Braun struct smc_sock *isk, *n; 487a046d57dSUrsula Braun struct sock *new_sk; 488a046d57dSUrsula Braun 489a046d57dSUrsula Braun list_for_each_entry_safe(isk, n, &smc_sk(parent)->accept_q, accept_q) { 490a046d57dSUrsula Braun new_sk = (struct sock *)isk; 491a046d57dSUrsula Braun 492a046d57dSUrsula Braun smc_accept_unlink(new_sk); 493a046d57dSUrsula Braun if (new_sk->sk_state == SMC_CLOSED) { 494a046d57dSUrsula Braun /* tbd in follow-on patch: close this sock */ 495a046d57dSUrsula Braun continue; 496a046d57dSUrsula Braun } 497a046d57dSUrsula Braun if (new_sock) 498a046d57dSUrsula Braun sock_graft(new_sk, new_sock); 499a046d57dSUrsula Braun return new_sk; 500a046d57dSUrsula Braun } 501a046d57dSUrsula Braun return NULL; 502a046d57dSUrsula Braun } 503a046d57dSUrsula Braun 504a046d57dSUrsula Braun /* clean up for a created but never accepted sock */ 505a046d57dSUrsula Braun static void smc_close_non_accepted(struct sock *sk) 506a046d57dSUrsula Braun { 507a046d57dSUrsula Braun struct smc_sock *smc = smc_sk(sk); 508a046d57dSUrsula Braun 509a046d57dSUrsula Braun sock_hold(sk); 510a046d57dSUrsula Braun if (smc->clcsock) { 511a046d57dSUrsula Braun struct socket *tcp; 512a046d57dSUrsula Braun 513a046d57dSUrsula Braun tcp = smc->clcsock; 514a046d57dSUrsula Braun smc->clcsock = NULL; 515a046d57dSUrsula Braun sock_release(tcp); 516a046d57dSUrsula Braun } 517a046d57dSUrsula Braun /* more closing stuff to be added with socket closing patch */ 518a046d57dSUrsula Braun sock_put(sk); 519a046d57dSUrsula Braun } 520a046d57dSUrsula Braun 521a046d57dSUrsula Braun /* setup for RDMA connection of server */ 522a046d57dSUrsula Braun static void smc_listen_work(struct work_struct *work) 523a046d57dSUrsula Braun { 524a046d57dSUrsula Braun struct smc_sock *new_smc = container_of(work, struct smc_sock, 525a046d57dSUrsula Braun smc_listen_work); 526a046d57dSUrsula Braun struct socket *newclcsock = new_smc->clcsock; 527a046d57dSUrsula Braun struct smc_sock *lsmc = new_smc->listen_smc; 528a046d57dSUrsula Braun struct smc_clc_msg_accept_confirm cclc; 529*0cfdd8f9SUrsula Braun int local_contact = SMC_REUSE_CONTACT; 530a046d57dSUrsula Braun struct sock *newsmcsk = &new_smc->sk; 531a046d57dSUrsula Braun struct smc_clc_msg_proposal pclc; 532a046d57dSUrsula Braun struct smc_ib_device *smcibdev; 533a046d57dSUrsula Braun struct sockaddr_in peeraddr; 534*0cfdd8f9SUrsula Braun struct smc_link *link; 535a046d57dSUrsula Braun int reason_code = 0; 536a046d57dSUrsula Braun int rc = 0, len; 537a046d57dSUrsula Braun __be32 subnet; 538a046d57dSUrsula Braun u8 prefix_len; 539a046d57dSUrsula Braun u8 ibport; 540a046d57dSUrsula Braun 541a046d57dSUrsula Braun /* do inband token exchange - 542a046d57dSUrsula Braun *wait for and receive SMC Proposal CLC message 543a046d57dSUrsula Braun */ 544a046d57dSUrsula Braun reason_code = smc_clc_wait_msg(new_smc, &pclc, sizeof(pclc), 545a046d57dSUrsula Braun SMC_CLC_PROPOSAL); 546a046d57dSUrsula Braun if (reason_code < 0) 547a046d57dSUrsula Braun goto out_err; 548a046d57dSUrsula Braun if (reason_code > 0) 549a046d57dSUrsula Braun goto decline_rdma; 550a046d57dSUrsula Braun 551a046d57dSUrsula Braun /* IPSec connections opt out of SMC-R optimizations */ 552a046d57dSUrsula Braun if (using_ipsec(new_smc)) { 553a046d57dSUrsula Braun reason_code = SMC_CLC_DECL_IPSEC; 554a046d57dSUrsula Braun goto decline_rdma; 555a046d57dSUrsula Braun } 556a046d57dSUrsula Braun 557a046d57dSUrsula Braun /* PNET table look up: search active ib_device and port 558a046d57dSUrsula Braun * within same PNETID that also contains the ethernet device 559a046d57dSUrsula Braun * used for the internal TCP socket 560a046d57dSUrsula Braun */ 561a046d57dSUrsula Braun smc_pnet_find_roce_resource(newclcsock->sk, &smcibdev, &ibport); 562a046d57dSUrsula Braun if (!smcibdev) { 563a046d57dSUrsula Braun reason_code = SMC_CLC_DECL_CNFERR; /* configuration error */ 564a046d57dSUrsula Braun goto decline_rdma; 565a046d57dSUrsula Braun } 566a046d57dSUrsula Braun 567a046d57dSUrsula Braun /* determine subnet and mask from internal TCP socket */ 568a046d57dSUrsula Braun rc = smc_netinfo_by_tcpsk(newclcsock, &subnet, &prefix_len); 569a046d57dSUrsula Braun if (rc) { 570a046d57dSUrsula Braun reason_code = SMC_CLC_DECL_CNFERR; /* configuration error */ 571a046d57dSUrsula Braun goto decline_rdma; 572a046d57dSUrsula Braun } 573a046d57dSUrsula Braun if ((pclc.outgoing_subnet != subnet) || 574a046d57dSUrsula Braun (pclc.prefix_len != prefix_len)) { 575a046d57dSUrsula Braun reason_code = SMC_CLC_DECL_CNFERR; /* configuration error */ 576a046d57dSUrsula Braun goto decline_rdma; 577a046d57dSUrsula Braun } 578a046d57dSUrsula Braun 579a046d57dSUrsula Braun /* get address of the peer connected to the internal TCP socket */ 580a046d57dSUrsula Braun kernel_getpeername(newclcsock, (struct sockaddr *)&peeraddr, &len); 581a046d57dSUrsula Braun 582*0cfdd8f9SUrsula Braun /* allocate connection / link group */ 583*0cfdd8f9SUrsula Braun mutex_lock(&smc_create_lgr_pending); 584*0cfdd8f9SUrsula Braun local_contact = smc_conn_create(new_smc, peeraddr.sin_addr.s_addr, 585*0cfdd8f9SUrsula Braun smcibdev, ibport, &pclc.lcl, 0); 586*0cfdd8f9SUrsula Braun if (local_contact == SMC_REUSE_CONTACT) 587*0cfdd8f9SUrsula Braun /* lock no longer needed, free it due to following 588*0cfdd8f9SUrsula Braun * smc_clc_wait_msg() call 589a046d57dSUrsula Braun */ 590*0cfdd8f9SUrsula Braun mutex_unlock(&smc_create_lgr_pending); 591*0cfdd8f9SUrsula Braun if (local_contact < 0) { 592*0cfdd8f9SUrsula Braun rc = local_contact; 593*0cfdd8f9SUrsula Braun if (rc == -ENOMEM) 594*0cfdd8f9SUrsula Braun reason_code = SMC_CLC_DECL_MEM;/* insufficient memory*/ 595*0cfdd8f9SUrsula Braun else if (rc == -ENOLINK) 596*0cfdd8f9SUrsula Braun reason_code = SMC_CLC_DECL_SYNCERR; /* synchr. error */ 597*0cfdd8f9SUrsula Braun goto decline_rdma; 598*0cfdd8f9SUrsula Braun } 599*0cfdd8f9SUrsula Braun link = &new_smc->conn.lgr->lnk[SMC_SINGLE_LINK]; 600a046d57dSUrsula Braun 601a046d57dSUrsula Braun /* tbd in follow-on patch: more steps to setup RDMA communcication, 602a046d57dSUrsula Braun * create rmbs, map rmbs 603a046d57dSUrsula Braun */ 604a046d57dSUrsula Braun 605*0cfdd8f9SUrsula Braun rc = smc_clc_send_accept(new_smc, local_contact); 606a046d57dSUrsula Braun if (rc) 607a046d57dSUrsula Braun goto out_err; 608a046d57dSUrsula Braun 609a046d57dSUrsula Braun /* receive SMC Confirm CLC message */ 610a046d57dSUrsula Braun reason_code = smc_clc_wait_msg(new_smc, &cclc, sizeof(cclc), 611a046d57dSUrsula Braun SMC_CLC_CONFIRM); 612a046d57dSUrsula Braun if (reason_code < 0) 613a046d57dSUrsula Braun goto out_err; 614a046d57dSUrsula Braun if (reason_code > 0) 615a046d57dSUrsula Braun goto decline_rdma; 616*0cfdd8f9SUrsula Braun smc_conn_save_peer_info(new_smc, &cclc); 617*0cfdd8f9SUrsula Braun if (local_contact == SMC_FIRST_CONTACT) 618*0cfdd8f9SUrsula Braun smc_link_save_peer_info(link, &cclc); 619a046d57dSUrsula Braun 620a046d57dSUrsula Braun /* tbd in follow-on patch: more steps to setup RDMA communcication, 621a046d57dSUrsula Braun * rtoken_handling, modify_qp 622a046d57dSUrsula Braun */ 623a046d57dSUrsula Braun 624a046d57dSUrsula Braun out_connected: 625a046d57dSUrsula Braun sk_refcnt_debug_inc(newsmcsk); 626a046d57dSUrsula Braun newsmcsk->sk_state = SMC_ACTIVE; 627a046d57dSUrsula Braun enqueue: 628*0cfdd8f9SUrsula Braun if (local_contact == SMC_FIRST_CONTACT) 629*0cfdd8f9SUrsula Braun mutex_unlock(&smc_create_lgr_pending); 630a046d57dSUrsula Braun lock_sock(&lsmc->sk); 631a046d57dSUrsula Braun if (lsmc->sk.sk_state == SMC_LISTEN) { 632a046d57dSUrsula Braun smc_accept_enqueue(&lsmc->sk, newsmcsk); 633a046d57dSUrsula Braun } else { /* no longer listening */ 634a046d57dSUrsula Braun smc_close_non_accepted(newsmcsk); 635a046d57dSUrsula Braun } 636a046d57dSUrsula Braun release_sock(&lsmc->sk); 637a046d57dSUrsula Braun 638a046d57dSUrsula Braun /* Wake up accept */ 639a046d57dSUrsula Braun lsmc->sk.sk_data_ready(&lsmc->sk); 640a046d57dSUrsula Braun sock_put(&lsmc->sk); /* sock_hold in smc_tcp_listen_work */ 641a046d57dSUrsula Braun return; 642a046d57dSUrsula Braun 643a046d57dSUrsula Braun decline_rdma: 644a046d57dSUrsula Braun /* RDMA setup failed, switch back to TCP */ 645*0cfdd8f9SUrsula Braun smc_conn_free(&new_smc->conn); 646a046d57dSUrsula Braun new_smc->use_fallback = true; 647a046d57dSUrsula Braun if (reason_code && (reason_code != SMC_CLC_DECL_REPLY)) { 648a046d57dSUrsula Braun rc = smc_clc_send_decline(new_smc, reason_code, 0); 649a046d57dSUrsula Braun if (rc < sizeof(struct smc_clc_msg_decline)) 650a046d57dSUrsula Braun goto out_err; 651a046d57dSUrsula Braun } 652a046d57dSUrsula Braun goto out_connected; 653a046d57dSUrsula Braun 654a046d57dSUrsula Braun out_err: 655a046d57dSUrsula Braun newsmcsk->sk_state = SMC_CLOSED; 656a046d57dSUrsula Braun goto enqueue; /* queue new sock with sk_err set */ 657a046d57dSUrsula Braun } 658a046d57dSUrsula Braun 659a046d57dSUrsula Braun static void smc_tcp_listen_work(struct work_struct *work) 660a046d57dSUrsula Braun { 661a046d57dSUrsula Braun struct smc_sock *lsmc = container_of(work, struct smc_sock, 662a046d57dSUrsula Braun tcp_listen_work); 663a046d57dSUrsula Braun struct smc_sock *new_smc; 664a046d57dSUrsula Braun int rc = 0; 665a046d57dSUrsula Braun 666a046d57dSUrsula Braun lock_sock(&lsmc->sk); 667a046d57dSUrsula Braun while (lsmc->sk.sk_state == SMC_LISTEN) { 668a046d57dSUrsula Braun rc = smc_clcsock_accept(lsmc, &new_smc); 669a046d57dSUrsula Braun if (rc) 670a046d57dSUrsula Braun goto out; 671a046d57dSUrsula Braun if (!new_smc) 672a046d57dSUrsula Braun continue; 673a046d57dSUrsula Braun 674a046d57dSUrsula Braun new_smc->listen_smc = lsmc; 675a046d57dSUrsula Braun new_smc->use_fallback = false; /* assume rdma capability first*/ 676a046d57dSUrsula Braun sock_hold(&lsmc->sk); /* sock_put in smc_listen_work */ 677a046d57dSUrsula Braun INIT_WORK(&new_smc->smc_listen_work, smc_listen_work); 678a046d57dSUrsula Braun smc_copy_sock_settings_to_smc(new_smc); 679a046d57dSUrsula Braun schedule_work(&new_smc->smc_listen_work); 680a046d57dSUrsula Braun } 681a046d57dSUrsula Braun 682a046d57dSUrsula Braun out: 683a046d57dSUrsula Braun release_sock(&lsmc->sk); 684a046d57dSUrsula Braun lsmc->sk.sk_data_ready(&lsmc->sk); /* no more listening, wake accept */ 685a046d57dSUrsula Braun } 686a046d57dSUrsula Braun 687ac713874SUrsula Braun static int smc_listen(struct socket *sock, int backlog) 688ac713874SUrsula Braun { 689ac713874SUrsula Braun struct sock *sk = sock->sk; 690ac713874SUrsula Braun struct smc_sock *smc; 691ac713874SUrsula Braun int rc; 692ac713874SUrsula Braun 693ac713874SUrsula Braun smc = smc_sk(sk); 694ac713874SUrsula Braun lock_sock(sk); 695ac713874SUrsula Braun 696ac713874SUrsula Braun rc = -EINVAL; 697ac713874SUrsula Braun if ((sk->sk_state != SMC_INIT) && (sk->sk_state != SMC_LISTEN)) 698ac713874SUrsula Braun goto out; 699ac713874SUrsula Braun 700ac713874SUrsula Braun rc = 0; 701ac713874SUrsula Braun if (sk->sk_state == SMC_LISTEN) { 702ac713874SUrsula Braun sk->sk_max_ack_backlog = backlog; 703ac713874SUrsula Braun goto out; 704ac713874SUrsula Braun } 705ac713874SUrsula Braun /* some socket options are handled in core, so we could not apply 706ac713874SUrsula Braun * them to the clc socket -- copy smc socket options to clc socket 707ac713874SUrsula Braun */ 708ac713874SUrsula Braun smc_copy_sock_settings_to_clc(smc); 709ac713874SUrsula Braun 710ac713874SUrsula Braun rc = kernel_listen(smc->clcsock, backlog); 711ac713874SUrsula Braun if (rc) 712ac713874SUrsula Braun goto out; 713ac713874SUrsula Braun sk->sk_max_ack_backlog = backlog; 714ac713874SUrsula Braun sk->sk_ack_backlog = 0; 715ac713874SUrsula Braun sk->sk_state = SMC_LISTEN; 716a046d57dSUrsula Braun INIT_WORK(&smc->tcp_listen_work, smc_tcp_listen_work); 717a046d57dSUrsula Braun schedule_work(&smc->tcp_listen_work); 718ac713874SUrsula Braun 719ac713874SUrsula Braun out: 720ac713874SUrsula Braun release_sock(sk); 721ac713874SUrsula Braun return rc; 722ac713874SUrsula Braun } 723ac713874SUrsula Braun 724ac713874SUrsula Braun static int smc_accept(struct socket *sock, struct socket *new_sock, 725ac713874SUrsula Braun int flags) 726ac713874SUrsula Braun { 727a046d57dSUrsula Braun struct sock *sk = sock->sk, *nsk; 728a046d57dSUrsula Braun DECLARE_WAITQUEUE(wait, current); 729ac713874SUrsula Braun struct smc_sock *lsmc; 730a046d57dSUrsula Braun long timeo; 731a046d57dSUrsula Braun int rc = 0; 732ac713874SUrsula Braun 733ac713874SUrsula Braun lsmc = smc_sk(sk); 734ac713874SUrsula Braun lock_sock(sk); 735ac713874SUrsula Braun 736ac713874SUrsula Braun if (lsmc->sk.sk_state != SMC_LISTEN) { 737ac713874SUrsula Braun rc = -EINVAL; 738ac713874SUrsula Braun goto out; 739ac713874SUrsula Braun } 740ac713874SUrsula Braun 741a046d57dSUrsula Braun /* Wait for an incoming connection */ 742a046d57dSUrsula Braun timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK); 743a046d57dSUrsula Braun add_wait_queue_exclusive(sk_sleep(sk), &wait); 744a046d57dSUrsula Braun while (!(nsk = smc_accept_dequeue(sk, new_sock))) { 745a046d57dSUrsula Braun set_current_state(TASK_INTERRUPTIBLE); 746a046d57dSUrsula Braun if (!timeo) { 747a046d57dSUrsula Braun rc = -EAGAIN; 748a046d57dSUrsula Braun break; 749a046d57dSUrsula Braun } 750a046d57dSUrsula Braun release_sock(sk); 751a046d57dSUrsula Braun timeo = schedule_timeout(timeo); 752a046d57dSUrsula Braun /* wakeup by sk_data_ready in smc_listen_work() */ 753a046d57dSUrsula Braun sched_annotate_sleep(); 754a046d57dSUrsula Braun lock_sock(sk); 755a046d57dSUrsula Braun if (signal_pending(current)) { 756a046d57dSUrsula Braun rc = sock_intr_errno(timeo); 757a046d57dSUrsula Braun break; 758a046d57dSUrsula Braun } 759a046d57dSUrsula Braun } 760a046d57dSUrsula Braun set_current_state(TASK_RUNNING); 761a046d57dSUrsula Braun remove_wait_queue(sk_sleep(sk), &wait); 762ac713874SUrsula Braun 763a046d57dSUrsula Braun if (!rc) 764a046d57dSUrsula Braun rc = sock_error(nsk); 765ac713874SUrsula Braun 766ac713874SUrsula Braun out: 767ac713874SUrsula Braun release_sock(sk); 768ac713874SUrsula Braun return rc; 769ac713874SUrsula Braun } 770ac713874SUrsula Braun 771ac713874SUrsula Braun static int smc_getname(struct socket *sock, struct sockaddr *addr, 772ac713874SUrsula Braun int *len, int peer) 773ac713874SUrsula Braun { 774ac713874SUrsula Braun struct smc_sock *smc; 775ac713874SUrsula Braun 776ac713874SUrsula Braun if (peer && (sock->sk->sk_state != SMC_ACTIVE)) 777ac713874SUrsula Braun return -ENOTCONN; 778ac713874SUrsula Braun 779ac713874SUrsula Braun smc = smc_sk(sock->sk); 780ac713874SUrsula Braun 781ac713874SUrsula Braun return smc->clcsock->ops->getname(smc->clcsock, addr, len, peer); 782ac713874SUrsula Braun } 783ac713874SUrsula Braun 784ac713874SUrsula Braun static int smc_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) 785ac713874SUrsula Braun { 786ac713874SUrsula Braun struct sock *sk = sock->sk; 787ac713874SUrsula Braun struct smc_sock *smc; 788ac713874SUrsula Braun int rc = -EPIPE; 789ac713874SUrsula Braun 790ac713874SUrsula Braun smc = smc_sk(sk); 791ac713874SUrsula Braun lock_sock(sk); 792ac713874SUrsula Braun if (sk->sk_state != SMC_ACTIVE) 793ac713874SUrsula Braun goto out; 794ac713874SUrsula Braun if (smc->use_fallback) 795ac713874SUrsula Braun rc = smc->clcsock->ops->sendmsg(smc->clcsock, msg, len); 796ac713874SUrsula Braun else 797ac713874SUrsula Braun rc = sock_no_sendmsg(sock, msg, len); 798ac713874SUrsula Braun out: 799ac713874SUrsula Braun release_sock(sk); 800ac713874SUrsula Braun return rc; 801ac713874SUrsula Braun } 802ac713874SUrsula Braun 803ac713874SUrsula Braun static int smc_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, 804ac713874SUrsula Braun int flags) 805ac713874SUrsula Braun { 806ac713874SUrsula Braun struct sock *sk = sock->sk; 807ac713874SUrsula Braun struct smc_sock *smc; 808ac713874SUrsula Braun int rc = -ENOTCONN; 809ac713874SUrsula Braun 810ac713874SUrsula Braun smc = smc_sk(sk); 811ac713874SUrsula Braun lock_sock(sk); 812ac713874SUrsula Braun if ((sk->sk_state != SMC_ACTIVE) && (sk->sk_state != SMC_CLOSED)) 813ac713874SUrsula Braun goto out; 814ac713874SUrsula Braun 815ac713874SUrsula Braun if (smc->use_fallback) 816ac713874SUrsula Braun rc = smc->clcsock->ops->recvmsg(smc->clcsock, msg, len, flags); 817ac713874SUrsula Braun else 818ac713874SUrsula Braun rc = sock_no_recvmsg(sock, msg, len, flags); 819ac713874SUrsula Braun out: 820ac713874SUrsula Braun release_sock(sk); 821ac713874SUrsula Braun return rc; 822ac713874SUrsula Braun } 823ac713874SUrsula Braun 824a046d57dSUrsula Braun static unsigned int smc_accept_poll(struct sock *parent) 825a046d57dSUrsula Braun { 826a046d57dSUrsula Braun struct smc_sock *isk; 827a046d57dSUrsula Braun struct sock *sk; 828a046d57dSUrsula Braun 829a046d57dSUrsula Braun lock_sock(parent); 830a046d57dSUrsula Braun list_for_each_entry(isk, &smc_sk(parent)->accept_q, accept_q) { 831a046d57dSUrsula Braun sk = (struct sock *)isk; 832a046d57dSUrsula Braun 833a046d57dSUrsula Braun if (sk->sk_state == SMC_ACTIVE) { 834a046d57dSUrsula Braun release_sock(parent); 835a046d57dSUrsula Braun return POLLIN | POLLRDNORM; 836a046d57dSUrsula Braun } 837a046d57dSUrsula Braun } 838a046d57dSUrsula Braun release_sock(parent); 839a046d57dSUrsula Braun 840a046d57dSUrsula Braun return 0; 841a046d57dSUrsula Braun } 842a046d57dSUrsula Braun 843ac713874SUrsula Braun static unsigned int smc_poll(struct file *file, struct socket *sock, 844ac713874SUrsula Braun poll_table *wait) 845ac713874SUrsula Braun { 846ac713874SUrsula Braun struct sock *sk = sock->sk; 847ac713874SUrsula Braun unsigned int mask = 0; 848ac713874SUrsula Braun struct smc_sock *smc; 849a046d57dSUrsula Braun int rc; 850ac713874SUrsula Braun 851ac713874SUrsula Braun smc = smc_sk(sock->sk); 852a046d57dSUrsula Braun if ((sk->sk_state == SMC_INIT) || smc->use_fallback) { 853a046d57dSUrsula Braun /* delegate to CLC child sock */ 854ac713874SUrsula Braun mask = smc->clcsock->ops->poll(file, smc->clcsock, wait); 855ac713874SUrsula Braun /* if non-blocking connect finished ... */ 856ac713874SUrsula Braun lock_sock(sk); 857ac713874SUrsula Braun if ((sk->sk_state == SMC_INIT) && (mask & POLLOUT)) { 858a046d57dSUrsula Braun sk->sk_err = smc->clcsock->sk->sk_err; 859a046d57dSUrsula Braun if (sk->sk_err) { 860a046d57dSUrsula Braun mask |= POLLERR; 861a046d57dSUrsula Braun } else { 862a046d57dSUrsula Braun rc = smc_connect_rdma(smc); 863a046d57dSUrsula Braun if (rc < 0) 864a046d57dSUrsula Braun mask |= POLLERR; 865a046d57dSUrsula Braun else 866a046d57dSUrsula Braun /* success cases including fallback */ 867a046d57dSUrsula Braun mask |= POLLOUT | POLLWRNORM; 868a046d57dSUrsula Braun } 869ac713874SUrsula Braun } 870ac713874SUrsula Braun release_sock(sk); 871ac713874SUrsula Braun } else { 872a046d57dSUrsula Braun sock_poll_wait(file, sk_sleep(sk), wait); 873a046d57dSUrsula Braun if (sk->sk_state == SMC_LISTEN) 874a046d57dSUrsula Braun /* woken up by sk_data_ready in smc_listen_work() */ 875a046d57dSUrsula Braun mask |= smc_accept_poll(sk); 876a046d57dSUrsula Braun if (sk->sk_err) 877a046d57dSUrsula Braun mask |= POLLERR; 878a046d57dSUrsula Braun /* for now - to be enhanced in follow-on patch */ 879ac713874SUrsula Braun } 880ac713874SUrsula Braun 881ac713874SUrsula Braun return mask; 882ac713874SUrsula Braun } 883ac713874SUrsula Braun 884ac713874SUrsula Braun static int smc_shutdown(struct socket *sock, int how) 885ac713874SUrsula Braun { 886ac713874SUrsula Braun struct sock *sk = sock->sk; 887ac713874SUrsula Braun struct smc_sock *smc; 888ac713874SUrsula Braun int rc = -EINVAL; 889ac713874SUrsula Braun 890ac713874SUrsula Braun smc = smc_sk(sk); 891ac713874SUrsula Braun 892ac713874SUrsula Braun if ((how < SHUT_RD) || (how > SHUT_RDWR)) 893ac713874SUrsula Braun goto out_err; 894ac713874SUrsula Braun 895ac713874SUrsula Braun lock_sock(sk); 896ac713874SUrsula Braun 897ac713874SUrsula Braun rc = -ENOTCONN; 898ac713874SUrsula Braun if (sk->sk_state == SMC_CLOSED) 899ac713874SUrsula Braun goto out; 900ac713874SUrsula Braun if (smc->use_fallback) { 901ac713874SUrsula Braun rc = kernel_sock_shutdown(smc->clcsock, how); 902ac713874SUrsula Braun sk->sk_shutdown = smc->clcsock->sk->sk_shutdown; 903ac713874SUrsula Braun if (sk->sk_shutdown == SHUTDOWN_MASK) 904ac713874SUrsula Braun sk->sk_state = SMC_CLOSED; 905ac713874SUrsula Braun } else { 906ac713874SUrsula Braun rc = sock_no_shutdown(sock, how); 907ac713874SUrsula Braun } 908ac713874SUrsula Braun 909ac713874SUrsula Braun out: 910ac713874SUrsula Braun release_sock(sk); 911ac713874SUrsula Braun 912ac713874SUrsula Braun out_err: 913ac713874SUrsula Braun return rc; 914ac713874SUrsula Braun } 915ac713874SUrsula Braun 916ac713874SUrsula Braun static int smc_setsockopt(struct socket *sock, int level, int optname, 917ac713874SUrsula Braun char __user *optval, unsigned int optlen) 918ac713874SUrsula Braun { 919ac713874SUrsula Braun struct sock *sk = sock->sk; 920ac713874SUrsula Braun struct smc_sock *smc; 921ac713874SUrsula Braun 922ac713874SUrsula Braun smc = smc_sk(sk); 923ac713874SUrsula Braun 924ac713874SUrsula Braun /* generic setsockopts reaching us here always apply to the 925ac713874SUrsula Braun * CLC socket 926ac713874SUrsula Braun */ 927ac713874SUrsula Braun return smc->clcsock->ops->setsockopt(smc->clcsock, level, optname, 928ac713874SUrsula Braun optval, optlen); 929ac713874SUrsula Braun } 930ac713874SUrsula Braun 931ac713874SUrsula Braun static int smc_getsockopt(struct socket *sock, int level, int optname, 932ac713874SUrsula Braun char __user *optval, int __user *optlen) 933ac713874SUrsula Braun { 934ac713874SUrsula Braun struct smc_sock *smc; 935ac713874SUrsula Braun 936ac713874SUrsula Braun smc = smc_sk(sock->sk); 937ac713874SUrsula Braun /* socket options apply to the CLC socket */ 938ac713874SUrsula Braun return smc->clcsock->ops->getsockopt(smc->clcsock, level, optname, 939ac713874SUrsula Braun optval, optlen); 940ac713874SUrsula Braun } 941ac713874SUrsula Braun 942ac713874SUrsula Braun static int smc_ioctl(struct socket *sock, unsigned int cmd, 943ac713874SUrsula Braun unsigned long arg) 944ac713874SUrsula Braun { 945ac713874SUrsula Braun struct smc_sock *smc; 946ac713874SUrsula Braun 947ac713874SUrsula Braun smc = smc_sk(sock->sk); 948ac713874SUrsula Braun if (smc->use_fallback) 949ac713874SUrsula Braun return smc->clcsock->ops->ioctl(smc->clcsock, cmd, arg); 950ac713874SUrsula Braun else 951ac713874SUrsula Braun return sock_no_ioctl(sock, cmd, arg); 952ac713874SUrsula Braun } 953ac713874SUrsula Braun 954ac713874SUrsula Braun static ssize_t smc_sendpage(struct socket *sock, struct page *page, 955ac713874SUrsula Braun int offset, size_t size, int flags) 956ac713874SUrsula Braun { 957ac713874SUrsula Braun struct sock *sk = sock->sk; 958ac713874SUrsula Braun struct smc_sock *smc; 959ac713874SUrsula Braun int rc = -EPIPE; 960ac713874SUrsula Braun 961ac713874SUrsula Braun smc = smc_sk(sk); 962ac713874SUrsula Braun lock_sock(sk); 963ac713874SUrsula Braun if (sk->sk_state != SMC_ACTIVE) 964ac713874SUrsula Braun goto out; 965ac713874SUrsula Braun if (smc->use_fallback) 966ac713874SUrsula Braun rc = kernel_sendpage(smc->clcsock, page, offset, 967ac713874SUrsula Braun size, flags); 968ac713874SUrsula Braun else 969ac713874SUrsula Braun rc = sock_no_sendpage(sock, page, offset, size, flags); 970ac713874SUrsula Braun 971ac713874SUrsula Braun out: 972ac713874SUrsula Braun release_sock(sk); 973ac713874SUrsula Braun return rc; 974ac713874SUrsula Braun } 975ac713874SUrsula Braun 976ac713874SUrsula Braun static ssize_t smc_splice_read(struct socket *sock, loff_t *ppos, 977ac713874SUrsula Braun struct pipe_inode_info *pipe, size_t len, 978ac713874SUrsula Braun unsigned int flags) 979ac713874SUrsula Braun { 980ac713874SUrsula Braun struct sock *sk = sock->sk; 981ac713874SUrsula Braun struct smc_sock *smc; 982ac713874SUrsula Braun int rc = -ENOTCONN; 983ac713874SUrsula Braun 984ac713874SUrsula Braun smc = smc_sk(sk); 985ac713874SUrsula Braun lock_sock(sk); 986ac713874SUrsula Braun if ((sk->sk_state != SMC_ACTIVE) && (sk->sk_state != SMC_CLOSED)) 987ac713874SUrsula Braun goto out; 988ac713874SUrsula Braun if (smc->use_fallback) { 989ac713874SUrsula Braun rc = smc->clcsock->ops->splice_read(smc->clcsock, ppos, 990ac713874SUrsula Braun pipe, len, flags); 991ac713874SUrsula Braun } else { 992ac713874SUrsula Braun rc = -EOPNOTSUPP; 993ac713874SUrsula Braun } 994ac713874SUrsula Braun out: 995ac713874SUrsula Braun release_sock(sk); 996ac713874SUrsula Braun return rc; 997ac713874SUrsula Braun } 998ac713874SUrsula Braun 999ac713874SUrsula Braun /* must look like tcp */ 1000ac713874SUrsula Braun static const struct proto_ops smc_sock_ops = { 1001ac713874SUrsula Braun .family = PF_SMC, 1002ac713874SUrsula Braun .owner = THIS_MODULE, 1003ac713874SUrsula Braun .release = smc_release, 1004ac713874SUrsula Braun .bind = smc_bind, 1005ac713874SUrsula Braun .connect = smc_connect, 1006ac713874SUrsula Braun .socketpair = sock_no_socketpair, 1007ac713874SUrsula Braun .accept = smc_accept, 1008ac713874SUrsula Braun .getname = smc_getname, 1009ac713874SUrsula Braun .poll = smc_poll, 1010ac713874SUrsula Braun .ioctl = smc_ioctl, 1011ac713874SUrsula Braun .listen = smc_listen, 1012ac713874SUrsula Braun .shutdown = smc_shutdown, 1013ac713874SUrsula Braun .setsockopt = smc_setsockopt, 1014ac713874SUrsula Braun .getsockopt = smc_getsockopt, 1015ac713874SUrsula Braun .sendmsg = smc_sendmsg, 1016ac713874SUrsula Braun .recvmsg = smc_recvmsg, 1017ac713874SUrsula Braun .mmap = sock_no_mmap, 1018ac713874SUrsula Braun .sendpage = smc_sendpage, 1019ac713874SUrsula Braun .splice_read = smc_splice_read, 1020ac713874SUrsula Braun }; 1021ac713874SUrsula Braun 1022ac713874SUrsula Braun static int smc_create(struct net *net, struct socket *sock, int protocol, 1023ac713874SUrsula Braun int kern) 1024ac713874SUrsula Braun { 1025ac713874SUrsula Braun struct smc_sock *smc; 1026ac713874SUrsula Braun struct sock *sk; 1027ac713874SUrsula Braun int rc; 1028ac713874SUrsula Braun 1029ac713874SUrsula Braun rc = -ESOCKTNOSUPPORT; 1030ac713874SUrsula Braun if (sock->type != SOCK_STREAM) 1031ac713874SUrsula Braun goto out; 1032ac713874SUrsula Braun 1033ac713874SUrsula Braun rc = -EPROTONOSUPPORT; 1034ac713874SUrsula Braun if ((protocol != IPPROTO_IP) && (protocol != IPPROTO_TCP)) 1035ac713874SUrsula Braun goto out; 1036ac713874SUrsula Braun 1037ac713874SUrsula Braun rc = -ENOBUFS; 1038ac713874SUrsula Braun sock->ops = &smc_sock_ops; 1039ac713874SUrsula Braun sk = smc_sock_alloc(net, sock); 1040ac713874SUrsula Braun if (!sk) 1041ac713874SUrsula Braun goto out; 1042ac713874SUrsula Braun 1043ac713874SUrsula Braun /* create internal TCP socket for CLC handshake and fallback */ 1044ac713874SUrsula Braun smc = smc_sk(sk); 1045a046d57dSUrsula Braun smc->use_fallback = false; /* assume rdma capability first */ 1046ac713874SUrsula Braun rc = sock_create_kern(net, PF_INET, SOCK_STREAM, 1047ac713874SUrsula Braun IPPROTO_TCP, &smc->clcsock); 1048ac713874SUrsula Braun if (rc) 1049ac713874SUrsula Braun sk_common_release(sk); 1050ac713874SUrsula Braun 1051ac713874SUrsula Braun out: 1052ac713874SUrsula Braun return rc; 1053ac713874SUrsula Braun } 1054ac713874SUrsula Braun 1055ac713874SUrsula Braun static const struct net_proto_family smc_sock_family_ops = { 1056ac713874SUrsula Braun .family = PF_SMC, 1057ac713874SUrsula Braun .owner = THIS_MODULE, 1058ac713874SUrsula Braun .create = smc_create, 1059ac713874SUrsula Braun }; 1060ac713874SUrsula Braun 1061ac713874SUrsula Braun static int __init smc_init(void) 1062ac713874SUrsula Braun { 1063ac713874SUrsula Braun int rc; 1064ac713874SUrsula Braun 10656812baabSThomas Richter rc = smc_pnet_init(); 10666812baabSThomas Richter if (rc) 10676812baabSThomas Richter return rc; 10686812baabSThomas Richter 1069ac713874SUrsula Braun rc = proto_register(&smc_proto, 1); 1070ac713874SUrsula Braun if (rc) { 1071ac713874SUrsula Braun pr_err("%s: proto_register fails with %d\n", __func__, rc); 10726812baabSThomas Richter goto out_pnet; 1073ac713874SUrsula Braun } 1074ac713874SUrsula Braun 1075ac713874SUrsula Braun rc = sock_register(&smc_sock_family_ops); 1076ac713874SUrsula Braun if (rc) { 1077ac713874SUrsula Braun pr_err("%s: sock_register fails with %d\n", __func__, rc); 1078ac713874SUrsula Braun goto out_proto; 1079ac713874SUrsula Braun } 1080ac713874SUrsula Braun 1081a4cf0443SUrsula Braun rc = smc_ib_register_client(); 1082a4cf0443SUrsula Braun if (rc) { 1083a4cf0443SUrsula Braun pr_err("%s: ib_register fails with %d\n", __func__, rc); 1084a4cf0443SUrsula Braun goto out_sock; 1085a4cf0443SUrsula Braun } 1086a4cf0443SUrsula Braun 1087ac713874SUrsula Braun return 0; 1088ac713874SUrsula Braun 1089a4cf0443SUrsula Braun out_sock: 1090a4cf0443SUrsula Braun sock_unregister(PF_SMC); 1091ac713874SUrsula Braun out_proto: 1092ac713874SUrsula Braun proto_unregister(&smc_proto); 10936812baabSThomas Richter out_pnet: 10946812baabSThomas Richter smc_pnet_exit(); 1095ac713874SUrsula Braun return rc; 1096ac713874SUrsula Braun } 1097ac713874SUrsula Braun 1098ac713874SUrsula Braun static void __exit smc_exit(void) 1099ac713874SUrsula Braun { 1100*0cfdd8f9SUrsula Braun struct smc_link_group *lgr, *lg; 1101*0cfdd8f9SUrsula Braun LIST_HEAD(lgr_freeing_list); 1102*0cfdd8f9SUrsula Braun 1103*0cfdd8f9SUrsula Braun spin_lock_bh(&smc_lgr_list.lock); 1104*0cfdd8f9SUrsula Braun if (!list_empty(&smc_lgr_list.list)) 1105*0cfdd8f9SUrsula Braun list_splice_init(&smc_lgr_list.list, &lgr_freeing_list); 1106*0cfdd8f9SUrsula Braun spin_unlock_bh(&smc_lgr_list.lock); 1107*0cfdd8f9SUrsula Braun list_for_each_entry_safe(lgr, lg, &lgr_freeing_list, list) { 1108*0cfdd8f9SUrsula Braun list_del_init(&lgr->list); 1109*0cfdd8f9SUrsula Braun smc_lgr_free(lgr); /* free link group */ 1110*0cfdd8f9SUrsula Braun } 1111a4cf0443SUrsula Braun smc_ib_unregister_client(); 1112ac713874SUrsula Braun sock_unregister(PF_SMC); 1113ac713874SUrsula Braun proto_unregister(&smc_proto); 11146812baabSThomas Richter smc_pnet_exit(); 1115ac713874SUrsula Braun } 1116ac713874SUrsula Braun 1117ac713874SUrsula Braun module_init(smc_init); 1118ac713874SUrsula Braun module_exit(smc_exit); 1119ac713874SUrsula Braun 1120ac713874SUrsula Braun MODULE_AUTHOR("Ursula Braun <ubraun@linux.vnet.ibm.com>"); 1121ac713874SUrsula Braun MODULE_DESCRIPTION("smc socket address family"); 1122ac713874SUrsula Braun MODULE_LICENSE("GPL"); 1123ac713874SUrsula Braun MODULE_ALIAS_NETPROTO(PF_SMC); 1124