1ac713874SUrsula Braun /* 2ac713874SUrsula Braun * Shared Memory Communications over RDMA (SMC-R) and RoCE 3ac713874SUrsula Braun * 4ac713874SUrsula Braun * AF_SMC protocol family socket handler keeping the AF_INET sock address type 5ac713874SUrsula Braun * applies to SOCK_STREAM sockets only 6ac713874SUrsula Braun * offers an alternative communication option for TCP-protocol sockets 7ac713874SUrsula Braun * applicable with RoCE-cards only 8ac713874SUrsula Braun * 9*a046d57dSUrsula Braun * Initial restrictions: 10*a046d57dSUrsula Braun * - non-blocking connect postponed 11*a046d57dSUrsula Braun * - IPv6 support postponed 12*a046d57dSUrsula Braun * - support for alternate links postponed 13*a046d57dSUrsula Braun * - partial support for non-blocking sockets only 14*a046d57dSUrsula Braun * - support for urgent data postponed 15*a046d57dSUrsula Braun * 16ac713874SUrsula Braun * Copyright IBM Corp. 2016 17ac713874SUrsula Braun * 18ac713874SUrsula Braun * Author(s): Ursula Braun <ubraun@linux.vnet.ibm.com> 19ac713874SUrsula Braun * based on prototype from Frank Blaschka 20ac713874SUrsula Braun */ 21ac713874SUrsula Braun 22ac713874SUrsula Braun #define KMSG_COMPONENT "smc" 23ac713874SUrsula Braun #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt 24ac713874SUrsula Braun 25ac713874SUrsula Braun #include <linux/module.h> 26ac713874SUrsula Braun #include <linux/socket.h> 27*a046d57dSUrsula Braun #include <linux/inetdevice.h> 28*a046d57dSUrsula Braun #include <linux/workqueue.h> 29ac713874SUrsula Braun #include <net/sock.h> 30*a046d57dSUrsula Braun #include <net/tcp.h> 31ac713874SUrsula Braun 32ac713874SUrsula Braun #include "smc.h" 33*a046d57dSUrsula Braun #include "smc_clc.h" 34a4cf0443SUrsula Braun #include "smc_ib.h" 356812baabSThomas Richter #include "smc_pnet.h" 36ac713874SUrsula Braun 37*a046d57dSUrsula Braun static void smc_tcp_listen_work(struct work_struct *); 38*a046d57dSUrsula Braun 39ac713874SUrsula Braun static void smc_set_keepalive(struct sock *sk, int val) 40ac713874SUrsula Braun { 41ac713874SUrsula Braun struct smc_sock *smc = smc_sk(sk); 42ac713874SUrsula Braun 43ac713874SUrsula Braun smc->clcsock->sk->sk_prot->keepalive(smc->clcsock->sk, val); 44ac713874SUrsula Braun } 45ac713874SUrsula Braun 46ac713874SUrsula Braun static struct proto smc_proto = { 47ac713874SUrsula Braun .name = "SMC", 48ac713874SUrsula Braun .owner = THIS_MODULE, 49ac713874SUrsula Braun .keepalive = smc_set_keepalive, 50ac713874SUrsula Braun .obj_size = sizeof(struct smc_sock), 51ac713874SUrsula Braun .slab_flags = SLAB_DESTROY_BY_RCU, 52ac713874SUrsula Braun }; 53ac713874SUrsula Braun 54ac713874SUrsula Braun static int smc_release(struct socket *sock) 55ac713874SUrsula Braun { 56ac713874SUrsula Braun struct sock *sk = sock->sk; 57ac713874SUrsula Braun struct smc_sock *smc; 58ac713874SUrsula Braun 59ac713874SUrsula Braun if (!sk) 60ac713874SUrsula Braun goto out; 61ac713874SUrsula Braun 62ac713874SUrsula Braun smc = smc_sk(sk); 63ac713874SUrsula Braun lock_sock(sk); 64ac713874SUrsula Braun 65ac713874SUrsula Braun sk->sk_state = SMC_CLOSED; 66ac713874SUrsula Braun if (smc->clcsock) { 67ac713874SUrsula Braun sock_release(smc->clcsock); 68ac713874SUrsula Braun smc->clcsock = NULL; 69ac713874SUrsula Braun } 70ac713874SUrsula Braun 71ac713874SUrsula Braun /* detach socket */ 72ac713874SUrsula Braun sock_orphan(sk); 73ac713874SUrsula Braun sock->sk = NULL; 74ac713874SUrsula Braun release_sock(sk); 75ac713874SUrsula Braun 76ac713874SUrsula Braun sock_put(sk); 77ac713874SUrsula Braun out: 78ac713874SUrsula Braun return 0; 79ac713874SUrsula Braun } 80ac713874SUrsula Braun 81ac713874SUrsula Braun static void smc_destruct(struct sock *sk) 82ac713874SUrsula Braun { 83ac713874SUrsula Braun if (sk->sk_state != SMC_CLOSED) 84ac713874SUrsula Braun return; 85ac713874SUrsula Braun if (!sock_flag(sk, SOCK_DEAD)) 86ac713874SUrsula Braun return; 87ac713874SUrsula Braun 88ac713874SUrsula Braun sk_refcnt_debug_dec(sk); 89ac713874SUrsula Braun } 90ac713874SUrsula Braun 91ac713874SUrsula Braun static struct sock *smc_sock_alloc(struct net *net, struct socket *sock) 92ac713874SUrsula Braun { 93ac713874SUrsula Braun struct smc_sock *smc; 94ac713874SUrsula Braun struct sock *sk; 95ac713874SUrsula Braun 96ac713874SUrsula Braun sk = sk_alloc(net, PF_SMC, GFP_KERNEL, &smc_proto, 0); 97ac713874SUrsula Braun if (!sk) 98ac713874SUrsula Braun return NULL; 99ac713874SUrsula Braun 100ac713874SUrsula Braun sock_init_data(sock, sk); /* sets sk_refcnt to 1 */ 101ac713874SUrsula Braun sk->sk_state = SMC_INIT; 102ac713874SUrsula Braun sk->sk_destruct = smc_destruct; 103ac713874SUrsula Braun sk->sk_protocol = SMCPROTO_SMC; 104ac713874SUrsula Braun smc = smc_sk(sk); 105*a046d57dSUrsula Braun INIT_WORK(&smc->tcp_listen_work, smc_tcp_listen_work); 106*a046d57dSUrsula Braun INIT_LIST_HEAD(&smc->accept_q); 107*a046d57dSUrsula Braun spin_lock_init(&smc->accept_q_lock); 108*a046d57dSUrsula Braun sk_refcnt_debug_inc(sk); 109ac713874SUrsula Braun 110ac713874SUrsula Braun return sk; 111ac713874SUrsula Braun } 112ac713874SUrsula Braun 113ac713874SUrsula Braun static int smc_bind(struct socket *sock, struct sockaddr *uaddr, 114ac713874SUrsula Braun int addr_len) 115ac713874SUrsula Braun { 116ac713874SUrsula Braun struct sockaddr_in *addr = (struct sockaddr_in *)uaddr; 117ac713874SUrsula Braun struct sock *sk = sock->sk; 118ac713874SUrsula Braun struct smc_sock *smc; 119ac713874SUrsula Braun int rc; 120ac713874SUrsula Braun 121ac713874SUrsula Braun smc = smc_sk(sk); 122ac713874SUrsula Braun 123ac713874SUrsula Braun /* replicate tests from inet_bind(), to be safe wrt. future changes */ 124ac713874SUrsula Braun rc = -EINVAL; 125ac713874SUrsula Braun if (addr_len < sizeof(struct sockaddr_in)) 126ac713874SUrsula Braun goto out; 127ac713874SUrsula Braun 128ac713874SUrsula Braun rc = -EAFNOSUPPORT; 129ac713874SUrsula Braun /* accept AF_UNSPEC (mapped to AF_INET) only if s_addr is INADDR_ANY */ 130ac713874SUrsula Braun if ((addr->sin_family != AF_INET) && 131ac713874SUrsula Braun ((addr->sin_family != AF_UNSPEC) || 132ac713874SUrsula Braun (addr->sin_addr.s_addr != htonl(INADDR_ANY)))) 133ac713874SUrsula Braun goto out; 134ac713874SUrsula Braun 135ac713874SUrsula Braun lock_sock(sk); 136ac713874SUrsula Braun 137ac713874SUrsula Braun /* Check if socket is already active */ 138ac713874SUrsula Braun rc = -EINVAL; 139ac713874SUrsula Braun if (sk->sk_state != SMC_INIT) 140ac713874SUrsula Braun goto out_rel; 141ac713874SUrsula Braun 142ac713874SUrsula Braun smc->clcsock->sk->sk_reuse = sk->sk_reuse; 143ac713874SUrsula Braun rc = kernel_bind(smc->clcsock, uaddr, addr_len); 144ac713874SUrsula Braun 145ac713874SUrsula Braun out_rel: 146ac713874SUrsula Braun release_sock(sk); 147ac713874SUrsula Braun out: 148ac713874SUrsula Braun return rc; 149ac713874SUrsula Braun } 150ac713874SUrsula Braun 151ac713874SUrsula Braun static void smc_copy_sock_settings(struct sock *nsk, struct sock *osk, 152ac713874SUrsula Braun unsigned long mask) 153ac713874SUrsula Braun { 154ac713874SUrsula Braun /* options we don't get control via setsockopt for */ 155ac713874SUrsula Braun nsk->sk_type = osk->sk_type; 156ac713874SUrsula Braun nsk->sk_sndbuf = osk->sk_sndbuf; 157ac713874SUrsula Braun nsk->sk_rcvbuf = osk->sk_rcvbuf; 158ac713874SUrsula Braun nsk->sk_sndtimeo = osk->sk_sndtimeo; 159ac713874SUrsula Braun nsk->sk_rcvtimeo = osk->sk_rcvtimeo; 160ac713874SUrsula Braun nsk->sk_mark = osk->sk_mark; 161ac713874SUrsula Braun nsk->sk_priority = osk->sk_priority; 162ac713874SUrsula Braun nsk->sk_rcvlowat = osk->sk_rcvlowat; 163ac713874SUrsula Braun nsk->sk_bound_dev_if = osk->sk_bound_dev_if; 164ac713874SUrsula Braun nsk->sk_err = osk->sk_err; 165ac713874SUrsula Braun 166ac713874SUrsula Braun nsk->sk_flags &= ~mask; 167ac713874SUrsula Braun nsk->sk_flags |= osk->sk_flags & mask; 168ac713874SUrsula Braun } 169ac713874SUrsula Braun 170ac713874SUrsula Braun #define SK_FLAGS_SMC_TO_CLC ((1UL << SOCK_URGINLINE) | \ 171ac713874SUrsula Braun (1UL << SOCK_KEEPOPEN) | \ 172ac713874SUrsula Braun (1UL << SOCK_LINGER) | \ 173ac713874SUrsula Braun (1UL << SOCK_BROADCAST) | \ 174ac713874SUrsula Braun (1UL << SOCK_TIMESTAMP) | \ 175ac713874SUrsula Braun (1UL << SOCK_DBG) | \ 176ac713874SUrsula Braun (1UL << SOCK_RCVTSTAMP) | \ 177ac713874SUrsula Braun (1UL << SOCK_RCVTSTAMPNS) | \ 178ac713874SUrsula Braun (1UL << SOCK_LOCALROUTE) | \ 179ac713874SUrsula Braun (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE) | \ 180ac713874SUrsula Braun (1UL << SOCK_RXQ_OVFL) | \ 181ac713874SUrsula Braun (1UL << SOCK_WIFI_STATUS) | \ 182ac713874SUrsula Braun (1UL << SOCK_NOFCS) | \ 183ac713874SUrsula Braun (1UL << SOCK_FILTER_LOCKED)) 184ac713874SUrsula Braun /* copy only relevant settings and flags of SOL_SOCKET level from smc to 185ac713874SUrsula Braun * clc socket (since smc is not called for these options from net/core) 186ac713874SUrsula Braun */ 187ac713874SUrsula Braun static void smc_copy_sock_settings_to_clc(struct smc_sock *smc) 188ac713874SUrsula Braun { 189ac713874SUrsula Braun smc_copy_sock_settings(smc->clcsock->sk, &smc->sk, SK_FLAGS_SMC_TO_CLC); 190ac713874SUrsula Braun } 191ac713874SUrsula Braun 192ac713874SUrsula Braun #define SK_FLAGS_CLC_TO_SMC ((1UL << SOCK_URGINLINE) | \ 193ac713874SUrsula Braun (1UL << SOCK_KEEPOPEN) | \ 194ac713874SUrsula Braun (1UL << SOCK_LINGER) | \ 195ac713874SUrsula Braun (1UL << SOCK_DBG)) 196ac713874SUrsula Braun /* copy only settings and flags relevant for smc from clc to smc socket */ 197ac713874SUrsula Braun static void smc_copy_sock_settings_to_smc(struct smc_sock *smc) 198ac713874SUrsula Braun { 199ac713874SUrsula Braun smc_copy_sock_settings(&smc->sk, smc->clcsock->sk, SK_FLAGS_CLC_TO_SMC); 200ac713874SUrsula Braun } 201ac713874SUrsula Braun 202*a046d57dSUrsula Braun /* determine subnet and mask of internal TCP socket */ 203*a046d57dSUrsula Braun int smc_netinfo_by_tcpsk(struct socket *clcsock, 204*a046d57dSUrsula Braun __be32 *subnet, u8 *prefix_len) 205*a046d57dSUrsula Braun { 206*a046d57dSUrsula Braun struct dst_entry *dst = sk_dst_get(clcsock->sk); 207*a046d57dSUrsula Braun struct sockaddr_in addr; 208*a046d57dSUrsula Braun int rc = -ENOENT; 209*a046d57dSUrsula Braun int len; 210*a046d57dSUrsula Braun 211*a046d57dSUrsula Braun if (!dst) { 212*a046d57dSUrsula Braun rc = -ENOTCONN; 213*a046d57dSUrsula Braun goto out; 214*a046d57dSUrsula Braun } 215*a046d57dSUrsula Braun if (!dst->dev) { 216*a046d57dSUrsula Braun rc = -ENODEV; 217*a046d57dSUrsula Braun goto out_rel; 218*a046d57dSUrsula Braun } 219*a046d57dSUrsula Braun 220*a046d57dSUrsula Braun /* get address to which the internal TCP socket is bound */ 221*a046d57dSUrsula Braun kernel_getsockname(clcsock, (struct sockaddr *)&addr, &len); 222*a046d57dSUrsula Braun /* analyze IPv4 specific data of net_device belonging to TCP socket */ 223*a046d57dSUrsula Braun for_ifa(dst->dev->ip_ptr) { 224*a046d57dSUrsula Braun if (ifa->ifa_address != addr.sin_addr.s_addr) 225*a046d57dSUrsula Braun continue; 226*a046d57dSUrsula Braun *prefix_len = inet_mask_len(ifa->ifa_mask); 227*a046d57dSUrsula Braun *subnet = ifa->ifa_address & ifa->ifa_mask; 228*a046d57dSUrsula Braun rc = 0; 229*a046d57dSUrsula Braun break; 230*a046d57dSUrsula Braun } endfor_ifa(dst->dev->ip_ptr); 231*a046d57dSUrsula Braun 232*a046d57dSUrsula Braun out_rel: 233*a046d57dSUrsula Braun dst_release(dst); 234*a046d57dSUrsula Braun out: 235*a046d57dSUrsula Braun return rc; 236*a046d57dSUrsula Braun } 237*a046d57dSUrsula Braun 238*a046d57dSUrsula Braun /* setup for RDMA connection of client */ 239*a046d57dSUrsula Braun static int smc_connect_rdma(struct smc_sock *smc) 240*a046d57dSUrsula Braun { 241*a046d57dSUrsula Braun struct smc_clc_msg_accept_confirm aclc; 242*a046d57dSUrsula Braun struct smc_ib_device *smcibdev; 243*a046d57dSUrsula Braun int reason_code = 0; 244*a046d57dSUrsula Braun int rc = 0; 245*a046d57dSUrsula Braun u8 ibport; 246*a046d57dSUrsula Braun 247*a046d57dSUrsula Braun /* IPSec connections opt out of SMC-R optimizations */ 248*a046d57dSUrsula Braun if (using_ipsec(smc)) { 249*a046d57dSUrsula Braun reason_code = SMC_CLC_DECL_IPSEC; 250*a046d57dSUrsula Braun goto decline_rdma; 251*a046d57dSUrsula Braun } 252*a046d57dSUrsula Braun 253*a046d57dSUrsula Braun /* PNET table look up: search active ib_device and port 254*a046d57dSUrsula Braun * within same PNETID that also contains the ethernet device 255*a046d57dSUrsula Braun * used for the internal TCP socket 256*a046d57dSUrsula Braun */ 257*a046d57dSUrsula Braun smc_pnet_find_roce_resource(smc->clcsock->sk, &smcibdev, &ibport); 258*a046d57dSUrsula Braun if (!smcibdev) { 259*a046d57dSUrsula Braun reason_code = SMC_CLC_DECL_CNFERR; /* configuration error */ 260*a046d57dSUrsula Braun goto decline_rdma; 261*a046d57dSUrsula Braun } 262*a046d57dSUrsula Braun 263*a046d57dSUrsula Braun /* do inband token exchange */ 264*a046d57dSUrsula Braun reason_code = smc_clc_send_proposal(smc, smcibdev, ibport); 265*a046d57dSUrsula Braun if (reason_code < 0) { 266*a046d57dSUrsula Braun rc = reason_code; 267*a046d57dSUrsula Braun goto out_err; 268*a046d57dSUrsula Braun } 269*a046d57dSUrsula Braun if (reason_code > 0) /* configuration error */ 270*a046d57dSUrsula Braun goto decline_rdma; 271*a046d57dSUrsula Braun /* receive SMC Accept CLC message */ 272*a046d57dSUrsula Braun reason_code = smc_clc_wait_msg(smc, &aclc, sizeof(aclc), 273*a046d57dSUrsula Braun SMC_CLC_ACCEPT); 274*a046d57dSUrsula Braun if (reason_code < 0) { 275*a046d57dSUrsula Braun rc = reason_code; 276*a046d57dSUrsula Braun goto out_err; 277*a046d57dSUrsula Braun } 278*a046d57dSUrsula Braun if (reason_code > 0) 279*a046d57dSUrsula Braun goto decline_rdma; 280*a046d57dSUrsula Braun 281*a046d57dSUrsula Braun /* tbd in follow-on patch: more steps to setup RDMA communcication, 282*a046d57dSUrsula Braun * create connection, link group, link 283*a046d57dSUrsula Braun */ 284*a046d57dSUrsula Braun 285*a046d57dSUrsula Braun /* tbd in follow-on patch: more steps to setup RDMA communcication, 286*a046d57dSUrsula Braun * create rmbs, map rmbs, rtoken_handling, modify_qp 287*a046d57dSUrsula Braun */ 288*a046d57dSUrsula Braun 289*a046d57dSUrsula Braun rc = smc_clc_send_confirm(smc); 290*a046d57dSUrsula Braun if (rc) 291*a046d57dSUrsula Braun goto out_err; 292*a046d57dSUrsula Braun 293*a046d57dSUrsula Braun /* tbd in follow-on patch: llc_confirm */ 294*a046d57dSUrsula Braun 295*a046d57dSUrsula Braun out_connected: 296*a046d57dSUrsula Braun smc_copy_sock_settings_to_clc(smc); 297*a046d57dSUrsula Braun smc->sk.sk_state = SMC_ACTIVE; 298*a046d57dSUrsula Braun 299*a046d57dSUrsula Braun return rc; 300*a046d57dSUrsula Braun 301*a046d57dSUrsula Braun decline_rdma: 302*a046d57dSUrsula Braun /* RDMA setup failed, switch back to TCP */ 303*a046d57dSUrsula Braun smc->use_fallback = true; 304*a046d57dSUrsula Braun if (reason_code && (reason_code != SMC_CLC_DECL_REPLY)) { 305*a046d57dSUrsula Braun rc = smc_clc_send_decline(smc, reason_code, 0); 306*a046d57dSUrsula Braun if (rc < sizeof(struct smc_clc_msg_decline)) 307*a046d57dSUrsula Braun goto out_err; 308*a046d57dSUrsula Braun } 309*a046d57dSUrsula Braun goto out_connected; 310*a046d57dSUrsula Braun 311*a046d57dSUrsula Braun out_err: 312*a046d57dSUrsula Braun return rc; 313*a046d57dSUrsula Braun } 314*a046d57dSUrsula Braun 315ac713874SUrsula Braun static int smc_connect(struct socket *sock, struct sockaddr *addr, 316ac713874SUrsula Braun int alen, int flags) 317ac713874SUrsula Braun { 318ac713874SUrsula Braun struct sock *sk = sock->sk; 319ac713874SUrsula Braun struct smc_sock *smc; 320ac713874SUrsula Braun int rc = -EINVAL; 321ac713874SUrsula Braun 322ac713874SUrsula Braun smc = smc_sk(sk); 323ac713874SUrsula Braun 324ac713874SUrsula Braun /* separate smc parameter checking to be safe */ 325ac713874SUrsula Braun if (alen < sizeof(addr->sa_family)) 326ac713874SUrsula Braun goto out_err; 327ac713874SUrsula Braun if (addr->sa_family != AF_INET) 328ac713874SUrsula Braun goto out_err; 329*a046d57dSUrsula Braun smc->addr = addr; /* needed for nonblocking connect */ 330ac713874SUrsula Braun 331ac713874SUrsula Braun lock_sock(sk); 332ac713874SUrsula Braun switch (sk->sk_state) { 333ac713874SUrsula Braun default: 334ac713874SUrsula Braun goto out; 335ac713874SUrsula Braun case SMC_ACTIVE: 336ac713874SUrsula Braun rc = -EISCONN; 337ac713874SUrsula Braun goto out; 338ac713874SUrsula Braun case SMC_INIT: 339ac713874SUrsula Braun rc = 0; 340ac713874SUrsula Braun break; 341ac713874SUrsula Braun } 342ac713874SUrsula Braun 343ac713874SUrsula Braun smc_copy_sock_settings_to_clc(smc); 344ac713874SUrsula Braun rc = kernel_connect(smc->clcsock, addr, alen, flags); 345ac713874SUrsula Braun if (rc) 346ac713874SUrsula Braun goto out; 347ac713874SUrsula Braun 348*a046d57dSUrsula Braun /* setup RDMA connection */ 349*a046d57dSUrsula Braun rc = smc_connect_rdma(smc); 350*a046d57dSUrsula Braun if (rc < 0) 351*a046d57dSUrsula Braun goto out; 352*a046d57dSUrsula Braun else 353*a046d57dSUrsula Braun rc = 0; /* success cases including fallback */ 354ac713874SUrsula Braun 355ac713874SUrsula Braun out: 356ac713874SUrsula Braun release_sock(sk); 357ac713874SUrsula Braun out_err: 358ac713874SUrsula Braun return rc; 359ac713874SUrsula Braun } 360ac713874SUrsula Braun 361ac713874SUrsula Braun static int smc_clcsock_accept(struct smc_sock *lsmc, struct smc_sock **new_smc) 362ac713874SUrsula Braun { 363ac713874SUrsula Braun struct sock *sk = &lsmc->sk; 364ac713874SUrsula Braun struct socket *new_clcsock; 365ac713874SUrsula Braun struct sock *new_sk; 366ac713874SUrsula Braun int rc; 367ac713874SUrsula Braun 368*a046d57dSUrsula Braun release_sock(&lsmc->sk); 369ac713874SUrsula Braun new_sk = smc_sock_alloc(sock_net(sk), NULL); 370ac713874SUrsula Braun if (!new_sk) { 371ac713874SUrsula Braun rc = -ENOMEM; 372ac713874SUrsula Braun lsmc->sk.sk_err = ENOMEM; 373ac713874SUrsula Braun *new_smc = NULL; 374*a046d57dSUrsula Braun lock_sock(&lsmc->sk); 375ac713874SUrsula Braun goto out; 376ac713874SUrsula Braun } 377ac713874SUrsula Braun *new_smc = smc_sk(new_sk); 378ac713874SUrsula Braun 379ac713874SUrsula Braun rc = kernel_accept(lsmc->clcsock, &new_clcsock, 0); 380*a046d57dSUrsula Braun lock_sock(&lsmc->sk); 381*a046d57dSUrsula Braun if (rc < 0) { 382*a046d57dSUrsula Braun lsmc->sk.sk_err = -rc; 383*a046d57dSUrsula Braun new_sk->sk_state = SMC_CLOSED; 384*a046d57dSUrsula Braun sock_set_flag(new_sk, SOCK_DEAD); 385*a046d57dSUrsula Braun sock_put(new_sk); 386*a046d57dSUrsula Braun *new_smc = NULL; 387*a046d57dSUrsula Braun goto out; 388*a046d57dSUrsula Braun } 389*a046d57dSUrsula Braun if (lsmc->sk.sk_state == SMC_CLOSED) { 390*a046d57dSUrsula Braun if (new_clcsock) 391*a046d57dSUrsula Braun sock_release(new_clcsock); 392*a046d57dSUrsula Braun new_sk->sk_state = SMC_CLOSED; 393*a046d57dSUrsula Braun sock_set_flag(new_sk, SOCK_DEAD); 394ac713874SUrsula Braun sock_put(new_sk); 395ac713874SUrsula Braun *new_smc = NULL; 396ac713874SUrsula Braun goto out; 397ac713874SUrsula Braun } 398ac713874SUrsula Braun 399ac713874SUrsula Braun (*new_smc)->clcsock = new_clcsock; 400ac713874SUrsula Braun out: 401ac713874SUrsula Braun return rc; 402ac713874SUrsula Braun } 403ac713874SUrsula Braun 404*a046d57dSUrsula Braun /* add a just created sock to the accept queue of the listen sock as 405*a046d57dSUrsula Braun * candidate for a following socket accept call from user space 406*a046d57dSUrsula Braun */ 407*a046d57dSUrsula Braun static void smc_accept_enqueue(struct sock *parent, struct sock *sk) 408*a046d57dSUrsula Braun { 409*a046d57dSUrsula Braun struct smc_sock *par = smc_sk(parent); 410*a046d57dSUrsula Braun 411*a046d57dSUrsula Braun sock_hold(sk); 412*a046d57dSUrsula Braun spin_lock(&par->accept_q_lock); 413*a046d57dSUrsula Braun list_add_tail(&smc_sk(sk)->accept_q, &par->accept_q); 414*a046d57dSUrsula Braun spin_unlock(&par->accept_q_lock); 415*a046d57dSUrsula Braun sk_acceptq_added(parent); 416*a046d57dSUrsula Braun } 417*a046d57dSUrsula Braun 418*a046d57dSUrsula Braun /* remove a socket from the accept queue of its parental listening socket */ 419*a046d57dSUrsula Braun static void smc_accept_unlink(struct sock *sk) 420*a046d57dSUrsula Braun { 421*a046d57dSUrsula Braun struct smc_sock *par = smc_sk(sk)->listen_smc; 422*a046d57dSUrsula Braun 423*a046d57dSUrsula Braun spin_lock(&par->accept_q_lock); 424*a046d57dSUrsula Braun list_del_init(&smc_sk(sk)->accept_q); 425*a046d57dSUrsula Braun spin_unlock(&par->accept_q_lock); 426*a046d57dSUrsula Braun sk_acceptq_removed(&smc_sk(sk)->listen_smc->sk); 427*a046d57dSUrsula Braun sock_put(sk); 428*a046d57dSUrsula Braun } 429*a046d57dSUrsula Braun 430*a046d57dSUrsula Braun /* remove a sock from the accept queue to bind it to a new socket created 431*a046d57dSUrsula Braun * for a socket accept call from user space 432*a046d57dSUrsula Braun */ 433*a046d57dSUrsula Braun static struct sock *smc_accept_dequeue(struct sock *parent, 434*a046d57dSUrsula Braun struct socket *new_sock) 435*a046d57dSUrsula Braun { 436*a046d57dSUrsula Braun struct smc_sock *isk, *n; 437*a046d57dSUrsula Braun struct sock *new_sk; 438*a046d57dSUrsula Braun 439*a046d57dSUrsula Braun list_for_each_entry_safe(isk, n, &smc_sk(parent)->accept_q, accept_q) { 440*a046d57dSUrsula Braun new_sk = (struct sock *)isk; 441*a046d57dSUrsula Braun 442*a046d57dSUrsula Braun smc_accept_unlink(new_sk); 443*a046d57dSUrsula Braun if (new_sk->sk_state == SMC_CLOSED) { 444*a046d57dSUrsula Braun /* tbd in follow-on patch: close this sock */ 445*a046d57dSUrsula Braun continue; 446*a046d57dSUrsula Braun } 447*a046d57dSUrsula Braun if (new_sock) 448*a046d57dSUrsula Braun sock_graft(new_sk, new_sock); 449*a046d57dSUrsula Braun return new_sk; 450*a046d57dSUrsula Braun } 451*a046d57dSUrsula Braun return NULL; 452*a046d57dSUrsula Braun } 453*a046d57dSUrsula Braun 454*a046d57dSUrsula Braun /* clean up for a created but never accepted sock */ 455*a046d57dSUrsula Braun static void smc_close_non_accepted(struct sock *sk) 456*a046d57dSUrsula Braun { 457*a046d57dSUrsula Braun struct smc_sock *smc = smc_sk(sk); 458*a046d57dSUrsula Braun 459*a046d57dSUrsula Braun sock_hold(sk); 460*a046d57dSUrsula Braun if (smc->clcsock) { 461*a046d57dSUrsula Braun struct socket *tcp; 462*a046d57dSUrsula Braun 463*a046d57dSUrsula Braun tcp = smc->clcsock; 464*a046d57dSUrsula Braun smc->clcsock = NULL; 465*a046d57dSUrsula Braun sock_release(tcp); 466*a046d57dSUrsula Braun } 467*a046d57dSUrsula Braun /* more closing stuff to be added with socket closing patch */ 468*a046d57dSUrsula Braun sock_put(sk); 469*a046d57dSUrsula Braun } 470*a046d57dSUrsula Braun 471*a046d57dSUrsula Braun /* setup for RDMA connection of server */ 472*a046d57dSUrsula Braun static void smc_listen_work(struct work_struct *work) 473*a046d57dSUrsula Braun { 474*a046d57dSUrsula Braun struct smc_sock *new_smc = container_of(work, struct smc_sock, 475*a046d57dSUrsula Braun smc_listen_work); 476*a046d57dSUrsula Braun struct socket *newclcsock = new_smc->clcsock; 477*a046d57dSUrsula Braun struct smc_sock *lsmc = new_smc->listen_smc; 478*a046d57dSUrsula Braun struct smc_clc_msg_accept_confirm cclc; 479*a046d57dSUrsula Braun struct sock *newsmcsk = &new_smc->sk; 480*a046d57dSUrsula Braun struct smc_clc_msg_proposal pclc; 481*a046d57dSUrsula Braun struct smc_ib_device *smcibdev; 482*a046d57dSUrsula Braun struct sockaddr_in peeraddr; 483*a046d57dSUrsula Braun int reason_code = 0; 484*a046d57dSUrsula Braun int rc = 0, len; 485*a046d57dSUrsula Braun __be32 subnet; 486*a046d57dSUrsula Braun u8 prefix_len; 487*a046d57dSUrsula Braun u8 ibport; 488*a046d57dSUrsula Braun 489*a046d57dSUrsula Braun /* do inband token exchange - 490*a046d57dSUrsula Braun *wait for and receive SMC Proposal CLC message 491*a046d57dSUrsula Braun */ 492*a046d57dSUrsula Braun reason_code = smc_clc_wait_msg(new_smc, &pclc, sizeof(pclc), 493*a046d57dSUrsula Braun SMC_CLC_PROPOSAL); 494*a046d57dSUrsula Braun if (reason_code < 0) 495*a046d57dSUrsula Braun goto out_err; 496*a046d57dSUrsula Braun if (reason_code > 0) 497*a046d57dSUrsula Braun goto decline_rdma; 498*a046d57dSUrsula Braun 499*a046d57dSUrsula Braun /* IPSec connections opt out of SMC-R optimizations */ 500*a046d57dSUrsula Braun if (using_ipsec(new_smc)) { 501*a046d57dSUrsula Braun reason_code = SMC_CLC_DECL_IPSEC; 502*a046d57dSUrsula Braun goto decline_rdma; 503*a046d57dSUrsula Braun } 504*a046d57dSUrsula Braun 505*a046d57dSUrsula Braun /* PNET table look up: search active ib_device and port 506*a046d57dSUrsula Braun * within same PNETID that also contains the ethernet device 507*a046d57dSUrsula Braun * used for the internal TCP socket 508*a046d57dSUrsula Braun */ 509*a046d57dSUrsula Braun smc_pnet_find_roce_resource(newclcsock->sk, &smcibdev, &ibport); 510*a046d57dSUrsula Braun if (!smcibdev) { 511*a046d57dSUrsula Braun reason_code = SMC_CLC_DECL_CNFERR; /* configuration error */ 512*a046d57dSUrsula Braun goto decline_rdma; 513*a046d57dSUrsula Braun } 514*a046d57dSUrsula Braun 515*a046d57dSUrsula Braun /* determine subnet and mask from internal TCP socket */ 516*a046d57dSUrsula Braun rc = smc_netinfo_by_tcpsk(newclcsock, &subnet, &prefix_len); 517*a046d57dSUrsula Braun if (rc) { 518*a046d57dSUrsula Braun reason_code = SMC_CLC_DECL_CNFERR; /* configuration error */ 519*a046d57dSUrsula Braun goto decline_rdma; 520*a046d57dSUrsula Braun } 521*a046d57dSUrsula Braun if ((pclc.outgoing_subnet != subnet) || 522*a046d57dSUrsula Braun (pclc.prefix_len != prefix_len)) { 523*a046d57dSUrsula Braun reason_code = SMC_CLC_DECL_CNFERR; /* configuration error */ 524*a046d57dSUrsula Braun goto decline_rdma; 525*a046d57dSUrsula Braun } 526*a046d57dSUrsula Braun 527*a046d57dSUrsula Braun /* get address of the peer connected to the internal TCP socket */ 528*a046d57dSUrsula Braun kernel_getpeername(newclcsock, (struct sockaddr *)&peeraddr, &len); 529*a046d57dSUrsula Braun 530*a046d57dSUrsula Braun /* tbd in follow-on patch: more steps to setup RDMA communcication, 531*a046d57dSUrsula Braun * create connection, link_group, link 532*a046d57dSUrsula Braun */ 533*a046d57dSUrsula Braun 534*a046d57dSUrsula Braun /* tbd in follow-on patch: more steps to setup RDMA communcication, 535*a046d57dSUrsula Braun * create rmbs, map rmbs 536*a046d57dSUrsula Braun */ 537*a046d57dSUrsula Braun 538*a046d57dSUrsula Braun rc = smc_clc_send_accept(new_smc); 539*a046d57dSUrsula Braun if (rc) 540*a046d57dSUrsula Braun goto out_err; 541*a046d57dSUrsula Braun 542*a046d57dSUrsula Braun /* receive SMC Confirm CLC message */ 543*a046d57dSUrsula Braun reason_code = smc_clc_wait_msg(new_smc, &cclc, sizeof(cclc), 544*a046d57dSUrsula Braun SMC_CLC_CONFIRM); 545*a046d57dSUrsula Braun if (reason_code < 0) 546*a046d57dSUrsula Braun goto out_err; 547*a046d57dSUrsula Braun if (reason_code > 0) 548*a046d57dSUrsula Braun goto decline_rdma; 549*a046d57dSUrsula Braun 550*a046d57dSUrsula Braun /* tbd in follow-on patch: more steps to setup RDMA communcication, 551*a046d57dSUrsula Braun * rtoken_handling, modify_qp 552*a046d57dSUrsula Braun */ 553*a046d57dSUrsula Braun 554*a046d57dSUrsula Braun out_connected: 555*a046d57dSUrsula Braun sk_refcnt_debug_inc(newsmcsk); 556*a046d57dSUrsula Braun newsmcsk->sk_state = SMC_ACTIVE; 557*a046d57dSUrsula Braun enqueue: 558*a046d57dSUrsula Braun lock_sock(&lsmc->sk); 559*a046d57dSUrsula Braun if (lsmc->sk.sk_state == SMC_LISTEN) { 560*a046d57dSUrsula Braun smc_accept_enqueue(&lsmc->sk, newsmcsk); 561*a046d57dSUrsula Braun } else { /* no longer listening */ 562*a046d57dSUrsula Braun smc_close_non_accepted(newsmcsk); 563*a046d57dSUrsula Braun } 564*a046d57dSUrsula Braun release_sock(&lsmc->sk); 565*a046d57dSUrsula Braun 566*a046d57dSUrsula Braun /* Wake up accept */ 567*a046d57dSUrsula Braun lsmc->sk.sk_data_ready(&lsmc->sk); 568*a046d57dSUrsula Braun sock_put(&lsmc->sk); /* sock_hold in smc_tcp_listen_work */ 569*a046d57dSUrsula Braun return; 570*a046d57dSUrsula Braun 571*a046d57dSUrsula Braun decline_rdma: 572*a046d57dSUrsula Braun /* RDMA setup failed, switch back to TCP */ 573*a046d57dSUrsula Braun new_smc->use_fallback = true; 574*a046d57dSUrsula Braun if (reason_code && (reason_code != SMC_CLC_DECL_REPLY)) { 575*a046d57dSUrsula Braun rc = smc_clc_send_decline(new_smc, reason_code, 0); 576*a046d57dSUrsula Braun if (rc < sizeof(struct smc_clc_msg_decline)) 577*a046d57dSUrsula Braun goto out_err; 578*a046d57dSUrsula Braun } 579*a046d57dSUrsula Braun goto out_connected; 580*a046d57dSUrsula Braun 581*a046d57dSUrsula Braun out_err: 582*a046d57dSUrsula Braun newsmcsk->sk_state = SMC_CLOSED; 583*a046d57dSUrsula Braun goto enqueue; /* queue new sock with sk_err set */ 584*a046d57dSUrsula Braun } 585*a046d57dSUrsula Braun 586*a046d57dSUrsula Braun static void smc_tcp_listen_work(struct work_struct *work) 587*a046d57dSUrsula Braun { 588*a046d57dSUrsula Braun struct smc_sock *lsmc = container_of(work, struct smc_sock, 589*a046d57dSUrsula Braun tcp_listen_work); 590*a046d57dSUrsula Braun struct smc_sock *new_smc; 591*a046d57dSUrsula Braun int rc = 0; 592*a046d57dSUrsula Braun 593*a046d57dSUrsula Braun lock_sock(&lsmc->sk); 594*a046d57dSUrsula Braun while (lsmc->sk.sk_state == SMC_LISTEN) { 595*a046d57dSUrsula Braun rc = smc_clcsock_accept(lsmc, &new_smc); 596*a046d57dSUrsula Braun if (rc) 597*a046d57dSUrsula Braun goto out; 598*a046d57dSUrsula Braun if (!new_smc) 599*a046d57dSUrsula Braun continue; 600*a046d57dSUrsula Braun 601*a046d57dSUrsula Braun new_smc->listen_smc = lsmc; 602*a046d57dSUrsula Braun new_smc->use_fallback = false; /* assume rdma capability first*/ 603*a046d57dSUrsula Braun sock_hold(&lsmc->sk); /* sock_put in smc_listen_work */ 604*a046d57dSUrsula Braun INIT_WORK(&new_smc->smc_listen_work, smc_listen_work); 605*a046d57dSUrsula Braun smc_copy_sock_settings_to_smc(new_smc); 606*a046d57dSUrsula Braun schedule_work(&new_smc->smc_listen_work); 607*a046d57dSUrsula Braun } 608*a046d57dSUrsula Braun 609*a046d57dSUrsula Braun out: 610*a046d57dSUrsula Braun release_sock(&lsmc->sk); 611*a046d57dSUrsula Braun lsmc->sk.sk_data_ready(&lsmc->sk); /* no more listening, wake accept */ 612*a046d57dSUrsula Braun } 613*a046d57dSUrsula Braun 614ac713874SUrsula Braun static int smc_listen(struct socket *sock, int backlog) 615ac713874SUrsula Braun { 616ac713874SUrsula Braun struct sock *sk = sock->sk; 617ac713874SUrsula Braun struct smc_sock *smc; 618ac713874SUrsula Braun int rc; 619ac713874SUrsula Braun 620ac713874SUrsula Braun smc = smc_sk(sk); 621ac713874SUrsula Braun lock_sock(sk); 622ac713874SUrsula Braun 623ac713874SUrsula Braun rc = -EINVAL; 624ac713874SUrsula Braun if ((sk->sk_state != SMC_INIT) && (sk->sk_state != SMC_LISTEN)) 625ac713874SUrsula Braun goto out; 626ac713874SUrsula Braun 627ac713874SUrsula Braun rc = 0; 628ac713874SUrsula Braun if (sk->sk_state == SMC_LISTEN) { 629ac713874SUrsula Braun sk->sk_max_ack_backlog = backlog; 630ac713874SUrsula Braun goto out; 631ac713874SUrsula Braun } 632ac713874SUrsula Braun /* some socket options are handled in core, so we could not apply 633ac713874SUrsula Braun * them to the clc socket -- copy smc socket options to clc socket 634ac713874SUrsula Braun */ 635ac713874SUrsula Braun smc_copy_sock_settings_to_clc(smc); 636ac713874SUrsula Braun 637ac713874SUrsula Braun rc = kernel_listen(smc->clcsock, backlog); 638ac713874SUrsula Braun if (rc) 639ac713874SUrsula Braun goto out; 640ac713874SUrsula Braun sk->sk_max_ack_backlog = backlog; 641ac713874SUrsula Braun sk->sk_ack_backlog = 0; 642ac713874SUrsula Braun sk->sk_state = SMC_LISTEN; 643*a046d57dSUrsula Braun INIT_WORK(&smc->tcp_listen_work, smc_tcp_listen_work); 644*a046d57dSUrsula Braun schedule_work(&smc->tcp_listen_work); 645ac713874SUrsula Braun 646ac713874SUrsula Braun out: 647ac713874SUrsula Braun release_sock(sk); 648ac713874SUrsula Braun return rc; 649ac713874SUrsula Braun } 650ac713874SUrsula Braun 651ac713874SUrsula Braun static int smc_accept(struct socket *sock, struct socket *new_sock, 652ac713874SUrsula Braun int flags) 653ac713874SUrsula Braun { 654*a046d57dSUrsula Braun struct sock *sk = sock->sk, *nsk; 655*a046d57dSUrsula Braun DECLARE_WAITQUEUE(wait, current); 656ac713874SUrsula Braun struct smc_sock *lsmc; 657*a046d57dSUrsula Braun long timeo; 658*a046d57dSUrsula Braun int rc = 0; 659ac713874SUrsula Braun 660ac713874SUrsula Braun lsmc = smc_sk(sk); 661ac713874SUrsula Braun lock_sock(sk); 662ac713874SUrsula Braun 663ac713874SUrsula Braun if (lsmc->sk.sk_state != SMC_LISTEN) { 664ac713874SUrsula Braun rc = -EINVAL; 665ac713874SUrsula Braun goto out; 666ac713874SUrsula Braun } 667ac713874SUrsula Braun 668*a046d57dSUrsula Braun /* Wait for an incoming connection */ 669*a046d57dSUrsula Braun timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK); 670*a046d57dSUrsula Braun add_wait_queue_exclusive(sk_sleep(sk), &wait); 671*a046d57dSUrsula Braun while (!(nsk = smc_accept_dequeue(sk, new_sock))) { 672*a046d57dSUrsula Braun set_current_state(TASK_INTERRUPTIBLE); 673*a046d57dSUrsula Braun if (!timeo) { 674*a046d57dSUrsula Braun rc = -EAGAIN; 675*a046d57dSUrsula Braun break; 676*a046d57dSUrsula Braun } 677*a046d57dSUrsula Braun release_sock(sk); 678*a046d57dSUrsula Braun timeo = schedule_timeout(timeo); 679*a046d57dSUrsula Braun /* wakeup by sk_data_ready in smc_listen_work() */ 680*a046d57dSUrsula Braun sched_annotate_sleep(); 681*a046d57dSUrsula Braun lock_sock(sk); 682*a046d57dSUrsula Braun if (signal_pending(current)) { 683*a046d57dSUrsula Braun rc = sock_intr_errno(timeo); 684*a046d57dSUrsula Braun break; 685*a046d57dSUrsula Braun } 686*a046d57dSUrsula Braun } 687*a046d57dSUrsula Braun set_current_state(TASK_RUNNING); 688*a046d57dSUrsula Braun remove_wait_queue(sk_sleep(sk), &wait); 689ac713874SUrsula Braun 690*a046d57dSUrsula Braun if (!rc) 691*a046d57dSUrsula Braun rc = sock_error(nsk); 692ac713874SUrsula Braun 693ac713874SUrsula Braun out: 694ac713874SUrsula Braun release_sock(sk); 695ac713874SUrsula Braun return rc; 696ac713874SUrsula Braun } 697ac713874SUrsula Braun 698ac713874SUrsula Braun static int smc_getname(struct socket *sock, struct sockaddr *addr, 699ac713874SUrsula Braun int *len, int peer) 700ac713874SUrsula Braun { 701ac713874SUrsula Braun struct smc_sock *smc; 702ac713874SUrsula Braun 703ac713874SUrsula Braun if (peer && (sock->sk->sk_state != SMC_ACTIVE)) 704ac713874SUrsula Braun return -ENOTCONN; 705ac713874SUrsula Braun 706ac713874SUrsula Braun smc = smc_sk(sock->sk); 707ac713874SUrsula Braun 708ac713874SUrsula Braun return smc->clcsock->ops->getname(smc->clcsock, addr, len, peer); 709ac713874SUrsula Braun } 710ac713874SUrsula Braun 711ac713874SUrsula Braun static int smc_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) 712ac713874SUrsula Braun { 713ac713874SUrsula Braun struct sock *sk = sock->sk; 714ac713874SUrsula Braun struct smc_sock *smc; 715ac713874SUrsula Braun int rc = -EPIPE; 716ac713874SUrsula Braun 717ac713874SUrsula Braun smc = smc_sk(sk); 718ac713874SUrsula Braun lock_sock(sk); 719ac713874SUrsula Braun if (sk->sk_state != SMC_ACTIVE) 720ac713874SUrsula Braun goto out; 721ac713874SUrsula Braun if (smc->use_fallback) 722ac713874SUrsula Braun rc = smc->clcsock->ops->sendmsg(smc->clcsock, msg, len); 723ac713874SUrsula Braun else 724ac713874SUrsula Braun rc = sock_no_sendmsg(sock, msg, len); 725ac713874SUrsula Braun out: 726ac713874SUrsula Braun release_sock(sk); 727ac713874SUrsula Braun return rc; 728ac713874SUrsula Braun } 729ac713874SUrsula Braun 730ac713874SUrsula Braun static int smc_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, 731ac713874SUrsula Braun int flags) 732ac713874SUrsula Braun { 733ac713874SUrsula Braun struct sock *sk = sock->sk; 734ac713874SUrsula Braun struct smc_sock *smc; 735ac713874SUrsula Braun int rc = -ENOTCONN; 736ac713874SUrsula Braun 737ac713874SUrsula Braun smc = smc_sk(sk); 738ac713874SUrsula Braun lock_sock(sk); 739ac713874SUrsula Braun if ((sk->sk_state != SMC_ACTIVE) && (sk->sk_state != SMC_CLOSED)) 740ac713874SUrsula Braun goto out; 741ac713874SUrsula Braun 742ac713874SUrsula Braun if (smc->use_fallback) 743ac713874SUrsula Braun rc = smc->clcsock->ops->recvmsg(smc->clcsock, msg, len, flags); 744ac713874SUrsula Braun else 745ac713874SUrsula Braun rc = sock_no_recvmsg(sock, msg, len, flags); 746ac713874SUrsula Braun out: 747ac713874SUrsula Braun release_sock(sk); 748ac713874SUrsula Braun return rc; 749ac713874SUrsula Braun } 750ac713874SUrsula Braun 751*a046d57dSUrsula Braun static unsigned int smc_accept_poll(struct sock *parent) 752*a046d57dSUrsula Braun { 753*a046d57dSUrsula Braun struct smc_sock *isk; 754*a046d57dSUrsula Braun struct sock *sk; 755*a046d57dSUrsula Braun 756*a046d57dSUrsula Braun lock_sock(parent); 757*a046d57dSUrsula Braun list_for_each_entry(isk, &smc_sk(parent)->accept_q, accept_q) { 758*a046d57dSUrsula Braun sk = (struct sock *)isk; 759*a046d57dSUrsula Braun 760*a046d57dSUrsula Braun if (sk->sk_state == SMC_ACTIVE) { 761*a046d57dSUrsula Braun release_sock(parent); 762*a046d57dSUrsula Braun return POLLIN | POLLRDNORM; 763*a046d57dSUrsula Braun } 764*a046d57dSUrsula Braun } 765*a046d57dSUrsula Braun release_sock(parent); 766*a046d57dSUrsula Braun 767*a046d57dSUrsula Braun return 0; 768*a046d57dSUrsula Braun } 769*a046d57dSUrsula Braun 770ac713874SUrsula Braun static unsigned int smc_poll(struct file *file, struct socket *sock, 771ac713874SUrsula Braun poll_table *wait) 772ac713874SUrsula Braun { 773ac713874SUrsula Braun struct sock *sk = sock->sk; 774ac713874SUrsula Braun unsigned int mask = 0; 775ac713874SUrsula Braun struct smc_sock *smc; 776*a046d57dSUrsula Braun int rc; 777ac713874SUrsula Braun 778ac713874SUrsula Braun smc = smc_sk(sock->sk); 779*a046d57dSUrsula Braun if ((sk->sk_state == SMC_INIT) || smc->use_fallback) { 780*a046d57dSUrsula Braun /* delegate to CLC child sock */ 781ac713874SUrsula Braun mask = smc->clcsock->ops->poll(file, smc->clcsock, wait); 782ac713874SUrsula Braun /* if non-blocking connect finished ... */ 783ac713874SUrsula Braun lock_sock(sk); 784ac713874SUrsula Braun if ((sk->sk_state == SMC_INIT) && (mask & POLLOUT)) { 785*a046d57dSUrsula Braun sk->sk_err = smc->clcsock->sk->sk_err; 786*a046d57dSUrsula Braun if (sk->sk_err) { 787*a046d57dSUrsula Braun mask |= POLLERR; 788*a046d57dSUrsula Braun } else { 789*a046d57dSUrsula Braun rc = smc_connect_rdma(smc); 790*a046d57dSUrsula Braun if (rc < 0) 791*a046d57dSUrsula Braun mask |= POLLERR; 792*a046d57dSUrsula Braun else 793*a046d57dSUrsula Braun /* success cases including fallback */ 794*a046d57dSUrsula Braun mask |= POLLOUT | POLLWRNORM; 795*a046d57dSUrsula Braun } 796ac713874SUrsula Braun } 797ac713874SUrsula Braun release_sock(sk); 798ac713874SUrsula Braun } else { 799*a046d57dSUrsula Braun sock_poll_wait(file, sk_sleep(sk), wait); 800*a046d57dSUrsula Braun if (sk->sk_state == SMC_LISTEN) 801*a046d57dSUrsula Braun /* woken up by sk_data_ready in smc_listen_work() */ 802*a046d57dSUrsula Braun mask |= smc_accept_poll(sk); 803*a046d57dSUrsula Braun if (sk->sk_err) 804*a046d57dSUrsula Braun mask |= POLLERR; 805*a046d57dSUrsula Braun /* for now - to be enhanced in follow-on patch */ 806ac713874SUrsula Braun } 807ac713874SUrsula Braun 808ac713874SUrsula Braun return mask; 809ac713874SUrsula Braun } 810ac713874SUrsula Braun 811ac713874SUrsula Braun static int smc_shutdown(struct socket *sock, int how) 812ac713874SUrsula Braun { 813ac713874SUrsula Braun struct sock *sk = sock->sk; 814ac713874SUrsula Braun struct smc_sock *smc; 815ac713874SUrsula Braun int rc = -EINVAL; 816ac713874SUrsula Braun 817ac713874SUrsula Braun smc = smc_sk(sk); 818ac713874SUrsula Braun 819ac713874SUrsula Braun if ((how < SHUT_RD) || (how > SHUT_RDWR)) 820ac713874SUrsula Braun goto out_err; 821ac713874SUrsula Braun 822ac713874SUrsula Braun lock_sock(sk); 823ac713874SUrsula Braun 824ac713874SUrsula Braun rc = -ENOTCONN; 825ac713874SUrsula Braun if (sk->sk_state == SMC_CLOSED) 826ac713874SUrsula Braun goto out; 827ac713874SUrsula Braun if (smc->use_fallback) { 828ac713874SUrsula Braun rc = kernel_sock_shutdown(smc->clcsock, how); 829ac713874SUrsula Braun sk->sk_shutdown = smc->clcsock->sk->sk_shutdown; 830ac713874SUrsula Braun if (sk->sk_shutdown == SHUTDOWN_MASK) 831ac713874SUrsula Braun sk->sk_state = SMC_CLOSED; 832ac713874SUrsula Braun } else { 833ac713874SUrsula Braun rc = sock_no_shutdown(sock, how); 834ac713874SUrsula Braun } 835ac713874SUrsula Braun 836ac713874SUrsula Braun out: 837ac713874SUrsula Braun release_sock(sk); 838ac713874SUrsula Braun 839ac713874SUrsula Braun out_err: 840ac713874SUrsula Braun return rc; 841ac713874SUrsula Braun } 842ac713874SUrsula Braun 843ac713874SUrsula Braun static int smc_setsockopt(struct socket *sock, int level, int optname, 844ac713874SUrsula Braun char __user *optval, unsigned int optlen) 845ac713874SUrsula Braun { 846ac713874SUrsula Braun struct sock *sk = sock->sk; 847ac713874SUrsula Braun struct smc_sock *smc; 848ac713874SUrsula Braun 849ac713874SUrsula Braun smc = smc_sk(sk); 850ac713874SUrsula Braun 851ac713874SUrsula Braun /* generic setsockopts reaching us here always apply to the 852ac713874SUrsula Braun * CLC socket 853ac713874SUrsula Braun */ 854ac713874SUrsula Braun return smc->clcsock->ops->setsockopt(smc->clcsock, level, optname, 855ac713874SUrsula Braun optval, optlen); 856ac713874SUrsula Braun } 857ac713874SUrsula Braun 858ac713874SUrsula Braun static int smc_getsockopt(struct socket *sock, int level, int optname, 859ac713874SUrsula Braun char __user *optval, int __user *optlen) 860ac713874SUrsula Braun { 861ac713874SUrsula Braun struct smc_sock *smc; 862ac713874SUrsula Braun 863ac713874SUrsula Braun smc = smc_sk(sock->sk); 864ac713874SUrsula Braun /* socket options apply to the CLC socket */ 865ac713874SUrsula Braun return smc->clcsock->ops->getsockopt(smc->clcsock, level, optname, 866ac713874SUrsula Braun optval, optlen); 867ac713874SUrsula Braun } 868ac713874SUrsula Braun 869ac713874SUrsula Braun static int smc_ioctl(struct socket *sock, unsigned int cmd, 870ac713874SUrsula Braun unsigned long arg) 871ac713874SUrsula Braun { 872ac713874SUrsula Braun struct smc_sock *smc; 873ac713874SUrsula Braun 874ac713874SUrsula Braun smc = smc_sk(sock->sk); 875ac713874SUrsula Braun if (smc->use_fallback) 876ac713874SUrsula Braun return smc->clcsock->ops->ioctl(smc->clcsock, cmd, arg); 877ac713874SUrsula Braun else 878ac713874SUrsula Braun return sock_no_ioctl(sock, cmd, arg); 879ac713874SUrsula Braun } 880ac713874SUrsula Braun 881ac713874SUrsula Braun static ssize_t smc_sendpage(struct socket *sock, struct page *page, 882ac713874SUrsula Braun int offset, size_t size, int flags) 883ac713874SUrsula Braun { 884ac713874SUrsula Braun struct sock *sk = sock->sk; 885ac713874SUrsula Braun struct smc_sock *smc; 886ac713874SUrsula Braun int rc = -EPIPE; 887ac713874SUrsula Braun 888ac713874SUrsula Braun smc = smc_sk(sk); 889ac713874SUrsula Braun lock_sock(sk); 890ac713874SUrsula Braun if (sk->sk_state != SMC_ACTIVE) 891ac713874SUrsula Braun goto out; 892ac713874SUrsula Braun if (smc->use_fallback) 893ac713874SUrsula Braun rc = kernel_sendpage(smc->clcsock, page, offset, 894ac713874SUrsula Braun size, flags); 895ac713874SUrsula Braun else 896ac713874SUrsula Braun rc = sock_no_sendpage(sock, page, offset, size, flags); 897ac713874SUrsula Braun 898ac713874SUrsula Braun out: 899ac713874SUrsula Braun release_sock(sk); 900ac713874SUrsula Braun return rc; 901ac713874SUrsula Braun } 902ac713874SUrsula Braun 903ac713874SUrsula Braun static ssize_t smc_splice_read(struct socket *sock, loff_t *ppos, 904ac713874SUrsula Braun struct pipe_inode_info *pipe, size_t len, 905ac713874SUrsula Braun unsigned int flags) 906ac713874SUrsula Braun { 907ac713874SUrsula Braun struct sock *sk = sock->sk; 908ac713874SUrsula Braun struct smc_sock *smc; 909ac713874SUrsula Braun int rc = -ENOTCONN; 910ac713874SUrsula Braun 911ac713874SUrsula Braun smc = smc_sk(sk); 912ac713874SUrsula Braun lock_sock(sk); 913ac713874SUrsula Braun if ((sk->sk_state != SMC_ACTIVE) && (sk->sk_state != SMC_CLOSED)) 914ac713874SUrsula Braun goto out; 915ac713874SUrsula Braun if (smc->use_fallback) { 916ac713874SUrsula Braun rc = smc->clcsock->ops->splice_read(smc->clcsock, ppos, 917ac713874SUrsula Braun pipe, len, flags); 918ac713874SUrsula Braun } else { 919ac713874SUrsula Braun rc = -EOPNOTSUPP; 920ac713874SUrsula Braun } 921ac713874SUrsula Braun out: 922ac713874SUrsula Braun release_sock(sk); 923ac713874SUrsula Braun return rc; 924ac713874SUrsula Braun } 925ac713874SUrsula Braun 926ac713874SUrsula Braun /* must look like tcp */ 927ac713874SUrsula Braun static const struct proto_ops smc_sock_ops = { 928ac713874SUrsula Braun .family = PF_SMC, 929ac713874SUrsula Braun .owner = THIS_MODULE, 930ac713874SUrsula Braun .release = smc_release, 931ac713874SUrsula Braun .bind = smc_bind, 932ac713874SUrsula Braun .connect = smc_connect, 933ac713874SUrsula Braun .socketpair = sock_no_socketpair, 934ac713874SUrsula Braun .accept = smc_accept, 935ac713874SUrsula Braun .getname = smc_getname, 936ac713874SUrsula Braun .poll = smc_poll, 937ac713874SUrsula Braun .ioctl = smc_ioctl, 938ac713874SUrsula Braun .listen = smc_listen, 939ac713874SUrsula Braun .shutdown = smc_shutdown, 940ac713874SUrsula Braun .setsockopt = smc_setsockopt, 941ac713874SUrsula Braun .getsockopt = smc_getsockopt, 942ac713874SUrsula Braun .sendmsg = smc_sendmsg, 943ac713874SUrsula Braun .recvmsg = smc_recvmsg, 944ac713874SUrsula Braun .mmap = sock_no_mmap, 945ac713874SUrsula Braun .sendpage = smc_sendpage, 946ac713874SUrsula Braun .splice_read = smc_splice_read, 947ac713874SUrsula Braun }; 948ac713874SUrsula Braun 949ac713874SUrsula Braun static int smc_create(struct net *net, struct socket *sock, int protocol, 950ac713874SUrsula Braun int kern) 951ac713874SUrsula Braun { 952ac713874SUrsula Braun struct smc_sock *smc; 953ac713874SUrsula Braun struct sock *sk; 954ac713874SUrsula Braun int rc; 955ac713874SUrsula Braun 956ac713874SUrsula Braun rc = -ESOCKTNOSUPPORT; 957ac713874SUrsula Braun if (sock->type != SOCK_STREAM) 958ac713874SUrsula Braun goto out; 959ac713874SUrsula Braun 960ac713874SUrsula Braun rc = -EPROTONOSUPPORT; 961ac713874SUrsula Braun if ((protocol != IPPROTO_IP) && (protocol != IPPROTO_TCP)) 962ac713874SUrsula Braun goto out; 963ac713874SUrsula Braun 964ac713874SUrsula Braun rc = -ENOBUFS; 965ac713874SUrsula Braun sock->ops = &smc_sock_ops; 966ac713874SUrsula Braun sk = smc_sock_alloc(net, sock); 967ac713874SUrsula Braun if (!sk) 968ac713874SUrsula Braun goto out; 969ac713874SUrsula Braun 970ac713874SUrsula Braun /* create internal TCP socket for CLC handshake and fallback */ 971ac713874SUrsula Braun smc = smc_sk(sk); 972*a046d57dSUrsula Braun smc->use_fallback = false; /* assume rdma capability first */ 973ac713874SUrsula Braun rc = sock_create_kern(net, PF_INET, SOCK_STREAM, 974ac713874SUrsula Braun IPPROTO_TCP, &smc->clcsock); 975ac713874SUrsula Braun if (rc) 976ac713874SUrsula Braun sk_common_release(sk); 977ac713874SUrsula Braun 978ac713874SUrsula Braun out: 979ac713874SUrsula Braun return rc; 980ac713874SUrsula Braun } 981ac713874SUrsula Braun 982ac713874SUrsula Braun static const struct net_proto_family smc_sock_family_ops = { 983ac713874SUrsula Braun .family = PF_SMC, 984ac713874SUrsula Braun .owner = THIS_MODULE, 985ac713874SUrsula Braun .create = smc_create, 986ac713874SUrsula Braun }; 987ac713874SUrsula Braun 988ac713874SUrsula Braun static int __init smc_init(void) 989ac713874SUrsula Braun { 990ac713874SUrsula Braun int rc; 991ac713874SUrsula Braun 9926812baabSThomas Richter rc = smc_pnet_init(); 9936812baabSThomas Richter if (rc) 9946812baabSThomas Richter return rc; 9956812baabSThomas Richter 996ac713874SUrsula Braun rc = proto_register(&smc_proto, 1); 997ac713874SUrsula Braun if (rc) { 998ac713874SUrsula Braun pr_err("%s: proto_register fails with %d\n", __func__, rc); 9996812baabSThomas Richter goto out_pnet; 1000ac713874SUrsula Braun } 1001ac713874SUrsula Braun 1002ac713874SUrsula Braun rc = sock_register(&smc_sock_family_ops); 1003ac713874SUrsula Braun if (rc) { 1004ac713874SUrsula Braun pr_err("%s: sock_register fails with %d\n", __func__, rc); 1005ac713874SUrsula Braun goto out_proto; 1006ac713874SUrsula Braun } 1007ac713874SUrsula Braun 1008a4cf0443SUrsula Braun rc = smc_ib_register_client(); 1009a4cf0443SUrsula Braun if (rc) { 1010a4cf0443SUrsula Braun pr_err("%s: ib_register fails with %d\n", __func__, rc); 1011a4cf0443SUrsula Braun goto out_sock; 1012a4cf0443SUrsula Braun } 1013a4cf0443SUrsula Braun 1014ac713874SUrsula Braun return 0; 1015ac713874SUrsula Braun 1016a4cf0443SUrsula Braun out_sock: 1017a4cf0443SUrsula Braun sock_unregister(PF_SMC); 1018ac713874SUrsula Braun out_proto: 1019ac713874SUrsula Braun proto_unregister(&smc_proto); 10206812baabSThomas Richter out_pnet: 10216812baabSThomas Richter smc_pnet_exit(); 1022ac713874SUrsula Braun return rc; 1023ac713874SUrsula Braun } 1024ac713874SUrsula Braun 1025ac713874SUrsula Braun static void __exit smc_exit(void) 1026ac713874SUrsula Braun { 1027a4cf0443SUrsula Braun smc_ib_unregister_client(); 1028ac713874SUrsula Braun sock_unregister(PF_SMC); 1029ac713874SUrsula Braun proto_unregister(&smc_proto); 10306812baabSThomas Richter smc_pnet_exit(); 1031ac713874SUrsula Braun } 1032ac713874SUrsula Braun 1033ac713874SUrsula Braun module_init(smc_init); 1034ac713874SUrsula Braun module_exit(smc_exit); 1035ac713874SUrsula Braun 1036ac713874SUrsula Braun MODULE_AUTHOR("Ursula Braun <ubraun@linux.vnet.ibm.com>"); 1037ac713874SUrsula Braun MODULE_DESCRIPTION("smc socket address family"); 1038ac713874SUrsula Braun MODULE_LICENSE("GPL"); 1039ac713874SUrsula Braun MODULE_ALIAS_NETPROTO(PF_SMC); 1040