xref: /openbmc/linux/net/smc/af_smc.c (revision a046d57da19f812216f393e7c535f5858f793ac3)
1ac713874SUrsula Braun /*
2ac713874SUrsula Braun  *  Shared Memory Communications over RDMA (SMC-R) and RoCE
3ac713874SUrsula Braun  *
4ac713874SUrsula Braun  *  AF_SMC protocol family socket handler keeping the AF_INET sock address type
5ac713874SUrsula Braun  *  applies to SOCK_STREAM sockets only
6ac713874SUrsula Braun  *  offers an alternative communication option for TCP-protocol sockets
7ac713874SUrsula Braun  *  applicable with RoCE-cards only
8ac713874SUrsula Braun  *
9*a046d57dSUrsula Braun  *  Initial restrictions:
10*a046d57dSUrsula Braun  *    - non-blocking connect postponed
11*a046d57dSUrsula Braun  *    - IPv6 support postponed
12*a046d57dSUrsula Braun  *    - support for alternate links postponed
13*a046d57dSUrsula Braun  *    - partial support for non-blocking sockets only
14*a046d57dSUrsula Braun  *    - support for urgent data postponed
15*a046d57dSUrsula Braun  *
16ac713874SUrsula Braun  *  Copyright IBM Corp. 2016
17ac713874SUrsula Braun  *
18ac713874SUrsula Braun  *  Author(s):  Ursula Braun <ubraun@linux.vnet.ibm.com>
19ac713874SUrsula Braun  *              based on prototype from Frank Blaschka
20ac713874SUrsula Braun  */
21ac713874SUrsula Braun 
22ac713874SUrsula Braun #define KMSG_COMPONENT "smc"
23ac713874SUrsula Braun #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
24ac713874SUrsula Braun 
25ac713874SUrsula Braun #include <linux/module.h>
26ac713874SUrsula Braun #include <linux/socket.h>
27*a046d57dSUrsula Braun #include <linux/inetdevice.h>
28*a046d57dSUrsula Braun #include <linux/workqueue.h>
29ac713874SUrsula Braun #include <net/sock.h>
30*a046d57dSUrsula Braun #include <net/tcp.h>
31ac713874SUrsula Braun 
32ac713874SUrsula Braun #include "smc.h"
33*a046d57dSUrsula Braun #include "smc_clc.h"
34a4cf0443SUrsula Braun #include "smc_ib.h"
356812baabSThomas Richter #include "smc_pnet.h"
36ac713874SUrsula Braun 
37*a046d57dSUrsula Braun static void smc_tcp_listen_work(struct work_struct *);
38*a046d57dSUrsula Braun 
39ac713874SUrsula Braun static void smc_set_keepalive(struct sock *sk, int val)
40ac713874SUrsula Braun {
41ac713874SUrsula Braun 	struct smc_sock *smc = smc_sk(sk);
42ac713874SUrsula Braun 
43ac713874SUrsula Braun 	smc->clcsock->sk->sk_prot->keepalive(smc->clcsock->sk, val);
44ac713874SUrsula Braun }
45ac713874SUrsula Braun 
46ac713874SUrsula Braun static struct proto smc_proto = {
47ac713874SUrsula Braun 	.name		= "SMC",
48ac713874SUrsula Braun 	.owner		= THIS_MODULE,
49ac713874SUrsula Braun 	.keepalive	= smc_set_keepalive,
50ac713874SUrsula Braun 	.obj_size	= sizeof(struct smc_sock),
51ac713874SUrsula Braun 	.slab_flags	= SLAB_DESTROY_BY_RCU,
52ac713874SUrsula Braun };
53ac713874SUrsula Braun 
54ac713874SUrsula Braun static int smc_release(struct socket *sock)
55ac713874SUrsula Braun {
56ac713874SUrsula Braun 	struct sock *sk = sock->sk;
57ac713874SUrsula Braun 	struct smc_sock *smc;
58ac713874SUrsula Braun 
59ac713874SUrsula Braun 	if (!sk)
60ac713874SUrsula Braun 		goto out;
61ac713874SUrsula Braun 
62ac713874SUrsula Braun 	smc = smc_sk(sk);
63ac713874SUrsula Braun 	lock_sock(sk);
64ac713874SUrsula Braun 
65ac713874SUrsula Braun 	sk->sk_state = SMC_CLOSED;
66ac713874SUrsula Braun 	if (smc->clcsock) {
67ac713874SUrsula Braun 		sock_release(smc->clcsock);
68ac713874SUrsula Braun 		smc->clcsock = NULL;
69ac713874SUrsula Braun 	}
70ac713874SUrsula Braun 
71ac713874SUrsula Braun 	/* detach socket */
72ac713874SUrsula Braun 	sock_orphan(sk);
73ac713874SUrsula Braun 	sock->sk = NULL;
74ac713874SUrsula Braun 	release_sock(sk);
75ac713874SUrsula Braun 
76ac713874SUrsula Braun 	sock_put(sk);
77ac713874SUrsula Braun out:
78ac713874SUrsula Braun 	return 0;
79ac713874SUrsula Braun }
80ac713874SUrsula Braun 
81ac713874SUrsula Braun static void smc_destruct(struct sock *sk)
82ac713874SUrsula Braun {
83ac713874SUrsula Braun 	if (sk->sk_state != SMC_CLOSED)
84ac713874SUrsula Braun 		return;
85ac713874SUrsula Braun 	if (!sock_flag(sk, SOCK_DEAD))
86ac713874SUrsula Braun 		return;
87ac713874SUrsula Braun 
88ac713874SUrsula Braun 	sk_refcnt_debug_dec(sk);
89ac713874SUrsula Braun }
90ac713874SUrsula Braun 
91ac713874SUrsula Braun static struct sock *smc_sock_alloc(struct net *net, struct socket *sock)
92ac713874SUrsula Braun {
93ac713874SUrsula Braun 	struct smc_sock *smc;
94ac713874SUrsula Braun 	struct sock *sk;
95ac713874SUrsula Braun 
96ac713874SUrsula Braun 	sk = sk_alloc(net, PF_SMC, GFP_KERNEL, &smc_proto, 0);
97ac713874SUrsula Braun 	if (!sk)
98ac713874SUrsula Braun 		return NULL;
99ac713874SUrsula Braun 
100ac713874SUrsula Braun 	sock_init_data(sock, sk); /* sets sk_refcnt to 1 */
101ac713874SUrsula Braun 	sk->sk_state = SMC_INIT;
102ac713874SUrsula Braun 	sk->sk_destruct = smc_destruct;
103ac713874SUrsula Braun 	sk->sk_protocol = SMCPROTO_SMC;
104ac713874SUrsula Braun 	smc = smc_sk(sk);
105*a046d57dSUrsula Braun 	INIT_WORK(&smc->tcp_listen_work, smc_tcp_listen_work);
106*a046d57dSUrsula Braun 	INIT_LIST_HEAD(&smc->accept_q);
107*a046d57dSUrsula Braun 	spin_lock_init(&smc->accept_q_lock);
108*a046d57dSUrsula Braun 	sk_refcnt_debug_inc(sk);
109ac713874SUrsula Braun 
110ac713874SUrsula Braun 	return sk;
111ac713874SUrsula Braun }
112ac713874SUrsula Braun 
113ac713874SUrsula Braun static int smc_bind(struct socket *sock, struct sockaddr *uaddr,
114ac713874SUrsula Braun 		    int addr_len)
115ac713874SUrsula Braun {
116ac713874SUrsula Braun 	struct sockaddr_in *addr = (struct sockaddr_in *)uaddr;
117ac713874SUrsula Braun 	struct sock *sk = sock->sk;
118ac713874SUrsula Braun 	struct smc_sock *smc;
119ac713874SUrsula Braun 	int rc;
120ac713874SUrsula Braun 
121ac713874SUrsula Braun 	smc = smc_sk(sk);
122ac713874SUrsula Braun 
123ac713874SUrsula Braun 	/* replicate tests from inet_bind(), to be safe wrt. future changes */
124ac713874SUrsula Braun 	rc = -EINVAL;
125ac713874SUrsula Braun 	if (addr_len < sizeof(struct sockaddr_in))
126ac713874SUrsula Braun 		goto out;
127ac713874SUrsula Braun 
128ac713874SUrsula Braun 	rc = -EAFNOSUPPORT;
129ac713874SUrsula Braun 	/* accept AF_UNSPEC (mapped to AF_INET) only if s_addr is INADDR_ANY */
130ac713874SUrsula Braun 	if ((addr->sin_family != AF_INET) &&
131ac713874SUrsula Braun 	    ((addr->sin_family != AF_UNSPEC) ||
132ac713874SUrsula Braun 	     (addr->sin_addr.s_addr != htonl(INADDR_ANY))))
133ac713874SUrsula Braun 		goto out;
134ac713874SUrsula Braun 
135ac713874SUrsula Braun 	lock_sock(sk);
136ac713874SUrsula Braun 
137ac713874SUrsula Braun 	/* Check if socket is already active */
138ac713874SUrsula Braun 	rc = -EINVAL;
139ac713874SUrsula Braun 	if (sk->sk_state != SMC_INIT)
140ac713874SUrsula Braun 		goto out_rel;
141ac713874SUrsula Braun 
142ac713874SUrsula Braun 	smc->clcsock->sk->sk_reuse = sk->sk_reuse;
143ac713874SUrsula Braun 	rc = kernel_bind(smc->clcsock, uaddr, addr_len);
144ac713874SUrsula Braun 
145ac713874SUrsula Braun out_rel:
146ac713874SUrsula Braun 	release_sock(sk);
147ac713874SUrsula Braun out:
148ac713874SUrsula Braun 	return rc;
149ac713874SUrsula Braun }
150ac713874SUrsula Braun 
151ac713874SUrsula Braun static void smc_copy_sock_settings(struct sock *nsk, struct sock *osk,
152ac713874SUrsula Braun 				   unsigned long mask)
153ac713874SUrsula Braun {
154ac713874SUrsula Braun 	/* options we don't get control via setsockopt for */
155ac713874SUrsula Braun 	nsk->sk_type = osk->sk_type;
156ac713874SUrsula Braun 	nsk->sk_sndbuf = osk->sk_sndbuf;
157ac713874SUrsula Braun 	nsk->sk_rcvbuf = osk->sk_rcvbuf;
158ac713874SUrsula Braun 	nsk->sk_sndtimeo = osk->sk_sndtimeo;
159ac713874SUrsula Braun 	nsk->sk_rcvtimeo = osk->sk_rcvtimeo;
160ac713874SUrsula Braun 	nsk->sk_mark = osk->sk_mark;
161ac713874SUrsula Braun 	nsk->sk_priority = osk->sk_priority;
162ac713874SUrsula Braun 	nsk->sk_rcvlowat = osk->sk_rcvlowat;
163ac713874SUrsula Braun 	nsk->sk_bound_dev_if = osk->sk_bound_dev_if;
164ac713874SUrsula Braun 	nsk->sk_err = osk->sk_err;
165ac713874SUrsula Braun 
166ac713874SUrsula Braun 	nsk->sk_flags &= ~mask;
167ac713874SUrsula Braun 	nsk->sk_flags |= osk->sk_flags & mask;
168ac713874SUrsula Braun }
169ac713874SUrsula Braun 
170ac713874SUrsula Braun #define SK_FLAGS_SMC_TO_CLC ((1UL << SOCK_URGINLINE) | \
171ac713874SUrsula Braun 			     (1UL << SOCK_KEEPOPEN) | \
172ac713874SUrsula Braun 			     (1UL << SOCK_LINGER) | \
173ac713874SUrsula Braun 			     (1UL << SOCK_BROADCAST) | \
174ac713874SUrsula Braun 			     (1UL << SOCK_TIMESTAMP) | \
175ac713874SUrsula Braun 			     (1UL << SOCK_DBG) | \
176ac713874SUrsula Braun 			     (1UL << SOCK_RCVTSTAMP) | \
177ac713874SUrsula Braun 			     (1UL << SOCK_RCVTSTAMPNS) | \
178ac713874SUrsula Braun 			     (1UL << SOCK_LOCALROUTE) | \
179ac713874SUrsula Braun 			     (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE) | \
180ac713874SUrsula Braun 			     (1UL << SOCK_RXQ_OVFL) | \
181ac713874SUrsula Braun 			     (1UL << SOCK_WIFI_STATUS) | \
182ac713874SUrsula Braun 			     (1UL << SOCK_NOFCS) | \
183ac713874SUrsula Braun 			     (1UL << SOCK_FILTER_LOCKED))
184ac713874SUrsula Braun /* copy only relevant settings and flags of SOL_SOCKET level from smc to
185ac713874SUrsula Braun  * clc socket (since smc is not called for these options from net/core)
186ac713874SUrsula Braun  */
187ac713874SUrsula Braun static void smc_copy_sock_settings_to_clc(struct smc_sock *smc)
188ac713874SUrsula Braun {
189ac713874SUrsula Braun 	smc_copy_sock_settings(smc->clcsock->sk, &smc->sk, SK_FLAGS_SMC_TO_CLC);
190ac713874SUrsula Braun }
191ac713874SUrsula Braun 
192ac713874SUrsula Braun #define SK_FLAGS_CLC_TO_SMC ((1UL << SOCK_URGINLINE) | \
193ac713874SUrsula Braun 			     (1UL << SOCK_KEEPOPEN) | \
194ac713874SUrsula Braun 			     (1UL << SOCK_LINGER) | \
195ac713874SUrsula Braun 			     (1UL << SOCK_DBG))
196ac713874SUrsula Braun /* copy only settings and flags relevant for smc from clc to smc socket */
197ac713874SUrsula Braun static void smc_copy_sock_settings_to_smc(struct smc_sock *smc)
198ac713874SUrsula Braun {
199ac713874SUrsula Braun 	smc_copy_sock_settings(&smc->sk, smc->clcsock->sk, SK_FLAGS_CLC_TO_SMC);
200ac713874SUrsula Braun }
201ac713874SUrsula Braun 
202*a046d57dSUrsula Braun /* determine subnet and mask of internal TCP socket */
203*a046d57dSUrsula Braun int smc_netinfo_by_tcpsk(struct socket *clcsock,
204*a046d57dSUrsula Braun 			 __be32 *subnet, u8 *prefix_len)
205*a046d57dSUrsula Braun {
206*a046d57dSUrsula Braun 	struct dst_entry *dst = sk_dst_get(clcsock->sk);
207*a046d57dSUrsula Braun 	struct sockaddr_in addr;
208*a046d57dSUrsula Braun 	int rc = -ENOENT;
209*a046d57dSUrsula Braun 	int len;
210*a046d57dSUrsula Braun 
211*a046d57dSUrsula Braun 	if (!dst) {
212*a046d57dSUrsula Braun 		rc = -ENOTCONN;
213*a046d57dSUrsula Braun 		goto out;
214*a046d57dSUrsula Braun 	}
215*a046d57dSUrsula Braun 	if (!dst->dev) {
216*a046d57dSUrsula Braun 		rc = -ENODEV;
217*a046d57dSUrsula Braun 		goto out_rel;
218*a046d57dSUrsula Braun 	}
219*a046d57dSUrsula Braun 
220*a046d57dSUrsula Braun 	/* get address to which the internal TCP socket is bound */
221*a046d57dSUrsula Braun 	kernel_getsockname(clcsock, (struct sockaddr *)&addr, &len);
222*a046d57dSUrsula Braun 	/* analyze IPv4 specific data of net_device belonging to TCP socket */
223*a046d57dSUrsula Braun 	for_ifa(dst->dev->ip_ptr) {
224*a046d57dSUrsula Braun 		if (ifa->ifa_address != addr.sin_addr.s_addr)
225*a046d57dSUrsula Braun 			continue;
226*a046d57dSUrsula Braun 		*prefix_len = inet_mask_len(ifa->ifa_mask);
227*a046d57dSUrsula Braun 		*subnet = ifa->ifa_address & ifa->ifa_mask;
228*a046d57dSUrsula Braun 		rc = 0;
229*a046d57dSUrsula Braun 		break;
230*a046d57dSUrsula Braun 	} endfor_ifa(dst->dev->ip_ptr);
231*a046d57dSUrsula Braun 
232*a046d57dSUrsula Braun out_rel:
233*a046d57dSUrsula Braun 	dst_release(dst);
234*a046d57dSUrsula Braun out:
235*a046d57dSUrsula Braun 	return rc;
236*a046d57dSUrsula Braun }
237*a046d57dSUrsula Braun 
238*a046d57dSUrsula Braun /* setup for RDMA connection of client */
239*a046d57dSUrsula Braun static int smc_connect_rdma(struct smc_sock *smc)
240*a046d57dSUrsula Braun {
241*a046d57dSUrsula Braun 	struct smc_clc_msg_accept_confirm aclc;
242*a046d57dSUrsula Braun 	struct smc_ib_device *smcibdev;
243*a046d57dSUrsula Braun 	int reason_code = 0;
244*a046d57dSUrsula Braun 	int rc = 0;
245*a046d57dSUrsula Braun 	u8 ibport;
246*a046d57dSUrsula Braun 
247*a046d57dSUrsula Braun 	/* IPSec connections opt out of SMC-R optimizations */
248*a046d57dSUrsula Braun 	if (using_ipsec(smc)) {
249*a046d57dSUrsula Braun 		reason_code = SMC_CLC_DECL_IPSEC;
250*a046d57dSUrsula Braun 		goto decline_rdma;
251*a046d57dSUrsula Braun 	}
252*a046d57dSUrsula Braun 
253*a046d57dSUrsula Braun 	/* PNET table look up: search active ib_device and port
254*a046d57dSUrsula Braun 	 * within same PNETID that also contains the ethernet device
255*a046d57dSUrsula Braun 	 * used for the internal TCP socket
256*a046d57dSUrsula Braun 	 */
257*a046d57dSUrsula Braun 	smc_pnet_find_roce_resource(smc->clcsock->sk, &smcibdev, &ibport);
258*a046d57dSUrsula Braun 	if (!smcibdev) {
259*a046d57dSUrsula Braun 		reason_code = SMC_CLC_DECL_CNFERR; /* configuration error */
260*a046d57dSUrsula Braun 		goto decline_rdma;
261*a046d57dSUrsula Braun 	}
262*a046d57dSUrsula Braun 
263*a046d57dSUrsula Braun 	/* do inband token exchange */
264*a046d57dSUrsula Braun 	reason_code = smc_clc_send_proposal(smc, smcibdev, ibport);
265*a046d57dSUrsula Braun 	if (reason_code < 0) {
266*a046d57dSUrsula Braun 		rc = reason_code;
267*a046d57dSUrsula Braun 		goto out_err;
268*a046d57dSUrsula Braun 	}
269*a046d57dSUrsula Braun 	if (reason_code > 0) /* configuration error */
270*a046d57dSUrsula Braun 		goto decline_rdma;
271*a046d57dSUrsula Braun 	/* receive SMC Accept CLC message */
272*a046d57dSUrsula Braun 	reason_code = smc_clc_wait_msg(smc, &aclc, sizeof(aclc),
273*a046d57dSUrsula Braun 				       SMC_CLC_ACCEPT);
274*a046d57dSUrsula Braun 	if (reason_code < 0) {
275*a046d57dSUrsula Braun 		rc = reason_code;
276*a046d57dSUrsula Braun 		goto out_err;
277*a046d57dSUrsula Braun 	}
278*a046d57dSUrsula Braun 	if (reason_code > 0)
279*a046d57dSUrsula Braun 		goto decline_rdma;
280*a046d57dSUrsula Braun 
281*a046d57dSUrsula Braun 	/* tbd in follow-on patch: more steps to setup RDMA communcication,
282*a046d57dSUrsula Braun 	 * create connection, link group, link
283*a046d57dSUrsula Braun 	 */
284*a046d57dSUrsula Braun 
285*a046d57dSUrsula Braun 	/* tbd in follow-on patch: more steps to setup RDMA communcication,
286*a046d57dSUrsula Braun 	 * create rmbs, map rmbs, rtoken_handling, modify_qp
287*a046d57dSUrsula Braun 	 */
288*a046d57dSUrsula Braun 
289*a046d57dSUrsula Braun 	rc = smc_clc_send_confirm(smc);
290*a046d57dSUrsula Braun 	if (rc)
291*a046d57dSUrsula Braun 		goto out_err;
292*a046d57dSUrsula Braun 
293*a046d57dSUrsula Braun 	/* tbd in follow-on patch: llc_confirm */
294*a046d57dSUrsula Braun 
295*a046d57dSUrsula Braun out_connected:
296*a046d57dSUrsula Braun 	smc_copy_sock_settings_to_clc(smc);
297*a046d57dSUrsula Braun 	smc->sk.sk_state = SMC_ACTIVE;
298*a046d57dSUrsula Braun 
299*a046d57dSUrsula Braun 	return rc;
300*a046d57dSUrsula Braun 
301*a046d57dSUrsula Braun decline_rdma:
302*a046d57dSUrsula Braun 	/* RDMA setup failed, switch back to TCP */
303*a046d57dSUrsula Braun 	smc->use_fallback = true;
304*a046d57dSUrsula Braun 	if (reason_code && (reason_code != SMC_CLC_DECL_REPLY)) {
305*a046d57dSUrsula Braun 		rc = smc_clc_send_decline(smc, reason_code, 0);
306*a046d57dSUrsula Braun 		if (rc < sizeof(struct smc_clc_msg_decline))
307*a046d57dSUrsula Braun 			goto out_err;
308*a046d57dSUrsula Braun 	}
309*a046d57dSUrsula Braun 	goto out_connected;
310*a046d57dSUrsula Braun 
311*a046d57dSUrsula Braun out_err:
312*a046d57dSUrsula Braun 	return rc;
313*a046d57dSUrsula Braun }
314*a046d57dSUrsula Braun 
315ac713874SUrsula Braun static int smc_connect(struct socket *sock, struct sockaddr *addr,
316ac713874SUrsula Braun 		       int alen, int flags)
317ac713874SUrsula Braun {
318ac713874SUrsula Braun 	struct sock *sk = sock->sk;
319ac713874SUrsula Braun 	struct smc_sock *smc;
320ac713874SUrsula Braun 	int rc = -EINVAL;
321ac713874SUrsula Braun 
322ac713874SUrsula Braun 	smc = smc_sk(sk);
323ac713874SUrsula Braun 
324ac713874SUrsula Braun 	/* separate smc parameter checking to be safe */
325ac713874SUrsula Braun 	if (alen < sizeof(addr->sa_family))
326ac713874SUrsula Braun 		goto out_err;
327ac713874SUrsula Braun 	if (addr->sa_family != AF_INET)
328ac713874SUrsula Braun 		goto out_err;
329*a046d57dSUrsula Braun 	smc->addr = addr;	/* needed for nonblocking connect */
330ac713874SUrsula Braun 
331ac713874SUrsula Braun 	lock_sock(sk);
332ac713874SUrsula Braun 	switch (sk->sk_state) {
333ac713874SUrsula Braun 	default:
334ac713874SUrsula Braun 		goto out;
335ac713874SUrsula Braun 	case SMC_ACTIVE:
336ac713874SUrsula Braun 		rc = -EISCONN;
337ac713874SUrsula Braun 		goto out;
338ac713874SUrsula Braun 	case SMC_INIT:
339ac713874SUrsula Braun 		rc = 0;
340ac713874SUrsula Braun 		break;
341ac713874SUrsula Braun 	}
342ac713874SUrsula Braun 
343ac713874SUrsula Braun 	smc_copy_sock_settings_to_clc(smc);
344ac713874SUrsula Braun 	rc = kernel_connect(smc->clcsock, addr, alen, flags);
345ac713874SUrsula Braun 	if (rc)
346ac713874SUrsula Braun 		goto out;
347ac713874SUrsula Braun 
348*a046d57dSUrsula Braun 	/* setup RDMA connection */
349*a046d57dSUrsula Braun 	rc = smc_connect_rdma(smc);
350*a046d57dSUrsula Braun 	if (rc < 0)
351*a046d57dSUrsula Braun 		goto out;
352*a046d57dSUrsula Braun 	else
353*a046d57dSUrsula Braun 		rc = 0; /* success cases including fallback */
354ac713874SUrsula Braun 
355ac713874SUrsula Braun out:
356ac713874SUrsula Braun 	release_sock(sk);
357ac713874SUrsula Braun out_err:
358ac713874SUrsula Braun 	return rc;
359ac713874SUrsula Braun }
360ac713874SUrsula Braun 
361ac713874SUrsula Braun static int smc_clcsock_accept(struct smc_sock *lsmc, struct smc_sock **new_smc)
362ac713874SUrsula Braun {
363ac713874SUrsula Braun 	struct sock *sk = &lsmc->sk;
364ac713874SUrsula Braun 	struct socket *new_clcsock;
365ac713874SUrsula Braun 	struct sock *new_sk;
366ac713874SUrsula Braun 	int rc;
367ac713874SUrsula Braun 
368*a046d57dSUrsula Braun 	release_sock(&lsmc->sk);
369ac713874SUrsula Braun 	new_sk = smc_sock_alloc(sock_net(sk), NULL);
370ac713874SUrsula Braun 	if (!new_sk) {
371ac713874SUrsula Braun 		rc = -ENOMEM;
372ac713874SUrsula Braun 		lsmc->sk.sk_err = ENOMEM;
373ac713874SUrsula Braun 		*new_smc = NULL;
374*a046d57dSUrsula Braun 		lock_sock(&lsmc->sk);
375ac713874SUrsula Braun 		goto out;
376ac713874SUrsula Braun 	}
377ac713874SUrsula Braun 	*new_smc = smc_sk(new_sk);
378ac713874SUrsula Braun 
379ac713874SUrsula Braun 	rc = kernel_accept(lsmc->clcsock, &new_clcsock, 0);
380*a046d57dSUrsula Braun 	lock_sock(&lsmc->sk);
381*a046d57dSUrsula Braun 	if  (rc < 0) {
382*a046d57dSUrsula Braun 		lsmc->sk.sk_err = -rc;
383*a046d57dSUrsula Braun 		new_sk->sk_state = SMC_CLOSED;
384*a046d57dSUrsula Braun 		sock_set_flag(new_sk, SOCK_DEAD);
385*a046d57dSUrsula Braun 		sock_put(new_sk);
386*a046d57dSUrsula Braun 		*new_smc = NULL;
387*a046d57dSUrsula Braun 		goto out;
388*a046d57dSUrsula Braun 	}
389*a046d57dSUrsula Braun 	if (lsmc->sk.sk_state == SMC_CLOSED) {
390*a046d57dSUrsula Braun 		if (new_clcsock)
391*a046d57dSUrsula Braun 			sock_release(new_clcsock);
392*a046d57dSUrsula Braun 		new_sk->sk_state = SMC_CLOSED;
393*a046d57dSUrsula Braun 		sock_set_flag(new_sk, SOCK_DEAD);
394ac713874SUrsula Braun 		sock_put(new_sk);
395ac713874SUrsula Braun 		*new_smc = NULL;
396ac713874SUrsula Braun 		goto out;
397ac713874SUrsula Braun 	}
398ac713874SUrsula Braun 
399ac713874SUrsula Braun 	(*new_smc)->clcsock = new_clcsock;
400ac713874SUrsula Braun out:
401ac713874SUrsula Braun 	return rc;
402ac713874SUrsula Braun }
403ac713874SUrsula Braun 
404*a046d57dSUrsula Braun /* add a just created sock to the accept queue of the listen sock as
405*a046d57dSUrsula Braun  * candidate for a following socket accept call from user space
406*a046d57dSUrsula Braun  */
407*a046d57dSUrsula Braun static void smc_accept_enqueue(struct sock *parent, struct sock *sk)
408*a046d57dSUrsula Braun {
409*a046d57dSUrsula Braun 	struct smc_sock *par = smc_sk(parent);
410*a046d57dSUrsula Braun 
411*a046d57dSUrsula Braun 	sock_hold(sk);
412*a046d57dSUrsula Braun 	spin_lock(&par->accept_q_lock);
413*a046d57dSUrsula Braun 	list_add_tail(&smc_sk(sk)->accept_q, &par->accept_q);
414*a046d57dSUrsula Braun 	spin_unlock(&par->accept_q_lock);
415*a046d57dSUrsula Braun 	sk_acceptq_added(parent);
416*a046d57dSUrsula Braun }
417*a046d57dSUrsula Braun 
418*a046d57dSUrsula Braun /* remove a socket from the accept queue of its parental listening socket */
419*a046d57dSUrsula Braun static void smc_accept_unlink(struct sock *sk)
420*a046d57dSUrsula Braun {
421*a046d57dSUrsula Braun 	struct smc_sock *par = smc_sk(sk)->listen_smc;
422*a046d57dSUrsula Braun 
423*a046d57dSUrsula Braun 	spin_lock(&par->accept_q_lock);
424*a046d57dSUrsula Braun 	list_del_init(&smc_sk(sk)->accept_q);
425*a046d57dSUrsula Braun 	spin_unlock(&par->accept_q_lock);
426*a046d57dSUrsula Braun 	sk_acceptq_removed(&smc_sk(sk)->listen_smc->sk);
427*a046d57dSUrsula Braun 	sock_put(sk);
428*a046d57dSUrsula Braun }
429*a046d57dSUrsula Braun 
430*a046d57dSUrsula Braun /* remove a sock from the accept queue to bind it to a new socket created
431*a046d57dSUrsula Braun  * for a socket accept call from user space
432*a046d57dSUrsula Braun  */
433*a046d57dSUrsula Braun static struct sock *smc_accept_dequeue(struct sock *parent,
434*a046d57dSUrsula Braun 				       struct socket *new_sock)
435*a046d57dSUrsula Braun {
436*a046d57dSUrsula Braun 	struct smc_sock *isk, *n;
437*a046d57dSUrsula Braun 	struct sock *new_sk;
438*a046d57dSUrsula Braun 
439*a046d57dSUrsula Braun 	list_for_each_entry_safe(isk, n, &smc_sk(parent)->accept_q, accept_q) {
440*a046d57dSUrsula Braun 		new_sk = (struct sock *)isk;
441*a046d57dSUrsula Braun 
442*a046d57dSUrsula Braun 		smc_accept_unlink(new_sk);
443*a046d57dSUrsula Braun 		if (new_sk->sk_state == SMC_CLOSED) {
444*a046d57dSUrsula Braun 			/* tbd in follow-on patch: close this sock */
445*a046d57dSUrsula Braun 			continue;
446*a046d57dSUrsula Braun 		}
447*a046d57dSUrsula Braun 		if (new_sock)
448*a046d57dSUrsula Braun 			sock_graft(new_sk, new_sock);
449*a046d57dSUrsula Braun 		return new_sk;
450*a046d57dSUrsula Braun 	}
451*a046d57dSUrsula Braun 	return NULL;
452*a046d57dSUrsula Braun }
453*a046d57dSUrsula Braun 
454*a046d57dSUrsula Braun /* clean up for a created but never accepted sock */
455*a046d57dSUrsula Braun static void smc_close_non_accepted(struct sock *sk)
456*a046d57dSUrsula Braun {
457*a046d57dSUrsula Braun 	struct smc_sock *smc = smc_sk(sk);
458*a046d57dSUrsula Braun 
459*a046d57dSUrsula Braun 	sock_hold(sk);
460*a046d57dSUrsula Braun 	if (smc->clcsock) {
461*a046d57dSUrsula Braun 		struct socket *tcp;
462*a046d57dSUrsula Braun 
463*a046d57dSUrsula Braun 		tcp = smc->clcsock;
464*a046d57dSUrsula Braun 		smc->clcsock = NULL;
465*a046d57dSUrsula Braun 		sock_release(tcp);
466*a046d57dSUrsula Braun 	}
467*a046d57dSUrsula Braun 	/* more closing stuff to be added with socket closing patch */
468*a046d57dSUrsula Braun 	sock_put(sk);
469*a046d57dSUrsula Braun }
470*a046d57dSUrsula Braun 
471*a046d57dSUrsula Braun /* setup for RDMA connection of server */
472*a046d57dSUrsula Braun static void smc_listen_work(struct work_struct *work)
473*a046d57dSUrsula Braun {
474*a046d57dSUrsula Braun 	struct smc_sock *new_smc = container_of(work, struct smc_sock,
475*a046d57dSUrsula Braun 						smc_listen_work);
476*a046d57dSUrsula Braun 	struct socket *newclcsock = new_smc->clcsock;
477*a046d57dSUrsula Braun 	struct smc_sock *lsmc = new_smc->listen_smc;
478*a046d57dSUrsula Braun 	struct smc_clc_msg_accept_confirm cclc;
479*a046d57dSUrsula Braun 	struct sock *newsmcsk = &new_smc->sk;
480*a046d57dSUrsula Braun 	struct smc_clc_msg_proposal pclc;
481*a046d57dSUrsula Braun 	struct smc_ib_device *smcibdev;
482*a046d57dSUrsula Braun 	struct sockaddr_in peeraddr;
483*a046d57dSUrsula Braun 	int reason_code = 0;
484*a046d57dSUrsula Braun 	int rc = 0, len;
485*a046d57dSUrsula Braun 	__be32 subnet;
486*a046d57dSUrsula Braun 	u8 prefix_len;
487*a046d57dSUrsula Braun 	u8 ibport;
488*a046d57dSUrsula Braun 
489*a046d57dSUrsula Braun 	/* do inband token exchange -
490*a046d57dSUrsula Braun 	 *wait for and receive SMC Proposal CLC message
491*a046d57dSUrsula Braun 	 */
492*a046d57dSUrsula Braun 	reason_code = smc_clc_wait_msg(new_smc, &pclc, sizeof(pclc),
493*a046d57dSUrsula Braun 				       SMC_CLC_PROPOSAL);
494*a046d57dSUrsula Braun 	if (reason_code < 0)
495*a046d57dSUrsula Braun 		goto out_err;
496*a046d57dSUrsula Braun 	if (reason_code > 0)
497*a046d57dSUrsula Braun 		goto decline_rdma;
498*a046d57dSUrsula Braun 
499*a046d57dSUrsula Braun 	/* IPSec connections opt out of SMC-R optimizations */
500*a046d57dSUrsula Braun 	if (using_ipsec(new_smc)) {
501*a046d57dSUrsula Braun 		reason_code = SMC_CLC_DECL_IPSEC;
502*a046d57dSUrsula Braun 		goto decline_rdma;
503*a046d57dSUrsula Braun 	}
504*a046d57dSUrsula Braun 
505*a046d57dSUrsula Braun 	/* PNET table look up: search active ib_device and port
506*a046d57dSUrsula Braun 	 * within same PNETID that also contains the ethernet device
507*a046d57dSUrsula Braun 	 * used for the internal TCP socket
508*a046d57dSUrsula Braun 	 */
509*a046d57dSUrsula Braun 	smc_pnet_find_roce_resource(newclcsock->sk, &smcibdev, &ibport);
510*a046d57dSUrsula Braun 	if (!smcibdev) {
511*a046d57dSUrsula Braun 		reason_code = SMC_CLC_DECL_CNFERR; /* configuration error */
512*a046d57dSUrsula Braun 		goto decline_rdma;
513*a046d57dSUrsula Braun 	}
514*a046d57dSUrsula Braun 
515*a046d57dSUrsula Braun 	/* determine subnet and mask from internal TCP socket */
516*a046d57dSUrsula Braun 	rc = smc_netinfo_by_tcpsk(newclcsock, &subnet, &prefix_len);
517*a046d57dSUrsula Braun 	if (rc) {
518*a046d57dSUrsula Braun 		reason_code = SMC_CLC_DECL_CNFERR; /* configuration error */
519*a046d57dSUrsula Braun 		goto decline_rdma;
520*a046d57dSUrsula Braun 	}
521*a046d57dSUrsula Braun 	if ((pclc.outgoing_subnet != subnet) ||
522*a046d57dSUrsula Braun 	    (pclc.prefix_len != prefix_len)) {
523*a046d57dSUrsula Braun 		reason_code = SMC_CLC_DECL_CNFERR; /* configuration error */
524*a046d57dSUrsula Braun 		goto decline_rdma;
525*a046d57dSUrsula Braun 	}
526*a046d57dSUrsula Braun 
527*a046d57dSUrsula Braun 	/* get address of the peer connected to the internal TCP socket */
528*a046d57dSUrsula Braun 	kernel_getpeername(newclcsock, (struct sockaddr *)&peeraddr, &len);
529*a046d57dSUrsula Braun 
530*a046d57dSUrsula Braun 	/* tbd in follow-on patch: more steps to setup RDMA communcication,
531*a046d57dSUrsula Braun 	 * create connection, link_group, link
532*a046d57dSUrsula Braun 	 */
533*a046d57dSUrsula Braun 
534*a046d57dSUrsula Braun 	/* tbd in follow-on patch: more steps to setup RDMA communcication,
535*a046d57dSUrsula Braun 	 * create rmbs, map rmbs
536*a046d57dSUrsula Braun 	 */
537*a046d57dSUrsula Braun 
538*a046d57dSUrsula Braun 	rc = smc_clc_send_accept(new_smc);
539*a046d57dSUrsula Braun 	if (rc)
540*a046d57dSUrsula Braun 		goto out_err;
541*a046d57dSUrsula Braun 
542*a046d57dSUrsula Braun 	/* receive SMC Confirm CLC message */
543*a046d57dSUrsula Braun 	reason_code = smc_clc_wait_msg(new_smc, &cclc, sizeof(cclc),
544*a046d57dSUrsula Braun 				       SMC_CLC_CONFIRM);
545*a046d57dSUrsula Braun 	if (reason_code < 0)
546*a046d57dSUrsula Braun 		goto out_err;
547*a046d57dSUrsula Braun 	if (reason_code > 0)
548*a046d57dSUrsula Braun 		goto decline_rdma;
549*a046d57dSUrsula Braun 
550*a046d57dSUrsula Braun 	/* tbd in follow-on patch: more steps to setup RDMA communcication,
551*a046d57dSUrsula Braun 	 * rtoken_handling, modify_qp
552*a046d57dSUrsula Braun 	 */
553*a046d57dSUrsula Braun 
554*a046d57dSUrsula Braun out_connected:
555*a046d57dSUrsula Braun 	sk_refcnt_debug_inc(newsmcsk);
556*a046d57dSUrsula Braun 	newsmcsk->sk_state = SMC_ACTIVE;
557*a046d57dSUrsula Braun enqueue:
558*a046d57dSUrsula Braun 	lock_sock(&lsmc->sk);
559*a046d57dSUrsula Braun 	if (lsmc->sk.sk_state == SMC_LISTEN) {
560*a046d57dSUrsula Braun 		smc_accept_enqueue(&lsmc->sk, newsmcsk);
561*a046d57dSUrsula Braun 	} else { /* no longer listening */
562*a046d57dSUrsula Braun 		smc_close_non_accepted(newsmcsk);
563*a046d57dSUrsula Braun 	}
564*a046d57dSUrsula Braun 	release_sock(&lsmc->sk);
565*a046d57dSUrsula Braun 
566*a046d57dSUrsula Braun 	/* Wake up accept */
567*a046d57dSUrsula Braun 	lsmc->sk.sk_data_ready(&lsmc->sk);
568*a046d57dSUrsula Braun 	sock_put(&lsmc->sk); /* sock_hold in smc_tcp_listen_work */
569*a046d57dSUrsula Braun 	return;
570*a046d57dSUrsula Braun 
571*a046d57dSUrsula Braun decline_rdma:
572*a046d57dSUrsula Braun 	/* RDMA setup failed, switch back to TCP */
573*a046d57dSUrsula Braun 	new_smc->use_fallback = true;
574*a046d57dSUrsula Braun 	if (reason_code && (reason_code != SMC_CLC_DECL_REPLY)) {
575*a046d57dSUrsula Braun 		rc = smc_clc_send_decline(new_smc, reason_code, 0);
576*a046d57dSUrsula Braun 		if (rc < sizeof(struct smc_clc_msg_decline))
577*a046d57dSUrsula Braun 			goto out_err;
578*a046d57dSUrsula Braun 	}
579*a046d57dSUrsula Braun 	goto out_connected;
580*a046d57dSUrsula Braun 
581*a046d57dSUrsula Braun out_err:
582*a046d57dSUrsula Braun 	newsmcsk->sk_state = SMC_CLOSED;
583*a046d57dSUrsula Braun 	goto enqueue; /* queue new sock with sk_err set */
584*a046d57dSUrsula Braun }
585*a046d57dSUrsula Braun 
586*a046d57dSUrsula Braun static void smc_tcp_listen_work(struct work_struct *work)
587*a046d57dSUrsula Braun {
588*a046d57dSUrsula Braun 	struct smc_sock *lsmc = container_of(work, struct smc_sock,
589*a046d57dSUrsula Braun 					     tcp_listen_work);
590*a046d57dSUrsula Braun 	struct smc_sock *new_smc;
591*a046d57dSUrsula Braun 	int rc = 0;
592*a046d57dSUrsula Braun 
593*a046d57dSUrsula Braun 	lock_sock(&lsmc->sk);
594*a046d57dSUrsula Braun 	while (lsmc->sk.sk_state == SMC_LISTEN) {
595*a046d57dSUrsula Braun 		rc = smc_clcsock_accept(lsmc, &new_smc);
596*a046d57dSUrsula Braun 		if (rc)
597*a046d57dSUrsula Braun 			goto out;
598*a046d57dSUrsula Braun 		if (!new_smc)
599*a046d57dSUrsula Braun 			continue;
600*a046d57dSUrsula Braun 
601*a046d57dSUrsula Braun 		new_smc->listen_smc = lsmc;
602*a046d57dSUrsula Braun 		new_smc->use_fallback = false; /* assume rdma capability first*/
603*a046d57dSUrsula Braun 		sock_hold(&lsmc->sk); /* sock_put in smc_listen_work */
604*a046d57dSUrsula Braun 		INIT_WORK(&new_smc->smc_listen_work, smc_listen_work);
605*a046d57dSUrsula Braun 		smc_copy_sock_settings_to_smc(new_smc);
606*a046d57dSUrsula Braun 		schedule_work(&new_smc->smc_listen_work);
607*a046d57dSUrsula Braun 	}
608*a046d57dSUrsula Braun 
609*a046d57dSUrsula Braun out:
610*a046d57dSUrsula Braun 	release_sock(&lsmc->sk);
611*a046d57dSUrsula Braun 	lsmc->sk.sk_data_ready(&lsmc->sk); /* no more listening, wake accept */
612*a046d57dSUrsula Braun }
613*a046d57dSUrsula Braun 
614ac713874SUrsula Braun static int smc_listen(struct socket *sock, int backlog)
615ac713874SUrsula Braun {
616ac713874SUrsula Braun 	struct sock *sk = sock->sk;
617ac713874SUrsula Braun 	struct smc_sock *smc;
618ac713874SUrsula Braun 	int rc;
619ac713874SUrsula Braun 
620ac713874SUrsula Braun 	smc = smc_sk(sk);
621ac713874SUrsula Braun 	lock_sock(sk);
622ac713874SUrsula Braun 
623ac713874SUrsula Braun 	rc = -EINVAL;
624ac713874SUrsula Braun 	if ((sk->sk_state != SMC_INIT) && (sk->sk_state != SMC_LISTEN))
625ac713874SUrsula Braun 		goto out;
626ac713874SUrsula Braun 
627ac713874SUrsula Braun 	rc = 0;
628ac713874SUrsula Braun 	if (sk->sk_state == SMC_LISTEN) {
629ac713874SUrsula Braun 		sk->sk_max_ack_backlog = backlog;
630ac713874SUrsula Braun 		goto out;
631ac713874SUrsula Braun 	}
632ac713874SUrsula Braun 	/* some socket options are handled in core, so we could not apply
633ac713874SUrsula Braun 	 * them to the clc socket -- copy smc socket options to clc socket
634ac713874SUrsula Braun 	 */
635ac713874SUrsula Braun 	smc_copy_sock_settings_to_clc(smc);
636ac713874SUrsula Braun 
637ac713874SUrsula Braun 	rc = kernel_listen(smc->clcsock, backlog);
638ac713874SUrsula Braun 	if (rc)
639ac713874SUrsula Braun 		goto out;
640ac713874SUrsula Braun 	sk->sk_max_ack_backlog = backlog;
641ac713874SUrsula Braun 	sk->sk_ack_backlog = 0;
642ac713874SUrsula Braun 	sk->sk_state = SMC_LISTEN;
643*a046d57dSUrsula Braun 	INIT_WORK(&smc->tcp_listen_work, smc_tcp_listen_work);
644*a046d57dSUrsula Braun 	schedule_work(&smc->tcp_listen_work);
645ac713874SUrsula Braun 
646ac713874SUrsula Braun out:
647ac713874SUrsula Braun 	release_sock(sk);
648ac713874SUrsula Braun 	return rc;
649ac713874SUrsula Braun }
650ac713874SUrsula Braun 
651ac713874SUrsula Braun static int smc_accept(struct socket *sock, struct socket *new_sock,
652ac713874SUrsula Braun 		      int flags)
653ac713874SUrsula Braun {
654*a046d57dSUrsula Braun 	struct sock *sk = sock->sk, *nsk;
655*a046d57dSUrsula Braun 	DECLARE_WAITQUEUE(wait, current);
656ac713874SUrsula Braun 	struct smc_sock *lsmc;
657*a046d57dSUrsula Braun 	long timeo;
658*a046d57dSUrsula Braun 	int rc = 0;
659ac713874SUrsula Braun 
660ac713874SUrsula Braun 	lsmc = smc_sk(sk);
661ac713874SUrsula Braun 	lock_sock(sk);
662ac713874SUrsula Braun 
663ac713874SUrsula Braun 	if (lsmc->sk.sk_state != SMC_LISTEN) {
664ac713874SUrsula Braun 		rc = -EINVAL;
665ac713874SUrsula Braun 		goto out;
666ac713874SUrsula Braun 	}
667ac713874SUrsula Braun 
668*a046d57dSUrsula Braun 	/* Wait for an incoming connection */
669*a046d57dSUrsula Braun 	timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK);
670*a046d57dSUrsula Braun 	add_wait_queue_exclusive(sk_sleep(sk), &wait);
671*a046d57dSUrsula Braun 	while (!(nsk = smc_accept_dequeue(sk, new_sock))) {
672*a046d57dSUrsula Braun 		set_current_state(TASK_INTERRUPTIBLE);
673*a046d57dSUrsula Braun 		if (!timeo) {
674*a046d57dSUrsula Braun 			rc = -EAGAIN;
675*a046d57dSUrsula Braun 			break;
676*a046d57dSUrsula Braun 		}
677*a046d57dSUrsula Braun 		release_sock(sk);
678*a046d57dSUrsula Braun 		timeo = schedule_timeout(timeo);
679*a046d57dSUrsula Braun 		/* wakeup by sk_data_ready in smc_listen_work() */
680*a046d57dSUrsula Braun 		sched_annotate_sleep();
681*a046d57dSUrsula Braun 		lock_sock(sk);
682*a046d57dSUrsula Braun 		if (signal_pending(current)) {
683*a046d57dSUrsula Braun 			rc = sock_intr_errno(timeo);
684*a046d57dSUrsula Braun 			break;
685*a046d57dSUrsula Braun 		}
686*a046d57dSUrsula Braun 	}
687*a046d57dSUrsula Braun 	set_current_state(TASK_RUNNING);
688*a046d57dSUrsula Braun 	remove_wait_queue(sk_sleep(sk), &wait);
689ac713874SUrsula Braun 
690*a046d57dSUrsula Braun 	if (!rc)
691*a046d57dSUrsula Braun 		rc = sock_error(nsk);
692ac713874SUrsula Braun 
693ac713874SUrsula Braun out:
694ac713874SUrsula Braun 	release_sock(sk);
695ac713874SUrsula Braun 	return rc;
696ac713874SUrsula Braun }
697ac713874SUrsula Braun 
698ac713874SUrsula Braun static int smc_getname(struct socket *sock, struct sockaddr *addr,
699ac713874SUrsula Braun 		       int *len, int peer)
700ac713874SUrsula Braun {
701ac713874SUrsula Braun 	struct smc_sock *smc;
702ac713874SUrsula Braun 
703ac713874SUrsula Braun 	if (peer && (sock->sk->sk_state != SMC_ACTIVE))
704ac713874SUrsula Braun 		return -ENOTCONN;
705ac713874SUrsula Braun 
706ac713874SUrsula Braun 	smc = smc_sk(sock->sk);
707ac713874SUrsula Braun 
708ac713874SUrsula Braun 	return smc->clcsock->ops->getname(smc->clcsock, addr, len, peer);
709ac713874SUrsula Braun }
710ac713874SUrsula Braun 
711ac713874SUrsula Braun static int smc_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
712ac713874SUrsula Braun {
713ac713874SUrsula Braun 	struct sock *sk = sock->sk;
714ac713874SUrsula Braun 	struct smc_sock *smc;
715ac713874SUrsula Braun 	int rc = -EPIPE;
716ac713874SUrsula Braun 
717ac713874SUrsula Braun 	smc = smc_sk(sk);
718ac713874SUrsula Braun 	lock_sock(sk);
719ac713874SUrsula Braun 	if (sk->sk_state != SMC_ACTIVE)
720ac713874SUrsula Braun 		goto out;
721ac713874SUrsula Braun 	if (smc->use_fallback)
722ac713874SUrsula Braun 		rc = smc->clcsock->ops->sendmsg(smc->clcsock, msg, len);
723ac713874SUrsula Braun 	else
724ac713874SUrsula Braun 		rc = sock_no_sendmsg(sock, msg, len);
725ac713874SUrsula Braun out:
726ac713874SUrsula Braun 	release_sock(sk);
727ac713874SUrsula Braun 	return rc;
728ac713874SUrsula Braun }
729ac713874SUrsula Braun 
730ac713874SUrsula Braun static int smc_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
731ac713874SUrsula Braun 		       int flags)
732ac713874SUrsula Braun {
733ac713874SUrsula Braun 	struct sock *sk = sock->sk;
734ac713874SUrsula Braun 	struct smc_sock *smc;
735ac713874SUrsula Braun 	int rc = -ENOTCONN;
736ac713874SUrsula Braun 
737ac713874SUrsula Braun 	smc = smc_sk(sk);
738ac713874SUrsula Braun 	lock_sock(sk);
739ac713874SUrsula Braun 	if ((sk->sk_state != SMC_ACTIVE) && (sk->sk_state != SMC_CLOSED))
740ac713874SUrsula Braun 		goto out;
741ac713874SUrsula Braun 
742ac713874SUrsula Braun 	if (smc->use_fallback)
743ac713874SUrsula Braun 		rc = smc->clcsock->ops->recvmsg(smc->clcsock, msg, len, flags);
744ac713874SUrsula Braun 	else
745ac713874SUrsula Braun 		rc = sock_no_recvmsg(sock, msg, len, flags);
746ac713874SUrsula Braun out:
747ac713874SUrsula Braun 	release_sock(sk);
748ac713874SUrsula Braun 	return rc;
749ac713874SUrsula Braun }
750ac713874SUrsula Braun 
751*a046d57dSUrsula Braun static unsigned int smc_accept_poll(struct sock *parent)
752*a046d57dSUrsula Braun {
753*a046d57dSUrsula Braun 	struct smc_sock *isk;
754*a046d57dSUrsula Braun 	struct sock *sk;
755*a046d57dSUrsula Braun 
756*a046d57dSUrsula Braun 	lock_sock(parent);
757*a046d57dSUrsula Braun 	list_for_each_entry(isk, &smc_sk(parent)->accept_q, accept_q) {
758*a046d57dSUrsula Braun 		sk = (struct sock *)isk;
759*a046d57dSUrsula Braun 
760*a046d57dSUrsula Braun 		if (sk->sk_state == SMC_ACTIVE) {
761*a046d57dSUrsula Braun 			release_sock(parent);
762*a046d57dSUrsula Braun 			return POLLIN | POLLRDNORM;
763*a046d57dSUrsula Braun 		}
764*a046d57dSUrsula Braun 	}
765*a046d57dSUrsula Braun 	release_sock(parent);
766*a046d57dSUrsula Braun 
767*a046d57dSUrsula Braun 	return 0;
768*a046d57dSUrsula Braun }
769*a046d57dSUrsula Braun 
770ac713874SUrsula Braun static unsigned int smc_poll(struct file *file, struct socket *sock,
771ac713874SUrsula Braun 			     poll_table *wait)
772ac713874SUrsula Braun {
773ac713874SUrsula Braun 	struct sock *sk = sock->sk;
774ac713874SUrsula Braun 	unsigned int mask = 0;
775ac713874SUrsula Braun 	struct smc_sock *smc;
776*a046d57dSUrsula Braun 	int rc;
777ac713874SUrsula Braun 
778ac713874SUrsula Braun 	smc = smc_sk(sock->sk);
779*a046d57dSUrsula Braun 	if ((sk->sk_state == SMC_INIT) || smc->use_fallback) {
780*a046d57dSUrsula Braun 		/* delegate to CLC child sock */
781ac713874SUrsula Braun 		mask = smc->clcsock->ops->poll(file, smc->clcsock, wait);
782ac713874SUrsula Braun 		/* if non-blocking connect finished ... */
783ac713874SUrsula Braun 		lock_sock(sk);
784ac713874SUrsula Braun 		if ((sk->sk_state == SMC_INIT) && (mask & POLLOUT)) {
785*a046d57dSUrsula Braun 			sk->sk_err = smc->clcsock->sk->sk_err;
786*a046d57dSUrsula Braun 			if (sk->sk_err) {
787*a046d57dSUrsula Braun 				mask |= POLLERR;
788*a046d57dSUrsula Braun 			} else {
789*a046d57dSUrsula Braun 				rc = smc_connect_rdma(smc);
790*a046d57dSUrsula Braun 				if (rc < 0)
791*a046d57dSUrsula Braun 					mask |= POLLERR;
792*a046d57dSUrsula Braun 				else
793*a046d57dSUrsula Braun 					/* success cases including fallback */
794*a046d57dSUrsula Braun 					mask |= POLLOUT | POLLWRNORM;
795*a046d57dSUrsula Braun 			}
796ac713874SUrsula Braun 		}
797ac713874SUrsula Braun 		release_sock(sk);
798ac713874SUrsula Braun 	} else {
799*a046d57dSUrsula Braun 		sock_poll_wait(file, sk_sleep(sk), wait);
800*a046d57dSUrsula Braun 		if (sk->sk_state == SMC_LISTEN)
801*a046d57dSUrsula Braun 			/* woken up by sk_data_ready in smc_listen_work() */
802*a046d57dSUrsula Braun 			mask |= smc_accept_poll(sk);
803*a046d57dSUrsula Braun 		if (sk->sk_err)
804*a046d57dSUrsula Braun 			mask |= POLLERR;
805*a046d57dSUrsula Braun 		/* for now - to be enhanced in follow-on patch */
806ac713874SUrsula Braun 	}
807ac713874SUrsula Braun 
808ac713874SUrsula Braun 	return mask;
809ac713874SUrsula Braun }
810ac713874SUrsula Braun 
811ac713874SUrsula Braun static int smc_shutdown(struct socket *sock, int how)
812ac713874SUrsula Braun {
813ac713874SUrsula Braun 	struct sock *sk = sock->sk;
814ac713874SUrsula Braun 	struct smc_sock *smc;
815ac713874SUrsula Braun 	int rc = -EINVAL;
816ac713874SUrsula Braun 
817ac713874SUrsula Braun 	smc = smc_sk(sk);
818ac713874SUrsula Braun 
819ac713874SUrsula Braun 	if ((how < SHUT_RD) || (how > SHUT_RDWR))
820ac713874SUrsula Braun 		goto out_err;
821ac713874SUrsula Braun 
822ac713874SUrsula Braun 	lock_sock(sk);
823ac713874SUrsula Braun 
824ac713874SUrsula Braun 	rc = -ENOTCONN;
825ac713874SUrsula Braun 	if (sk->sk_state == SMC_CLOSED)
826ac713874SUrsula Braun 		goto out;
827ac713874SUrsula Braun 	if (smc->use_fallback) {
828ac713874SUrsula Braun 		rc = kernel_sock_shutdown(smc->clcsock, how);
829ac713874SUrsula Braun 		sk->sk_shutdown = smc->clcsock->sk->sk_shutdown;
830ac713874SUrsula Braun 		if (sk->sk_shutdown == SHUTDOWN_MASK)
831ac713874SUrsula Braun 			sk->sk_state = SMC_CLOSED;
832ac713874SUrsula Braun 	} else {
833ac713874SUrsula Braun 		rc = sock_no_shutdown(sock, how);
834ac713874SUrsula Braun 	}
835ac713874SUrsula Braun 
836ac713874SUrsula Braun out:
837ac713874SUrsula Braun 	release_sock(sk);
838ac713874SUrsula Braun 
839ac713874SUrsula Braun out_err:
840ac713874SUrsula Braun 	return rc;
841ac713874SUrsula Braun }
842ac713874SUrsula Braun 
843ac713874SUrsula Braun static int smc_setsockopt(struct socket *sock, int level, int optname,
844ac713874SUrsula Braun 			  char __user *optval, unsigned int optlen)
845ac713874SUrsula Braun {
846ac713874SUrsula Braun 	struct sock *sk = sock->sk;
847ac713874SUrsula Braun 	struct smc_sock *smc;
848ac713874SUrsula Braun 
849ac713874SUrsula Braun 	smc = smc_sk(sk);
850ac713874SUrsula Braun 
851ac713874SUrsula Braun 	/* generic setsockopts reaching us here always apply to the
852ac713874SUrsula Braun 	 * CLC socket
853ac713874SUrsula Braun 	 */
854ac713874SUrsula Braun 	return smc->clcsock->ops->setsockopt(smc->clcsock, level, optname,
855ac713874SUrsula Braun 					     optval, optlen);
856ac713874SUrsula Braun }
857ac713874SUrsula Braun 
858ac713874SUrsula Braun static int smc_getsockopt(struct socket *sock, int level, int optname,
859ac713874SUrsula Braun 			  char __user *optval, int __user *optlen)
860ac713874SUrsula Braun {
861ac713874SUrsula Braun 	struct smc_sock *smc;
862ac713874SUrsula Braun 
863ac713874SUrsula Braun 	smc = smc_sk(sock->sk);
864ac713874SUrsula Braun 	/* socket options apply to the CLC socket */
865ac713874SUrsula Braun 	return smc->clcsock->ops->getsockopt(smc->clcsock, level, optname,
866ac713874SUrsula Braun 					     optval, optlen);
867ac713874SUrsula Braun }
868ac713874SUrsula Braun 
869ac713874SUrsula Braun static int smc_ioctl(struct socket *sock, unsigned int cmd,
870ac713874SUrsula Braun 		     unsigned long arg)
871ac713874SUrsula Braun {
872ac713874SUrsula Braun 	struct smc_sock *smc;
873ac713874SUrsula Braun 
874ac713874SUrsula Braun 	smc = smc_sk(sock->sk);
875ac713874SUrsula Braun 	if (smc->use_fallback)
876ac713874SUrsula Braun 		return smc->clcsock->ops->ioctl(smc->clcsock, cmd, arg);
877ac713874SUrsula Braun 	else
878ac713874SUrsula Braun 		return sock_no_ioctl(sock, cmd, arg);
879ac713874SUrsula Braun }
880ac713874SUrsula Braun 
881ac713874SUrsula Braun static ssize_t smc_sendpage(struct socket *sock, struct page *page,
882ac713874SUrsula Braun 			    int offset, size_t size, int flags)
883ac713874SUrsula Braun {
884ac713874SUrsula Braun 	struct sock *sk = sock->sk;
885ac713874SUrsula Braun 	struct smc_sock *smc;
886ac713874SUrsula Braun 	int rc = -EPIPE;
887ac713874SUrsula Braun 
888ac713874SUrsula Braun 	smc = smc_sk(sk);
889ac713874SUrsula Braun 	lock_sock(sk);
890ac713874SUrsula Braun 	if (sk->sk_state != SMC_ACTIVE)
891ac713874SUrsula Braun 		goto out;
892ac713874SUrsula Braun 	if (smc->use_fallback)
893ac713874SUrsula Braun 		rc = kernel_sendpage(smc->clcsock, page, offset,
894ac713874SUrsula Braun 				     size, flags);
895ac713874SUrsula Braun 	else
896ac713874SUrsula Braun 		rc = sock_no_sendpage(sock, page, offset, size, flags);
897ac713874SUrsula Braun 
898ac713874SUrsula Braun out:
899ac713874SUrsula Braun 	release_sock(sk);
900ac713874SUrsula Braun 	return rc;
901ac713874SUrsula Braun }
902ac713874SUrsula Braun 
903ac713874SUrsula Braun static ssize_t smc_splice_read(struct socket *sock, loff_t *ppos,
904ac713874SUrsula Braun 			       struct pipe_inode_info *pipe, size_t len,
905ac713874SUrsula Braun 				    unsigned int flags)
906ac713874SUrsula Braun {
907ac713874SUrsula Braun 	struct sock *sk = sock->sk;
908ac713874SUrsula Braun 	struct smc_sock *smc;
909ac713874SUrsula Braun 	int rc = -ENOTCONN;
910ac713874SUrsula Braun 
911ac713874SUrsula Braun 	smc = smc_sk(sk);
912ac713874SUrsula Braun 	lock_sock(sk);
913ac713874SUrsula Braun 	if ((sk->sk_state != SMC_ACTIVE) && (sk->sk_state != SMC_CLOSED))
914ac713874SUrsula Braun 		goto out;
915ac713874SUrsula Braun 	if (smc->use_fallback) {
916ac713874SUrsula Braun 		rc = smc->clcsock->ops->splice_read(smc->clcsock, ppos,
917ac713874SUrsula Braun 						    pipe, len, flags);
918ac713874SUrsula Braun 	} else {
919ac713874SUrsula Braun 		rc = -EOPNOTSUPP;
920ac713874SUrsula Braun 	}
921ac713874SUrsula Braun out:
922ac713874SUrsula Braun 	release_sock(sk);
923ac713874SUrsula Braun 	return rc;
924ac713874SUrsula Braun }
925ac713874SUrsula Braun 
926ac713874SUrsula Braun /* must look like tcp */
927ac713874SUrsula Braun static const struct proto_ops smc_sock_ops = {
928ac713874SUrsula Braun 	.family		= PF_SMC,
929ac713874SUrsula Braun 	.owner		= THIS_MODULE,
930ac713874SUrsula Braun 	.release	= smc_release,
931ac713874SUrsula Braun 	.bind		= smc_bind,
932ac713874SUrsula Braun 	.connect	= smc_connect,
933ac713874SUrsula Braun 	.socketpair	= sock_no_socketpair,
934ac713874SUrsula Braun 	.accept		= smc_accept,
935ac713874SUrsula Braun 	.getname	= smc_getname,
936ac713874SUrsula Braun 	.poll		= smc_poll,
937ac713874SUrsula Braun 	.ioctl		= smc_ioctl,
938ac713874SUrsula Braun 	.listen		= smc_listen,
939ac713874SUrsula Braun 	.shutdown	= smc_shutdown,
940ac713874SUrsula Braun 	.setsockopt	= smc_setsockopt,
941ac713874SUrsula Braun 	.getsockopt	= smc_getsockopt,
942ac713874SUrsula Braun 	.sendmsg	= smc_sendmsg,
943ac713874SUrsula Braun 	.recvmsg	= smc_recvmsg,
944ac713874SUrsula Braun 	.mmap		= sock_no_mmap,
945ac713874SUrsula Braun 	.sendpage	= smc_sendpage,
946ac713874SUrsula Braun 	.splice_read	= smc_splice_read,
947ac713874SUrsula Braun };
948ac713874SUrsula Braun 
949ac713874SUrsula Braun static int smc_create(struct net *net, struct socket *sock, int protocol,
950ac713874SUrsula Braun 		      int kern)
951ac713874SUrsula Braun {
952ac713874SUrsula Braun 	struct smc_sock *smc;
953ac713874SUrsula Braun 	struct sock *sk;
954ac713874SUrsula Braun 	int rc;
955ac713874SUrsula Braun 
956ac713874SUrsula Braun 	rc = -ESOCKTNOSUPPORT;
957ac713874SUrsula Braun 	if (sock->type != SOCK_STREAM)
958ac713874SUrsula Braun 		goto out;
959ac713874SUrsula Braun 
960ac713874SUrsula Braun 	rc = -EPROTONOSUPPORT;
961ac713874SUrsula Braun 	if ((protocol != IPPROTO_IP) && (protocol != IPPROTO_TCP))
962ac713874SUrsula Braun 		goto out;
963ac713874SUrsula Braun 
964ac713874SUrsula Braun 	rc = -ENOBUFS;
965ac713874SUrsula Braun 	sock->ops = &smc_sock_ops;
966ac713874SUrsula Braun 	sk = smc_sock_alloc(net, sock);
967ac713874SUrsula Braun 	if (!sk)
968ac713874SUrsula Braun 		goto out;
969ac713874SUrsula Braun 
970ac713874SUrsula Braun 	/* create internal TCP socket for CLC handshake and fallback */
971ac713874SUrsula Braun 	smc = smc_sk(sk);
972*a046d57dSUrsula Braun 	smc->use_fallback = false; /* assume rdma capability first */
973ac713874SUrsula Braun 	rc = sock_create_kern(net, PF_INET, SOCK_STREAM,
974ac713874SUrsula Braun 			      IPPROTO_TCP, &smc->clcsock);
975ac713874SUrsula Braun 	if (rc)
976ac713874SUrsula Braun 		sk_common_release(sk);
977ac713874SUrsula Braun 
978ac713874SUrsula Braun out:
979ac713874SUrsula Braun 	return rc;
980ac713874SUrsula Braun }
981ac713874SUrsula Braun 
982ac713874SUrsula Braun static const struct net_proto_family smc_sock_family_ops = {
983ac713874SUrsula Braun 	.family	= PF_SMC,
984ac713874SUrsula Braun 	.owner	= THIS_MODULE,
985ac713874SUrsula Braun 	.create	= smc_create,
986ac713874SUrsula Braun };
987ac713874SUrsula Braun 
988ac713874SUrsula Braun static int __init smc_init(void)
989ac713874SUrsula Braun {
990ac713874SUrsula Braun 	int rc;
991ac713874SUrsula Braun 
9926812baabSThomas Richter 	rc = smc_pnet_init();
9936812baabSThomas Richter 	if (rc)
9946812baabSThomas Richter 		return rc;
9956812baabSThomas Richter 
996ac713874SUrsula Braun 	rc = proto_register(&smc_proto, 1);
997ac713874SUrsula Braun 	if (rc) {
998ac713874SUrsula Braun 		pr_err("%s: proto_register fails with %d\n", __func__, rc);
9996812baabSThomas Richter 		goto out_pnet;
1000ac713874SUrsula Braun 	}
1001ac713874SUrsula Braun 
1002ac713874SUrsula Braun 	rc = sock_register(&smc_sock_family_ops);
1003ac713874SUrsula Braun 	if (rc) {
1004ac713874SUrsula Braun 		pr_err("%s: sock_register fails with %d\n", __func__, rc);
1005ac713874SUrsula Braun 		goto out_proto;
1006ac713874SUrsula Braun 	}
1007ac713874SUrsula Braun 
1008a4cf0443SUrsula Braun 	rc = smc_ib_register_client();
1009a4cf0443SUrsula Braun 	if (rc) {
1010a4cf0443SUrsula Braun 		pr_err("%s: ib_register fails with %d\n", __func__, rc);
1011a4cf0443SUrsula Braun 		goto out_sock;
1012a4cf0443SUrsula Braun 	}
1013a4cf0443SUrsula Braun 
1014ac713874SUrsula Braun 	return 0;
1015ac713874SUrsula Braun 
1016a4cf0443SUrsula Braun out_sock:
1017a4cf0443SUrsula Braun 	sock_unregister(PF_SMC);
1018ac713874SUrsula Braun out_proto:
1019ac713874SUrsula Braun 	proto_unregister(&smc_proto);
10206812baabSThomas Richter out_pnet:
10216812baabSThomas Richter 	smc_pnet_exit();
1022ac713874SUrsula Braun 	return rc;
1023ac713874SUrsula Braun }
1024ac713874SUrsula Braun 
1025ac713874SUrsula Braun static void __exit smc_exit(void)
1026ac713874SUrsula Braun {
1027a4cf0443SUrsula Braun 	smc_ib_unregister_client();
1028ac713874SUrsula Braun 	sock_unregister(PF_SMC);
1029ac713874SUrsula Braun 	proto_unregister(&smc_proto);
10306812baabSThomas Richter 	smc_pnet_exit();
1031ac713874SUrsula Braun }
1032ac713874SUrsula Braun 
1033ac713874SUrsula Braun module_init(smc_init);
1034ac713874SUrsula Braun module_exit(smc_exit);
1035ac713874SUrsula Braun 
1036ac713874SUrsula Braun MODULE_AUTHOR("Ursula Braun <ubraun@linux.vnet.ibm.com>");
1037ac713874SUrsula Braun MODULE_DESCRIPTION("smc socket address family");
1038ac713874SUrsula Braun MODULE_LICENSE("GPL");
1039ac713874SUrsula Braun MODULE_ALIAS_NETPROTO(PF_SMC);
1040