xref: /openbmc/linux/net/smc/smc_tx.c (revision 1ac731c529cd4d6adbce134754b51ff7d822b145)
1b2441318SGreg Kroah-Hartman // SPDX-License-Identifier: GPL-2.0
2e6727f39SUrsula Braun /*
3e6727f39SUrsula Braun  * Shared Memory Communications over RDMA (SMC-R) and RoCE
4e6727f39SUrsula Braun  *
5e6727f39SUrsula Braun  * Manage send buffer.
6e6727f39SUrsula Braun  * Producer:
7e6727f39SUrsula Braun  * Copy user space data into send buffer, if send buffer space available.
8e6727f39SUrsula Braun  * Consumer:
9e6727f39SUrsula Braun  * Trigger RDMA write into RMBE of peer and send CDC, if RMBE space available.
10e6727f39SUrsula Braun  *
11e6727f39SUrsula Braun  * Copyright IBM Corp. 2016
12e6727f39SUrsula Braun  *
13e6727f39SUrsula Braun  * Author(s):  Ursula Braun <ubraun@linux.vnet.ibm.com>
14e6727f39SUrsula Braun  */
15e6727f39SUrsula Braun 
16e6727f39SUrsula Braun #include <linux/net.h>
17e6727f39SUrsula Braun #include <linux/rcupdate.h>
18e6727f39SUrsula Braun #include <linux/workqueue.h>
19c3edc401SIngo Molnar #include <linux/sched/signal.h>
20c3edc401SIngo Molnar 
21e6727f39SUrsula Braun #include <net/sock.h>
2201d2f7e2SUrsula Braun #include <net/tcp.h>
23e6727f39SUrsula Braun 
24e6727f39SUrsula Braun #include "smc.h"
25e6727f39SUrsula Braun #include "smc_wr.h"
26e6727f39SUrsula Braun #include "smc_cdc.h"
275bc056d8SKarsten Graul #include "smc_close.h"
28be244f28SHans Wippel #include "smc_ism.h"
29e6727f39SUrsula Braun #include "smc_tx.h"
30e0e4b8faSGuvenc Gulce #include "smc_stats.h"
31aff3083fSTony Lu #include "smc_tracepoint.h"
32e6727f39SUrsula Braun 
3316297d14SKarsten Graul #define SMC_TX_WORK_DELAY	0
3418e537cdSUrsula Braun 
35e6727f39SUrsula Braun /***************************** sndbuf producer *******************************/
36e6727f39SUrsula Braun 
37e6727f39SUrsula Braun /* callback implementation for sk.sk_write_space()
38de8474ebSStefan Raspl  * to wakeup sndbuf producers that blocked with smc_tx_wait().
39e6727f39SUrsula Braun  * called under sk_socket lock.
40e6727f39SUrsula Braun  */
smc_tx_write_space(struct sock * sk)41e6727f39SUrsula Braun static void smc_tx_write_space(struct sock *sk)
42e6727f39SUrsula Braun {
43e6727f39SUrsula Braun 	struct socket *sock = sk->sk_socket;
44e6727f39SUrsula Braun 	struct smc_sock *smc = smc_sk(sk);
45e6727f39SUrsula Braun 	struct socket_wq *wq;
46e6727f39SUrsula Braun 
47e6727f39SUrsula Braun 	/* similar to sk_stream_write_space */
48e6727f39SUrsula Braun 	if (atomic_read(&smc->conn.sndbuf_space) && sock) {
49e0e4b8faSGuvenc Gulce 		if (test_bit(SOCK_NOSPACE, &sock->flags))
50194730a9SGuvenc Gulce 			SMC_STAT_RMB_TX_FULL(smc, !smc->conn.lnk);
51e6727f39SUrsula Braun 		clear_bit(SOCK_NOSPACE, &sock->flags);
52e6727f39SUrsula Braun 		rcu_read_lock();
53e6727f39SUrsula Braun 		wq = rcu_dereference(sk->sk_wq);
54e6727f39SUrsula Braun 		if (skwq_has_sleeper(wq))
55e6727f39SUrsula Braun 			wake_up_interruptible_poll(&wq->wait,
56a9a08845SLinus Torvalds 						   EPOLLOUT | EPOLLWRNORM |
57a9a08845SLinus Torvalds 						   EPOLLWRBAND);
58e6727f39SUrsula Braun 		if (wq && wq->fasync_list && !(sk->sk_shutdown & SEND_SHUTDOWN))
59e6727f39SUrsula Braun 			sock_wake_async(wq, SOCK_WAKE_SPACE, POLL_OUT);
60e6727f39SUrsula Braun 		rcu_read_unlock();
61e6727f39SUrsula Braun 	}
62e6727f39SUrsula Braun }
63e6727f39SUrsula Braun 
64de8474ebSStefan Raspl /* Wakeup sndbuf producers that blocked with smc_tx_wait().
65e6727f39SUrsula Braun  * Cf. tcp_data_snd_check()=>tcp_check_space()=>tcp_new_space().
66e6727f39SUrsula Braun  */
smc_tx_sndbuf_nonfull(struct smc_sock * smc)67e6727f39SUrsula Braun void smc_tx_sndbuf_nonfull(struct smc_sock *smc)
68e6727f39SUrsula Braun {
69e6727f39SUrsula Braun 	if (smc->sk.sk_socket &&
70e6727f39SUrsula Braun 	    test_bit(SOCK_NOSPACE, &smc->sk.sk_socket->flags))
71e6727f39SUrsula Braun 		smc->sk.sk_write_space(&smc->sk);
72e6727f39SUrsula Braun }
73e6727f39SUrsula Braun 
74de8474ebSStefan Raspl /* blocks sndbuf producer until at least one byte of free space available
75de8474ebSStefan Raspl  * or urgent Byte was consumed
76de8474ebSStefan Raspl  */
smc_tx_wait(struct smc_sock * smc,int flags)77de8474ebSStefan Raspl static int smc_tx_wait(struct smc_sock *smc, int flags)
78e6727f39SUrsula Braun {
79e6727f39SUrsula Braun 	DEFINE_WAIT_FUNC(wait, woken_wake_function);
80e6727f39SUrsula Braun 	struct smc_connection *conn = &smc->conn;
81e6727f39SUrsula Braun 	struct sock *sk = &smc->sk;
82e6727f39SUrsula Braun 	long timeo;
83e6727f39SUrsula Braun 	int rc = 0;
84e6727f39SUrsula Braun 
85e6727f39SUrsula Braun 	/* similar to sk_stream_wait_memory */
86e6727f39SUrsula Braun 	timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT);
87e6727f39SUrsula Braun 	add_wait_queue(sk_sleep(sk), &wait);
88e6727f39SUrsula Braun 	while (1) {
89e6727f39SUrsula Braun 		sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
90e6727f39SUrsula Braun 		if (sk->sk_err ||
91e6727f39SUrsula Braun 		    (sk->sk_shutdown & SEND_SHUTDOWN) ||
92b2900980SUrsula Braun 		    conn->killed ||
93e6727f39SUrsula Braun 		    conn->local_tx_ctrl.conn_state_flags.peer_done_writing) {
94e6727f39SUrsula Braun 			rc = -EPIPE;
95e6727f39SUrsula Braun 			break;
96e6727f39SUrsula Braun 		}
97aa377e68SUrsula Braun 		if (smc_cdc_rxed_any_close(conn)) {
98e6727f39SUrsula Braun 			rc = -ECONNRESET;
99e6727f39SUrsula Braun 			break;
100e6727f39SUrsula Braun 		}
101e6727f39SUrsula Braun 		if (!timeo) {
1024651d180SJason Baron 			/* ensure EPOLLOUT is subsequently generated */
103e6727f39SUrsula Braun 			set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
104e6727f39SUrsula Braun 			rc = -EAGAIN;
105e6727f39SUrsula Braun 			break;
106e6727f39SUrsula Braun 		}
107e6727f39SUrsula Braun 		if (signal_pending(current)) {
108e6727f39SUrsula Braun 			rc = sock_intr_errno(timeo);
109e6727f39SUrsula Braun 			break;
110e6727f39SUrsula Braun 		}
111e6727f39SUrsula Braun 		sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk);
112de8474ebSStefan Raspl 		if (atomic_read(&conn->sndbuf_space) && !conn->urg_tx_pend)
113de8474ebSStefan Raspl 			break; /* at least 1 byte of free & no urgent data */
114e6727f39SUrsula Braun 		set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
115e6727f39SUrsula Braun 		sk_wait_event(sk, &timeo,
116*d0ac89f6SEric Dumazet 			      READ_ONCE(sk->sk_err) ||
117*d0ac89f6SEric Dumazet 			      (READ_ONCE(sk->sk_shutdown) & SEND_SHUTDOWN) ||
118aa377e68SUrsula Braun 			      smc_cdc_rxed_any_close(conn) ||
119de8474ebSStefan Raspl 			      (atomic_read(&conn->sndbuf_space) &&
120de8474ebSStefan Raspl 			       !conn->urg_tx_pend),
121e6727f39SUrsula Braun 			      &wait);
122e6727f39SUrsula Braun 	}
123e6727f39SUrsula Braun 	remove_wait_queue(sk_sleep(sk), &wait);
124e6727f39SUrsula Braun 	return rc;
125e6727f39SUrsula Braun }
126e6727f39SUrsula Braun 
smc_tx_is_corked(struct smc_sock * smc)12701d2f7e2SUrsula Braun static bool smc_tx_is_corked(struct smc_sock *smc)
12801d2f7e2SUrsula Braun {
12901d2f7e2SUrsula Braun 	struct tcp_sock *tp = tcp_sk(smc->clcsock->sk);
13001d2f7e2SUrsula Braun 
13101d2f7e2SUrsula Braun 	return (tp->nonagle & TCP_NAGLE_CORK) ? true : false;
13201d2f7e2SUrsula Braun }
13301d2f7e2SUrsula Braun 
134dcd2cf5fSDust Li /* If we have pending CDC messages, do not send:
135dcd2cf5fSDust Li  * Because CQE of this CDC message will happen shortly, it gives
136dcd2cf5fSDust Li  * a chance to coalesce future sendmsg() payload in to one RDMA Write,
137dcd2cf5fSDust Li  * without need for a timer, and with no latency trade off.
138dcd2cf5fSDust Li  * Algorithm here:
139dcd2cf5fSDust Li  *  1. First message should never cork
140dcd2cf5fSDust Li  *  2. If we have pending Tx CDC messages, wait for the first CDC
141dcd2cf5fSDust Li  *     message's completion
142dcd2cf5fSDust Li  *  3. Don't cork to much data in a single RDMA Write to prevent burst
143dcd2cf5fSDust Li  *     traffic, total corked message should not exceed sendbuf/2
144dcd2cf5fSDust Li  */
smc_should_autocork(struct smc_sock * smc)145dcd2cf5fSDust Li static bool smc_should_autocork(struct smc_sock *smc)
146dcd2cf5fSDust Li {
147dcd2cf5fSDust Li 	struct smc_connection *conn = &smc->conn;
148dcd2cf5fSDust Li 	int corking_size;
149dcd2cf5fSDust Li 
150ef739f1dSJakub Kicinski 	corking_size = min_t(unsigned int, conn->sndbuf_desc->len >> 1,
151ef739f1dSJakub Kicinski 			     sock_net(&smc->sk)->smc.sysctl_autocorking_size);
152dcd2cf5fSDust Li 
153dcd2cf5fSDust Li 	if (atomic_read(&conn->cdc_pend_tx_wr) == 0 ||
154dcd2cf5fSDust Li 	    smc_tx_prepared_sends(conn) > corking_size)
155dcd2cf5fSDust Li 		return false;
156dcd2cf5fSDust Li 	return true;
157dcd2cf5fSDust Li }
158dcd2cf5fSDust Li 
smc_tx_should_cork(struct smc_sock * smc,struct msghdr * msg)159dcd2cf5fSDust Li static bool smc_tx_should_cork(struct smc_sock *smc, struct msghdr *msg)
160dcd2cf5fSDust Li {
161dcd2cf5fSDust Li 	struct smc_connection *conn = &smc->conn;
162dcd2cf5fSDust Li 
163dcd2cf5fSDust Li 	if (smc_should_autocork(smc))
164dcd2cf5fSDust Li 		return true;
165dcd2cf5fSDust Li 
166dcd2cf5fSDust Li 	/* for a corked socket defer the RDMA writes if
167dcd2cf5fSDust Li 	 * sndbuf_space is still available. The applications
168dcd2cf5fSDust Li 	 * should known how/when to uncork it.
169dcd2cf5fSDust Li 	 */
170dcd2cf5fSDust Li 	if ((msg->msg_flags & MSG_MORE ||
171dcd2cf5fSDust Li 	     smc_tx_is_corked(smc)) &&
172dcd2cf5fSDust Li 	    atomic_read(&conn->sndbuf_space))
173dcd2cf5fSDust Li 		return true;
174dcd2cf5fSDust Li 
175dcd2cf5fSDust Li 	return false;
176dcd2cf5fSDust Li }
177dcd2cf5fSDust Li 
178dcd2cf5fSDust Li /* sndbuf producer: main API called by socket layer.
179e6727f39SUrsula Braun  * called under sock lock.
180e6727f39SUrsula Braun  */
smc_tx_sendmsg(struct smc_sock * smc,struct msghdr * msg,size_t len)181e6727f39SUrsula Braun int smc_tx_sendmsg(struct smc_sock *smc, struct msghdr *msg, size_t len)
182e6727f39SUrsula Braun {
183e6727f39SUrsula Braun 	size_t copylen, send_done = 0, send_remaining = len;
184e6727f39SUrsula Braun 	size_t chunk_len, chunk_off, chunk_len_sum;
185e6727f39SUrsula Braun 	struct smc_connection *conn = &smc->conn;
186e6727f39SUrsula Braun 	union smc_host_cursor prep;
187e6727f39SUrsula Braun 	struct sock *sk = &smc->sk;
188e6727f39SUrsula Braun 	char *sndbuf_base;
189e6727f39SUrsula Braun 	int tx_cnt_prep;
190e6727f39SUrsula Braun 	int writespace;
191e6727f39SUrsula Braun 	int rc, chunk;
192e6727f39SUrsula Braun 
193e6727f39SUrsula Braun 	/* This should be in poll */
194e6727f39SUrsula Braun 	sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk);
195e6727f39SUrsula Braun 
196e6727f39SUrsula Braun 	if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN)) {
197e6727f39SUrsula Braun 		rc = -EPIPE;
198e6727f39SUrsula Braun 		goto out_err;
199e6727f39SUrsula Braun 	}
200e6727f39SUrsula Braun 
201e6727f39SUrsula Braun 	if (sk->sk_state == SMC_INIT)
20217081633SGuvenc Gulce 		return -ENOTCONN;
20317081633SGuvenc Gulce 
20417081633SGuvenc Gulce 	if (len > conn->sndbuf_desc->len)
205e0e4b8faSGuvenc Gulce 		SMC_STAT_RMB_TX_SIZE_SMALL(smc, !conn->lnk);
206194730a9SGuvenc Gulce 
207e0e4b8faSGuvenc Gulce 	if (len > conn->peer_rmbe_size)
208e0e4b8faSGuvenc Gulce 		SMC_STAT_RMB_TX_PEER_SIZE_SMALL(smc, !conn->lnk);
209194730a9SGuvenc Gulce 
210e0e4b8faSGuvenc Gulce 	if (msg->msg_flags & MSG_OOB)
211e0e4b8faSGuvenc Gulce 		SMC_STAT_INC(smc, urg_data_cnt);
212194730a9SGuvenc Gulce 
213e0e4b8faSGuvenc Gulce 	while (msg_data_left(msg)) {
214e6727f39SUrsula Braun 		if (smc->sk.sk_shutdown & SEND_SHUTDOWN ||
215e6727f39SUrsula Braun 		    (smc->sk.sk_err == ECONNABORTED) ||
216b38d7324SUrsula Braun 		    conn->killed)
217b2900980SUrsula Braun 			return -EPIPE;
218e6727f39SUrsula Braun 		if (smc_cdc_rxed_any_close(conn))
219e6727f39SUrsula Braun 			return send_done ?: -ECONNRESET;
220e6727f39SUrsula Braun 
221e6727f39SUrsula Braun 		if (msg->msg_flags & MSG_OOB)
222de8474ebSStefan Raspl 			conn->local_tx_ctrl.prod_flags.urg_data_pending = 1;
223de8474ebSStefan Raspl 
224de8474ebSStefan Raspl 		if (!atomic_read(&conn->sndbuf_space) || conn->urg_tx_pend) {
225de8474ebSStefan Raspl 			if (send_done)
226e6727f39SUrsula Braun 				return send_done;
227e6727f39SUrsula Braun 			rc = smc_tx_wait(smc, msg->msg_flags);
2286889b36dSKarsten Graul 			if (rc)
2296889b36dSKarsten Graul 				goto out_err;
230e6727f39SUrsula Braun 			continue;
231e6727f39SUrsula Braun 		}
232e6727f39SUrsula Braun 
233e6727f39SUrsula Braun 		/* initialize variables for 1st iteration of subsequent loop */
234e6727f39SUrsula Braun 		/* could be just 1 byte, even after smc_tx_wait above */
235de8474ebSStefan Raspl 		writespace = atomic_read(&conn->sndbuf_space);
236e6727f39SUrsula Braun 		/* not more than what user space asked for */
237e6727f39SUrsula Braun 		copylen = min_t(size_t, send_remaining, writespace);
238e6727f39SUrsula Braun 		/* determine start of sndbuf */
239e6727f39SUrsula Braun 		sndbuf_base = conn->sndbuf_desc->cpu_addr;
240e6727f39SUrsula Braun 		smc_curs_copy(&prep, &conn->tx_curs_prep, conn);
241bac6de7bSStefan Raspl 		tx_cnt_prep = prep.count;
242e6727f39SUrsula Braun 		/* determine chunks where to write into sndbuf */
243e6727f39SUrsula Braun 		/* either unwrapped case, or 1st chunk of wrapped case */
244e6727f39SUrsula Braun 		chunk_len = min_t(size_t, copylen, conn->sndbuf_desc->len -
24569cb7dc0SHans Wippel 				  tx_cnt_prep);
24669cb7dc0SHans Wippel 		chunk_len_sum = chunk_len;
247e6727f39SUrsula Braun 		chunk_off = tx_cnt_prep;
248e6727f39SUrsula Braun 		for (chunk = 0; chunk < 2; chunk++) {
249e6727f39SUrsula Braun 			rc = memcpy_from_msg(sndbuf_base + chunk_off,
250e6727f39SUrsula Braun 					     msg, chunk_len);
251e6727f39SUrsula Braun 			if (rc) {
252e6727f39SUrsula Braun 				smc_sndbuf_sync_sg_for_device(conn);
25310428dd8SUrsula Braun 				if (send_done)
254e6727f39SUrsula Braun 					return send_done;
255e6727f39SUrsula Braun 				goto out_err;
256e6727f39SUrsula Braun 			}
257e6727f39SUrsula Braun 			send_done += chunk_len;
258e6727f39SUrsula Braun 			send_remaining -= chunk_len;
259e6727f39SUrsula Braun 
260e6727f39SUrsula Braun 			if (chunk_len_sum == copylen)
261e6727f39SUrsula Braun 				break; /* either on 1st or 2nd iteration */
262e6727f39SUrsula Braun 			/* prepare next (== 2nd) iteration */
263e6727f39SUrsula Braun 			chunk_len = copylen - chunk_len; /* remainder */
264e6727f39SUrsula Braun 			chunk_len_sum += chunk_len;
265e6727f39SUrsula Braun 			chunk_off = 0; /* modulo offset in send ring buffer */
266e6727f39SUrsula Braun 		}
267e6727f39SUrsula Braun 		smc_sndbuf_sync_sg_for_device(conn);
26810428dd8SUrsula Braun 		/* update cursors */
269e6727f39SUrsula Braun 		smc_curs_add(conn->sndbuf_desc->len, &prep, copylen);
27069cb7dc0SHans Wippel 		smc_curs_copy(&conn->tx_curs_prep, &prep, conn);
271bac6de7bSStefan Raspl 		/* increased in send tasklet smc_cdc_tx_handler() */
272e6727f39SUrsula Braun 		smp_mb__before_atomic();
273e6727f39SUrsula Braun 		atomic_sub(copylen, &conn->sndbuf_space);
274e6727f39SUrsula Braun 		/* guarantee 0 <= sndbuf_space <= sndbuf_desc->len */
27569cb7dc0SHans Wippel 		smp_mb__after_atomic();
276e6727f39SUrsula Braun 		/* since we just produced more new data into sndbuf,
277e6727f39SUrsula Braun 		 * trigger sndbuf consumer: RDMA write into peer RMBE and CDC
278e6727f39SUrsula Braun 		 */
279e6727f39SUrsula Braun 		if ((msg->msg_flags & MSG_OOB) && !send_remaining)
280de8474ebSStefan Raspl 			conn->urg_tx_pend = true;
281de8474ebSStefan Raspl 		/* If we need to cork, do nothing and wait for the next
282dcd2cf5fSDust Li 		 * sendmsg() call or push on tx completion
283dcd2cf5fSDust Li 		 */
28401d2f7e2SUrsula Braun 		if (!smc_tx_should_cork(smc, msg))
285dcd2cf5fSDust Li 			smc_tx_sndbuf_nonempty(conn);
286e6727f39SUrsula Braun 
287aff3083fSTony Lu 		trace_smc_tx_sendmsg(smc, copylen);
288aff3083fSTony Lu 	} /* while (msg_data_left(msg)) */
289e6727f39SUrsula Braun 
290e6727f39SUrsula Braun 	return send_done;
291e6727f39SUrsula Braun 
292e6727f39SUrsula Braun out_err:
293e6727f39SUrsula Braun 	rc = sk_stream_error(sk, msg->msg_flags, rc);
294e6727f39SUrsula Braun 	/* make sure we wake any epoll edge trigger waiter */
295e6727f39SUrsula Braun 	if (unlikely(rc == -EAGAIN))
296e6727f39SUrsula Braun 		sk->sk_write_space(sk);
297e6727f39SUrsula Braun 	return rc;
298e6727f39SUrsula Braun }
299e6727f39SUrsula Braun 
300e6727f39SUrsula Braun /***************************** sndbuf consumer *******************************/
301be9a16ccSTony Lu 
302be9a16ccSTony Lu /* sndbuf consumer: actual data transfer of one target chunk with ISM write */
smcd_tx_ism_write(struct smc_connection * conn,void * data,size_t len,u32 offset,int signal)303be9a16ccSTony Lu int smcd_tx_ism_write(struct smc_connection *conn, void *data, size_t len,
304be9a16ccSTony Lu 		      u32 offset, int signal)
305be9a16ccSTony Lu {
306be9a16ccSTony Lu 	int rc;
307be9a16ccSTony Lu 
308be9a16ccSTony Lu 	rc = smc_ism_write(conn->lgr->smcd, conn->peer_token,
309be9a16ccSTony Lu 			   conn->peer_rmbe_idx, signal, conn->tx_off + offset,
310be9a16ccSTony Lu 			   data, len);
311de4eda9dSAl Viro 	if (rc)
312be9a16ccSTony Lu 		conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1;
313be9a16ccSTony Lu 	return rc;
314be9a16ccSTony Lu }
315be9a16ccSTony Lu 
316be9a16ccSTony Lu /* sndbuf consumer: actual data transfer of one target chunk with RDMA write */
smc_tx_rdma_write(struct smc_connection * conn,int peer_rmbe_offset,int num_sges,struct ib_rdma_wr * rdma_wr)317e6727f39SUrsula Braun static int smc_tx_rdma_write(struct smc_connection *conn, int peer_rmbe_offset,
318e6727f39SUrsula Braun 			     int num_sges, struct ib_rdma_wr *rdma_wr)
319be244f28SHans Wippel {
320be244f28SHans Wippel 	struct smc_link_group *lgr = conn->lgr;
321be244f28SHans Wippel 	struct smc_link *link = conn->lnk;
322be244f28SHans Wippel 	int rc;
323be244f28SHans Wippel 
324be244f28SHans Wippel 	rdma_wr->wr.wr_id = smc_wr_tx_get_next_wr_id(link);
325eb481b02SHeiko Carstens 	rdma_wr->wr.num_sge = num_sges;
326eb481b02SHeiko Carstens 	rdma_wr->remote_addr =
327eb481b02SHeiko Carstens 		lgr->rtokens[conn->rtoken_idx][link->link_idx].dma_addr +
328be244f28SHans Wippel 		/* RMBE within RMB */
329be244f28SHans Wippel 		conn->tx_off +
330be244f28SHans Wippel 		/* offset within RMBE */
331be244f28SHans Wippel 		peer_rmbe_offset;
332be244f28SHans Wippel 	rdma_wr->rkey = lgr->rtokens[conn->rtoken_idx][link->link_idx].rkey;
333e6727f39SUrsula Braun 	rc = ib_post_send(link->roce_qp, &rdma_wr->wr, NULL);
334e6727f39SUrsula Braun 	if (rc)
335ad6f317fSUrsula Braun 		smcr_link_down_cond_sched(link);
336e6727f39SUrsula Braun 	return rc;
337e6727f39SUrsula Braun }
338387707fdSKarsten Graul 
339e6727f39SUrsula Braun /* sndbuf consumer */
smc_tx_advance_cursors(struct smc_connection * conn,union smc_host_cursor * prod,union smc_host_cursor * sent,size_t len)340e6727f39SUrsula Braun static inline void smc_tx_advance_cursors(struct smc_connection *conn,
341ad6f317fSUrsula Braun 					  union smc_host_cursor *prod,
342ad6f317fSUrsula Braun 					  union smc_host_cursor *sent,
343ad6f317fSUrsula Braun 					  size_t len)
344387707fdSKarsten Graul {
345e6727f39SUrsula Braun 	smc_curs_add(conn->peer_rmbe_size, prod, len);
34695d8d263SHans Wippel 	/* increased in recv tasklet smc_cdc_msg_rcv() */
347e6727f39SUrsula Braun 	smp_mb__before_atomic();
348e6727f39SUrsula Braun 	/* data in flight reduces usable snd_wnd */
349387707fdSKarsten Graul 	atomic_sub(len, &conn->peer_rmbe_space);
350ad6f317fSUrsula Braun 	/* guarantee 0 <= peer_rmbe_space <= peer_rmbe_size */
351b2900980SUrsula Braun 	smp_mb__after_atomic();
35287523930SKarsten Graul 	smc_curs_add(conn->sndbuf_desc->len, sent, len);
353e6727f39SUrsula Braun }
354e6727f39SUrsula Braun 
355e6727f39SUrsula Braun /* SMC-R helper for smc_tx_rdma_writes() */
smcr_tx_rdma_writes(struct smc_connection * conn,size_t len,size_t src_off,size_t src_len,size_t dst_off,size_t dst_len,struct smc_rdma_wr * wr_rdma_buf)356e6727f39SUrsula Braun static int smcr_tx_rdma_writes(struct smc_connection *conn, size_t len,
357e6727f39SUrsula Braun 			       size_t src_off, size_t src_len,
358e6727f39SUrsula Braun 			       size_t dst_off, size_t dst_len,
359e6727f39SUrsula Braun 			       struct smc_rdma_wr *wr_rdma_buf)
360e6727f39SUrsula Braun {
361e6727f39SUrsula Braun 	struct smc_link *link = conn->lnk;
362e6727f39SUrsula Braun 
363e6727f39SUrsula Braun 	dma_addr_t dma_addr =
364e6727f39SUrsula Braun 		sg_dma_address(conn->sndbuf_desc->sgt[link->link_idx].sgl);
365e6727f39SUrsula Braun 	u64 virt_addr = (uintptr_t)conn->sndbuf_desc->cpu_addr;
366e6727f39SUrsula Braun 	int src_len_sum = src_len, dst_len_sum = dst_len;
367e6727f39SUrsula Braun 	int sent_count = src_off;
368e6727f39SUrsula Braun 	int srcchunk, dstchunk;
36969cb7dc0SHans Wippel 	int num_sges;
370e6727f39SUrsula Braun 	int rc;
371e6727f39SUrsula Braun 
372be244f28SHans Wippel 	for (dstchunk = 0; dstchunk < 2; dstchunk++) {
373be244f28SHans Wippel 		struct ib_rdma_wr *wr = &wr_rdma_buf->wr_tx_rdma[dstchunk];
374be244f28SHans Wippel 		struct ib_sge *sge = wr->wr.sg_list;
375ad6f317fSUrsula Braun 		u64 base_addr = dma_addr;
376ad6f317fSUrsula Braun 
377e6727f39SUrsula Braun 		if (dst_len < link->qp_attr.cap.max_inline_data) {
378387707fdSKarsten Graul 			base_addr = virt_addr;
379387707fdSKarsten Graul 			wr->wr.send_flags |= IB_SEND_INLINE;
380be244f28SHans Wippel 		} else {
381387707fdSKarsten Graul 			wr->wr.send_flags &= ~IB_SEND_INLINE;
382b8d19945SWen Gu 		}
383be244f28SHans Wippel 
384be244f28SHans Wippel 		num_sges = 0;
385be244f28SHans Wippel 		for (srcchunk = 0; srcchunk < 2; srcchunk++) {
386e6727f39SUrsula Braun 			sge[srcchunk].addr = conn->sndbuf_desc->is_vm ?
387e6727f39SUrsula Braun 				(virt_addr + src_off) : (base_addr + src_off);
388e6727f39SUrsula Braun 			sge[srcchunk].length = src_len;
389e6727f39SUrsula Braun 			if (conn->sndbuf_desc->is_vm)
390793a7df6SGuangguan Wang 				sge[srcchunk].lkey =
391793a7df6SGuangguan Wang 					conn->sndbuf_desc->mr[link->link_idx]->lkey;
392793a7df6SGuangguan Wang 			num_sges++;
393793a7df6SGuangguan Wang 
394793a7df6SGuangguan Wang 			src_off += src_len;
395b8d19945SWen Gu 			if (src_off >= conn->sndbuf_desc->len)
396793a7df6SGuangguan Wang 				src_off -= conn->sndbuf_desc->len;
397793a7df6SGuangguan Wang 						/* modulo in send ring */
398793a7df6SGuangguan Wang 			if (src_len_sum == dst_len)
399793a7df6SGuangguan Wang 				break; /* either on 1st or 2nd iteration */
400ad6f317fSUrsula Braun 			/* prepare next (== 2nd) iteration */
401e6727f39SUrsula Braun 			src_len = dst_len - src_len; /* remainder */
402e6727f39SUrsula Braun 			src_len_sum += src_len;
403b8d19945SWen Gu 		}
404b8d19945SWen Gu 		rc = smc_tx_rdma_write(conn, dst_off, num_sges, wr);
405ad6f317fSUrsula Braun 		if (rc)
406b8d19945SWen Gu 			return rc;
407b8d19945SWen Gu 		if (dst_len_sum == len)
408b8d19945SWen Gu 			break; /* either on 1st or 2nd iteration */
409e6727f39SUrsula Braun 		/* prepare next (== 2nd) iteration */
410be244f28SHans Wippel 		dst_off = 0; /* modulo offset in RMBE ring buffer */
411e6727f39SUrsula Braun 		dst_len = len - dst_len; /* remainder */
41269cb7dc0SHans Wippel 		dst_len_sum += dst_len;
41369cb7dc0SHans Wippel 		src_len = min_t(int, dst_len, conn->sndbuf_desc->len -
414e6727f39SUrsula Braun 				sent_count);
415e6727f39SUrsula Braun 		src_len_sum = src_len;
416e6727f39SUrsula Braun 	}
417e6727f39SUrsula Braun 	return 0;
418e6727f39SUrsula Braun }
419e6727f39SUrsula Braun 
420e6727f39SUrsula Braun /* SMC-D helper for smc_tx_rdma_writes() */
smcd_tx_rdma_writes(struct smc_connection * conn,size_t len,size_t src_off,size_t src_len,size_t dst_off,size_t dst_len)421793a7df6SGuangguan Wang static int smcd_tx_rdma_writes(struct smc_connection *conn, size_t len,
422e6727f39SUrsula Braun 			       size_t src_off, size_t src_len,
423e6727f39SUrsula Braun 			       size_t dst_off, size_t dst_len)
424e6727f39SUrsula Braun {
425e6727f39SUrsula Braun 	int src_len_sum = src_len, dst_len_sum = dst_len;
426e6727f39SUrsula Braun 	int srcchunk, dstchunk;
427e6727f39SUrsula Braun 	int rc;
428e6727f39SUrsula Braun 
429e6727f39SUrsula Braun 	for (dstchunk = 0; dstchunk < 2; dstchunk++) {
430be244f28SHans Wippel 		for (srcchunk = 0; srcchunk < 2; srcchunk++) {
431be244f28SHans Wippel 			void *data = conn->sndbuf_desc->cpu_addr + src_off;
432e6727f39SUrsula Braun 
433e6727f39SUrsula Braun 			rc = smcd_tx_ism_write(conn, data, src_len, dst_off +
434be244f28SHans Wippel 					       sizeof(struct smcd_cdc_msg), 0);
435be244f28SHans Wippel 			if (rc)
436be244f28SHans Wippel 				return rc;
437be244f28SHans Wippel 			dst_off += src_len;
438be244f28SHans Wippel 			src_off += src_len;
439be244f28SHans Wippel 			if (src_off >= conn->sndbuf_desc->len)
440be244f28SHans Wippel 				src_off -= conn->sndbuf_desc->len;
441be244f28SHans Wippel 						/* modulo in send ring */
442be244f28SHans Wippel 			if (src_len_sum == dst_len)
443be244f28SHans Wippel 				break; /* either on 1st or 2nd iteration */
444be244f28SHans Wippel 			/* prepare next (== 2nd) iteration */
445be244f28SHans Wippel 			src_len = dst_len - src_len; /* remainder */
446be244f28SHans Wippel 			src_len_sum += src_len;
447be244f28SHans Wippel 		}
448be244f28SHans Wippel 		if (dst_len_sum == len)
449be244f28SHans Wippel 			break; /* either on 1st or 2nd iteration */
450be244f28SHans Wippel 		/* prepare next (== 2nd) iteration */
451be244f28SHans Wippel 		dst_off = 0; /* modulo offset in RMBE ring buffer */
452be244f28SHans Wippel 		dst_len = len - dst_len; /* remainder */
453be244f28SHans Wippel 		dst_len_sum += dst_len;
454be244f28SHans Wippel 		src_len = min_t(int, dst_len, conn->sndbuf_desc->len - src_off);
455be244f28SHans Wippel 		src_len_sum = src_len;
456be244f28SHans Wippel 	}
457be244f28SHans Wippel 	return 0;
458be244f28SHans Wippel }
459be244f28SHans Wippel 
460be244f28SHans Wippel /* sndbuf consumer: prepare all necessary (src&dst) chunks of data transmit;
461be244f28SHans Wippel  * usable snd_wnd as max transmit
462be244f28SHans Wippel  */
smc_tx_rdma_writes(struct smc_connection * conn,struct smc_rdma_wr * wr_rdma_buf)463be244f28SHans Wippel static int smc_tx_rdma_writes(struct smc_connection *conn,
464be244f28SHans Wippel 			      struct smc_rdma_wr *wr_rdma_buf)
465be244f28SHans Wippel {
466be244f28SHans Wippel 	size_t len, src_len, dst_off, dst_len; /* current chunk values */
467be244f28SHans Wippel 	union smc_host_cursor sent, prep, prod, cons;
468be244f28SHans Wippel 	struct smc_cdc_producer_flags *pflags;
469be244f28SHans Wippel 	int to_send, rmbespace;
470be244f28SHans Wippel 	int rc;
471be244f28SHans Wippel 
472be244f28SHans Wippel 	/* source: sndbuf */
473be244f28SHans Wippel 	smc_curs_copy(&sent, &conn->tx_curs_sent, conn);
474be244f28SHans Wippel 	smc_curs_copy(&prep, &conn->tx_curs_prep, conn);
475be244f28SHans Wippel 	/* cf. wmem_alloc - (snd_max - snd_una) */
476be244f28SHans Wippel 	to_send = smc_curs_diff(conn->sndbuf_desc->len, &sent, &prep);
477e6727f39SUrsula Braun 	if (to_send <= 0)
478e6727f39SUrsula Braun 		return 0;
479e6727f39SUrsula Braun 
480ad6f317fSUrsula Braun 	/* destination: RMBE */
481ad6f317fSUrsula Braun 	/* cf. snd_wnd */
482e6727f39SUrsula Braun 	rmbespace = atomic_read(&conn->peer_rmbe_space);
483be244f28SHans Wippel 	if (rmbespace <= 0) {
484e6727f39SUrsula Braun 		struct smc_sock *smc = container_of(conn, struct smc_sock,
485e6727f39SUrsula Braun 						    conn);
486e6727f39SUrsula Braun 		SMC_STAT_RMB_TX_PEER_FULL(smc, !conn->lnk);
487e6727f39SUrsula Braun 		return 0;
488e6727f39SUrsula Braun 	}
489e6727f39SUrsula Braun 	smc_curs_copy(&prod, &conn->local_tx_ctrl.prod, conn);
490bac6de7bSStefan Raspl 	smc_curs_copy(&cons, &conn->local_rx_ctrl.cons, conn);
491bac6de7bSStefan Raspl 
492e6727f39SUrsula Braun 	/* if usable snd_wnd closes ask peer to advertise once it opens again */
493e6727f39SUrsula Braun 	pflags = &conn->local_tx_ctrl.prod_flags;
494e6727f39SUrsula Braun 	pflags->write_blocked = (to_send >= rmbespace);
495e6727f39SUrsula Braun 	/* cf. usable snd_wnd */
496e6727f39SUrsula Braun 	len = min(to_send, rmbespace);
497e6727f39SUrsula Braun 
498e6727f39SUrsula Braun 	/* initialize variables for first iteration of subsequent nested loop */
499e6727f39SUrsula Braun 	dst_off = prod.count;
500e0e4b8faSGuvenc Gulce 	if (prod.wrap == cons.wrap) {
501194730a9SGuvenc Gulce 		/* the filled destination area is unwrapped,
502194730a9SGuvenc Gulce 		 * hence the available free destination space is wrapped
503194730a9SGuvenc Gulce 		 * and we need 2 destination chunks of sum len; start with 1st
504e6727f39SUrsula Braun 		 * which is limited by what's available in sndbuf
505e0e4b8faSGuvenc Gulce 		 */
506bac6de7bSStefan Raspl 		dst_len = min_t(size_t,
507bac6de7bSStefan Raspl 				conn->peer_rmbe_size - prod.count, len);
508e6727f39SUrsula Braun 	} else {
509e6727f39SUrsula Braun 		/* the filled destination area is wrapped,
510e6727f39SUrsula Braun 		 * hence the available free destination space is unwrapped
511e6727f39SUrsula Braun 		 * and we need a single destination chunk of entire len
512e6727f39SUrsula Braun 		 */
513e6727f39SUrsula Braun 		dst_len = len;
514e6727f39SUrsula Braun 	}
515e6727f39SUrsula Braun 	/* dst_len determines the maximum src_len */
516e6727f39SUrsula Braun 	if (sent.count + dst_len <= conn->sndbuf_desc->len) {
517e6727f39SUrsula Braun 		/* unwrapped src case: single chunk of entire dst_len */
518e6727f39SUrsula Braun 		src_len = dst_len;
519e6727f39SUrsula Braun 	} else {
520e6727f39SUrsula Braun 		/* wrapped src case: 2 chunks of sum dst_len; start with 1st: */
521e6727f39SUrsula Braun 		src_len = conn->sndbuf_desc->len - sent.count;
522e6727f39SUrsula Braun 	}
523e6727f39SUrsula Braun 
524e6727f39SUrsula Braun 	if (conn->lgr->is_smcd)
525e6727f39SUrsula Braun 		rc = smcd_tx_rdma_writes(conn, len, sent.count, src_len,
526e6727f39SUrsula Braun 					 dst_off, dst_len);
527e6727f39SUrsula Braun 	else
528e6727f39SUrsula Braun 		rc = smcr_tx_rdma_writes(conn, len, sent.count, src_len,
529e6727f39SUrsula Braun 					 dst_off, dst_len, wr_rdma_buf);
530e6727f39SUrsula Braun 	if (rc)
531e6727f39SUrsula Braun 		return rc;
532e6727f39SUrsula Braun 
533e6727f39SUrsula Braun 	if (conn->urg_tx_pend && len == to_send)
534e6727f39SUrsula Braun 		pflags->urg_data_present = 1;
535e6727f39SUrsula Braun 	smc_tx_advance_cursors(conn, &prod, &sent, len);
536e6727f39SUrsula Braun 	/* update connection's cursors with advanced local cursors */
537e6727f39SUrsula Braun 	smc_curs_copy(&conn->local_tx_ctrl.prod, &prod, conn);
538e6727f39SUrsula Braun 							/* dst: peer RMBE */
539e6727f39SUrsula Braun 	smc_curs_copy(&conn->tx_curs_sent, &sent, conn);/* src: local sndbuf */
540be244f28SHans Wippel 
541be244f28SHans Wippel 	return 0;
542be244f28SHans Wippel }
543be244f28SHans Wippel 
544be244f28SHans Wippel /* Wakeup sndbuf consumers from any context (IRQ or process)
545be244f28SHans Wippel  * since there is more data to transmit; usable snd_wnd as max transmit
546ad6f317fSUrsula Braun  */
smcr_tx_sndbuf_nonempty(struct smc_connection * conn)547e6727f39SUrsula Braun static int smcr_tx_sndbuf_nonempty(struct smc_connection *conn)
548e6727f39SUrsula Braun {
549e6727f39SUrsula Braun 	struct smc_cdc_producer_flags *pflags = &conn->local_tx_ctrl.prod_flags;
550de8474ebSStefan Raspl 	struct smc_link *link = conn->lnk;
551de8474ebSStefan Raspl 	struct smc_rdma_wr *wr_rdma_buf;
552e6727f39SUrsula Braun 	struct smc_cdc_tx_pend *pend;
553e6727f39SUrsula Braun 	struct smc_wr_buf *wr_buf;
554bac6de7bSStefan Raspl 	int rc;
555e6727f39SUrsula Braun 
556bac6de7bSStefan Raspl 	if (!link || !smc_wr_tx_link_hold(link))
557e6727f39SUrsula Braun 		return -ENOLINK;
558e6727f39SUrsula Braun 	rc = smc_cdc_get_free_slot(conn, link, &wr_buf, &wr_rdma_buf, &pend);
559e6727f39SUrsula Braun 	if (rc < 0) {
560e6727f39SUrsula Braun 		smc_wr_tx_link_put(link);
561e6727f39SUrsula Braun 		if (rc == -EBUSY) {
562e6727f39SUrsula Braun 			struct smc_sock *smc =
563e6727f39SUrsula Braun 				container_of(conn, struct smc_sock, conn);
56495f7f3e7SKarsten Graul 
565e6727f39SUrsula Braun 			if (smc->sk.sk_err == ECONNABORTED)
566cecc7a31SUrsula Braun 				return sock_error(&smc->sk);
567c6f02ebeSKarsten Graul 			if (conn->killed)
568ad6f317fSUrsula Braun 				return -EPIPE;
569e6727f39SUrsula Braun 			rc = 0;
570e6727f39SUrsula Braun 			mod_delayed_work(conn->lgr->tx_wq, &conn->tx_work,
571e6727f39SUrsula Braun 					 SMC_TX_WORK_DELAY);
572e6727f39SUrsula Braun 		}
57395f7f3e7SKarsten Graul 		return rc;
57495f7f3e7SKarsten Graul 	}
575c6f02ebeSKarsten Graul 
576e6727f39SUrsula Braun 	spin_lock_bh(&conn->send_lock);
57795f7f3e7SKarsten Graul 	if (link != conn->lnk) {
578e6727f39SUrsula Braun 		/* link of connection changed, tx_work will restart */
579b38d7324SUrsula Braun 		smc_wr_tx_put_slot(link,
580b38d7324SUrsula Braun 				   (struct smc_wr_tx_pend_priv *)pend);
581b38d7324SUrsula Braun 		rc = -ENOLINK;
58233f3fcc2SKarsten Graul 		goto out_unlock;
58333f3fcc2SKarsten Graul 	}
584b2900980SUrsula Braun 	if (!pflags->urg_data_present) {
585b2900980SUrsula Braun 		rc = smc_tx_rdma_writes(conn, wr_rdma_buf);
586e6727f39SUrsula Braun 		if (rc) {
58722ef473dSKarsten Graul 			smc_wr_tx_put_slot(link,
58818e537cdSUrsula Braun 					   (struct smc_wr_tx_pend_priv *)pend);
589e6727f39SUrsula Braun 			goto out_unlock;
59033f3fcc2SKarsten Graul 		}
591e6727f39SUrsula Braun 	}
592e6727f39SUrsula Braun 
59333f3fcc2SKarsten Graul 	rc = smc_cdc_msg_send(conn, wr_buf, pend);
594c6f02ebeSKarsten Graul 	if (!rc && pflags->urg_data_present) {
595c6f02ebeSKarsten Graul 		pflags->urg_data_pending = 0;
596c6f02ebeSKarsten Graul 		pflags->urg_data_present = 0;
597c6f02ebeSKarsten Graul 	}
598c6f02ebeSKarsten Graul 
599c6f02ebeSKarsten Graul out_unlock:
600c6f02ebeSKarsten Graul 	spin_unlock_bh(&conn->send_lock);
601cecc7a31SUrsula Braun 	smc_wr_tx_link_put(link);
602ad6f317fSUrsula Braun 	return rc;
603e6727f39SUrsula Braun }
604c6f02ebeSKarsten Graul 
smcd_tx_sndbuf_nonempty(struct smc_connection * conn)605e6727f39SUrsula Braun static int smcd_tx_sndbuf_nonempty(struct smc_connection *conn)
606e6727f39SUrsula Braun {
607e6727f39SUrsula Braun 	struct smc_cdc_producer_flags *pflags = &conn->local_tx_ctrl.prod_flags;
608de8474ebSStefan Raspl 	int rc = 0;
609e6727f39SUrsula Braun 
610e6727f39SUrsula Braun 	spin_lock_bh(&conn->send_lock);
611de8474ebSStefan Raspl 	if (!pflags->urg_data_present)
612de8474ebSStefan Raspl 		rc = smc_tx_rdma_writes(conn, NULL);
613de8474ebSStefan Raspl 	if (!rc)
614de8474ebSStefan Raspl 		rc = smcd_cdc_msg_send(conn);
615e6727f39SUrsula Braun 
616e6727f39SUrsula Braun 	if (!rc && pflags->urg_data_present) {
617e6727f39SUrsula Braun 		pflags->urg_data_pending = 0;
61895f7f3e7SKarsten Graul 		pflags->urg_data_present = 0;
6198f3d65c1SKarsten Graul 	}
6208f3d65c1SKarsten Graul 	spin_unlock_bh(&conn->send_lock);
6218f3d65c1SKarsten Graul 	return rc;
622be244f28SHans Wippel }
623be244f28SHans Wippel 
__smc_tx_sndbuf_nonempty(struct smc_connection * conn)624be244f28SHans Wippel static int __smc_tx_sndbuf_nonempty(struct smc_connection *conn)
625be244f28SHans Wippel {
626be244f28SHans Wippel 	struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
627be244f28SHans Wippel 	int rc = 0;
628be244f28SHans Wippel 
629ad6f317fSUrsula Braun 	/* No data in the send queue */
630be244f28SHans Wippel 	if (unlikely(smc_tx_prepared_sends(conn) <= 0))
631be244f28SHans Wippel 		goto out;
632be244f28SHans Wippel 
633be244f28SHans Wippel 	/* Peer don't have RMBE space */
634be244f28SHans Wippel 	if (unlikely(atomic_read(&conn->peer_rmbe_space) <= 0)) {
635be244f28SHans Wippel 		SMC_STAT_RMB_TX_PEER_FULL(smc, !conn->lnk);
636be244f28SHans Wippel 		goto out;
637be244f28SHans Wippel 	}
638be244f28SHans Wippel 
639be244f28SHans Wippel 	if (conn->killed ||
640be244f28SHans Wippel 	    conn->local_rx_ctrl.conn_state_flags.peer_conn_abort) {
641dcd2cf5fSDust Li 		rc = -EPIPE;    /* connection being aborted */
642be244f28SHans Wippel 		goto out;
643dcd2cf5fSDust Li 	}
644dcd2cf5fSDust Li 	if (conn->lgr->is_smcd)
645dcd2cf5fSDust Li 		rc = smcd_tx_sndbuf_nonempty(conn);
646dcd2cf5fSDust Li 	else
647dcd2cf5fSDust Li 		rc = smcr_tx_sndbuf_nonempty(conn);
648dcd2cf5fSDust Li 
649dcd2cf5fSDust Li 	if (!rc) {
650dcd2cf5fSDust Li 		/* trigger socket release if connection is closing */
651dcd2cf5fSDust Li 		smc_close_wake_tx_prepared(smc);
652dcd2cf5fSDust Li 	}
653dcd2cf5fSDust Li 
654dcd2cf5fSDust Li out:
655be244f28SHans Wippel 	return rc;
656b2900980SUrsula Braun }
657dcd2cf5fSDust Li 
smc_tx_sndbuf_nonempty(struct smc_connection * conn)658dcd2cf5fSDust Li int smc_tx_sndbuf_nonempty(struct smc_connection *conn)
659dcd2cf5fSDust Li {
660dcd2cf5fSDust Li 	int rc;
661be244f28SHans Wippel 
662be244f28SHans Wippel 	/* This make sure only one can send simultaneously to prevent wasting
663be244f28SHans Wippel 	 * of CPU and CDC slot.
664be244f28SHans Wippel 	 * Record whether someone has tried to push while we are pushing.
665be244f28SHans Wippel 	 */
6665bc056d8SKarsten Graul 	if (atomic_inc_return(&conn->tx_pushing) > 1)
6675bc056d8SKarsten Graul 		return 0;
6685bc056d8SKarsten Graul 
6695bc056d8SKarsten Graul again:
670dcd2cf5fSDust Li 	atomic_set(&conn->tx_pushing, 1);
671dcd2cf5fSDust Li 	smp_wmb(); /* Make sure tx_pushing is 1 before real send */
672dcd2cf5fSDust Li 	rc = __smc_tx_sndbuf_nonempty(conn);
673dcd2cf5fSDust Li 
674dcd2cf5fSDust Li 	/* We need to check whether someone else have added some data into
675dcd2cf5fSDust Li 	 * the send queue and tried to push but failed after the atomic_set()
676dcd2cf5fSDust Li 	 * when we are pushing.
677dcd2cf5fSDust Li 	 * If so, we need to push again to prevent those data hang in the send
678dcd2cf5fSDust Li 	 * queue.
679dcd2cf5fSDust Li 	 */
680dcd2cf5fSDust Li 	if (unlikely(!atomic_dec_and_test(&conn->tx_pushing)))
681dcd2cf5fSDust Li 		goto again;
682dcd2cf5fSDust Li 
683dcd2cf5fSDust Li 	return rc;
684dcd2cf5fSDust Li }
685dcd2cf5fSDust Li 
686dcd2cf5fSDust Li /* Wakeup sndbuf consumers from process context
687dcd2cf5fSDust Li  * since there is more data to transmit. The caller
688dcd2cf5fSDust Li  * must hold sock lock.
689dcd2cf5fSDust Li  */
smc_tx_pending(struct smc_connection * conn)690dcd2cf5fSDust Li void smc_tx_pending(struct smc_connection *conn)
691dcd2cf5fSDust Li {
692dcd2cf5fSDust Li 	struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
693dcd2cf5fSDust Li 	int rc;
694dcd2cf5fSDust Li 
695dcd2cf5fSDust Li 	if (smc->sk.sk_err)
696dcd2cf5fSDust Li 		return;
697dcd2cf5fSDust Li 
698dcd2cf5fSDust Li 	rc = smc_tx_sndbuf_nonempty(conn);
699dcd2cf5fSDust Li 	if (!rc && conn->local_rx_ctrl.prod_flags.write_blocked &&
700be244f28SHans Wippel 	    !atomic_read(&conn->bytes_to_rcv))
701be244f28SHans Wippel 		conn->local_rx_ctrl.prod_flags.write_blocked = 0;
702be244f28SHans Wippel }
7032e13bde1STony Lu 
7042e13bde1STony Lu /* Wakeup sndbuf consumers from process context
7052e13bde1STony Lu  * since there is more data to transmit in locked
7062e13bde1STony Lu  * sock.
707ea785a1aSTony Lu  */
smc_tx_work(struct work_struct * work)708ea785a1aSTony Lu void smc_tx_work(struct work_struct *work)
709ea785a1aSTony Lu {
710ea785a1aSTony Lu 	struct smc_connection *conn = container_of(to_delayed_work(work),
711ea785a1aSTony Lu 						   struct smc_connection,
712ea785a1aSTony Lu 						   tx_work);
713ea785a1aSTony Lu 	struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
714ea785a1aSTony Lu 
715ea785a1aSTony Lu 	lock_sock(&smc->sk);
716ea785a1aSTony Lu 	smc_tx_pending(conn);
717ea785a1aSTony Lu 	release_sock(&smc->sk);
718ea785a1aSTony Lu }
719ea785a1aSTony Lu 
smc_tx_consumer_update(struct smc_connection * conn,bool force)720ea785a1aSTony Lu void smc_tx_consumer_update(struct smc_connection *conn, bool force)
721e6727f39SUrsula Braun {
7222e13bde1STony Lu 	union smc_host_cursor cfed, cons, prod;
7232e13bde1STony Lu 	int sender_free = conn->rmb_desc->len;
724e6727f39SUrsula Braun 	int to_confirm;
725be7f3e59SEric Dumazet 
726e6727f39SUrsula Braun 	smc_curs_copy(&cons, &conn->local_tx_ctrl.cons, conn);
72718e537cdSUrsula Braun 	smc_curs_copy(&cfed, &conn->rx_curs_confirmed, conn);
728e6727f39SUrsula Braun 	to_confirm = smc_curs_diff(conn->rmb_desc->len, &cfed, &cons);
729e6727f39SUrsula Braun 	if (to_confirm > conn->rmbe_update_limit) {
730e6727f39SUrsula Braun 		smc_curs_copy(&prod, &conn->local_rx_ctrl.prod, conn);
731e6727f39SUrsula Braun 		sender_free = conn->rmb_desc->len -
732e6727f39SUrsula Braun 			      smc_curs_diff_large(conn->rmb_desc->len,
733ea785a1aSTony Lu 						  &cfed, &prod);
734e6727f39SUrsula Braun 	}
735e6727f39SUrsula Braun 
736e6727f39SUrsula Braun 	if (conn->local_rx_ctrl.prod_flags.cons_curs_upd_req ||
737de8474ebSStefan Raspl 	    force ||
738952310ccSUrsula Braun 	    ((to_confirm > conn->rmbe_update_limit) &&
73999be51f1SUrsula Braun 	     ((sender_free <= (conn->rmb_desc->len / 2)) ||
74099be51f1SUrsula Braun 	      conn->local_rx_ctrl.prod_flags.write_blocked))) {
7416b5771aaSUrsula Braun 		if (conn->killed ||
742952310ccSUrsula Braun 		    conn->local_rx_ctrl.conn_state_flags.peer_conn_abort)
743bac6de7bSStefan Raspl 			return;
744bac6de7bSStefan Raspl 		if ((smc_cdc_get_slot_and_msg_send(conn) < 0) &&
74569cb7dc0SHans Wippel 		    !conn->killed) {
74699be51f1SUrsula Braun 			queue_delayed_work(conn->lgr->tx_wq, &conn->tx_work,
747bac6de7bSStefan Raspl 					   SMC_TX_WORK_DELAY);
74899be51f1SUrsula Braun 			return;
749b8649efaSUrsula Braun 		}
750b8649efaSUrsula Braun 	}
75199be51f1SUrsula Braun 	if (conn->local_rx_ctrl.prod_flags.write_blocked &&
752952310ccSUrsula Braun 	    !atomic_read(&conn->bytes_to_rcv))
753952310ccSUrsula Braun 		conn->local_rx_ctrl.prod_flags.write_blocked = 0;
754de8474ebSStefan Raspl }
755952310ccSUrsula Braun 
75699be51f1SUrsula Braun /***************************** send initialize *******************************/
757952310ccSUrsula Braun 
758b2900980SUrsula Braun /* Initialize send properties on connection establishment. NB: not __init! */
smc_tx_init(struct smc_sock * smc)759b2900980SUrsula Braun void smc_tx_init(struct smc_sock *smc)
760b2900980SUrsula Braun {
7611a0a04c7SUrsula Braun 	smc->sk.sk_write_space = smc_tx_write_space;
762b2900980SUrsula Braun }
76322ef473dSKarsten Graul