xref: /openbmc/linux/net/smc/smc_close.c (revision 1b39eacd)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  *  Shared Memory Communications over RDMA (SMC-R) and RoCE
4  *
5  *  Socket Closing - normal and abnormal
6  *
7  *  Copyright IBM Corp. 2016
8  *
9  *  Author(s):  Ursula Braun <ubraun@linux.vnet.ibm.com>
10  */
11 
12 #include <linux/workqueue.h>
13 #include <linux/sched/signal.h>
14 
15 #include <net/sock.h>
16 
17 #include "smc.h"
18 #include "smc_tx.h"
19 #include "smc_cdc.h"
20 #include "smc_close.h"
21 
22 #define SMC_CLOSE_WAIT_TX_PENDS_TIME		(5 * HZ)
23 
24 static void smc_close_cleanup_listen(struct sock *parent)
25 {
26 	struct sock *sk;
27 
28 	/* Close non-accepted connections */
29 	while ((sk = smc_accept_dequeue(parent, NULL)))
30 		smc_close_non_accepted(sk);
31 }
32 
33 static void smc_close_wait_tx_pends(struct smc_sock *smc)
34 {
35 	DEFINE_WAIT_FUNC(wait, woken_wake_function);
36 	struct sock *sk = &smc->sk;
37 	signed long timeout;
38 
39 	timeout = SMC_CLOSE_WAIT_TX_PENDS_TIME;
40 	add_wait_queue(sk_sleep(sk), &wait);
41 	while (!signal_pending(current) && timeout) {
42 		int rc;
43 
44 		rc = sk_wait_event(sk, &timeout,
45 				   !smc_cdc_tx_has_pending(&smc->conn),
46 				   &wait);
47 		if (rc)
48 			break;
49 	}
50 	remove_wait_queue(sk_sleep(sk), &wait);
51 }
52 
53 /* wait for sndbuf data being transmitted */
54 static void smc_close_stream_wait(struct smc_sock *smc, long timeout)
55 {
56 	DEFINE_WAIT_FUNC(wait, woken_wake_function);
57 	struct sock *sk = &smc->sk;
58 
59 	if (!timeout)
60 		return;
61 
62 	if (!smc_tx_prepared_sends(&smc->conn))
63 		return;
64 
65 	smc->wait_close_tx_prepared = 1;
66 	add_wait_queue(sk_sleep(sk), &wait);
67 	while (!signal_pending(current) && timeout) {
68 		int rc;
69 
70 		rc = sk_wait_event(sk, &timeout,
71 				   !smc_tx_prepared_sends(&smc->conn) ||
72 				   (sk->sk_err == ECONNABORTED) ||
73 				   (sk->sk_err == ECONNRESET),
74 				   &wait);
75 		if (rc)
76 			break;
77 	}
78 	remove_wait_queue(sk_sleep(sk), &wait);
79 	smc->wait_close_tx_prepared = 0;
80 }
81 
82 void smc_close_wake_tx_prepared(struct smc_sock *smc)
83 {
84 	if (smc->wait_close_tx_prepared)
85 		/* wake up socket closing */
86 		smc->sk.sk_state_change(&smc->sk);
87 }
88 
89 static int smc_close_wr(struct smc_connection *conn)
90 {
91 	conn->local_tx_ctrl.conn_state_flags.peer_done_writing = 1;
92 
93 	return smc_cdc_get_slot_and_msg_send(conn);
94 }
95 
96 static int smc_close_final(struct smc_connection *conn)
97 {
98 	if (atomic_read(&conn->bytes_to_rcv))
99 		conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1;
100 	else
101 		conn->local_tx_ctrl.conn_state_flags.peer_conn_closed = 1;
102 
103 	return smc_cdc_get_slot_and_msg_send(conn);
104 }
105 
106 static int smc_close_abort(struct smc_connection *conn)
107 {
108 	conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1;
109 
110 	return smc_cdc_get_slot_and_msg_send(conn);
111 }
112 
113 /* terminate smc socket abnormally - active abort
114  * RDMA communication no longer possible
115  */
116 void smc_close_active_abort(struct smc_sock *smc)
117 {
118 	struct smc_cdc_conn_state_flags *txflags =
119 		&smc->conn.local_tx_ctrl.conn_state_flags;
120 
121 	smc->sk.sk_err = ECONNABORTED;
122 	if (smc->clcsock && smc->clcsock->sk) {
123 		smc->clcsock->sk->sk_err = ECONNABORTED;
124 		smc->clcsock->sk->sk_state_change(smc->clcsock->sk);
125 	}
126 	switch (smc->sk.sk_state) {
127 	case SMC_INIT:
128 	case SMC_ACTIVE:
129 		smc->sk.sk_state = SMC_PEERABORTWAIT;
130 		break;
131 	case SMC_APPCLOSEWAIT1:
132 	case SMC_APPCLOSEWAIT2:
133 		txflags->peer_conn_abort = 1;
134 		sock_release(smc->clcsock);
135 		if (!smc_cdc_rxed_any_close(&smc->conn))
136 			smc->sk.sk_state = SMC_PEERABORTWAIT;
137 		else
138 			smc->sk.sk_state = SMC_CLOSED;
139 		break;
140 	case SMC_PEERCLOSEWAIT1:
141 	case SMC_PEERCLOSEWAIT2:
142 		if (!txflags->peer_conn_closed) {
143 			smc->sk.sk_state = SMC_PEERABORTWAIT;
144 			txflags->peer_conn_abort = 1;
145 			sock_release(smc->clcsock);
146 		} else {
147 			smc->sk.sk_state = SMC_CLOSED;
148 		}
149 		break;
150 	case SMC_PROCESSABORT:
151 	case SMC_APPFINCLOSEWAIT:
152 		if (!txflags->peer_conn_closed) {
153 			txflags->peer_conn_abort = 1;
154 			sock_release(smc->clcsock);
155 		}
156 		smc->sk.sk_state = SMC_CLOSED;
157 		break;
158 	case SMC_PEERFINCLOSEWAIT:
159 	case SMC_PEERABORTWAIT:
160 	case SMC_CLOSED:
161 		break;
162 	}
163 
164 	sock_set_flag(&smc->sk, SOCK_DEAD);
165 	smc->sk.sk_state_change(&smc->sk);
166 }
167 
168 static inline bool smc_close_sent_any_close(struct smc_connection *conn)
169 {
170 	return conn->local_tx_ctrl.conn_state_flags.peer_conn_abort ||
171 	       conn->local_tx_ctrl.conn_state_flags.peer_conn_closed;
172 }
173 
174 int smc_close_active(struct smc_sock *smc)
175 {
176 	struct smc_cdc_conn_state_flags *txflags =
177 		&smc->conn.local_tx_ctrl.conn_state_flags;
178 	struct smc_connection *conn = &smc->conn;
179 	struct sock *sk = &smc->sk;
180 	int old_state;
181 	long timeout;
182 	int rc = 0;
183 
184 	timeout = current->flags & PF_EXITING ?
185 		  0 : sock_flag(sk, SOCK_LINGER) ?
186 		      sk->sk_lingertime : SMC_MAX_STREAM_WAIT_TIMEOUT;
187 
188 again:
189 	old_state = sk->sk_state;
190 	switch (old_state) {
191 	case SMC_INIT:
192 		sk->sk_state = SMC_CLOSED;
193 		if (smc->smc_listen_work.func)
194 			cancel_work_sync(&smc->smc_listen_work);
195 		break;
196 	case SMC_LISTEN:
197 		sk->sk_state = SMC_CLOSED;
198 		sk->sk_state_change(sk); /* wake up accept */
199 		if (smc->clcsock && smc->clcsock->sk) {
200 			rc = kernel_sock_shutdown(smc->clcsock, SHUT_RDWR);
201 			/* wake up kernel_accept of smc_tcp_listen_worker */
202 			smc->clcsock->sk->sk_data_ready(smc->clcsock->sk);
203 		}
204 		release_sock(sk);
205 		smc_close_cleanup_listen(sk);
206 		cancel_work_sync(&smc->smc_listen_work);
207 		lock_sock(sk);
208 		break;
209 	case SMC_ACTIVE:
210 		smc_close_stream_wait(smc, timeout);
211 		release_sock(sk);
212 		cancel_delayed_work_sync(&conn->tx_work);
213 		lock_sock(sk);
214 		if (sk->sk_state == SMC_ACTIVE) {
215 			/* send close request */
216 			rc = smc_close_final(conn);
217 			sk->sk_state = SMC_PEERCLOSEWAIT1;
218 		} else {
219 			/* peer event has changed the state */
220 			goto again;
221 		}
222 		break;
223 	case SMC_APPFINCLOSEWAIT:
224 		/* socket already shutdown wr or both (active close) */
225 		if (txflags->peer_done_writing &&
226 		    !smc_close_sent_any_close(conn)) {
227 			/* just shutdown wr done, send close request */
228 			rc = smc_close_final(conn);
229 		}
230 		sk->sk_state = SMC_CLOSED;
231 		smc_close_wait_tx_pends(smc);
232 		break;
233 	case SMC_APPCLOSEWAIT1:
234 	case SMC_APPCLOSEWAIT2:
235 		if (!smc_cdc_rxed_any_close(conn))
236 			smc_close_stream_wait(smc, timeout);
237 		release_sock(sk);
238 		cancel_delayed_work_sync(&conn->tx_work);
239 		lock_sock(sk);
240 		if (sk->sk_err != ECONNABORTED) {
241 			/* confirm close from peer */
242 			rc = smc_close_final(conn);
243 			if (rc)
244 				break;
245 		}
246 		if (smc_cdc_rxed_any_close(conn))
247 			/* peer has closed the socket already */
248 			sk->sk_state = SMC_CLOSED;
249 		else
250 			/* peer has just issued a shutdown write */
251 			sk->sk_state = SMC_PEERFINCLOSEWAIT;
252 		smc_close_wait_tx_pends(smc);
253 		break;
254 	case SMC_PEERCLOSEWAIT1:
255 	case SMC_PEERCLOSEWAIT2:
256 		if (txflags->peer_done_writing &&
257 		    !smc_close_sent_any_close(conn)) {
258 			/* just shutdown wr done, send close request */
259 			rc = smc_close_final(conn);
260 		}
261 		/* peer sending PeerConnectionClosed will cause transition */
262 		break;
263 	case SMC_PEERFINCLOSEWAIT:
264 		/* peer sending PeerConnectionClosed will cause transition */
265 		break;
266 	case SMC_PROCESSABORT:
267 		release_sock(sk);
268 		cancel_delayed_work_sync(&conn->tx_work);
269 		lock_sock(sk);
270 		smc_close_abort(conn);
271 		sk->sk_state = SMC_CLOSED;
272 		smc_close_wait_tx_pends(smc);
273 		break;
274 	case SMC_PEERABORTWAIT:
275 	case SMC_CLOSED:
276 		/* nothing to do, add tracing in future patch */
277 		break;
278 	}
279 
280 	if (old_state != sk->sk_state)
281 		sk->sk_state_change(&smc->sk);
282 	return rc;
283 }
284 
285 static void smc_close_passive_abort_received(struct smc_sock *smc)
286 {
287 	struct smc_cdc_conn_state_flags *txflags =
288 		&smc->conn.local_tx_ctrl.conn_state_flags;
289 	struct sock *sk = &smc->sk;
290 
291 	switch (sk->sk_state) {
292 	case SMC_ACTIVE:
293 	case SMC_APPFINCLOSEWAIT:
294 	case SMC_APPCLOSEWAIT1:
295 	case SMC_APPCLOSEWAIT2:
296 		smc_close_abort(&smc->conn);
297 		sk->sk_state = SMC_PROCESSABORT;
298 		break;
299 	case SMC_PEERCLOSEWAIT1:
300 	case SMC_PEERCLOSEWAIT2:
301 		if (txflags->peer_done_writing &&
302 		    !smc_close_sent_any_close(&smc->conn)) {
303 			/* just shutdown, but not yet closed locally */
304 			smc_close_abort(&smc->conn);
305 			sk->sk_state = SMC_PROCESSABORT;
306 		} else {
307 			sk->sk_state = SMC_CLOSED;
308 		}
309 		break;
310 	case SMC_PEERFINCLOSEWAIT:
311 	case SMC_PEERABORTWAIT:
312 		sk->sk_state = SMC_CLOSED;
313 		break;
314 	case SMC_INIT:
315 	case SMC_PROCESSABORT:
316 	/* nothing to do, add tracing in future patch */
317 		break;
318 	}
319 }
320 
321 /* Some kind of closing has been received: peer_conn_closed, peer_conn_abort,
322  * or peer_done_writing.
323  */
324 static void smc_close_passive_work(struct work_struct *work)
325 {
326 	struct smc_connection *conn = container_of(work,
327 						   struct smc_connection,
328 						   close_work);
329 	struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
330 	struct smc_cdc_conn_state_flags *rxflags;
331 	struct sock *sk = &smc->sk;
332 	int old_state;
333 
334 	lock_sock(&smc->sk);
335 	old_state = sk->sk_state;
336 
337 	if (!conn->alert_token_local) {
338 		/* abnormal termination */
339 		smc_close_active_abort(smc);
340 		goto wakeup;
341 	}
342 
343 	rxflags = &smc->conn.local_rx_ctrl.conn_state_flags;
344 	if (rxflags->peer_conn_abort) {
345 		smc_close_passive_abort_received(smc);
346 		goto wakeup;
347 	}
348 
349 	switch (sk->sk_state) {
350 	case SMC_INIT:
351 		if (atomic_read(&smc->conn.bytes_to_rcv) ||
352 		    (rxflags->peer_done_writing &&
353 		     !smc_cdc_rxed_any_close(conn)))
354 			sk->sk_state = SMC_APPCLOSEWAIT1;
355 		else
356 			sk->sk_state = SMC_CLOSED;
357 		break;
358 	case SMC_ACTIVE:
359 		sk->sk_state = SMC_APPCLOSEWAIT1;
360 		break;
361 	case SMC_PEERCLOSEWAIT1:
362 		if (rxflags->peer_done_writing)
363 			sk->sk_state = SMC_PEERCLOSEWAIT2;
364 		/* fall through */
365 		/* to check for closing */
366 	case SMC_PEERCLOSEWAIT2:
367 	case SMC_PEERFINCLOSEWAIT:
368 		if (!smc_cdc_rxed_any_close(&smc->conn))
369 			break;
370 		if (sock_flag(sk, SOCK_DEAD) &&
371 		    smc_close_sent_any_close(conn)) {
372 			/* smc_release has already been called locally */
373 			sk->sk_state = SMC_CLOSED;
374 		} else {
375 			/* just shutdown, but not yet closed locally */
376 			sk->sk_state = SMC_APPFINCLOSEWAIT;
377 		}
378 		break;
379 	case SMC_APPCLOSEWAIT1:
380 	case SMC_APPCLOSEWAIT2:
381 	case SMC_APPFINCLOSEWAIT:
382 	case SMC_PEERABORTWAIT:
383 	case SMC_PROCESSABORT:
384 	case SMC_CLOSED:
385 		/* nothing to do, add tracing in future patch */
386 		break;
387 	}
388 
389 wakeup:
390 	sk->sk_data_ready(sk); /* wakeup blocked rcvbuf consumers */
391 	sk->sk_write_space(sk); /* wakeup blocked sndbuf producers */
392 
393 	if (old_state != sk->sk_state) {
394 		sk->sk_state_change(sk);
395 		if ((sk->sk_state == SMC_CLOSED) &&
396 		    (sock_flag(sk, SOCK_DEAD) || !sk->sk_socket)) {
397 			smc_conn_free(&smc->conn);
398 			schedule_delayed_work(&smc->sock_put_work,
399 					      SMC_CLOSE_SOCK_PUT_DELAY);
400 		}
401 	}
402 	release_sock(&smc->sk);
403 }
404 
405 void smc_close_sock_put_work(struct work_struct *work)
406 {
407 	struct smc_sock *smc = container_of(to_delayed_work(work),
408 					    struct smc_sock,
409 					    sock_put_work);
410 
411 	smc->sk.sk_prot->unhash(&smc->sk);
412 	sock_put(&smc->sk);
413 }
414 
415 int smc_close_shutdown_write(struct smc_sock *smc)
416 {
417 	struct smc_connection *conn = &smc->conn;
418 	struct sock *sk = &smc->sk;
419 	int old_state;
420 	long timeout;
421 	int rc = 0;
422 
423 	timeout = current->flags & PF_EXITING ?
424 		  0 : sock_flag(sk, SOCK_LINGER) ?
425 		      sk->sk_lingertime : SMC_MAX_STREAM_WAIT_TIMEOUT;
426 
427 again:
428 	old_state = sk->sk_state;
429 	switch (old_state) {
430 	case SMC_ACTIVE:
431 		smc_close_stream_wait(smc, timeout);
432 		release_sock(sk);
433 		cancel_delayed_work_sync(&conn->tx_work);
434 		lock_sock(sk);
435 		/* send close wr request */
436 		rc = smc_close_wr(conn);
437 		if (sk->sk_state == SMC_ACTIVE)
438 			sk->sk_state = SMC_PEERCLOSEWAIT1;
439 		else
440 			goto again;
441 		break;
442 	case SMC_APPCLOSEWAIT1:
443 		/* passive close */
444 		if (!smc_cdc_rxed_any_close(conn))
445 			smc_close_stream_wait(smc, timeout);
446 		release_sock(sk);
447 		cancel_delayed_work_sync(&conn->tx_work);
448 		lock_sock(sk);
449 		/* confirm close from peer */
450 		rc = smc_close_wr(conn);
451 		sk->sk_state = SMC_APPCLOSEWAIT2;
452 		break;
453 	case SMC_APPCLOSEWAIT2:
454 	case SMC_PEERFINCLOSEWAIT:
455 	case SMC_PEERCLOSEWAIT1:
456 	case SMC_PEERCLOSEWAIT2:
457 	case SMC_APPFINCLOSEWAIT:
458 	case SMC_PROCESSABORT:
459 	case SMC_PEERABORTWAIT:
460 		/* nothing to do, add tracing in future patch */
461 		break;
462 	}
463 
464 	if (old_state != sk->sk_state)
465 		sk->sk_state_change(&smc->sk);
466 	return rc;
467 }
468 
469 /* Initialize close properties on connection establishment. */
470 void smc_close_init(struct smc_sock *smc)
471 {
472 	INIT_WORK(&smc->conn.close_work, smc_close_passive_work);
473 }
474