1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Shared Memory Communications over RDMA (SMC-R) and RoCE 4 * 5 * Socket Closing - normal and abnormal 6 * 7 * Copyright IBM Corp. 2016 8 * 9 * Author(s): Ursula Braun <ubraun@linux.vnet.ibm.com> 10 */ 11 12 #include <linux/workqueue.h> 13 #include <linux/sched/signal.h> 14 15 #include <net/sock.h> 16 17 #include "smc.h" 18 #include "smc_tx.h" 19 #include "smc_cdc.h" 20 #include "smc_close.h" 21 22 static void smc_close_cleanup_listen(struct sock *parent) 23 { 24 struct sock *sk; 25 26 /* Close non-accepted connections */ 27 while ((sk = smc_accept_dequeue(parent, NULL))) 28 smc_close_non_accepted(sk); 29 } 30 31 /* wait for sndbuf data being transmitted */ 32 static void smc_close_stream_wait(struct smc_sock *smc, long timeout) 33 { 34 DEFINE_WAIT_FUNC(wait, woken_wake_function); 35 struct sock *sk = &smc->sk; 36 37 if (!timeout) 38 return; 39 40 if (!smc_tx_prepared_sends(&smc->conn)) 41 return; 42 43 smc->wait_close_tx_prepared = 1; 44 add_wait_queue(sk_sleep(sk), &wait); 45 while (!signal_pending(current) && timeout) { 46 int rc; 47 48 rc = sk_wait_event(sk, &timeout, 49 !smc_tx_prepared_sends(&smc->conn) || 50 (sk->sk_err == ECONNABORTED) || 51 (sk->sk_err == ECONNRESET), 52 &wait); 53 if (rc) 54 break; 55 } 56 remove_wait_queue(sk_sleep(sk), &wait); 57 smc->wait_close_tx_prepared = 0; 58 } 59 60 void smc_close_wake_tx_prepared(struct smc_sock *smc) 61 { 62 if (smc->wait_close_tx_prepared) 63 /* wake up socket closing */ 64 smc->sk.sk_state_change(&smc->sk); 65 } 66 67 static int smc_close_wr(struct smc_connection *conn) 68 { 69 conn->local_tx_ctrl.conn_state_flags.peer_done_writing = 1; 70 71 return smc_cdc_get_slot_and_msg_send(conn); 72 } 73 74 static int smc_close_final(struct smc_connection *conn) 75 { 76 if (atomic_read(&conn->bytes_to_rcv)) 77 conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1; 78 else 79 conn->local_tx_ctrl.conn_state_flags.peer_conn_closed = 1; 80 81 return smc_cdc_get_slot_and_msg_send(conn); 82 } 83 84 static int smc_close_abort(struct smc_connection *conn) 85 { 86 conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1; 87 88 return smc_cdc_get_slot_and_msg_send(conn); 89 } 90 91 /* terminate smc socket abnormally - active abort 92 * RDMA communication no longer possible 93 */ 94 static void smc_close_active_abort(struct smc_sock *smc) 95 { 96 struct sock *sk = &smc->sk; 97 98 struct smc_cdc_conn_state_flags *txflags = 99 &smc->conn.local_tx_ctrl.conn_state_flags; 100 101 sk->sk_err = ECONNABORTED; 102 if (smc->clcsock && smc->clcsock->sk) { 103 smc->clcsock->sk->sk_err = ECONNABORTED; 104 smc->clcsock->sk->sk_state_change(smc->clcsock->sk); 105 } 106 switch (sk->sk_state) { 107 case SMC_INIT: 108 case SMC_ACTIVE: 109 sk->sk_state = SMC_PEERABORTWAIT; 110 break; 111 case SMC_APPCLOSEWAIT1: 112 case SMC_APPCLOSEWAIT2: 113 txflags->peer_conn_abort = 1; 114 sock_release(smc->clcsock); 115 if (!smc_cdc_rxed_any_close(&smc->conn)) 116 sk->sk_state = SMC_PEERABORTWAIT; 117 else 118 sk->sk_state = SMC_CLOSED; 119 break; 120 case SMC_PEERCLOSEWAIT1: 121 case SMC_PEERCLOSEWAIT2: 122 if (!txflags->peer_conn_closed) { 123 sk->sk_state = SMC_PEERABORTWAIT; 124 txflags->peer_conn_abort = 1; 125 sock_release(smc->clcsock); 126 } else { 127 sk->sk_state = SMC_CLOSED; 128 } 129 break; 130 case SMC_PROCESSABORT: 131 case SMC_APPFINCLOSEWAIT: 132 if (!txflags->peer_conn_closed) { 133 txflags->peer_conn_abort = 1; 134 sock_release(smc->clcsock); 135 } 136 sk->sk_state = SMC_CLOSED; 137 break; 138 case SMC_PEERFINCLOSEWAIT: 139 case SMC_PEERABORTWAIT: 140 case SMC_CLOSED: 141 break; 142 } 143 144 sock_set_flag(sk, SOCK_DEAD); 145 sk->sk_state_change(sk); 146 } 147 148 static inline bool smc_close_sent_any_close(struct smc_connection *conn) 149 { 150 return conn->local_tx_ctrl.conn_state_flags.peer_conn_abort || 151 conn->local_tx_ctrl.conn_state_flags.peer_conn_closed; 152 } 153 154 int smc_close_active(struct smc_sock *smc) 155 { 156 struct smc_cdc_conn_state_flags *txflags = 157 &smc->conn.local_tx_ctrl.conn_state_flags; 158 struct smc_connection *conn = &smc->conn; 159 struct sock *sk = &smc->sk; 160 int old_state; 161 long timeout; 162 int rc = 0; 163 164 timeout = current->flags & PF_EXITING ? 165 0 : sock_flag(sk, SOCK_LINGER) ? 166 sk->sk_lingertime : SMC_MAX_STREAM_WAIT_TIMEOUT; 167 168 again: 169 old_state = sk->sk_state; 170 switch (old_state) { 171 case SMC_INIT: 172 sk->sk_state = SMC_CLOSED; 173 if (smc->smc_listen_work.func) 174 cancel_work_sync(&smc->smc_listen_work); 175 break; 176 case SMC_LISTEN: 177 sk->sk_state = SMC_CLOSED; 178 sk->sk_state_change(sk); /* wake up accept */ 179 if (smc->clcsock && smc->clcsock->sk) { 180 rc = kernel_sock_shutdown(smc->clcsock, SHUT_RDWR); 181 /* wake up kernel_accept of smc_tcp_listen_worker */ 182 smc->clcsock->sk->sk_data_ready(smc->clcsock->sk); 183 } 184 release_sock(sk); 185 smc_close_cleanup_listen(sk); 186 cancel_work_sync(&smc->smc_listen_work); 187 lock_sock(sk); 188 break; 189 case SMC_ACTIVE: 190 smc_close_stream_wait(smc, timeout); 191 release_sock(sk); 192 cancel_delayed_work_sync(&conn->tx_work); 193 lock_sock(sk); 194 if (sk->sk_state == SMC_ACTIVE) { 195 /* send close request */ 196 rc = smc_close_final(conn); 197 sk->sk_state = SMC_PEERCLOSEWAIT1; 198 } else { 199 /* peer event has changed the state */ 200 goto again; 201 } 202 break; 203 case SMC_APPFINCLOSEWAIT: 204 /* socket already shutdown wr or both (active close) */ 205 if (txflags->peer_done_writing && 206 !smc_close_sent_any_close(conn)) { 207 /* just shutdown wr done, send close request */ 208 rc = smc_close_final(conn); 209 } 210 sk->sk_state = SMC_CLOSED; 211 break; 212 case SMC_APPCLOSEWAIT1: 213 case SMC_APPCLOSEWAIT2: 214 if (!smc_cdc_rxed_any_close(conn)) 215 smc_close_stream_wait(smc, timeout); 216 release_sock(sk); 217 cancel_delayed_work_sync(&conn->tx_work); 218 lock_sock(sk); 219 if (sk->sk_err != ECONNABORTED) { 220 /* confirm close from peer */ 221 rc = smc_close_final(conn); 222 if (rc) 223 break; 224 } 225 if (smc_cdc_rxed_any_close(conn)) 226 /* peer has closed the socket already */ 227 sk->sk_state = SMC_CLOSED; 228 else 229 /* peer has just issued a shutdown write */ 230 sk->sk_state = SMC_PEERFINCLOSEWAIT; 231 break; 232 case SMC_PEERCLOSEWAIT1: 233 case SMC_PEERCLOSEWAIT2: 234 if (txflags->peer_done_writing && 235 !smc_close_sent_any_close(conn)) { 236 /* just shutdown wr done, send close request */ 237 rc = smc_close_final(conn); 238 } 239 /* peer sending PeerConnectionClosed will cause transition */ 240 break; 241 case SMC_PEERFINCLOSEWAIT: 242 /* peer sending PeerConnectionClosed will cause transition */ 243 break; 244 case SMC_PROCESSABORT: 245 release_sock(sk); 246 cancel_delayed_work_sync(&conn->tx_work); 247 lock_sock(sk); 248 smc_close_abort(conn); 249 sk->sk_state = SMC_CLOSED; 250 break; 251 case SMC_PEERABORTWAIT: 252 case SMC_CLOSED: 253 /* nothing to do, add tracing in future patch */ 254 break; 255 } 256 257 if (old_state != sk->sk_state) 258 sk->sk_state_change(sk); 259 return rc; 260 } 261 262 static void smc_close_passive_abort_received(struct smc_sock *smc) 263 { 264 struct smc_cdc_conn_state_flags *txflags = 265 &smc->conn.local_tx_ctrl.conn_state_flags; 266 struct sock *sk = &smc->sk; 267 268 switch (sk->sk_state) { 269 case SMC_ACTIVE: 270 case SMC_APPFINCLOSEWAIT: 271 case SMC_APPCLOSEWAIT1: 272 case SMC_APPCLOSEWAIT2: 273 smc_close_abort(&smc->conn); 274 sk->sk_state = SMC_PROCESSABORT; 275 break; 276 case SMC_PEERCLOSEWAIT1: 277 case SMC_PEERCLOSEWAIT2: 278 if (txflags->peer_done_writing && 279 !smc_close_sent_any_close(&smc->conn)) { 280 /* just shutdown, but not yet closed locally */ 281 smc_close_abort(&smc->conn); 282 sk->sk_state = SMC_PROCESSABORT; 283 } else { 284 sk->sk_state = SMC_CLOSED; 285 } 286 break; 287 case SMC_PEERFINCLOSEWAIT: 288 case SMC_PEERABORTWAIT: 289 sk->sk_state = SMC_CLOSED; 290 break; 291 case SMC_INIT: 292 case SMC_PROCESSABORT: 293 /* nothing to do, add tracing in future patch */ 294 break; 295 } 296 } 297 298 /* Some kind of closing has been received: peer_conn_closed, peer_conn_abort, 299 * or peer_done_writing. 300 */ 301 static void smc_close_passive_work(struct work_struct *work) 302 { 303 struct smc_connection *conn = container_of(work, 304 struct smc_connection, 305 close_work); 306 struct smc_sock *smc = container_of(conn, struct smc_sock, conn); 307 struct smc_cdc_conn_state_flags *rxflags; 308 struct sock *sk = &smc->sk; 309 int old_state; 310 311 lock_sock(sk); 312 old_state = sk->sk_state; 313 314 if (!conn->alert_token_local) { 315 /* abnormal termination */ 316 smc_close_active_abort(smc); 317 goto wakeup; 318 } 319 320 rxflags = &conn->local_rx_ctrl.conn_state_flags; 321 if (rxflags->peer_conn_abort) { 322 smc_close_passive_abort_received(smc); 323 goto wakeup; 324 } 325 326 switch (sk->sk_state) { 327 case SMC_INIT: 328 if (atomic_read(&conn->bytes_to_rcv) || 329 (rxflags->peer_done_writing && 330 !smc_cdc_rxed_any_close(conn))) 331 sk->sk_state = SMC_APPCLOSEWAIT1; 332 else 333 sk->sk_state = SMC_CLOSED; 334 break; 335 case SMC_ACTIVE: 336 sk->sk_state = SMC_APPCLOSEWAIT1; 337 break; 338 case SMC_PEERCLOSEWAIT1: 339 if (rxflags->peer_done_writing) 340 sk->sk_state = SMC_PEERCLOSEWAIT2; 341 /* fall through */ 342 /* to check for closing */ 343 case SMC_PEERCLOSEWAIT2: 344 case SMC_PEERFINCLOSEWAIT: 345 if (!smc_cdc_rxed_any_close(conn)) 346 break; 347 if (sock_flag(sk, SOCK_DEAD) && 348 smc_close_sent_any_close(conn)) { 349 /* smc_release has already been called locally */ 350 sk->sk_state = SMC_CLOSED; 351 } else { 352 /* just shutdown, but not yet closed locally */ 353 sk->sk_state = SMC_APPFINCLOSEWAIT; 354 } 355 break; 356 case SMC_APPCLOSEWAIT1: 357 case SMC_APPCLOSEWAIT2: 358 case SMC_APPFINCLOSEWAIT: 359 case SMC_PEERABORTWAIT: 360 case SMC_PROCESSABORT: 361 case SMC_CLOSED: 362 /* nothing to do, add tracing in future patch */ 363 break; 364 } 365 366 wakeup: 367 sk->sk_data_ready(sk); /* wakeup blocked rcvbuf consumers */ 368 sk->sk_write_space(sk); /* wakeup blocked sndbuf producers */ 369 370 if (old_state != sk->sk_state) { 371 sk->sk_state_change(sk); 372 if ((sk->sk_state == SMC_CLOSED) && 373 (sock_flag(sk, SOCK_DEAD) || !sk->sk_socket)) { 374 smc_conn_free(conn); 375 schedule_delayed_work(&smc->sock_put_work, 376 SMC_CLOSE_SOCK_PUT_DELAY); 377 } 378 } 379 release_sock(sk); 380 } 381 382 void smc_close_sock_put_work(struct work_struct *work) 383 { 384 struct smc_sock *smc = container_of(to_delayed_work(work), 385 struct smc_sock, 386 sock_put_work); 387 388 smc->sk.sk_prot->unhash(&smc->sk); 389 sock_put(&smc->sk); 390 } 391 392 int smc_close_shutdown_write(struct smc_sock *smc) 393 { 394 struct smc_connection *conn = &smc->conn; 395 struct sock *sk = &smc->sk; 396 int old_state; 397 long timeout; 398 int rc = 0; 399 400 timeout = current->flags & PF_EXITING ? 401 0 : sock_flag(sk, SOCK_LINGER) ? 402 sk->sk_lingertime : SMC_MAX_STREAM_WAIT_TIMEOUT; 403 404 again: 405 old_state = sk->sk_state; 406 switch (old_state) { 407 case SMC_ACTIVE: 408 smc_close_stream_wait(smc, timeout); 409 release_sock(sk); 410 cancel_delayed_work_sync(&conn->tx_work); 411 lock_sock(sk); 412 /* send close wr request */ 413 rc = smc_close_wr(conn); 414 if (sk->sk_state == SMC_ACTIVE) 415 sk->sk_state = SMC_PEERCLOSEWAIT1; 416 else 417 goto again; 418 break; 419 case SMC_APPCLOSEWAIT1: 420 /* passive close */ 421 if (!smc_cdc_rxed_any_close(conn)) 422 smc_close_stream_wait(smc, timeout); 423 release_sock(sk); 424 cancel_delayed_work_sync(&conn->tx_work); 425 lock_sock(sk); 426 /* confirm close from peer */ 427 rc = smc_close_wr(conn); 428 sk->sk_state = SMC_APPCLOSEWAIT2; 429 break; 430 case SMC_APPCLOSEWAIT2: 431 case SMC_PEERFINCLOSEWAIT: 432 case SMC_PEERCLOSEWAIT1: 433 case SMC_PEERCLOSEWAIT2: 434 case SMC_APPFINCLOSEWAIT: 435 case SMC_PROCESSABORT: 436 case SMC_PEERABORTWAIT: 437 /* nothing to do, add tracing in future patch */ 438 break; 439 } 440 441 if (old_state != sk->sk_state) 442 sk->sk_state_change(sk); 443 return rc; 444 } 445 446 /* Initialize close properties on connection establishment. */ 447 void smc_close_init(struct smc_sock *smc) 448 { 449 INIT_WORK(&smc->conn.close_work, smc_close_passive_work); 450 } 451