1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Shared Memory Communications over RDMA (SMC-R) and RoCE 4 * 5 * Socket Closing - normal and abnormal 6 * 7 * Copyright IBM Corp. 2016 8 * 9 * Author(s): Ursula Braun <ubraun@linux.vnet.ibm.com> 10 */ 11 12 #include <linux/workqueue.h> 13 #include <linux/sched/signal.h> 14 15 #include <net/sock.h> 16 17 #include "smc.h" 18 #include "smc_tx.h" 19 #include "smc_cdc.h" 20 #include "smc_close.h" 21 22 #define SMC_CLOSE_WAIT_TX_PENDS_TIME (5 * HZ) 23 24 static void smc_close_cleanup_listen(struct sock *parent) 25 { 26 struct sock *sk; 27 28 /* Close non-accepted connections */ 29 while ((sk = smc_accept_dequeue(parent, NULL))) 30 smc_close_non_accepted(sk); 31 } 32 33 static void smc_close_wait_tx_pends(struct smc_sock *smc) 34 { 35 DEFINE_WAIT_FUNC(wait, woken_wake_function); 36 struct sock *sk = &smc->sk; 37 signed long timeout; 38 39 timeout = SMC_CLOSE_WAIT_TX_PENDS_TIME; 40 add_wait_queue(sk_sleep(sk), &wait); 41 while (!signal_pending(current) && timeout) { 42 int rc; 43 44 rc = sk_wait_event(sk, &timeout, 45 !smc_cdc_tx_has_pending(&smc->conn), 46 &wait); 47 if (rc) 48 break; 49 } 50 remove_wait_queue(sk_sleep(sk), &wait); 51 } 52 53 /* wait for sndbuf data being transmitted */ 54 static void smc_close_stream_wait(struct smc_sock *smc, long timeout) 55 { 56 DEFINE_WAIT_FUNC(wait, woken_wake_function); 57 struct sock *sk = &smc->sk; 58 59 if (!timeout) 60 return; 61 62 if (!smc_tx_prepared_sends(&smc->conn)) 63 return; 64 65 smc->wait_close_tx_prepared = 1; 66 add_wait_queue(sk_sleep(sk), &wait); 67 while (!signal_pending(current) && timeout) { 68 int rc; 69 70 rc = sk_wait_event(sk, &timeout, 71 !smc_tx_prepared_sends(&smc->conn) || 72 (sk->sk_err == ECONNABORTED) || 73 (sk->sk_err == ECONNRESET), 74 &wait); 75 if (rc) 76 break; 77 } 78 remove_wait_queue(sk_sleep(sk), &wait); 79 smc->wait_close_tx_prepared = 0; 80 } 81 82 void smc_close_wake_tx_prepared(struct smc_sock *smc) 83 { 84 if (smc->wait_close_tx_prepared) 85 /* wake up socket closing */ 86 smc->sk.sk_state_change(&smc->sk); 87 } 88 89 static int smc_close_wr(struct smc_connection *conn) 90 { 91 conn->local_tx_ctrl.conn_state_flags.peer_done_writing = 1; 92 93 return smc_cdc_get_slot_and_msg_send(conn); 94 } 95 96 static int smc_close_final(struct smc_connection *conn) 97 { 98 if (atomic_read(&conn->bytes_to_rcv)) 99 conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1; 100 else 101 conn->local_tx_ctrl.conn_state_flags.peer_conn_closed = 1; 102 103 return smc_cdc_get_slot_and_msg_send(conn); 104 } 105 106 static int smc_close_abort(struct smc_connection *conn) 107 { 108 conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1; 109 110 return smc_cdc_get_slot_and_msg_send(conn); 111 } 112 113 /* terminate smc socket abnormally - active abort 114 * RDMA communication no longer possible 115 */ 116 void smc_close_active_abort(struct smc_sock *smc) 117 { 118 struct smc_cdc_conn_state_flags *txflags = 119 &smc->conn.local_tx_ctrl.conn_state_flags; 120 121 smc->sk.sk_err = ECONNABORTED; 122 if (smc->clcsock && smc->clcsock->sk) { 123 smc->clcsock->sk->sk_err = ECONNABORTED; 124 smc->clcsock->sk->sk_state_change(smc->clcsock->sk); 125 } 126 switch (smc->sk.sk_state) { 127 case SMC_INIT: 128 case SMC_ACTIVE: 129 smc->sk.sk_state = SMC_PEERABORTWAIT; 130 break; 131 case SMC_APPCLOSEWAIT1: 132 case SMC_APPCLOSEWAIT2: 133 txflags->peer_conn_abort = 1; 134 sock_release(smc->clcsock); 135 if (!smc_cdc_rxed_any_close(&smc->conn)) 136 smc->sk.sk_state = SMC_PEERABORTWAIT; 137 else 138 smc->sk.sk_state = SMC_CLOSED; 139 break; 140 case SMC_PEERCLOSEWAIT1: 141 case SMC_PEERCLOSEWAIT2: 142 if (!txflags->peer_conn_closed) { 143 smc->sk.sk_state = SMC_PEERABORTWAIT; 144 txflags->peer_conn_abort = 1; 145 sock_release(smc->clcsock); 146 } else { 147 smc->sk.sk_state = SMC_CLOSED; 148 } 149 break; 150 case SMC_PROCESSABORT: 151 case SMC_APPFINCLOSEWAIT: 152 if (!txflags->peer_conn_closed) { 153 txflags->peer_conn_abort = 1; 154 sock_release(smc->clcsock); 155 } 156 smc->sk.sk_state = SMC_CLOSED; 157 break; 158 case SMC_PEERFINCLOSEWAIT: 159 case SMC_PEERABORTWAIT: 160 case SMC_CLOSED: 161 break; 162 } 163 164 sock_set_flag(&smc->sk, SOCK_DEAD); 165 smc->sk.sk_state_change(&smc->sk); 166 } 167 168 static inline bool smc_close_sent_any_close(struct smc_connection *conn) 169 { 170 return conn->local_tx_ctrl.conn_state_flags.peer_conn_abort || 171 conn->local_tx_ctrl.conn_state_flags.peer_conn_closed; 172 } 173 174 int smc_close_active(struct smc_sock *smc) 175 { 176 struct smc_cdc_conn_state_flags *txflags = 177 &smc->conn.local_tx_ctrl.conn_state_flags; 178 struct smc_connection *conn = &smc->conn; 179 struct sock *sk = &smc->sk; 180 int old_state; 181 long timeout; 182 int rc = 0; 183 184 timeout = current->flags & PF_EXITING ? 185 0 : sock_flag(sk, SOCK_LINGER) ? 186 sk->sk_lingertime : SMC_MAX_STREAM_WAIT_TIMEOUT; 187 188 again: 189 old_state = sk->sk_state; 190 switch (old_state) { 191 case SMC_INIT: 192 sk->sk_state = SMC_CLOSED; 193 if (smc->smc_listen_work.func) 194 cancel_work_sync(&smc->smc_listen_work); 195 break; 196 case SMC_LISTEN: 197 sk->sk_state = SMC_CLOSED; 198 sk->sk_state_change(sk); /* wake up accept */ 199 if (smc->clcsock && smc->clcsock->sk) { 200 rc = kernel_sock_shutdown(smc->clcsock, SHUT_RDWR); 201 /* wake up kernel_accept of smc_tcp_listen_worker */ 202 smc->clcsock->sk->sk_data_ready(smc->clcsock->sk); 203 } 204 release_sock(sk); 205 smc_close_cleanup_listen(sk); 206 cancel_work_sync(&smc->smc_listen_work); 207 lock_sock(sk); 208 break; 209 case SMC_ACTIVE: 210 smc_close_stream_wait(smc, timeout); 211 release_sock(sk); 212 cancel_delayed_work_sync(&conn->tx_work); 213 lock_sock(sk); 214 if (sk->sk_state == SMC_ACTIVE) { 215 /* send close request */ 216 rc = smc_close_final(conn); 217 sk->sk_state = SMC_PEERCLOSEWAIT1; 218 } else { 219 /* peer event has changed the state */ 220 goto again; 221 } 222 break; 223 case SMC_APPFINCLOSEWAIT: 224 /* socket already shutdown wr or both (active close) */ 225 if (txflags->peer_done_writing && 226 !smc_close_sent_any_close(conn)) { 227 /* just shutdown wr done, send close request */ 228 rc = smc_close_final(conn); 229 } 230 sk->sk_state = SMC_CLOSED; 231 smc_close_wait_tx_pends(smc); 232 break; 233 case SMC_APPCLOSEWAIT1: 234 case SMC_APPCLOSEWAIT2: 235 if (!smc_cdc_rxed_any_close(conn)) 236 smc_close_stream_wait(smc, timeout); 237 release_sock(sk); 238 cancel_delayed_work_sync(&conn->tx_work); 239 lock_sock(sk); 240 if (sk->sk_err != ECONNABORTED) { 241 /* confirm close from peer */ 242 rc = smc_close_final(conn); 243 if (rc) 244 break; 245 } 246 if (smc_cdc_rxed_any_close(conn)) 247 /* peer has closed the socket already */ 248 sk->sk_state = SMC_CLOSED; 249 else 250 /* peer has just issued a shutdown write */ 251 sk->sk_state = SMC_PEERFINCLOSEWAIT; 252 smc_close_wait_tx_pends(smc); 253 break; 254 case SMC_PEERCLOSEWAIT1: 255 case SMC_PEERCLOSEWAIT2: 256 if (txflags->peer_done_writing && 257 !smc_close_sent_any_close(conn)) { 258 /* just shutdown wr done, send close request */ 259 rc = smc_close_final(conn); 260 } 261 /* peer sending PeerConnectionClosed will cause transition */ 262 break; 263 case SMC_PEERFINCLOSEWAIT: 264 /* peer sending PeerConnectionClosed will cause transition */ 265 break; 266 case SMC_PROCESSABORT: 267 release_sock(sk); 268 cancel_delayed_work_sync(&conn->tx_work); 269 lock_sock(sk); 270 smc_close_abort(conn); 271 sk->sk_state = SMC_CLOSED; 272 smc_close_wait_tx_pends(smc); 273 break; 274 case SMC_PEERABORTWAIT: 275 case SMC_CLOSED: 276 /* nothing to do, add tracing in future patch */ 277 break; 278 } 279 280 if (old_state != sk->sk_state) 281 sk->sk_state_change(&smc->sk); 282 return rc; 283 } 284 285 static void smc_close_passive_abort_received(struct smc_sock *smc) 286 { 287 struct smc_cdc_conn_state_flags *txflags = 288 &smc->conn.local_tx_ctrl.conn_state_flags; 289 struct sock *sk = &smc->sk; 290 291 switch (sk->sk_state) { 292 case SMC_ACTIVE: 293 case SMC_APPFINCLOSEWAIT: 294 case SMC_APPCLOSEWAIT1: 295 case SMC_APPCLOSEWAIT2: 296 smc_close_abort(&smc->conn); 297 sk->sk_state = SMC_PROCESSABORT; 298 break; 299 case SMC_PEERCLOSEWAIT1: 300 case SMC_PEERCLOSEWAIT2: 301 if (txflags->peer_done_writing && 302 !smc_close_sent_any_close(&smc->conn)) { 303 /* just shutdown, but not yet closed locally */ 304 smc_close_abort(&smc->conn); 305 sk->sk_state = SMC_PROCESSABORT; 306 } else { 307 sk->sk_state = SMC_CLOSED; 308 } 309 break; 310 case SMC_PEERFINCLOSEWAIT: 311 case SMC_PEERABORTWAIT: 312 sk->sk_state = SMC_CLOSED; 313 break; 314 case SMC_INIT: 315 case SMC_PROCESSABORT: 316 /* nothing to do, add tracing in future patch */ 317 break; 318 } 319 } 320 321 /* Some kind of closing has been received: peer_conn_closed, peer_conn_abort, 322 * or peer_done_writing. 323 */ 324 static void smc_close_passive_work(struct work_struct *work) 325 { 326 struct smc_connection *conn = container_of(work, 327 struct smc_connection, 328 close_work); 329 struct smc_sock *smc = container_of(conn, struct smc_sock, conn); 330 struct smc_cdc_conn_state_flags *rxflags; 331 struct sock *sk = &smc->sk; 332 int old_state; 333 334 lock_sock(&smc->sk); 335 old_state = sk->sk_state; 336 337 if (!conn->alert_token_local) { 338 /* abnormal termination */ 339 smc_close_active_abort(smc); 340 goto wakeup; 341 } 342 343 rxflags = &smc->conn.local_rx_ctrl.conn_state_flags; 344 if (rxflags->peer_conn_abort) { 345 smc_close_passive_abort_received(smc); 346 goto wakeup; 347 } 348 349 switch (sk->sk_state) { 350 case SMC_INIT: 351 if (atomic_read(&smc->conn.bytes_to_rcv) || 352 (rxflags->peer_done_writing && 353 !smc_cdc_rxed_any_close(conn))) 354 sk->sk_state = SMC_APPCLOSEWAIT1; 355 else 356 sk->sk_state = SMC_CLOSED; 357 break; 358 case SMC_ACTIVE: 359 sk->sk_state = SMC_APPCLOSEWAIT1; 360 break; 361 case SMC_PEERCLOSEWAIT1: 362 if (rxflags->peer_done_writing) 363 sk->sk_state = SMC_PEERCLOSEWAIT2; 364 /* fall through */ 365 /* to check for closing */ 366 case SMC_PEERCLOSEWAIT2: 367 case SMC_PEERFINCLOSEWAIT: 368 if (!smc_cdc_rxed_any_close(&smc->conn)) 369 break; 370 if (sock_flag(sk, SOCK_DEAD) && 371 smc_close_sent_any_close(conn)) { 372 /* smc_release has already been called locally */ 373 sk->sk_state = SMC_CLOSED; 374 } else { 375 /* just shutdown, but not yet closed locally */ 376 sk->sk_state = SMC_APPFINCLOSEWAIT; 377 } 378 break; 379 case SMC_APPCLOSEWAIT1: 380 case SMC_APPCLOSEWAIT2: 381 case SMC_APPFINCLOSEWAIT: 382 case SMC_PEERABORTWAIT: 383 case SMC_PROCESSABORT: 384 case SMC_CLOSED: 385 /* nothing to do, add tracing in future patch */ 386 break; 387 } 388 389 wakeup: 390 sk->sk_data_ready(sk); /* wakeup blocked rcvbuf consumers */ 391 sk->sk_write_space(sk); /* wakeup blocked sndbuf producers */ 392 393 if (old_state != sk->sk_state) { 394 sk->sk_state_change(sk); 395 if ((sk->sk_state == SMC_CLOSED) && 396 (sock_flag(sk, SOCK_DEAD) || !sk->sk_socket)) { 397 smc_conn_free(&smc->conn); 398 schedule_delayed_work(&smc->sock_put_work, 399 SMC_CLOSE_SOCK_PUT_DELAY); 400 } 401 } 402 release_sock(&smc->sk); 403 } 404 405 void smc_close_sock_put_work(struct work_struct *work) 406 { 407 struct smc_sock *smc = container_of(to_delayed_work(work), 408 struct smc_sock, 409 sock_put_work); 410 411 smc->sk.sk_prot->unhash(&smc->sk); 412 sock_put(&smc->sk); 413 } 414 415 int smc_close_shutdown_write(struct smc_sock *smc) 416 { 417 struct smc_connection *conn = &smc->conn; 418 struct sock *sk = &smc->sk; 419 int old_state; 420 long timeout; 421 int rc = 0; 422 423 timeout = current->flags & PF_EXITING ? 424 0 : sock_flag(sk, SOCK_LINGER) ? 425 sk->sk_lingertime : SMC_MAX_STREAM_WAIT_TIMEOUT; 426 427 again: 428 old_state = sk->sk_state; 429 switch (old_state) { 430 case SMC_ACTIVE: 431 smc_close_stream_wait(smc, timeout); 432 release_sock(sk); 433 cancel_delayed_work_sync(&conn->tx_work); 434 lock_sock(sk); 435 /* send close wr request */ 436 rc = smc_close_wr(conn); 437 if (sk->sk_state == SMC_ACTIVE) 438 sk->sk_state = SMC_PEERCLOSEWAIT1; 439 else 440 goto again; 441 break; 442 case SMC_APPCLOSEWAIT1: 443 /* passive close */ 444 if (!smc_cdc_rxed_any_close(conn)) 445 smc_close_stream_wait(smc, timeout); 446 release_sock(sk); 447 cancel_delayed_work_sync(&conn->tx_work); 448 lock_sock(sk); 449 /* confirm close from peer */ 450 rc = smc_close_wr(conn); 451 sk->sk_state = SMC_APPCLOSEWAIT2; 452 break; 453 case SMC_APPCLOSEWAIT2: 454 case SMC_PEERFINCLOSEWAIT: 455 case SMC_PEERCLOSEWAIT1: 456 case SMC_PEERCLOSEWAIT2: 457 case SMC_APPFINCLOSEWAIT: 458 case SMC_PROCESSABORT: 459 case SMC_PEERABORTWAIT: 460 /* nothing to do, add tracing in future patch */ 461 break; 462 } 463 464 if (old_state != sk->sk_state) 465 sk->sk_state_change(&smc->sk); 466 return rc; 467 } 468 469 /* Initialize close properties on connection establishment. */ 470 void smc_close_init(struct smc_sock *smc) 471 { 472 INIT_WORK(&smc->conn.close_work, smc_close_passive_work); 473 } 474