1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Shared Memory Communications over RDMA (SMC-R) and RoCE 4 * 5 * Socket Closing - normal and abnormal 6 * 7 * Copyright IBM Corp. 2016 8 * 9 * Author(s): Ursula Braun <ubraun@linux.vnet.ibm.com> 10 */ 11 12 #include <linux/workqueue.h> 13 #include <linux/sched/signal.h> 14 15 #include <net/sock.h> 16 #include <net/tcp.h> 17 18 #include "smc.h" 19 #include "smc_tx.h" 20 #include "smc_cdc.h" 21 #include "smc_close.h" 22 23 /* release the clcsock that is assigned to the smc_sock */ 24 void smc_clcsock_release(struct smc_sock *smc) 25 { 26 struct socket *tcp; 27 28 if (smc->listen_smc && current_work() != &smc->smc_listen_work) 29 cancel_work_sync(&smc->smc_listen_work); 30 mutex_lock(&smc->clcsock_release_lock); 31 if (smc->clcsock) { 32 tcp = smc->clcsock; 33 smc->clcsock = NULL; 34 sock_release(tcp); 35 } 36 mutex_unlock(&smc->clcsock_release_lock); 37 } 38 39 static void smc_close_cleanup_listen(struct sock *parent) 40 { 41 struct sock *sk; 42 43 /* Close non-accepted connections */ 44 while ((sk = smc_accept_dequeue(parent, NULL))) 45 smc_close_non_accepted(sk); 46 } 47 48 /* wait for sndbuf data being transmitted */ 49 static void smc_close_stream_wait(struct smc_sock *smc, long timeout) 50 { 51 DEFINE_WAIT_FUNC(wait, woken_wake_function); 52 struct sock *sk = &smc->sk; 53 54 if (!timeout) 55 return; 56 57 if (!smc_tx_prepared_sends(&smc->conn)) 58 return; 59 60 smc->wait_close_tx_prepared = 1; 61 add_wait_queue(sk_sleep(sk), &wait); 62 while (!signal_pending(current) && timeout) { 63 int rc; 64 65 rc = sk_wait_event(sk, &timeout, 66 !smc_tx_prepared_sends(&smc->conn) || 67 sk->sk_err == ECONNABORTED || 68 sk->sk_err == ECONNRESET || 69 smc->conn.killed, 70 &wait); 71 if (rc) 72 break; 73 } 74 remove_wait_queue(sk_sleep(sk), &wait); 75 smc->wait_close_tx_prepared = 0; 76 } 77 78 void smc_close_wake_tx_prepared(struct smc_sock *smc) 79 { 80 if (smc->wait_close_tx_prepared) 81 /* wake up socket closing */ 82 smc->sk.sk_state_change(&smc->sk); 83 } 84 85 static int smc_close_wr(struct smc_connection *conn) 86 { 87 conn->local_tx_ctrl.conn_state_flags.peer_done_writing = 1; 88 89 return smc_cdc_get_slot_and_msg_send(conn); 90 } 91 92 static int smc_close_final(struct smc_connection *conn) 93 { 94 if (atomic_read(&conn->bytes_to_rcv)) 95 conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1; 96 else 97 conn->local_tx_ctrl.conn_state_flags.peer_conn_closed = 1; 98 if (conn->killed) 99 return -EPIPE; 100 101 return smc_cdc_get_slot_and_msg_send(conn); 102 } 103 104 int smc_close_abort(struct smc_connection *conn) 105 { 106 conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1; 107 108 return smc_cdc_get_slot_and_msg_send(conn); 109 } 110 111 static void smc_close_cancel_work(struct smc_sock *smc) 112 { 113 struct sock *sk = &smc->sk; 114 115 release_sock(sk); 116 cancel_work_sync(&smc->conn.close_work); 117 cancel_delayed_work_sync(&smc->conn.tx_work); 118 lock_sock(sk); 119 } 120 121 /* terminate smc socket abnormally - active abort 122 * link group is terminated, i.e. RDMA communication no longer possible 123 */ 124 void smc_close_active_abort(struct smc_sock *smc) 125 { 126 struct sock *sk = &smc->sk; 127 bool release_clcsock = false; 128 129 if (sk->sk_state != SMC_INIT && smc->clcsock && smc->clcsock->sk) { 130 sk->sk_err = ECONNABORTED; 131 if (smc->clcsock && smc->clcsock->sk) 132 tcp_abort(smc->clcsock->sk, ECONNABORTED); 133 } 134 switch (sk->sk_state) { 135 case SMC_ACTIVE: 136 case SMC_APPCLOSEWAIT1: 137 case SMC_APPCLOSEWAIT2: 138 sk->sk_state = SMC_PEERABORTWAIT; 139 smc_close_cancel_work(smc); 140 if (sk->sk_state != SMC_PEERABORTWAIT) 141 break; 142 sk->sk_state = SMC_CLOSED; 143 sock_put(sk); /* (postponed) passive closing */ 144 break; 145 case SMC_PEERCLOSEWAIT1: 146 case SMC_PEERCLOSEWAIT2: 147 case SMC_PEERFINCLOSEWAIT: 148 sk->sk_state = SMC_PEERABORTWAIT; 149 smc_close_cancel_work(smc); 150 if (sk->sk_state != SMC_PEERABORTWAIT) 151 break; 152 sk->sk_state = SMC_CLOSED; 153 smc_conn_free(&smc->conn); 154 release_clcsock = true; 155 sock_put(sk); /* passive closing */ 156 break; 157 case SMC_PROCESSABORT: 158 case SMC_APPFINCLOSEWAIT: 159 sk->sk_state = SMC_PEERABORTWAIT; 160 smc_close_cancel_work(smc); 161 if (sk->sk_state != SMC_PEERABORTWAIT) 162 break; 163 sk->sk_state = SMC_CLOSED; 164 smc_conn_free(&smc->conn); 165 release_clcsock = true; 166 break; 167 case SMC_INIT: 168 case SMC_PEERABORTWAIT: 169 case SMC_CLOSED: 170 break; 171 } 172 173 sock_set_flag(sk, SOCK_DEAD); 174 sk->sk_state_change(sk); 175 176 if (release_clcsock) { 177 release_sock(sk); 178 smc_clcsock_release(smc); 179 lock_sock(sk); 180 } 181 } 182 183 static inline bool smc_close_sent_any_close(struct smc_connection *conn) 184 { 185 return conn->local_tx_ctrl.conn_state_flags.peer_conn_abort || 186 conn->local_tx_ctrl.conn_state_flags.peer_conn_closed; 187 } 188 189 int smc_close_active(struct smc_sock *smc) 190 { 191 struct smc_cdc_conn_state_flags *txflags = 192 &smc->conn.local_tx_ctrl.conn_state_flags; 193 struct smc_connection *conn = &smc->conn; 194 struct sock *sk = &smc->sk; 195 int old_state; 196 long timeout; 197 int rc = 0; 198 int rc1 = 0; 199 200 timeout = current->flags & PF_EXITING ? 201 0 : sock_flag(sk, SOCK_LINGER) ? 202 sk->sk_lingertime : SMC_MAX_STREAM_WAIT_TIMEOUT; 203 204 old_state = sk->sk_state; 205 again: 206 switch (sk->sk_state) { 207 case SMC_INIT: 208 sk->sk_state = SMC_CLOSED; 209 break; 210 case SMC_LISTEN: 211 sk->sk_state = SMC_CLOSED; 212 sk->sk_state_change(sk); /* wake up accept */ 213 if (smc->clcsock && smc->clcsock->sk) { 214 smc->clcsock->sk->sk_data_ready = smc->clcsk_data_ready; 215 smc->clcsock->sk->sk_user_data = NULL; 216 rc = kernel_sock_shutdown(smc->clcsock, SHUT_RDWR); 217 } 218 smc_close_cleanup_listen(sk); 219 release_sock(sk); 220 flush_work(&smc->tcp_listen_work); 221 lock_sock(sk); 222 break; 223 case SMC_ACTIVE: 224 smc_close_stream_wait(smc, timeout); 225 release_sock(sk); 226 cancel_delayed_work_sync(&conn->tx_work); 227 lock_sock(sk); 228 if (sk->sk_state == SMC_ACTIVE) { 229 /* send close request */ 230 rc = smc_close_final(conn); 231 sk->sk_state = SMC_PEERCLOSEWAIT1; 232 233 /* actively shutdown clcsock before peer close it, 234 * prevent peer from entering TIME_WAIT state. 235 */ 236 if (smc->clcsock && smc->clcsock->sk) { 237 rc1 = kernel_sock_shutdown(smc->clcsock, 238 SHUT_RDWR); 239 rc = rc ? rc : rc1; 240 } 241 } else { 242 /* peer event has changed the state */ 243 goto again; 244 } 245 break; 246 case SMC_APPFINCLOSEWAIT: 247 /* socket already shutdown wr or both (active close) */ 248 if (txflags->peer_done_writing && 249 !smc_close_sent_any_close(conn)) { 250 /* just shutdown wr done, send close request */ 251 rc = smc_close_final(conn); 252 } 253 sk->sk_state = SMC_CLOSED; 254 break; 255 case SMC_APPCLOSEWAIT1: 256 case SMC_APPCLOSEWAIT2: 257 if (!smc_cdc_rxed_any_close(conn)) 258 smc_close_stream_wait(smc, timeout); 259 release_sock(sk); 260 cancel_delayed_work_sync(&conn->tx_work); 261 lock_sock(sk); 262 if (sk->sk_state != SMC_APPCLOSEWAIT1 && 263 sk->sk_state != SMC_APPCLOSEWAIT2) 264 goto again; 265 /* confirm close from peer */ 266 rc = smc_close_final(conn); 267 if (smc_cdc_rxed_any_close(conn)) { 268 /* peer has closed the socket already */ 269 sk->sk_state = SMC_CLOSED; 270 sock_put(sk); /* postponed passive closing */ 271 } else { 272 /* peer has just issued a shutdown write */ 273 sk->sk_state = SMC_PEERFINCLOSEWAIT; 274 } 275 break; 276 case SMC_PEERCLOSEWAIT1: 277 case SMC_PEERCLOSEWAIT2: 278 if (txflags->peer_done_writing && 279 !smc_close_sent_any_close(conn)) { 280 /* just shutdown wr done, send close request */ 281 rc = smc_close_final(conn); 282 } 283 /* peer sending PeerConnectionClosed will cause transition */ 284 break; 285 case SMC_PEERFINCLOSEWAIT: 286 /* peer sending PeerConnectionClosed will cause transition */ 287 break; 288 case SMC_PROCESSABORT: 289 rc = smc_close_abort(conn); 290 sk->sk_state = SMC_CLOSED; 291 break; 292 case SMC_PEERABORTWAIT: 293 sk->sk_state = SMC_CLOSED; 294 break; 295 case SMC_CLOSED: 296 /* nothing to do, add tracing in future patch */ 297 break; 298 } 299 300 if (old_state != sk->sk_state) 301 sk->sk_state_change(sk); 302 return rc; 303 } 304 305 static void smc_close_passive_abort_received(struct smc_sock *smc) 306 { 307 struct smc_cdc_conn_state_flags *txflags = 308 &smc->conn.local_tx_ctrl.conn_state_flags; 309 struct sock *sk = &smc->sk; 310 311 switch (sk->sk_state) { 312 case SMC_INIT: 313 case SMC_ACTIVE: 314 case SMC_APPCLOSEWAIT1: 315 sk->sk_state = SMC_PROCESSABORT; 316 sock_put(sk); /* passive closing */ 317 break; 318 case SMC_APPFINCLOSEWAIT: 319 sk->sk_state = SMC_PROCESSABORT; 320 break; 321 case SMC_PEERCLOSEWAIT1: 322 case SMC_PEERCLOSEWAIT2: 323 if (txflags->peer_done_writing && 324 !smc_close_sent_any_close(&smc->conn)) 325 /* just shutdown, but not yet closed locally */ 326 sk->sk_state = SMC_PROCESSABORT; 327 else 328 sk->sk_state = SMC_CLOSED; 329 sock_put(sk); /* passive closing */ 330 break; 331 case SMC_APPCLOSEWAIT2: 332 case SMC_PEERFINCLOSEWAIT: 333 sk->sk_state = SMC_CLOSED; 334 sock_put(sk); /* passive closing */ 335 break; 336 case SMC_PEERABORTWAIT: 337 sk->sk_state = SMC_CLOSED; 338 break; 339 case SMC_PROCESSABORT: 340 /* nothing to do, add tracing in future patch */ 341 break; 342 } 343 } 344 345 /* Either some kind of closing has been received: peer_conn_closed, 346 * peer_conn_abort, or peer_done_writing 347 * or the link group of the connection terminates abnormally. 348 */ 349 static void smc_close_passive_work(struct work_struct *work) 350 { 351 struct smc_connection *conn = container_of(work, 352 struct smc_connection, 353 close_work); 354 struct smc_sock *smc = container_of(conn, struct smc_sock, conn); 355 struct smc_cdc_conn_state_flags *rxflags; 356 bool release_clcsock = false; 357 struct sock *sk = &smc->sk; 358 int old_state; 359 360 lock_sock(sk); 361 old_state = sk->sk_state; 362 363 rxflags = &conn->local_rx_ctrl.conn_state_flags; 364 if (rxflags->peer_conn_abort) { 365 /* peer has not received all data */ 366 smc_close_passive_abort_received(smc); 367 release_sock(sk); 368 cancel_delayed_work_sync(&conn->tx_work); 369 lock_sock(sk); 370 goto wakeup; 371 } 372 373 switch (sk->sk_state) { 374 case SMC_INIT: 375 sk->sk_state = SMC_APPCLOSEWAIT1; 376 break; 377 case SMC_ACTIVE: 378 sk->sk_state = SMC_APPCLOSEWAIT1; 379 /* postpone sock_put() for passive closing to cover 380 * received SEND_SHUTDOWN as well 381 */ 382 break; 383 case SMC_PEERCLOSEWAIT1: 384 if (rxflags->peer_done_writing) 385 sk->sk_state = SMC_PEERCLOSEWAIT2; 386 fallthrough; 387 /* to check for closing */ 388 case SMC_PEERCLOSEWAIT2: 389 if (!smc_cdc_rxed_any_close(conn)) 390 break; 391 if (sock_flag(sk, SOCK_DEAD) && 392 smc_close_sent_any_close(conn)) { 393 /* smc_release has already been called locally */ 394 sk->sk_state = SMC_CLOSED; 395 } else { 396 /* just shutdown, but not yet closed locally */ 397 sk->sk_state = SMC_APPFINCLOSEWAIT; 398 } 399 sock_put(sk); /* passive closing */ 400 break; 401 case SMC_PEERFINCLOSEWAIT: 402 if (smc_cdc_rxed_any_close(conn)) { 403 sk->sk_state = SMC_CLOSED; 404 sock_put(sk); /* passive closing */ 405 } 406 break; 407 case SMC_APPCLOSEWAIT1: 408 case SMC_APPCLOSEWAIT2: 409 /* postpone sock_put() for passive closing to cover 410 * received SEND_SHUTDOWN as well 411 */ 412 break; 413 case SMC_APPFINCLOSEWAIT: 414 case SMC_PEERABORTWAIT: 415 case SMC_PROCESSABORT: 416 case SMC_CLOSED: 417 /* nothing to do, add tracing in future patch */ 418 break; 419 } 420 421 wakeup: 422 sk->sk_data_ready(sk); /* wakeup blocked rcvbuf consumers */ 423 sk->sk_write_space(sk); /* wakeup blocked sndbuf producers */ 424 425 if (old_state != sk->sk_state) { 426 sk->sk_state_change(sk); 427 if ((sk->sk_state == SMC_CLOSED) && 428 (sock_flag(sk, SOCK_DEAD) || !sk->sk_socket)) { 429 smc_conn_free(conn); 430 if (smc->clcsock) 431 release_clcsock = true; 432 } 433 } 434 release_sock(sk); 435 if (release_clcsock) 436 smc_clcsock_release(smc); 437 sock_put(sk); /* sock_hold done by schedulers of close_work */ 438 } 439 440 int smc_close_shutdown_write(struct smc_sock *smc) 441 { 442 struct smc_connection *conn = &smc->conn; 443 struct sock *sk = &smc->sk; 444 int old_state; 445 long timeout; 446 int rc = 0; 447 448 timeout = current->flags & PF_EXITING ? 449 0 : sock_flag(sk, SOCK_LINGER) ? 450 sk->sk_lingertime : SMC_MAX_STREAM_WAIT_TIMEOUT; 451 452 old_state = sk->sk_state; 453 again: 454 switch (sk->sk_state) { 455 case SMC_ACTIVE: 456 smc_close_stream_wait(smc, timeout); 457 release_sock(sk); 458 cancel_delayed_work_sync(&conn->tx_work); 459 lock_sock(sk); 460 if (sk->sk_state != SMC_ACTIVE) 461 goto again; 462 /* send close wr request */ 463 rc = smc_close_wr(conn); 464 sk->sk_state = SMC_PEERCLOSEWAIT1; 465 break; 466 case SMC_APPCLOSEWAIT1: 467 /* passive close */ 468 if (!smc_cdc_rxed_any_close(conn)) 469 smc_close_stream_wait(smc, timeout); 470 release_sock(sk); 471 cancel_delayed_work_sync(&conn->tx_work); 472 lock_sock(sk); 473 if (sk->sk_state != SMC_APPCLOSEWAIT1) 474 goto again; 475 /* confirm close from peer */ 476 rc = smc_close_wr(conn); 477 sk->sk_state = SMC_APPCLOSEWAIT2; 478 break; 479 case SMC_APPCLOSEWAIT2: 480 case SMC_PEERFINCLOSEWAIT: 481 case SMC_PEERCLOSEWAIT1: 482 case SMC_PEERCLOSEWAIT2: 483 case SMC_APPFINCLOSEWAIT: 484 case SMC_PROCESSABORT: 485 case SMC_PEERABORTWAIT: 486 /* nothing to do, add tracing in future patch */ 487 break; 488 } 489 490 if (old_state != sk->sk_state) 491 sk->sk_state_change(sk); 492 return rc; 493 } 494 495 /* Initialize close properties on connection establishment. */ 496 void smc_close_init(struct smc_sock *smc) 497 { 498 INIT_WORK(&smc->conn.close_work, smc_close_passive_work); 499 } 500