1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Shared Memory Communications over RDMA (SMC-R) and RoCE 4 * 5 * Socket Closing - normal and abnormal 6 * 7 * Copyright IBM Corp. 2016 8 * 9 * Author(s): Ursula Braun <ubraun@linux.vnet.ibm.com> 10 */ 11 12 #include <linux/workqueue.h> 13 #include <linux/sched/signal.h> 14 15 #include <net/sock.h> 16 #include <net/tcp.h> 17 18 #include "smc.h" 19 #include "smc_tx.h" 20 #include "smc_cdc.h" 21 #include "smc_close.h" 22 23 /* release the clcsock that is assigned to the smc_sock */ 24 void smc_clcsock_release(struct smc_sock *smc) 25 { 26 struct socket *tcp; 27 28 if (smc->listen_smc && current_work() != &smc->smc_listen_work) 29 cancel_work_sync(&smc->smc_listen_work); 30 mutex_lock(&smc->clcsock_release_lock); 31 if (smc->clcsock) { 32 tcp = smc->clcsock; 33 smc->clcsock = NULL; 34 sock_release(tcp); 35 } 36 mutex_unlock(&smc->clcsock_release_lock); 37 } 38 39 static void smc_close_cleanup_listen(struct sock *parent) 40 { 41 struct sock *sk; 42 43 /* Close non-accepted connections */ 44 while ((sk = smc_accept_dequeue(parent, NULL))) 45 smc_close_non_accepted(sk); 46 } 47 48 /* wait for sndbuf data being transmitted */ 49 static void smc_close_stream_wait(struct smc_sock *smc, long timeout) 50 { 51 DEFINE_WAIT_FUNC(wait, woken_wake_function); 52 struct sock *sk = &smc->sk; 53 54 if (!timeout) 55 return; 56 57 if (!smc_tx_prepared_sends(&smc->conn)) 58 return; 59 60 smc->wait_close_tx_prepared = 1; 61 add_wait_queue(sk_sleep(sk), &wait); 62 while (!signal_pending(current) && timeout) { 63 int rc; 64 65 rc = sk_wait_event(sk, &timeout, 66 !smc_tx_prepared_sends(&smc->conn) || 67 sk->sk_err == ECONNABORTED || 68 sk->sk_err == ECONNRESET || 69 smc->conn.killed, 70 &wait); 71 if (rc) 72 break; 73 } 74 remove_wait_queue(sk_sleep(sk), &wait); 75 smc->wait_close_tx_prepared = 0; 76 } 77 78 void smc_close_wake_tx_prepared(struct smc_sock *smc) 79 { 80 if (smc->wait_close_tx_prepared) 81 /* wake up socket closing */ 82 smc->sk.sk_state_change(&smc->sk); 83 } 84 85 static int smc_close_wr(struct smc_connection *conn) 86 { 87 conn->local_tx_ctrl.conn_state_flags.peer_done_writing = 1; 88 89 return smc_cdc_get_slot_and_msg_send(conn); 90 } 91 92 static int smc_close_final(struct smc_connection *conn) 93 { 94 if (atomic_read(&conn->bytes_to_rcv)) 95 conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1; 96 else 97 conn->local_tx_ctrl.conn_state_flags.peer_conn_closed = 1; 98 if (conn->killed) 99 return -EPIPE; 100 101 return smc_cdc_get_slot_and_msg_send(conn); 102 } 103 104 int smc_close_abort(struct smc_connection *conn) 105 { 106 conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1; 107 108 return smc_cdc_get_slot_and_msg_send(conn); 109 } 110 111 static void smc_close_cancel_work(struct smc_sock *smc) 112 { 113 struct sock *sk = &smc->sk; 114 115 release_sock(sk); 116 cancel_work_sync(&smc->conn.close_work); 117 cancel_delayed_work_sync(&smc->conn.tx_work); 118 lock_sock(sk); 119 } 120 121 /* terminate smc socket abnormally - active abort 122 * link group is terminated, i.e. RDMA communication no longer possible 123 */ 124 void smc_close_active_abort(struct smc_sock *smc) 125 { 126 struct sock *sk = &smc->sk; 127 bool release_clcsock = false; 128 129 if (sk->sk_state != SMC_INIT && smc->clcsock && smc->clcsock->sk) { 130 sk->sk_err = ECONNABORTED; 131 if (smc->clcsock && smc->clcsock->sk) 132 tcp_abort(smc->clcsock->sk, ECONNABORTED); 133 } 134 switch (sk->sk_state) { 135 case SMC_ACTIVE: 136 case SMC_APPCLOSEWAIT1: 137 case SMC_APPCLOSEWAIT2: 138 sk->sk_state = SMC_PEERABORTWAIT; 139 smc_close_cancel_work(smc); 140 if (sk->sk_state != SMC_PEERABORTWAIT) 141 break; 142 sk->sk_state = SMC_CLOSED; 143 sock_put(sk); /* (postponed) passive closing */ 144 break; 145 case SMC_PEERCLOSEWAIT1: 146 case SMC_PEERCLOSEWAIT2: 147 case SMC_PEERFINCLOSEWAIT: 148 sk->sk_state = SMC_PEERABORTWAIT; 149 smc_close_cancel_work(smc); 150 if (sk->sk_state != SMC_PEERABORTWAIT) 151 break; 152 sk->sk_state = SMC_CLOSED; 153 smc_conn_free(&smc->conn); 154 release_clcsock = true; 155 sock_put(sk); /* passive closing */ 156 break; 157 case SMC_PROCESSABORT: 158 case SMC_APPFINCLOSEWAIT: 159 sk->sk_state = SMC_PEERABORTWAIT; 160 smc_close_cancel_work(smc); 161 if (sk->sk_state != SMC_PEERABORTWAIT) 162 break; 163 sk->sk_state = SMC_CLOSED; 164 smc_conn_free(&smc->conn); 165 release_clcsock = true; 166 break; 167 case SMC_INIT: 168 case SMC_PEERABORTWAIT: 169 case SMC_CLOSED: 170 break; 171 } 172 173 sock_set_flag(sk, SOCK_DEAD); 174 sk->sk_state_change(sk); 175 176 if (release_clcsock) { 177 release_sock(sk); 178 smc_clcsock_release(smc); 179 lock_sock(sk); 180 } 181 } 182 183 static inline bool smc_close_sent_any_close(struct smc_connection *conn) 184 { 185 return conn->local_tx_ctrl.conn_state_flags.peer_conn_abort || 186 conn->local_tx_ctrl.conn_state_flags.peer_conn_closed; 187 } 188 189 int smc_close_active(struct smc_sock *smc) 190 { 191 struct smc_cdc_conn_state_flags *txflags = 192 &smc->conn.local_tx_ctrl.conn_state_flags; 193 struct smc_connection *conn = &smc->conn; 194 struct sock *sk = &smc->sk; 195 int old_state; 196 long timeout; 197 int rc = 0; 198 199 timeout = current->flags & PF_EXITING ? 200 0 : sock_flag(sk, SOCK_LINGER) ? 201 sk->sk_lingertime : SMC_MAX_STREAM_WAIT_TIMEOUT; 202 203 old_state = sk->sk_state; 204 again: 205 switch (sk->sk_state) { 206 case SMC_INIT: 207 sk->sk_state = SMC_CLOSED; 208 break; 209 case SMC_LISTEN: 210 sk->sk_state = SMC_CLOSED; 211 sk->sk_state_change(sk); /* wake up accept */ 212 if (smc->clcsock && smc->clcsock->sk) { 213 smc->clcsock->sk->sk_data_ready = smc->clcsk_data_ready; 214 smc->clcsock->sk->sk_user_data = NULL; 215 rc = kernel_sock_shutdown(smc->clcsock, SHUT_RDWR); 216 } 217 smc_close_cleanup_listen(sk); 218 release_sock(sk); 219 flush_work(&smc->tcp_listen_work); 220 lock_sock(sk); 221 break; 222 case SMC_ACTIVE: 223 smc_close_stream_wait(smc, timeout); 224 release_sock(sk); 225 cancel_delayed_work_sync(&conn->tx_work); 226 lock_sock(sk); 227 if (sk->sk_state == SMC_ACTIVE) { 228 /* send close request */ 229 rc = smc_close_final(conn); 230 sk->sk_state = SMC_PEERCLOSEWAIT1; 231 232 /* actively shutdown clcsock before peer close it, 233 * prevent peer from entering TIME_WAIT state. 234 */ 235 if (smc->clcsock && smc->clcsock->sk) 236 rc = kernel_sock_shutdown(smc->clcsock, SHUT_RDWR); 237 } else { 238 /* peer event has changed the state */ 239 goto again; 240 } 241 break; 242 case SMC_APPFINCLOSEWAIT: 243 /* socket already shutdown wr or both (active close) */ 244 if (txflags->peer_done_writing && 245 !smc_close_sent_any_close(conn)) { 246 /* just shutdown wr done, send close request */ 247 rc = smc_close_final(conn); 248 } 249 sk->sk_state = SMC_CLOSED; 250 break; 251 case SMC_APPCLOSEWAIT1: 252 case SMC_APPCLOSEWAIT2: 253 if (!smc_cdc_rxed_any_close(conn)) 254 smc_close_stream_wait(smc, timeout); 255 release_sock(sk); 256 cancel_delayed_work_sync(&conn->tx_work); 257 lock_sock(sk); 258 if (sk->sk_state != SMC_APPCLOSEWAIT1 && 259 sk->sk_state != SMC_APPCLOSEWAIT2) 260 goto again; 261 /* confirm close from peer */ 262 rc = smc_close_final(conn); 263 if (smc_cdc_rxed_any_close(conn)) { 264 /* peer has closed the socket already */ 265 sk->sk_state = SMC_CLOSED; 266 sock_put(sk); /* postponed passive closing */ 267 } else { 268 /* peer has just issued a shutdown write */ 269 sk->sk_state = SMC_PEERFINCLOSEWAIT; 270 } 271 break; 272 case SMC_PEERCLOSEWAIT1: 273 case SMC_PEERCLOSEWAIT2: 274 if (txflags->peer_done_writing && 275 !smc_close_sent_any_close(conn)) { 276 /* just shutdown wr done, send close request */ 277 rc = smc_close_final(conn); 278 } 279 /* peer sending PeerConnectionClosed will cause transition */ 280 break; 281 case SMC_PEERFINCLOSEWAIT: 282 /* peer sending PeerConnectionClosed will cause transition */ 283 break; 284 case SMC_PROCESSABORT: 285 rc = smc_close_abort(conn); 286 sk->sk_state = SMC_CLOSED; 287 break; 288 case SMC_PEERABORTWAIT: 289 sk->sk_state = SMC_CLOSED; 290 break; 291 case SMC_CLOSED: 292 /* nothing to do, add tracing in future patch */ 293 break; 294 } 295 296 if (old_state != sk->sk_state) 297 sk->sk_state_change(sk); 298 return rc; 299 } 300 301 static void smc_close_passive_abort_received(struct smc_sock *smc) 302 { 303 struct smc_cdc_conn_state_flags *txflags = 304 &smc->conn.local_tx_ctrl.conn_state_flags; 305 struct sock *sk = &smc->sk; 306 307 switch (sk->sk_state) { 308 case SMC_INIT: 309 case SMC_ACTIVE: 310 case SMC_APPCLOSEWAIT1: 311 sk->sk_state = SMC_PROCESSABORT; 312 sock_put(sk); /* passive closing */ 313 break; 314 case SMC_APPFINCLOSEWAIT: 315 sk->sk_state = SMC_PROCESSABORT; 316 break; 317 case SMC_PEERCLOSEWAIT1: 318 case SMC_PEERCLOSEWAIT2: 319 if (txflags->peer_done_writing && 320 !smc_close_sent_any_close(&smc->conn)) 321 /* just shutdown, but not yet closed locally */ 322 sk->sk_state = SMC_PROCESSABORT; 323 else 324 sk->sk_state = SMC_CLOSED; 325 sock_put(sk); /* passive closing */ 326 break; 327 case SMC_APPCLOSEWAIT2: 328 case SMC_PEERFINCLOSEWAIT: 329 sk->sk_state = SMC_CLOSED; 330 sock_put(sk); /* passive closing */ 331 break; 332 case SMC_PEERABORTWAIT: 333 sk->sk_state = SMC_CLOSED; 334 break; 335 case SMC_PROCESSABORT: 336 /* nothing to do, add tracing in future patch */ 337 break; 338 } 339 } 340 341 /* Either some kind of closing has been received: peer_conn_closed, 342 * peer_conn_abort, or peer_done_writing 343 * or the link group of the connection terminates abnormally. 344 */ 345 static void smc_close_passive_work(struct work_struct *work) 346 { 347 struct smc_connection *conn = container_of(work, 348 struct smc_connection, 349 close_work); 350 struct smc_sock *smc = container_of(conn, struct smc_sock, conn); 351 struct smc_cdc_conn_state_flags *rxflags; 352 bool release_clcsock = false; 353 struct sock *sk = &smc->sk; 354 int old_state; 355 356 lock_sock(sk); 357 old_state = sk->sk_state; 358 359 rxflags = &conn->local_rx_ctrl.conn_state_flags; 360 if (rxflags->peer_conn_abort) { 361 /* peer has not received all data */ 362 smc_close_passive_abort_received(smc); 363 release_sock(sk); 364 cancel_delayed_work_sync(&conn->tx_work); 365 lock_sock(sk); 366 goto wakeup; 367 } 368 369 switch (sk->sk_state) { 370 case SMC_INIT: 371 sk->sk_state = SMC_APPCLOSEWAIT1; 372 break; 373 case SMC_ACTIVE: 374 sk->sk_state = SMC_APPCLOSEWAIT1; 375 /* postpone sock_put() for passive closing to cover 376 * received SEND_SHUTDOWN as well 377 */ 378 break; 379 case SMC_PEERCLOSEWAIT1: 380 if (rxflags->peer_done_writing) 381 sk->sk_state = SMC_PEERCLOSEWAIT2; 382 fallthrough; 383 /* to check for closing */ 384 case SMC_PEERCLOSEWAIT2: 385 if (!smc_cdc_rxed_any_close(conn)) 386 break; 387 if (sock_flag(sk, SOCK_DEAD) && 388 smc_close_sent_any_close(conn)) { 389 /* smc_release has already been called locally */ 390 sk->sk_state = SMC_CLOSED; 391 } else { 392 /* just shutdown, but not yet closed locally */ 393 sk->sk_state = SMC_APPFINCLOSEWAIT; 394 } 395 sock_put(sk); /* passive closing */ 396 break; 397 case SMC_PEERFINCLOSEWAIT: 398 if (smc_cdc_rxed_any_close(conn)) { 399 sk->sk_state = SMC_CLOSED; 400 sock_put(sk); /* passive closing */ 401 } 402 break; 403 case SMC_APPCLOSEWAIT1: 404 case SMC_APPCLOSEWAIT2: 405 /* postpone sock_put() for passive closing to cover 406 * received SEND_SHUTDOWN as well 407 */ 408 break; 409 case SMC_APPFINCLOSEWAIT: 410 case SMC_PEERABORTWAIT: 411 case SMC_PROCESSABORT: 412 case SMC_CLOSED: 413 /* nothing to do, add tracing in future patch */ 414 break; 415 } 416 417 wakeup: 418 sk->sk_data_ready(sk); /* wakeup blocked rcvbuf consumers */ 419 sk->sk_write_space(sk); /* wakeup blocked sndbuf producers */ 420 421 if (old_state != sk->sk_state) { 422 sk->sk_state_change(sk); 423 if ((sk->sk_state == SMC_CLOSED) && 424 (sock_flag(sk, SOCK_DEAD) || !sk->sk_socket)) { 425 smc_conn_free(conn); 426 if (smc->clcsock) 427 release_clcsock = true; 428 } 429 } 430 release_sock(sk); 431 if (release_clcsock) 432 smc_clcsock_release(smc); 433 sock_put(sk); /* sock_hold done by schedulers of close_work */ 434 } 435 436 int smc_close_shutdown_write(struct smc_sock *smc) 437 { 438 struct smc_connection *conn = &smc->conn; 439 struct sock *sk = &smc->sk; 440 int old_state; 441 long timeout; 442 int rc = 0; 443 444 timeout = current->flags & PF_EXITING ? 445 0 : sock_flag(sk, SOCK_LINGER) ? 446 sk->sk_lingertime : SMC_MAX_STREAM_WAIT_TIMEOUT; 447 448 old_state = sk->sk_state; 449 again: 450 switch (sk->sk_state) { 451 case SMC_ACTIVE: 452 smc_close_stream_wait(smc, timeout); 453 release_sock(sk); 454 cancel_delayed_work_sync(&conn->tx_work); 455 lock_sock(sk); 456 if (sk->sk_state != SMC_ACTIVE) 457 goto again; 458 /* send close wr request */ 459 rc = smc_close_wr(conn); 460 sk->sk_state = SMC_PEERCLOSEWAIT1; 461 break; 462 case SMC_APPCLOSEWAIT1: 463 /* passive close */ 464 if (!smc_cdc_rxed_any_close(conn)) 465 smc_close_stream_wait(smc, timeout); 466 release_sock(sk); 467 cancel_delayed_work_sync(&conn->tx_work); 468 lock_sock(sk); 469 if (sk->sk_state != SMC_APPCLOSEWAIT1) 470 goto again; 471 /* confirm close from peer */ 472 rc = smc_close_wr(conn); 473 sk->sk_state = SMC_APPCLOSEWAIT2; 474 break; 475 case SMC_APPCLOSEWAIT2: 476 case SMC_PEERFINCLOSEWAIT: 477 case SMC_PEERCLOSEWAIT1: 478 case SMC_PEERCLOSEWAIT2: 479 case SMC_APPFINCLOSEWAIT: 480 case SMC_PROCESSABORT: 481 case SMC_PEERABORTWAIT: 482 /* nothing to do, add tracing in future patch */ 483 break; 484 } 485 486 if (old_state != sk->sk_state) 487 sk->sk_state_change(sk); 488 return rc; 489 } 490 491 /* Initialize close properties on connection establishment. */ 492 void smc_close_init(struct smc_sock *smc) 493 { 494 INIT_WORK(&smc->conn.close_work, smc_close_passive_work); 495 } 496