1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Shared Memory Communications over RDMA (SMC-R) and RoCE 4 * 5 * Socket Closing - normal and abnormal 6 * 7 * Copyright IBM Corp. 2016 8 * 9 * Author(s): Ursula Braun <ubraun@linux.vnet.ibm.com> 10 */ 11 12 #include <linux/workqueue.h> 13 #include <linux/sched/signal.h> 14 15 #include <net/sock.h> 16 #include <net/tcp.h> 17 18 #include "smc.h" 19 #include "smc_tx.h" 20 #include "smc_cdc.h" 21 #include "smc_close.h" 22 23 /* release the clcsock that is assigned to the smc_sock */ 24 void smc_clcsock_release(struct smc_sock *smc) 25 { 26 struct socket *tcp; 27 28 if (smc->listen_smc && current_work() != &smc->smc_listen_work) 29 cancel_work_sync(&smc->smc_listen_work); 30 mutex_lock(&smc->clcsock_release_lock); 31 if (smc->clcsock) { 32 tcp = smc->clcsock; 33 smc->clcsock = NULL; 34 sock_release(tcp); 35 } 36 mutex_unlock(&smc->clcsock_release_lock); 37 } 38 39 static void smc_close_cleanup_listen(struct sock *parent) 40 { 41 struct sock *sk; 42 43 /* Close non-accepted connections */ 44 while ((sk = smc_accept_dequeue(parent, NULL))) 45 smc_close_non_accepted(sk); 46 } 47 48 /* wait for sndbuf data being transmitted */ 49 static void smc_close_stream_wait(struct smc_sock *smc, long timeout) 50 { 51 DEFINE_WAIT_FUNC(wait, woken_wake_function); 52 struct sock *sk = &smc->sk; 53 54 if (!timeout) 55 return; 56 57 if (!smc_tx_prepared_sends(&smc->conn)) 58 return; 59 60 smc->wait_close_tx_prepared = 1; 61 add_wait_queue(sk_sleep(sk), &wait); 62 while (!signal_pending(current) && timeout) { 63 int rc; 64 65 rc = sk_wait_event(sk, &timeout, 66 !smc_tx_prepared_sends(&smc->conn) || 67 sk->sk_err == ECONNABORTED || 68 sk->sk_err == ECONNRESET || 69 smc->conn.killed, 70 &wait); 71 if (rc) 72 break; 73 } 74 remove_wait_queue(sk_sleep(sk), &wait); 75 smc->wait_close_tx_prepared = 0; 76 } 77 78 void smc_close_wake_tx_prepared(struct smc_sock *smc) 79 { 80 if (smc->wait_close_tx_prepared) 81 /* wake up socket closing */ 82 smc->sk.sk_state_change(&smc->sk); 83 } 84 85 static int smc_close_wr(struct smc_connection *conn) 86 { 87 conn->local_tx_ctrl.conn_state_flags.peer_done_writing = 1; 88 89 return smc_cdc_get_slot_and_msg_send(conn); 90 } 91 92 static int smc_close_final(struct smc_connection *conn) 93 { 94 if (atomic_read(&conn->bytes_to_rcv)) 95 conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1; 96 else 97 conn->local_tx_ctrl.conn_state_flags.peer_conn_closed = 1; 98 if (conn->killed) 99 return -EPIPE; 100 101 return smc_cdc_get_slot_and_msg_send(conn); 102 } 103 104 int smc_close_abort(struct smc_connection *conn) 105 { 106 conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1; 107 108 return smc_cdc_get_slot_and_msg_send(conn); 109 } 110 111 static void smc_close_cancel_work(struct smc_sock *smc) 112 { 113 struct sock *sk = &smc->sk; 114 115 release_sock(sk); 116 cancel_work_sync(&smc->conn.close_work); 117 cancel_delayed_work_sync(&smc->conn.tx_work); 118 lock_sock(sk); 119 } 120 121 /* terminate smc socket abnormally - active abort 122 * link group is terminated, i.e. RDMA communication no longer possible 123 */ 124 void smc_close_active_abort(struct smc_sock *smc) 125 { 126 struct sock *sk = &smc->sk; 127 bool release_clcsock = false; 128 129 if (sk->sk_state != SMC_INIT && smc->clcsock && smc->clcsock->sk) { 130 sk->sk_err = ECONNABORTED; 131 if (smc->clcsock && smc->clcsock->sk) 132 tcp_abort(smc->clcsock->sk, ECONNABORTED); 133 } 134 switch (sk->sk_state) { 135 case SMC_ACTIVE: 136 case SMC_APPCLOSEWAIT1: 137 case SMC_APPCLOSEWAIT2: 138 sk->sk_state = SMC_PEERABORTWAIT; 139 smc_close_cancel_work(smc); 140 if (sk->sk_state != SMC_PEERABORTWAIT) 141 break; 142 sk->sk_state = SMC_CLOSED; 143 sock_put(sk); /* (postponed) passive closing */ 144 break; 145 case SMC_PEERCLOSEWAIT1: 146 case SMC_PEERCLOSEWAIT2: 147 case SMC_PEERFINCLOSEWAIT: 148 sk->sk_state = SMC_PEERABORTWAIT; 149 smc_close_cancel_work(smc); 150 if (sk->sk_state != SMC_PEERABORTWAIT) 151 break; 152 sk->sk_state = SMC_CLOSED; 153 smc_conn_free(&smc->conn); 154 release_clcsock = true; 155 sock_put(sk); /* passive closing */ 156 break; 157 case SMC_PROCESSABORT: 158 case SMC_APPFINCLOSEWAIT: 159 sk->sk_state = SMC_PEERABORTWAIT; 160 smc_close_cancel_work(smc); 161 if (sk->sk_state != SMC_PEERABORTWAIT) 162 break; 163 sk->sk_state = SMC_CLOSED; 164 smc_conn_free(&smc->conn); 165 release_clcsock = true; 166 break; 167 case SMC_INIT: 168 case SMC_PEERABORTWAIT: 169 case SMC_CLOSED: 170 break; 171 } 172 173 sock_set_flag(sk, SOCK_DEAD); 174 sk->sk_state_change(sk); 175 176 if (release_clcsock) { 177 release_sock(sk); 178 smc_clcsock_release(smc); 179 lock_sock(sk); 180 } 181 } 182 183 static inline bool smc_close_sent_any_close(struct smc_connection *conn) 184 { 185 return conn->local_tx_ctrl.conn_state_flags.peer_conn_abort || 186 conn->local_tx_ctrl.conn_state_flags.peer_conn_closed; 187 } 188 189 int smc_close_active(struct smc_sock *smc) 190 { 191 struct smc_cdc_conn_state_flags *txflags = 192 &smc->conn.local_tx_ctrl.conn_state_flags; 193 struct smc_connection *conn = &smc->conn; 194 struct sock *sk = &smc->sk; 195 int old_state; 196 long timeout; 197 int rc = 0; 198 199 timeout = current->flags & PF_EXITING ? 200 0 : sock_flag(sk, SOCK_LINGER) ? 201 sk->sk_lingertime : SMC_MAX_STREAM_WAIT_TIMEOUT; 202 203 old_state = sk->sk_state; 204 again: 205 switch (sk->sk_state) { 206 case SMC_INIT: 207 sk->sk_state = SMC_CLOSED; 208 break; 209 case SMC_LISTEN: 210 sk->sk_state = SMC_CLOSED; 211 sk->sk_state_change(sk); /* wake up accept */ 212 if (smc->clcsock && smc->clcsock->sk) { 213 smc->clcsock->sk->sk_data_ready = smc->clcsk_data_ready; 214 smc->clcsock->sk->sk_user_data = NULL; 215 rc = kernel_sock_shutdown(smc->clcsock, SHUT_RDWR); 216 } 217 smc_close_cleanup_listen(sk); 218 release_sock(sk); 219 flush_work(&smc->tcp_listen_work); 220 lock_sock(sk); 221 break; 222 case SMC_ACTIVE: 223 smc_close_stream_wait(smc, timeout); 224 release_sock(sk); 225 cancel_delayed_work_sync(&conn->tx_work); 226 lock_sock(sk); 227 if (sk->sk_state == SMC_ACTIVE) { 228 /* send close request */ 229 rc = smc_close_final(conn); 230 sk->sk_state = SMC_PEERCLOSEWAIT1; 231 } else { 232 /* peer event has changed the state */ 233 goto again; 234 } 235 break; 236 case SMC_APPFINCLOSEWAIT: 237 /* socket already shutdown wr or both (active close) */ 238 if (txflags->peer_done_writing && 239 !smc_close_sent_any_close(conn)) { 240 /* just shutdown wr done, send close request */ 241 rc = smc_close_final(conn); 242 } 243 sk->sk_state = SMC_CLOSED; 244 break; 245 case SMC_APPCLOSEWAIT1: 246 case SMC_APPCLOSEWAIT2: 247 if (!smc_cdc_rxed_any_close(conn)) 248 smc_close_stream_wait(smc, timeout); 249 release_sock(sk); 250 cancel_delayed_work_sync(&conn->tx_work); 251 lock_sock(sk); 252 if (sk->sk_state != SMC_APPCLOSEWAIT1 && 253 sk->sk_state != SMC_APPCLOSEWAIT2) 254 goto again; 255 /* confirm close from peer */ 256 rc = smc_close_final(conn); 257 if (smc_cdc_rxed_any_close(conn)) { 258 /* peer has closed the socket already */ 259 sk->sk_state = SMC_CLOSED; 260 sock_put(sk); /* postponed passive closing */ 261 } else { 262 /* peer has just issued a shutdown write */ 263 sk->sk_state = SMC_PEERFINCLOSEWAIT; 264 } 265 break; 266 case SMC_PEERCLOSEWAIT1: 267 case SMC_PEERCLOSEWAIT2: 268 if (txflags->peer_done_writing && 269 !smc_close_sent_any_close(conn)) { 270 /* just shutdown wr done, send close request */ 271 rc = smc_close_final(conn); 272 } 273 /* peer sending PeerConnectionClosed will cause transition */ 274 break; 275 case SMC_PEERFINCLOSEWAIT: 276 /* peer sending PeerConnectionClosed will cause transition */ 277 break; 278 case SMC_PROCESSABORT: 279 rc = smc_close_abort(conn); 280 sk->sk_state = SMC_CLOSED; 281 break; 282 case SMC_PEERABORTWAIT: 283 sk->sk_state = SMC_CLOSED; 284 break; 285 case SMC_CLOSED: 286 /* nothing to do, add tracing in future patch */ 287 break; 288 } 289 290 if (old_state != sk->sk_state) 291 sk->sk_state_change(sk); 292 return rc; 293 } 294 295 static void smc_close_passive_abort_received(struct smc_sock *smc) 296 { 297 struct smc_cdc_conn_state_flags *txflags = 298 &smc->conn.local_tx_ctrl.conn_state_flags; 299 struct sock *sk = &smc->sk; 300 301 switch (sk->sk_state) { 302 case SMC_INIT: 303 case SMC_ACTIVE: 304 case SMC_APPCLOSEWAIT1: 305 sk->sk_state = SMC_PROCESSABORT; 306 sock_put(sk); /* passive closing */ 307 break; 308 case SMC_APPFINCLOSEWAIT: 309 sk->sk_state = SMC_PROCESSABORT; 310 break; 311 case SMC_PEERCLOSEWAIT1: 312 case SMC_PEERCLOSEWAIT2: 313 if (txflags->peer_done_writing && 314 !smc_close_sent_any_close(&smc->conn)) 315 /* just shutdown, but not yet closed locally */ 316 sk->sk_state = SMC_PROCESSABORT; 317 else 318 sk->sk_state = SMC_CLOSED; 319 sock_put(sk); /* passive closing */ 320 break; 321 case SMC_APPCLOSEWAIT2: 322 case SMC_PEERFINCLOSEWAIT: 323 sk->sk_state = SMC_CLOSED; 324 sock_put(sk); /* passive closing */ 325 break; 326 case SMC_PEERABORTWAIT: 327 sk->sk_state = SMC_CLOSED; 328 break; 329 case SMC_PROCESSABORT: 330 /* nothing to do, add tracing in future patch */ 331 break; 332 } 333 } 334 335 /* Either some kind of closing has been received: peer_conn_closed, 336 * peer_conn_abort, or peer_done_writing 337 * or the link group of the connection terminates abnormally. 338 */ 339 static void smc_close_passive_work(struct work_struct *work) 340 { 341 struct smc_connection *conn = container_of(work, 342 struct smc_connection, 343 close_work); 344 struct smc_sock *smc = container_of(conn, struct smc_sock, conn); 345 struct smc_cdc_conn_state_flags *rxflags; 346 bool release_clcsock = false; 347 struct sock *sk = &smc->sk; 348 int old_state; 349 350 lock_sock(sk); 351 old_state = sk->sk_state; 352 353 rxflags = &conn->local_rx_ctrl.conn_state_flags; 354 if (rxflags->peer_conn_abort) { 355 /* peer has not received all data */ 356 smc_close_passive_abort_received(smc); 357 release_sock(&smc->sk); 358 cancel_delayed_work_sync(&conn->tx_work); 359 lock_sock(&smc->sk); 360 goto wakeup; 361 } 362 363 switch (sk->sk_state) { 364 case SMC_INIT: 365 sk->sk_state = SMC_APPCLOSEWAIT1; 366 break; 367 case SMC_ACTIVE: 368 sk->sk_state = SMC_APPCLOSEWAIT1; 369 /* postpone sock_put() for passive closing to cover 370 * received SEND_SHUTDOWN as well 371 */ 372 break; 373 case SMC_PEERCLOSEWAIT1: 374 if (rxflags->peer_done_writing) 375 sk->sk_state = SMC_PEERCLOSEWAIT2; 376 fallthrough; 377 /* to check for closing */ 378 case SMC_PEERCLOSEWAIT2: 379 if (!smc_cdc_rxed_any_close(conn)) 380 break; 381 if (sock_flag(sk, SOCK_DEAD) && 382 smc_close_sent_any_close(conn)) { 383 /* smc_release has already been called locally */ 384 sk->sk_state = SMC_CLOSED; 385 } else { 386 /* just shutdown, but not yet closed locally */ 387 sk->sk_state = SMC_APPFINCLOSEWAIT; 388 } 389 sock_put(sk); /* passive closing */ 390 break; 391 case SMC_PEERFINCLOSEWAIT: 392 if (smc_cdc_rxed_any_close(conn)) { 393 sk->sk_state = SMC_CLOSED; 394 sock_put(sk); /* passive closing */ 395 } 396 break; 397 case SMC_APPCLOSEWAIT1: 398 case SMC_APPCLOSEWAIT2: 399 /* postpone sock_put() for passive closing to cover 400 * received SEND_SHUTDOWN as well 401 */ 402 break; 403 case SMC_APPFINCLOSEWAIT: 404 case SMC_PEERABORTWAIT: 405 case SMC_PROCESSABORT: 406 case SMC_CLOSED: 407 /* nothing to do, add tracing in future patch */ 408 break; 409 } 410 411 wakeup: 412 sk->sk_data_ready(sk); /* wakeup blocked rcvbuf consumers */ 413 sk->sk_write_space(sk); /* wakeup blocked sndbuf producers */ 414 415 if (old_state != sk->sk_state) { 416 sk->sk_state_change(sk); 417 if ((sk->sk_state == SMC_CLOSED) && 418 (sock_flag(sk, SOCK_DEAD) || !sk->sk_socket)) { 419 smc_conn_free(conn); 420 if (smc->clcsock) 421 release_clcsock = true; 422 } 423 } 424 release_sock(sk); 425 if (release_clcsock) 426 smc_clcsock_release(smc); 427 sock_put(sk); /* sock_hold done by schedulers of close_work */ 428 } 429 430 int smc_close_shutdown_write(struct smc_sock *smc) 431 { 432 struct smc_connection *conn = &smc->conn; 433 struct sock *sk = &smc->sk; 434 int old_state; 435 long timeout; 436 int rc = 0; 437 438 timeout = current->flags & PF_EXITING ? 439 0 : sock_flag(sk, SOCK_LINGER) ? 440 sk->sk_lingertime : SMC_MAX_STREAM_WAIT_TIMEOUT; 441 442 old_state = sk->sk_state; 443 again: 444 switch (sk->sk_state) { 445 case SMC_ACTIVE: 446 smc_close_stream_wait(smc, timeout); 447 release_sock(sk); 448 cancel_delayed_work_sync(&conn->tx_work); 449 lock_sock(sk); 450 if (sk->sk_state != SMC_ACTIVE) 451 goto again; 452 /* send close wr request */ 453 rc = smc_close_wr(conn); 454 sk->sk_state = SMC_PEERCLOSEWAIT1; 455 break; 456 case SMC_APPCLOSEWAIT1: 457 /* passive close */ 458 if (!smc_cdc_rxed_any_close(conn)) 459 smc_close_stream_wait(smc, timeout); 460 release_sock(sk); 461 cancel_delayed_work_sync(&conn->tx_work); 462 lock_sock(sk); 463 if (sk->sk_state != SMC_APPCLOSEWAIT1) 464 goto again; 465 /* confirm close from peer */ 466 rc = smc_close_wr(conn); 467 sk->sk_state = SMC_APPCLOSEWAIT2; 468 break; 469 case SMC_APPCLOSEWAIT2: 470 case SMC_PEERFINCLOSEWAIT: 471 case SMC_PEERCLOSEWAIT1: 472 case SMC_PEERCLOSEWAIT2: 473 case SMC_APPFINCLOSEWAIT: 474 case SMC_PROCESSABORT: 475 case SMC_PEERABORTWAIT: 476 /* nothing to do, add tracing in future patch */ 477 break; 478 } 479 480 if (old_state != sk->sk_state) 481 sk->sk_state_change(sk); 482 return rc; 483 } 484 485 /* Initialize close properties on connection establishment. */ 486 void smc_close_init(struct smc_sock *smc) 487 { 488 INIT_WORK(&smc->conn.close_work, smc_close_passive_work); 489 } 490