1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Shared Memory Communications over RDMA (SMC-R) and RoCE 4 * 5 * Socket Closing - normal and abnormal 6 * 7 * Copyright IBM Corp. 2016 8 * 9 * Author(s): Ursula Braun <ubraun@linux.vnet.ibm.com> 10 */ 11 12 #include <linux/workqueue.h> 13 #include <linux/sched/signal.h> 14 15 #include <net/sock.h> 16 17 #include "smc.h" 18 #include "smc_tx.h" 19 #include "smc_cdc.h" 20 #include "smc_close.h" 21 22 #define SMC_CLOSE_WAIT_LISTEN_CLCSOCK_TIME (5 * HZ) 23 24 /* release the clcsock that is assigned to the smc_sock */ 25 void smc_clcsock_release(struct smc_sock *smc) 26 { 27 struct socket *tcp; 28 29 if (smc->listen_smc && current_work() != &smc->smc_listen_work) 30 cancel_work_sync(&smc->smc_listen_work); 31 mutex_lock(&smc->clcsock_release_lock); 32 if (smc->clcsock) { 33 tcp = smc->clcsock; 34 smc->clcsock = NULL; 35 sock_release(tcp); 36 } 37 mutex_unlock(&smc->clcsock_release_lock); 38 } 39 40 static void smc_close_cleanup_listen(struct sock *parent) 41 { 42 struct sock *sk; 43 44 /* Close non-accepted connections */ 45 while ((sk = smc_accept_dequeue(parent, NULL))) 46 smc_close_non_accepted(sk); 47 } 48 49 /* wait for sndbuf data being transmitted */ 50 static void smc_close_stream_wait(struct smc_sock *smc, long timeout) 51 { 52 DEFINE_WAIT_FUNC(wait, woken_wake_function); 53 struct sock *sk = &smc->sk; 54 55 if (!timeout) 56 return; 57 58 if (!smc_tx_prepared_sends(&smc->conn)) 59 return; 60 61 smc->wait_close_tx_prepared = 1; 62 add_wait_queue(sk_sleep(sk), &wait); 63 while (!signal_pending(current) && timeout) { 64 int rc; 65 66 rc = sk_wait_event(sk, &timeout, 67 !smc_tx_prepared_sends(&smc->conn) || 68 sk->sk_err == ECONNABORTED || 69 sk->sk_err == ECONNRESET || 70 smc->conn.killed, 71 &wait); 72 if (rc) 73 break; 74 } 75 remove_wait_queue(sk_sleep(sk), &wait); 76 smc->wait_close_tx_prepared = 0; 77 } 78 79 void smc_close_wake_tx_prepared(struct smc_sock *smc) 80 { 81 if (smc->wait_close_tx_prepared) 82 /* wake up socket closing */ 83 smc->sk.sk_state_change(&smc->sk); 84 } 85 86 static int smc_close_wr(struct smc_connection *conn) 87 { 88 conn->local_tx_ctrl.conn_state_flags.peer_done_writing = 1; 89 90 return smc_cdc_get_slot_and_msg_send(conn); 91 } 92 93 static int smc_close_final(struct smc_connection *conn) 94 { 95 if (atomic_read(&conn->bytes_to_rcv)) 96 conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1; 97 else 98 conn->local_tx_ctrl.conn_state_flags.peer_conn_closed = 1; 99 if (conn->killed) 100 return -EPIPE; 101 102 return smc_cdc_get_slot_and_msg_send(conn); 103 } 104 105 static int smc_close_abort(struct smc_connection *conn) 106 { 107 conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1; 108 109 return smc_cdc_get_slot_and_msg_send(conn); 110 } 111 112 /* terminate smc socket abnormally - active abort 113 * link group is terminated, i.e. RDMA communication no longer possible 114 */ 115 static void smc_close_active_abort(struct smc_sock *smc) 116 { 117 struct sock *sk = &smc->sk; 118 119 if (sk->sk_state != SMC_INIT && smc->clcsock && smc->clcsock->sk) { 120 sk->sk_err = ECONNABORTED; 121 if (smc->clcsock && smc->clcsock->sk) { 122 smc->clcsock->sk->sk_err = ECONNABORTED; 123 smc->clcsock->sk->sk_state_change(smc->clcsock->sk); 124 } 125 } 126 switch (sk->sk_state) { 127 case SMC_ACTIVE: 128 sk->sk_state = SMC_PEERABORTWAIT; 129 release_sock(sk); 130 cancel_delayed_work_sync(&smc->conn.tx_work); 131 lock_sock(sk); 132 sk->sk_state = SMC_CLOSED; 133 sock_put(sk); /* passive closing */ 134 break; 135 case SMC_APPCLOSEWAIT1: 136 case SMC_APPCLOSEWAIT2: 137 release_sock(sk); 138 cancel_delayed_work_sync(&smc->conn.tx_work); 139 lock_sock(sk); 140 sk->sk_state = SMC_CLOSED; 141 break; 142 case SMC_PEERCLOSEWAIT1: 143 case SMC_PEERCLOSEWAIT2: 144 case SMC_PEERFINCLOSEWAIT: 145 sk->sk_state = SMC_CLOSED; 146 sock_put(sk); /* passive closing */ 147 break; 148 case SMC_PROCESSABORT: 149 case SMC_APPFINCLOSEWAIT: 150 sk->sk_state = SMC_CLOSED; 151 break; 152 case SMC_INIT: 153 case SMC_PEERABORTWAIT: 154 case SMC_CLOSED: 155 break; 156 } 157 158 sock_set_flag(sk, SOCK_DEAD); 159 sk->sk_state_change(sk); 160 } 161 162 static inline bool smc_close_sent_any_close(struct smc_connection *conn) 163 { 164 return conn->local_tx_ctrl.conn_state_flags.peer_conn_abort || 165 conn->local_tx_ctrl.conn_state_flags.peer_conn_closed; 166 } 167 168 int smc_close_active(struct smc_sock *smc) 169 { 170 struct smc_cdc_conn_state_flags *txflags = 171 &smc->conn.local_tx_ctrl.conn_state_flags; 172 struct smc_connection *conn = &smc->conn; 173 struct sock *sk = &smc->sk; 174 int old_state; 175 long timeout; 176 int rc = 0; 177 178 timeout = current->flags & PF_EXITING ? 179 0 : sock_flag(sk, SOCK_LINGER) ? 180 sk->sk_lingertime : SMC_MAX_STREAM_WAIT_TIMEOUT; 181 182 old_state = sk->sk_state; 183 again: 184 switch (sk->sk_state) { 185 case SMC_INIT: 186 sk->sk_state = SMC_CLOSED; 187 break; 188 case SMC_LISTEN: 189 sk->sk_state = SMC_CLOSED; 190 sk->sk_state_change(sk); /* wake up accept */ 191 if (smc->clcsock && smc->clcsock->sk) { 192 rc = kernel_sock_shutdown(smc->clcsock, SHUT_RDWR); 193 /* wake up kernel_accept of smc_tcp_listen_worker */ 194 smc->clcsock->sk->sk_data_ready(smc->clcsock->sk); 195 } 196 smc_close_cleanup_listen(sk); 197 release_sock(sk); 198 flush_work(&smc->tcp_listen_work); 199 lock_sock(sk); 200 break; 201 case SMC_ACTIVE: 202 smc_close_stream_wait(smc, timeout); 203 release_sock(sk); 204 cancel_delayed_work_sync(&conn->tx_work); 205 lock_sock(sk); 206 if (sk->sk_state == SMC_ACTIVE) { 207 /* send close request */ 208 rc = smc_close_final(conn); 209 sk->sk_state = SMC_PEERCLOSEWAIT1; 210 } else { 211 /* peer event has changed the state */ 212 goto again; 213 } 214 break; 215 case SMC_APPFINCLOSEWAIT: 216 /* socket already shutdown wr or both (active close) */ 217 if (txflags->peer_done_writing && 218 !smc_close_sent_any_close(conn)) { 219 /* just shutdown wr done, send close request */ 220 rc = smc_close_final(conn); 221 } 222 sk->sk_state = SMC_CLOSED; 223 break; 224 case SMC_APPCLOSEWAIT1: 225 case SMC_APPCLOSEWAIT2: 226 if (!smc_cdc_rxed_any_close(conn)) 227 smc_close_stream_wait(smc, timeout); 228 release_sock(sk); 229 cancel_delayed_work_sync(&conn->tx_work); 230 lock_sock(sk); 231 if (sk->sk_state != SMC_APPCLOSEWAIT1 && 232 sk->sk_state != SMC_APPCLOSEWAIT2) 233 goto again; 234 /* confirm close from peer */ 235 rc = smc_close_final(conn); 236 if (smc_cdc_rxed_any_close(conn)) { 237 /* peer has closed the socket already */ 238 sk->sk_state = SMC_CLOSED; 239 sock_put(sk); /* postponed passive closing */ 240 } else { 241 /* peer has just issued a shutdown write */ 242 sk->sk_state = SMC_PEERFINCLOSEWAIT; 243 } 244 break; 245 case SMC_PEERCLOSEWAIT1: 246 case SMC_PEERCLOSEWAIT2: 247 if (txflags->peer_done_writing && 248 !smc_close_sent_any_close(conn)) { 249 /* just shutdown wr done, send close request */ 250 rc = smc_close_final(conn); 251 } 252 /* peer sending PeerConnectionClosed will cause transition */ 253 break; 254 case SMC_PEERFINCLOSEWAIT: 255 /* peer sending PeerConnectionClosed will cause transition */ 256 break; 257 case SMC_PROCESSABORT: 258 rc = smc_close_abort(conn); 259 sk->sk_state = SMC_CLOSED; 260 break; 261 case SMC_PEERABORTWAIT: 262 sk->sk_state = SMC_CLOSED; 263 break; 264 case SMC_CLOSED: 265 /* nothing to do, add tracing in future patch */ 266 break; 267 } 268 269 if (old_state != sk->sk_state) 270 sk->sk_state_change(sk); 271 return rc; 272 } 273 274 static void smc_close_passive_abort_received(struct smc_sock *smc) 275 { 276 struct smc_cdc_conn_state_flags *txflags = 277 &smc->conn.local_tx_ctrl.conn_state_flags; 278 struct sock *sk = &smc->sk; 279 280 switch (sk->sk_state) { 281 case SMC_INIT: 282 case SMC_ACTIVE: 283 case SMC_APPCLOSEWAIT1: 284 sk->sk_state = SMC_PROCESSABORT; 285 sock_put(sk); /* passive closing */ 286 break; 287 case SMC_APPFINCLOSEWAIT: 288 sk->sk_state = SMC_PROCESSABORT; 289 break; 290 case SMC_PEERCLOSEWAIT1: 291 case SMC_PEERCLOSEWAIT2: 292 if (txflags->peer_done_writing && 293 !smc_close_sent_any_close(&smc->conn)) 294 /* just shutdown, but not yet closed locally */ 295 sk->sk_state = SMC_PROCESSABORT; 296 else 297 sk->sk_state = SMC_CLOSED; 298 sock_put(sk); /* passive closing */ 299 break; 300 case SMC_APPCLOSEWAIT2: 301 case SMC_PEERFINCLOSEWAIT: 302 sk->sk_state = SMC_CLOSED; 303 sock_put(sk); /* passive closing */ 304 break; 305 case SMC_PEERABORTWAIT: 306 sk->sk_state = SMC_CLOSED; 307 break; 308 case SMC_PROCESSABORT: 309 /* nothing to do, add tracing in future patch */ 310 break; 311 } 312 } 313 314 /* Either some kind of closing has been received: peer_conn_closed, 315 * peer_conn_abort, or peer_done_writing 316 * or the link group of the connection terminates abnormally. 317 */ 318 static void smc_close_passive_work(struct work_struct *work) 319 { 320 struct smc_connection *conn = container_of(work, 321 struct smc_connection, 322 close_work); 323 struct smc_sock *smc = container_of(conn, struct smc_sock, conn); 324 struct smc_cdc_conn_state_flags *rxflags; 325 bool release_clcsock = false; 326 struct sock *sk = &smc->sk; 327 int old_state; 328 329 lock_sock(sk); 330 old_state = sk->sk_state; 331 332 if (conn->killed) { 333 /* abnormal termination */ 334 smc_close_active_abort(smc); 335 goto wakeup; 336 } 337 338 rxflags = &conn->local_rx_ctrl.conn_state_flags; 339 if (rxflags->peer_conn_abort) { 340 /* peer has not received all data */ 341 smc_close_passive_abort_received(smc); 342 release_sock(&smc->sk); 343 cancel_delayed_work_sync(&conn->tx_work); 344 lock_sock(&smc->sk); 345 goto wakeup; 346 } 347 348 switch (sk->sk_state) { 349 case SMC_INIT: 350 sk->sk_state = SMC_APPCLOSEWAIT1; 351 break; 352 case SMC_ACTIVE: 353 sk->sk_state = SMC_APPCLOSEWAIT1; 354 /* postpone sock_put() for passive closing to cover 355 * received SEND_SHUTDOWN as well 356 */ 357 break; 358 case SMC_PEERCLOSEWAIT1: 359 if (rxflags->peer_done_writing) 360 sk->sk_state = SMC_PEERCLOSEWAIT2; 361 /* fall through */ 362 /* to check for closing */ 363 case SMC_PEERCLOSEWAIT2: 364 if (!smc_cdc_rxed_any_close(conn)) 365 break; 366 if (sock_flag(sk, SOCK_DEAD) && 367 smc_close_sent_any_close(conn)) { 368 /* smc_release has already been called locally */ 369 sk->sk_state = SMC_CLOSED; 370 } else { 371 /* just shutdown, but not yet closed locally */ 372 sk->sk_state = SMC_APPFINCLOSEWAIT; 373 } 374 sock_put(sk); /* passive closing */ 375 break; 376 case SMC_PEERFINCLOSEWAIT: 377 if (smc_cdc_rxed_any_close(conn)) { 378 sk->sk_state = SMC_CLOSED; 379 sock_put(sk); /* passive closing */ 380 } 381 break; 382 case SMC_APPCLOSEWAIT1: 383 case SMC_APPCLOSEWAIT2: 384 /* postpone sock_put() for passive closing to cover 385 * received SEND_SHUTDOWN as well 386 */ 387 break; 388 case SMC_APPFINCLOSEWAIT: 389 case SMC_PEERABORTWAIT: 390 case SMC_PROCESSABORT: 391 case SMC_CLOSED: 392 /* nothing to do, add tracing in future patch */ 393 break; 394 } 395 396 wakeup: 397 sk->sk_data_ready(sk); /* wakeup blocked rcvbuf consumers */ 398 sk->sk_write_space(sk); /* wakeup blocked sndbuf producers */ 399 400 if (old_state != sk->sk_state) { 401 sk->sk_state_change(sk); 402 if ((sk->sk_state == SMC_CLOSED) && 403 (sock_flag(sk, SOCK_DEAD) || !sk->sk_socket)) { 404 smc_conn_free(conn); 405 if (smc->clcsock) 406 release_clcsock = true; 407 } 408 } 409 release_sock(sk); 410 if (release_clcsock) 411 smc_clcsock_release(smc); 412 sock_put(sk); /* sock_hold done by schedulers of close_work */ 413 } 414 415 int smc_close_shutdown_write(struct smc_sock *smc) 416 { 417 struct smc_connection *conn = &smc->conn; 418 struct sock *sk = &smc->sk; 419 int old_state; 420 long timeout; 421 int rc = 0; 422 423 timeout = current->flags & PF_EXITING ? 424 0 : sock_flag(sk, SOCK_LINGER) ? 425 sk->sk_lingertime : SMC_MAX_STREAM_WAIT_TIMEOUT; 426 427 old_state = sk->sk_state; 428 again: 429 switch (sk->sk_state) { 430 case SMC_ACTIVE: 431 smc_close_stream_wait(smc, timeout); 432 release_sock(sk); 433 cancel_delayed_work_sync(&conn->tx_work); 434 lock_sock(sk); 435 if (sk->sk_state != SMC_ACTIVE) 436 goto again; 437 /* send close wr request */ 438 rc = smc_close_wr(conn); 439 sk->sk_state = SMC_PEERCLOSEWAIT1; 440 break; 441 case SMC_APPCLOSEWAIT1: 442 /* passive close */ 443 if (!smc_cdc_rxed_any_close(conn)) 444 smc_close_stream_wait(smc, timeout); 445 release_sock(sk); 446 cancel_delayed_work_sync(&conn->tx_work); 447 lock_sock(sk); 448 if (sk->sk_state != SMC_APPCLOSEWAIT1) 449 goto again; 450 /* confirm close from peer */ 451 rc = smc_close_wr(conn); 452 sk->sk_state = SMC_APPCLOSEWAIT2; 453 break; 454 case SMC_APPCLOSEWAIT2: 455 case SMC_PEERFINCLOSEWAIT: 456 case SMC_PEERCLOSEWAIT1: 457 case SMC_PEERCLOSEWAIT2: 458 case SMC_APPFINCLOSEWAIT: 459 case SMC_PROCESSABORT: 460 case SMC_PEERABORTWAIT: 461 /* nothing to do, add tracing in future patch */ 462 break; 463 } 464 465 if (old_state != sk->sk_state) 466 sk->sk_state_change(sk); 467 return rc; 468 } 469 470 /* Initialize close properties on connection establishment. */ 471 void smc_close_init(struct smc_sock *smc) 472 { 473 INIT_WORK(&smc->conn.close_work, smc_close_passive_work); 474 } 475