1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Shared Memory Communications over RDMA (SMC-R) and RoCE 4 * 5 * Socket Closing - normal and abnormal 6 * 7 * Copyright IBM Corp. 2016 8 * 9 * Author(s): Ursula Braun <ubraun@linux.vnet.ibm.com> 10 */ 11 12 #include <linux/workqueue.h> 13 #include <linux/sched/signal.h> 14 15 #include <net/sock.h> 16 17 #include "smc.h" 18 #include "smc_tx.h" 19 #include "smc_cdc.h" 20 #include "smc_close.h" 21 22 #define SMC_CLOSE_WAIT_LISTEN_CLCSOCK_TIME (5 * HZ) 23 24 static void smc_close_cleanup_listen(struct sock *parent) 25 { 26 struct sock *sk; 27 28 /* Close non-accepted connections */ 29 while ((sk = smc_accept_dequeue(parent, NULL))) 30 smc_close_non_accepted(sk); 31 } 32 33 /* wait for sndbuf data being transmitted */ 34 static void smc_close_stream_wait(struct smc_sock *smc, long timeout) 35 { 36 DEFINE_WAIT_FUNC(wait, woken_wake_function); 37 struct sock *sk = &smc->sk; 38 39 if (!timeout) 40 return; 41 42 if (!smc_tx_prepared_sends(&smc->conn)) 43 return; 44 45 smc->wait_close_tx_prepared = 1; 46 add_wait_queue(sk_sleep(sk), &wait); 47 while (!signal_pending(current) && timeout) { 48 int rc; 49 50 rc = sk_wait_event(sk, &timeout, 51 !smc_tx_prepared_sends(&smc->conn) || 52 (sk->sk_err == ECONNABORTED) || 53 (sk->sk_err == ECONNRESET), 54 &wait); 55 if (rc) 56 break; 57 } 58 remove_wait_queue(sk_sleep(sk), &wait); 59 smc->wait_close_tx_prepared = 0; 60 } 61 62 void smc_close_wake_tx_prepared(struct smc_sock *smc) 63 { 64 if (smc->wait_close_tx_prepared) 65 /* wake up socket closing */ 66 smc->sk.sk_state_change(&smc->sk); 67 } 68 69 static int smc_close_wr(struct smc_connection *conn) 70 { 71 conn->local_tx_ctrl.conn_state_flags.peer_done_writing = 1; 72 73 return smc_cdc_get_slot_and_msg_send(conn); 74 } 75 76 static int smc_close_final(struct smc_connection *conn) 77 { 78 if (atomic_read(&conn->bytes_to_rcv)) 79 conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1; 80 else 81 conn->local_tx_ctrl.conn_state_flags.peer_conn_closed = 1; 82 83 return smc_cdc_get_slot_and_msg_send(conn); 84 } 85 86 static int smc_close_abort(struct smc_connection *conn) 87 { 88 conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1; 89 90 return smc_cdc_get_slot_and_msg_send(conn); 91 } 92 93 /* terminate smc socket abnormally - active abort 94 * link group is terminated, i.e. RDMA communication no longer possible 95 */ 96 static void smc_close_active_abort(struct smc_sock *smc) 97 { 98 struct sock *sk = &smc->sk; 99 100 struct smc_cdc_conn_state_flags *txflags = 101 &smc->conn.local_tx_ctrl.conn_state_flags; 102 103 sk->sk_err = ECONNABORTED; 104 if (smc->clcsock && smc->clcsock->sk) { 105 smc->clcsock->sk->sk_err = ECONNABORTED; 106 smc->clcsock->sk->sk_state_change(smc->clcsock->sk); 107 } 108 switch (sk->sk_state) { 109 case SMC_INIT: 110 sk->sk_state = SMC_PEERABORTWAIT; 111 break; 112 case SMC_ACTIVE: 113 sk->sk_state = SMC_PEERABORTWAIT; 114 release_sock(sk); 115 cancel_delayed_work_sync(&smc->conn.tx_work); 116 lock_sock(sk); 117 sock_put(sk); /* passive closing */ 118 break; 119 case SMC_APPCLOSEWAIT1: 120 case SMC_APPCLOSEWAIT2: 121 if (!smc_cdc_rxed_any_close(&smc->conn)) 122 sk->sk_state = SMC_PEERABORTWAIT; 123 else 124 sk->sk_state = SMC_CLOSED; 125 release_sock(sk); 126 cancel_delayed_work_sync(&smc->conn.tx_work); 127 lock_sock(sk); 128 break; 129 case SMC_PEERCLOSEWAIT1: 130 case SMC_PEERCLOSEWAIT2: 131 if (!txflags->peer_conn_closed) { 132 /* just SHUTDOWN_SEND done */ 133 sk->sk_state = SMC_PEERABORTWAIT; 134 } else { 135 sk->sk_state = SMC_CLOSED; 136 } 137 sock_put(sk); /* passive closing */ 138 break; 139 case SMC_PROCESSABORT: 140 case SMC_APPFINCLOSEWAIT: 141 sk->sk_state = SMC_CLOSED; 142 break; 143 case SMC_PEERFINCLOSEWAIT: 144 sock_put(sk); /* passive closing */ 145 break; 146 case SMC_PEERABORTWAIT: 147 case SMC_CLOSED: 148 break; 149 } 150 151 sock_set_flag(sk, SOCK_DEAD); 152 sk->sk_state_change(sk); 153 } 154 155 static inline bool smc_close_sent_any_close(struct smc_connection *conn) 156 { 157 return conn->local_tx_ctrl.conn_state_flags.peer_conn_abort || 158 conn->local_tx_ctrl.conn_state_flags.peer_conn_closed; 159 } 160 161 int smc_close_active(struct smc_sock *smc) 162 { 163 struct smc_cdc_conn_state_flags *txflags = 164 &smc->conn.local_tx_ctrl.conn_state_flags; 165 struct smc_connection *conn = &smc->conn; 166 struct sock *sk = &smc->sk; 167 int old_state; 168 long timeout; 169 int rc = 0; 170 171 timeout = current->flags & PF_EXITING ? 172 0 : sock_flag(sk, SOCK_LINGER) ? 173 sk->sk_lingertime : SMC_MAX_STREAM_WAIT_TIMEOUT; 174 175 old_state = sk->sk_state; 176 again: 177 switch (sk->sk_state) { 178 case SMC_INIT: 179 sk->sk_state = SMC_CLOSED; 180 break; 181 case SMC_LISTEN: 182 sk->sk_state = SMC_CLOSED; 183 sk->sk_state_change(sk); /* wake up accept */ 184 if (smc->clcsock && smc->clcsock->sk) { 185 rc = kernel_sock_shutdown(smc->clcsock, SHUT_RDWR); 186 /* wake up kernel_accept of smc_tcp_listen_worker */ 187 smc->clcsock->sk->sk_data_ready(smc->clcsock->sk); 188 } 189 smc_close_cleanup_listen(sk); 190 release_sock(sk); 191 flush_work(&smc->tcp_listen_work); 192 lock_sock(sk); 193 break; 194 case SMC_ACTIVE: 195 smc_close_stream_wait(smc, timeout); 196 release_sock(sk); 197 cancel_delayed_work_sync(&conn->tx_work); 198 lock_sock(sk); 199 if (sk->sk_state == SMC_ACTIVE) { 200 /* send close request */ 201 rc = smc_close_final(conn); 202 if (rc) 203 break; 204 sk->sk_state = SMC_PEERCLOSEWAIT1; 205 } else { 206 /* peer event has changed the state */ 207 goto again; 208 } 209 break; 210 case SMC_APPFINCLOSEWAIT: 211 /* socket already shutdown wr or both (active close) */ 212 if (txflags->peer_done_writing && 213 !smc_close_sent_any_close(conn)) { 214 /* just shutdown wr done, send close request */ 215 rc = smc_close_final(conn); 216 if (rc) 217 break; 218 } 219 sk->sk_state = SMC_CLOSED; 220 break; 221 case SMC_APPCLOSEWAIT1: 222 case SMC_APPCLOSEWAIT2: 223 if (!smc_cdc_rxed_any_close(conn)) 224 smc_close_stream_wait(smc, timeout); 225 release_sock(sk); 226 cancel_delayed_work_sync(&conn->tx_work); 227 lock_sock(sk); 228 if (sk->sk_state != SMC_APPCLOSEWAIT1 && 229 sk->sk_state != SMC_APPCLOSEWAIT2) 230 goto again; 231 /* confirm close from peer */ 232 rc = smc_close_final(conn); 233 if (rc) 234 break; 235 if (smc_cdc_rxed_any_close(conn)) { 236 /* peer has closed the socket already */ 237 sk->sk_state = SMC_CLOSED; 238 sock_put(sk); /* postponed passive closing */ 239 } else { 240 /* peer has just issued a shutdown write */ 241 sk->sk_state = SMC_PEERFINCLOSEWAIT; 242 } 243 break; 244 case SMC_PEERCLOSEWAIT1: 245 case SMC_PEERCLOSEWAIT2: 246 if (txflags->peer_done_writing && 247 !smc_close_sent_any_close(conn)) { 248 /* just shutdown wr done, send close request */ 249 rc = smc_close_final(conn); 250 if (rc) 251 break; 252 } 253 /* peer sending PeerConnectionClosed will cause transition */ 254 break; 255 case SMC_PEERFINCLOSEWAIT: 256 /* peer sending PeerConnectionClosed will cause transition */ 257 break; 258 case SMC_PROCESSABORT: 259 smc_close_abort(conn); 260 sk->sk_state = SMC_CLOSED; 261 break; 262 case SMC_PEERABORTWAIT: 263 case SMC_CLOSED: 264 /* nothing to do, add tracing in future patch */ 265 break; 266 } 267 268 if (old_state != sk->sk_state) 269 sk->sk_state_change(sk); 270 return rc; 271 } 272 273 static void smc_close_passive_abort_received(struct smc_sock *smc) 274 { 275 struct smc_cdc_conn_state_flags *txflags = 276 &smc->conn.local_tx_ctrl.conn_state_flags; 277 struct sock *sk = &smc->sk; 278 279 switch (sk->sk_state) { 280 case SMC_INIT: 281 case SMC_ACTIVE: 282 case SMC_APPCLOSEWAIT1: 283 sk->sk_state = SMC_PROCESSABORT; 284 sock_put(sk); /* passive closing */ 285 break; 286 case SMC_APPFINCLOSEWAIT: 287 sk->sk_state = SMC_PROCESSABORT; 288 break; 289 case SMC_PEERCLOSEWAIT1: 290 case SMC_PEERCLOSEWAIT2: 291 if (txflags->peer_done_writing && 292 !smc_close_sent_any_close(&smc->conn)) 293 /* just shutdown, but not yet closed locally */ 294 sk->sk_state = SMC_PROCESSABORT; 295 else 296 sk->sk_state = SMC_CLOSED; 297 sock_put(sk); /* passive closing */ 298 break; 299 case SMC_APPCLOSEWAIT2: 300 case SMC_PEERFINCLOSEWAIT: 301 sk->sk_state = SMC_CLOSED; 302 sock_put(sk); /* passive closing */ 303 break; 304 case SMC_PEERABORTWAIT: 305 sk->sk_state = SMC_CLOSED; 306 break; 307 case SMC_PROCESSABORT: 308 /* nothing to do, add tracing in future patch */ 309 break; 310 } 311 } 312 313 /* Either some kind of closing has been received: peer_conn_closed, 314 * peer_conn_abort, or peer_done_writing 315 * or the link group of the connection terminates abnormally. 316 */ 317 static void smc_close_passive_work(struct work_struct *work) 318 { 319 struct smc_connection *conn = container_of(work, 320 struct smc_connection, 321 close_work); 322 struct smc_sock *smc = container_of(conn, struct smc_sock, conn); 323 struct smc_cdc_conn_state_flags *rxflags; 324 struct sock *sk = &smc->sk; 325 int old_state; 326 327 lock_sock(sk); 328 old_state = sk->sk_state; 329 330 if (!conn->alert_token_local) { 331 /* abnormal termination */ 332 smc_close_active_abort(smc); 333 goto wakeup; 334 } 335 336 rxflags = &conn->local_rx_ctrl.conn_state_flags; 337 if (rxflags->peer_conn_abort) { 338 /* peer has not received all data */ 339 smc_close_passive_abort_received(smc); 340 release_sock(&smc->sk); 341 cancel_delayed_work_sync(&conn->tx_work); 342 lock_sock(&smc->sk); 343 goto wakeup; 344 } 345 346 switch (sk->sk_state) { 347 case SMC_INIT: 348 if (atomic_read(&conn->bytes_to_rcv) || 349 (rxflags->peer_done_writing && 350 !smc_cdc_rxed_any_close(conn))) { 351 sk->sk_state = SMC_APPCLOSEWAIT1; 352 } else { 353 sk->sk_state = SMC_CLOSED; 354 sock_put(sk); /* passive closing */ 355 } 356 break; 357 case SMC_ACTIVE: 358 sk->sk_state = SMC_APPCLOSEWAIT1; 359 /* postpone sock_put() for passive closing to cover 360 * received SEND_SHUTDOWN as well 361 */ 362 break; 363 case SMC_PEERCLOSEWAIT1: 364 if (rxflags->peer_done_writing) 365 sk->sk_state = SMC_PEERCLOSEWAIT2; 366 /* fall through */ 367 /* to check for closing */ 368 case SMC_PEERCLOSEWAIT2: 369 if (!smc_cdc_rxed_any_close(conn)) 370 break; 371 if (sock_flag(sk, SOCK_DEAD) && 372 smc_close_sent_any_close(conn)) { 373 /* smc_release has already been called locally */ 374 sk->sk_state = SMC_CLOSED; 375 } else { 376 /* just shutdown, but not yet closed locally */ 377 sk->sk_state = SMC_APPFINCLOSEWAIT; 378 } 379 sock_put(sk); /* passive closing */ 380 break; 381 case SMC_PEERFINCLOSEWAIT: 382 if (smc_cdc_rxed_any_close(conn)) { 383 sk->sk_state = SMC_CLOSED; 384 sock_put(sk); /* passive closing */ 385 } 386 break; 387 case SMC_APPCLOSEWAIT1: 388 case SMC_APPCLOSEWAIT2: 389 /* postpone sock_put() for passive closing to cover 390 * received SEND_SHUTDOWN as well 391 */ 392 break; 393 case SMC_APPFINCLOSEWAIT: 394 case SMC_PEERABORTWAIT: 395 case SMC_PROCESSABORT: 396 case SMC_CLOSED: 397 /* nothing to do, add tracing in future patch */ 398 break; 399 } 400 401 wakeup: 402 sk->sk_data_ready(sk); /* wakeup blocked rcvbuf consumers */ 403 sk->sk_write_space(sk); /* wakeup blocked sndbuf producers */ 404 405 if (old_state != sk->sk_state) { 406 sk->sk_state_change(sk); 407 if ((sk->sk_state == SMC_CLOSED) && 408 (sock_flag(sk, SOCK_DEAD) || !sk->sk_socket)) 409 smc_conn_free(conn); 410 } 411 release_sock(sk); 412 sock_put(sk); /* sock_hold done by schedulers of close_work */ 413 } 414 415 int smc_close_shutdown_write(struct smc_sock *smc) 416 { 417 struct smc_connection *conn = &smc->conn; 418 struct sock *sk = &smc->sk; 419 int old_state; 420 long timeout; 421 int rc = 0; 422 423 timeout = current->flags & PF_EXITING ? 424 0 : sock_flag(sk, SOCK_LINGER) ? 425 sk->sk_lingertime : SMC_MAX_STREAM_WAIT_TIMEOUT; 426 427 old_state = sk->sk_state; 428 again: 429 switch (sk->sk_state) { 430 case SMC_ACTIVE: 431 smc_close_stream_wait(smc, timeout); 432 release_sock(sk); 433 cancel_delayed_work_sync(&conn->tx_work); 434 lock_sock(sk); 435 if (sk->sk_state != SMC_ACTIVE) 436 goto again; 437 /* send close wr request */ 438 rc = smc_close_wr(conn); 439 if (rc) 440 break; 441 sk->sk_state = SMC_PEERCLOSEWAIT1; 442 break; 443 case SMC_APPCLOSEWAIT1: 444 /* passive close */ 445 if (!smc_cdc_rxed_any_close(conn)) 446 smc_close_stream_wait(smc, timeout); 447 release_sock(sk); 448 cancel_delayed_work_sync(&conn->tx_work); 449 lock_sock(sk); 450 if (sk->sk_state != SMC_APPCLOSEWAIT1) 451 goto again; 452 /* confirm close from peer */ 453 rc = smc_close_wr(conn); 454 if (rc) 455 break; 456 sk->sk_state = SMC_APPCLOSEWAIT2; 457 break; 458 case SMC_APPCLOSEWAIT2: 459 case SMC_PEERFINCLOSEWAIT: 460 case SMC_PEERCLOSEWAIT1: 461 case SMC_PEERCLOSEWAIT2: 462 case SMC_APPFINCLOSEWAIT: 463 case SMC_PROCESSABORT: 464 case SMC_PEERABORTWAIT: 465 /* nothing to do, add tracing in future patch */ 466 break; 467 } 468 469 if (old_state != sk->sk_state) 470 sk->sk_state_change(sk); 471 return rc; 472 } 473 474 /* Initialize close properties on connection establishment. */ 475 void smc_close_init(struct smc_sock *smc) 476 { 477 INIT_WORK(&smc->conn.close_work, smc_close_passive_work); 478 } 479