1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Shared Memory Communications over RDMA (SMC-R) and RoCE 4 * 5 * Socket Closing - normal and abnormal 6 * 7 * Copyright IBM Corp. 2016 8 * 9 * Author(s): Ursula Braun <ubraun@linux.vnet.ibm.com> 10 */ 11 12 #include <linux/workqueue.h> 13 #include <linux/sched/signal.h> 14 15 #include <net/sock.h> 16 17 #include "smc.h" 18 #include "smc_tx.h" 19 #include "smc_cdc.h" 20 #include "smc_close.h" 21 22 static void smc_close_cleanup_listen(struct sock *parent) 23 { 24 struct sock *sk; 25 26 /* Close non-accepted connections */ 27 while ((sk = smc_accept_dequeue(parent, NULL))) 28 smc_close_non_accepted(sk); 29 } 30 31 /* wait for sndbuf data being transmitted */ 32 static void smc_close_stream_wait(struct smc_sock *smc, long timeout) 33 { 34 DEFINE_WAIT_FUNC(wait, woken_wake_function); 35 struct sock *sk = &smc->sk; 36 37 if (!timeout) 38 return; 39 40 if (!smc_tx_prepared_sends(&smc->conn)) 41 return; 42 43 smc->wait_close_tx_prepared = 1; 44 add_wait_queue(sk_sleep(sk), &wait); 45 while (!signal_pending(current) && timeout) { 46 int rc; 47 48 rc = sk_wait_event(sk, &timeout, 49 !smc_tx_prepared_sends(&smc->conn) || 50 (sk->sk_err == ECONNABORTED) || 51 (sk->sk_err == ECONNRESET), 52 &wait); 53 if (rc) 54 break; 55 } 56 remove_wait_queue(sk_sleep(sk), &wait); 57 smc->wait_close_tx_prepared = 0; 58 } 59 60 void smc_close_wake_tx_prepared(struct smc_sock *smc) 61 { 62 if (smc->wait_close_tx_prepared) 63 /* wake up socket closing */ 64 smc->sk.sk_state_change(&smc->sk); 65 } 66 67 static int smc_close_wr(struct smc_connection *conn) 68 { 69 conn->local_tx_ctrl.conn_state_flags.peer_done_writing = 1; 70 71 return smc_cdc_get_slot_and_msg_send(conn); 72 } 73 74 static int smc_close_final(struct smc_connection *conn) 75 { 76 if (atomic_read(&conn->bytes_to_rcv)) 77 conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1; 78 else 79 conn->local_tx_ctrl.conn_state_flags.peer_conn_closed = 1; 80 81 return smc_cdc_get_slot_and_msg_send(conn); 82 } 83 84 static int smc_close_abort(struct smc_connection *conn) 85 { 86 conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1; 87 88 return smc_cdc_get_slot_and_msg_send(conn); 89 } 90 91 /* terminate smc socket abnormally - active abort 92 * link group is terminated, i.e. RDMA communication no longer possible 93 */ 94 static void smc_close_active_abort(struct smc_sock *smc) 95 { 96 struct sock *sk = &smc->sk; 97 98 struct smc_cdc_conn_state_flags *txflags = 99 &smc->conn.local_tx_ctrl.conn_state_flags; 100 101 sk->sk_err = ECONNABORTED; 102 if (smc->clcsock && smc->clcsock->sk) { 103 smc->clcsock->sk->sk_err = ECONNABORTED; 104 smc->clcsock->sk->sk_state_change(smc->clcsock->sk); 105 } 106 switch (sk->sk_state) { 107 case SMC_INIT: 108 case SMC_ACTIVE: 109 sk->sk_state = SMC_PEERABORTWAIT; 110 release_sock(sk); 111 cancel_delayed_work_sync(&smc->conn.tx_work); 112 lock_sock(sk); 113 break; 114 case SMC_APPCLOSEWAIT1: 115 case SMC_APPCLOSEWAIT2: 116 sock_release(smc->clcsock); 117 if (!smc_cdc_rxed_any_close(&smc->conn)) 118 sk->sk_state = SMC_PEERABORTWAIT; 119 else 120 sk->sk_state = SMC_CLOSED; 121 release_sock(sk); 122 cancel_delayed_work_sync(&smc->conn.tx_work); 123 lock_sock(sk); 124 break; 125 case SMC_PEERCLOSEWAIT1: 126 case SMC_PEERCLOSEWAIT2: 127 if (!txflags->peer_conn_closed) { 128 sk->sk_state = SMC_PEERABORTWAIT; 129 sock_release(smc->clcsock); 130 } else { 131 sk->sk_state = SMC_CLOSED; 132 } 133 break; 134 case SMC_PROCESSABORT: 135 case SMC_APPFINCLOSEWAIT: 136 if (!txflags->peer_conn_closed) 137 sock_release(smc->clcsock); 138 sk->sk_state = SMC_CLOSED; 139 break; 140 case SMC_PEERFINCLOSEWAIT: 141 case SMC_PEERABORTWAIT: 142 case SMC_CLOSED: 143 break; 144 } 145 146 sock_set_flag(sk, SOCK_DEAD); 147 sk->sk_state_change(sk); 148 } 149 150 static inline bool smc_close_sent_any_close(struct smc_connection *conn) 151 { 152 return conn->local_tx_ctrl.conn_state_flags.peer_conn_abort || 153 conn->local_tx_ctrl.conn_state_flags.peer_conn_closed; 154 } 155 156 int smc_close_active(struct smc_sock *smc) 157 { 158 struct smc_cdc_conn_state_flags *txflags = 159 &smc->conn.local_tx_ctrl.conn_state_flags; 160 struct smc_connection *conn = &smc->conn; 161 struct sock *sk = &smc->sk; 162 int old_state; 163 long timeout; 164 int rc = 0; 165 166 timeout = current->flags & PF_EXITING ? 167 0 : sock_flag(sk, SOCK_LINGER) ? 168 sk->sk_lingertime : SMC_MAX_STREAM_WAIT_TIMEOUT; 169 170 old_state = sk->sk_state; 171 again: 172 switch (sk->sk_state) { 173 case SMC_INIT: 174 sk->sk_state = SMC_CLOSED; 175 if (smc->smc_listen_work.func) 176 cancel_work_sync(&smc->smc_listen_work); 177 break; 178 case SMC_LISTEN: 179 sk->sk_state = SMC_CLOSED; 180 sk->sk_state_change(sk); /* wake up accept */ 181 if (smc->clcsock && smc->clcsock->sk) { 182 rc = kernel_sock_shutdown(smc->clcsock, SHUT_RDWR); 183 /* wake up kernel_accept of smc_tcp_listen_worker */ 184 smc->clcsock->sk->sk_data_ready(smc->clcsock->sk); 185 } 186 release_sock(sk); 187 smc_close_cleanup_listen(sk); 188 cancel_work_sync(&smc->smc_listen_work); 189 lock_sock(sk); 190 break; 191 case SMC_ACTIVE: 192 smc_close_stream_wait(smc, timeout); 193 release_sock(sk); 194 cancel_delayed_work_sync(&conn->tx_work); 195 lock_sock(sk); 196 if (sk->sk_state == SMC_ACTIVE) { 197 /* send close request */ 198 rc = smc_close_final(conn); 199 if (rc) 200 break; 201 sk->sk_state = SMC_PEERCLOSEWAIT1; 202 } else { 203 /* peer event has changed the state */ 204 goto again; 205 } 206 break; 207 case SMC_APPFINCLOSEWAIT: 208 /* socket already shutdown wr or both (active close) */ 209 if (txflags->peer_done_writing && 210 !smc_close_sent_any_close(conn)) { 211 /* just shutdown wr done, send close request */ 212 rc = smc_close_final(conn); 213 if (rc) 214 break; 215 } 216 sk->sk_state = SMC_CLOSED; 217 break; 218 case SMC_APPCLOSEWAIT1: 219 case SMC_APPCLOSEWAIT2: 220 if (!smc_cdc_rxed_any_close(conn)) 221 smc_close_stream_wait(smc, timeout); 222 release_sock(sk); 223 cancel_delayed_work_sync(&conn->tx_work); 224 lock_sock(sk); 225 if (sk->sk_state != SMC_APPCLOSEWAIT1 && 226 sk->sk_state != SMC_APPCLOSEWAIT2) 227 goto again; 228 /* confirm close from peer */ 229 rc = smc_close_final(conn); 230 if (rc) 231 break; 232 if (smc_cdc_rxed_any_close(conn)) 233 /* peer has closed the socket already */ 234 sk->sk_state = SMC_CLOSED; 235 else 236 /* peer has just issued a shutdown write */ 237 sk->sk_state = SMC_PEERFINCLOSEWAIT; 238 break; 239 case SMC_PEERCLOSEWAIT1: 240 case SMC_PEERCLOSEWAIT2: 241 if (txflags->peer_done_writing && 242 !smc_close_sent_any_close(conn)) { 243 /* just shutdown wr done, send close request */ 244 rc = smc_close_final(conn); 245 if (rc) 246 break; 247 } 248 /* peer sending PeerConnectionClosed will cause transition */ 249 break; 250 case SMC_PEERFINCLOSEWAIT: 251 /* peer sending PeerConnectionClosed will cause transition */ 252 break; 253 case SMC_PROCESSABORT: 254 smc_close_abort(conn); 255 sk->sk_state = SMC_CLOSED; 256 break; 257 case SMC_PEERABORTWAIT: 258 case SMC_CLOSED: 259 /* nothing to do, add tracing in future patch */ 260 break; 261 } 262 263 if (old_state != sk->sk_state) 264 sk->sk_state_change(sk); 265 return rc; 266 } 267 268 static void smc_close_passive_abort_received(struct smc_sock *smc) 269 { 270 struct smc_cdc_conn_state_flags *txflags = 271 &smc->conn.local_tx_ctrl.conn_state_flags; 272 struct sock *sk = &smc->sk; 273 274 switch (sk->sk_state) { 275 case SMC_ACTIVE: 276 case SMC_APPFINCLOSEWAIT: 277 case SMC_APPCLOSEWAIT1: 278 case SMC_APPCLOSEWAIT2: 279 sk->sk_state = SMC_PROCESSABORT; 280 break; 281 case SMC_PEERCLOSEWAIT1: 282 case SMC_PEERCLOSEWAIT2: 283 if (txflags->peer_done_writing && 284 !smc_close_sent_any_close(&smc->conn)) { 285 /* just shutdown, but not yet closed locally */ 286 sk->sk_state = SMC_PROCESSABORT; 287 } else { 288 sk->sk_state = SMC_CLOSED; 289 } 290 break; 291 case SMC_PEERFINCLOSEWAIT: 292 case SMC_PEERABORTWAIT: 293 sk->sk_state = SMC_CLOSED; 294 break; 295 case SMC_INIT: 296 case SMC_PROCESSABORT: 297 /* nothing to do, add tracing in future patch */ 298 break; 299 } 300 } 301 302 /* Either some kind of closing has been received: peer_conn_closed, 303 * peer_conn_abort, or peer_done_writing 304 * or the link group of the connection terminates abnormally. 305 */ 306 static void smc_close_passive_work(struct work_struct *work) 307 { 308 struct smc_connection *conn = container_of(work, 309 struct smc_connection, 310 close_work); 311 struct smc_sock *smc = container_of(conn, struct smc_sock, conn); 312 struct smc_cdc_conn_state_flags *rxflags; 313 struct sock *sk = &smc->sk; 314 int old_state; 315 316 lock_sock(sk); 317 old_state = sk->sk_state; 318 319 if (!conn->alert_token_local) { 320 /* abnormal termination */ 321 smc_close_active_abort(smc); 322 goto wakeup; 323 } 324 325 rxflags = &conn->local_rx_ctrl.conn_state_flags; 326 if (rxflags->peer_conn_abort) { 327 /* peer has not received all data */ 328 smc_close_passive_abort_received(smc); 329 release_sock(&smc->sk); 330 cancel_delayed_work_sync(&conn->tx_work); 331 lock_sock(&smc->sk); 332 goto wakeup; 333 } 334 335 switch (sk->sk_state) { 336 case SMC_INIT: 337 if (atomic_read(&conn->bytes_to_rcv) || 338 (rxflags->peer_done_writing && 339 !smc_cdc_rxed_any_close(conn))) 340 sk->sk_state = SMC_APPCLOSEWAIT1; 341 else 342 sk->sk_state = SMC_CLOSED; 343 break; 344 case SMC_ACTIVE: 345 sk->sk_state = SMC_APPCLOSEWAIT1; 346 break; 347 case SMC_PEERCLOSEWAIT1: 348 if (rxflags->peer_done_writing) 349 sk->sk_state = SMC_PEERCLOSEWAIT2; 350 /* fall through */ 351 /* to check for closing */ 352 case SMC_PEERCLOSEWAIT2: 353 if (!smc_cdc_rxed_any_close(conn)) 354 break; 355 if (sock_flag(sk, SOCK_DEAD) && 356 smc_close_sent_any_close(conn)) { 357 /* smc_release has already been called locally */ 358 sk->sk_state = SMC_CLOSED; 359 } else { 360 /* just shutdown, but not yet closed locally */ 361 sk->sk_state = SMC_APPFINCLOSEWAIT; 362 } 363 break; 364 case SMC_PEERFINCLOSEWAIT: 365 if (smc_cdc_rxed_any_close(conn)) 366 sk->sk_state = SMC_CLOSED; 367 break; 368 case SMC_APPCLOSEWAIT1: 369 case SMC_APPCLOSEWAIT2: 370 case SMC_APPFINCLOSEWAIT: 371 case SMC_PEERABORTWAIT: 372 case SMC_PROCESSABORT: 373 case SMC_CLOSED: 374 /* nothing to do, add tracing in future patch */ 375 break; 376 } 377 378 wakeup: 379 sk->sk_data_ready(sk); /* wakeup blocked rcvbuf consumers */ 380 sk->sk_write_space(sk); /* wakeup blocked sndbuf producers */ 381 382 if (old_state != sk->sk_state) { 383 sk->sk_state_change(sk); 384 if ((sk->sk_state == SMC_CLOSED) && 385 (sock_flag(sk, SOCK_DEAD) || !sk->sk_socket)) { 386 smc_conn_free(conn); 387 schedule_delayed_work(&smc->sock_put_work, 388 SMC_CLOSE_SOCK_PUT_DELAY); 389 } 390 } 391 release_sock(sk); 392 } 393 394 void smc_close_sock_put_work(struct work_struct *work) 395 { 396 struct smc_sock *smc = container_of(to_delayed_work(work), 397 struct smc_sock, 398 sock_put_work); 399 400 smc->sk.sk_prot->unhash(&smc->sk); 401 sock_put(&smc->sk); 402 } 403 404 int smc_close_shutdown_write(struct smc_sock *smc) 405 { 406 struct smc_connection *conn = &smc->conn; 407 struct sock *sk = &smc->sk; 408 int old_state; 409 long timeout; 410 int rc = 0; 411 412 timeout = current->flags & PF_EXITING ? 413 0 : sock_flag(sk, SOCK_LINGER) ? 414 sk->sk_lingertime : SMC_MAX_STREAM_WAIT_TIMEOUT; 415 416 old_state = sk->sk_state; 417 again: 418 switch (sk->sk_state) { 419 case SMC_ACTIVE: 420 smc_close_stream_wait(smc, timeout); 421 release_sock(sk); 422 cancel_delayed_work_sync(&conn->tx_work); 423 lock_sock(sk); 424 if (sk->sk_state != SMC_ACTIVE) 425 goto again; 426 /* send close wr request */ 427 rc = smc_close_wr(conn); 428 if (rc) 429 break; 430 sk->sk_state = SMC_PEERCLOSEWAIT1; 431 break; 432 case SMC_APPCLOSEWAIT1: 433 /* passive close */ 434 if (!smc_cdc_rxed_any_close(conn)) 435 smc_close_stream_wait(smc, timeout); 436 release_sock(sk); 437 cancel_delayed_work_sync(&conn->tx_work); 438 lock_sock(sk); 439 if (sk->sk_state != SMC_APPCLOSEWAIT1) 440 goto again; 441 /* confirm close from peer */ 442 rc = smc_close_wr(conn); 443 if (rc) 444 break; 445 sk->sk_state = SMC_APPCLOSEWAIT2; 446 break; 447 case SMC_APPCLOSEWAIT2: 448 case SMC_PEERFINCLOSEWAIT: 449 case SMC_PEERCLOSEWAIT1: 450 case SMC_PEERCLOSEWAIT2: 451 case SMC_APPFINCLOSEWAIT: 452 case SMC_PROCESSABORT: 453 case SMC_PEERABORTWAIT: 454 /* nothing to do, add tracing in future patch */ 455 break; 456 } 457 458 if (old_state != sk->sk_state) 459 sk->sk_state_change(sk); 460 return rc; 461 } 462 463 /* Initialize close properties on connection establishment. */ 464 void smc_close_init(struct smc_sock *smc) 465 { 466 INIT_WORK(&smc->conn.close_work, smc_close_passive_work); 467 } 468