1 /* 2 * Shared Memory Communications over RDMA (SMC-R) and RoCE 3 * 4 * Socket Closing - normal and abnormal 5 * 6 * Copyright IBM Corp. 2016 7 * 8 * Author(s): Ursula Braun <ubraun@linux.vnet.ibm.com> 9 */ 10 11 #include <linux/workqueue.h> 12 #include <linux/sched/signal.h> 13 14 #include <net/sock.h> 15 16 #include "smc.h" 17 #include "smc_tx.h" 18 #include "smc_cdc.h" 19 #include "smc_close.h" 20 21 #define SMC_CLOSE_WAIT_TX_PENDS_TIME (5 * HZ) 22 23 static void smc_close_cleanup_listen(struct sock *parent) 24 { 25 struct sock *sk; 26 27 /* Close non-accepted connections */ 28 while ((sk = smc_accept_dequeue(parent, NULL))) 29 smc_close_non_accepted(sk); 30 } 31 32 static void smc_close_wait_tx_pends(struct smc_sock *smc) 33 { 34 DEFINE_WAIT_FUNC(wait, woken_wake_function); 35 struct sock *sk = &smc->sk; 36 signed long timeout; 37 38 timeout = SMC_CLOSE_WAIT_TX_PENDS_TIME; 39 add_wait_queue(sk_sleep(sk), &wait); 40 while (!signal_pending(current) && timeout) { 41 int rc; 42 43 rc = sk_wait_event(sk, &timeout, 44 !smc_cdc_tx_has_pending(&smc->conn), 45 &wait); 46 if (rc) 47 break; 48 } 49 remove_wait_queue(sk_sleep(sk), &wait); 50 } 51 52 /* wait for sndbuf data being transmitted */ 53 static void smc_close_stream_wait(struct smc_sock *smc, long timeout) 54 { 55 DEFINE_WAIT_FUNC(wait, woken_wake_function); 56 struct sock *sk = &smc->sk; 57 58 if (!timeout) 59 return; 60 61 if (!smc_tx_prepared_sends(&smc->conn)) 62 return; 63 64 smc->wait_close_tx_prepared = 1; 65 add_wait_queue(sk_sleep(sk), &wait); 66 while (!signal_pending(current) && timeout) { 67 int rc; 68 69 rc = sk_wait_event(sk, &timeout, 70 !smc_tx_prepared_sends(&smc->conn) || 71 (sk->sk_err == ECONNABORTED) || 72 (sk->sk_err == ECONNRESET), 73 &wait); 74 if (rc) 75 break; 76 } 77 remove_wait_queue(sk_sleep(sk), &wait); 78 smc->wait_close_tx_prepared = 0; 79 } 80 81 void smc_close_wake_tx_prepared(struct smc_sock *smc) 82 { 83 if (smc->wait_close_tx_prepared) 84 /* wake up socket closing */ 85 smc->sk.sk_state_change(&smc->sk); 86 } 87 88 static int smc_close_wr(struct smc_connection *conn) 89 { 90 conn->local_tx_ctrl.conn_state_flags.peer_done_writing = 1; 91 92 return smc_cdc_get_slot_and_msg_send(conn); 93 } 94 95 static int smc_close_final(struct smc_connection *conn) 96 { 97 if (atomic_read(&conn->bytes_to_rcv)) 98 conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1; 99 else 100 conn->local_tx_ctrl.conn_state_flags.peer_conn_closed = 1; 101 102 return smc_cdc_get_slot_and_msg_send(conn); 103 } 104 105 static int smc_close_abort(struct smc_connection *conn) 106 { 107 conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1; 108 109 return smc_cdc_get_slot_and_msg_send(conn); 110 } 111 112 /* terminate smc socket abnormally - active abort 113 * RDMA communication no longer possible 114 */ 115 void smc_close_active_abort(struct smc_sock *smc) 116 { 117 struct smc_cdc_conn_state_flags *txflags = 118 &smc->conn.local_tx_ctrl.conn_state_flags; 119 120 smc->sk.sk_err = ECONNABORTED; 121 if (smc->clcsock && smc->clcsock->sk) { 122 smc->clcsock->sk->sk_err = ECONNABORTED; 123 smc->clcsock->sk->sk_state_change(smc->clcsock->sk); 124 } 125 switch (smc->sk.sk_state) { 126 case SMC_INIT: 127 case SMC_ACTIVE: 128 smc->sk.sk_state = SMC_PEERABORTWAIT; 129 break; 130 case SMC_APPCLOSEWAIT1: 131 case SMC_APPCLOSEWAIT2: 132 txflags->peer_conn_abort = 1; 133 sock_release(smc->clcsock); 134 if (!smc_cdc_rxed_any_close(&smc->conn)) 135 smc->sk.sk_state = SMC_PEERABORTWAIT; 136 else 137 smc->sk.sk_state = SMC_CLOSED; 138 break; 139 case SMC_PEERCLOSEWAIT1: 140 case SMC_PEERCLOSEWAIT2: 141 if (!txflags->peer_conn_closed) { 142 smc->sk.sk_state = SMC_PEERABORTWAIT; 143 txflags->peer_conn_abort = 1; 144 sock_release(smc->clcsock); 145 } else { 146 smc->sk.sk_state = SMC_CLOSED; 147 } 148 break; 149 case SMC_PROCESSABORT: 150 case SMC_APPFINCLOSEWAIT: 151 if (!txflags->peer_conn_closed) { 152 txflags->peer_conn_abort = 1; 153 sock_release(smc->clcsock); 154 } 155 smc->sk.sk_state = SMC_CLOSED; 156 break; 157 case SMC_PEERFINCLOSEWAIT: 158 case SMC_PEERABORTWAIT: 159 case SMC_CLOSED: 160 break; 161 } 162 163 sock_set_flag(&smc->sk, SOCK_DEAD); 164 smc->sk.sk_state_change(&smc->sk); 165 } 166 167 int smc_close_active(struct smc_sock *smc) 168 { 169 struct smc_cdc_conn_state_flags *txflags = 170 &smc->conn.local_tx_ctrl.conn_state_flags; 171 long timeout = SMC_MAX_STREAM_WAIT_TIMEOUT; 172 struct smc_connection *conn = &smc->conn; 173 struct sock *sk = &smc->sk; 174 int old_state; 175 int rc = 0; 176 177 if (sock_flag(sk, SOCK_LINGER) && 178 !(current->flags & PF_EXITING)) 179 timeout = sk->sk_lingertime; 180 181 again: 182 old_state = sk->sk_state; 183 switch (old_state) { 184 case SMC_INIT: 185 sk->sk_state = SMC_CLOSED; 186 if (smc->smc_listen_work.func) 187 cancel_work_sync(&smc->smc_listen_work); 188 sock_put(sk); 189 break; 190 case SMC_LISTEN: 191 sk->sk_state = SMC_CLOSED; 192 sk->sk_state_change(sk); /* wake up accept */ 193 if (smc->clcsock && smc->clcsock->sk) { 194 rc = kernel_sock_shutdown(smc->clcsock, SHUT_RDWR); 195 /* wake up kernel_accept of smc_tcp_listen_worker */ 196 smc->clcsock->sk->sk_data_ready(smc->clcsock->sk); 197 } 198 release_sock(sk); 199 smc_close_cleanup_listen(sk); 200 cancel_work_sync(&smc->smc_listen_work); 201 lock_sock(sk); 202 break; 203 case SMC_ACTIVE: 204 smc_close_stream_wait(smc, timeout); 205 release_sock(sk); 206 cancel_work_sync(&conn->tx_work); 207 lock_sock(sk); 208 if (sk->sk_state == SMC_ACTIVE) { 209 /* send close request */ 210 rc = smc_close_final(conn); 211 sk->sk_state = SMC_PEERCLOSEWAIT1; 212 } else { 213 /* peer event has changed the state */ 214 goto again; 215 } 216 break; 217 case SMC_APPFINCLOSEWAIT: 218 /* socket already shutdown wr or both (active close) */ 219 if (txflags->peer_done_writing && 220 !txflags->peer_conn_closed) { 221 /* just shutdown wr done, send close request */ 222 rc = smc_close_final(conn); 223 } 224 sk->sk_state = SMC_CLOSED; 225 smc_close_wait_tx_pends(smc); 226 break; 227 case SMC_APPCLOSEWAIT1: 228 case SMC_APPCLOSEWAIT2: 229 if (!smc_cdc_rxed_any_close(conn)) 230 smc_close_stream_wait(smc, timeout); 231 release_sock(sk); 232 cancel_work_sync(&conn->tx_work); 233 lock_sock(sk); 234 if (sk->sk_err != ECONNABORTED) { 235 /* confirm close from peer */ 236 rc = smc_close_final(conn); 237 if (rc) 238 break; 239 } 240 if (smc_cdc_rxed_any_close(conn)) 241 /* peer has closed the socket already */ 242 sk->sk_state = SMC_CLOSED; 243 else 244 /* peer has just issued a shutdown write */ 245 sk->sk_state = SMC_PEERFINCLOSEWAIT; 246 smc_close_wait_tx_pends(smc); 247 break; 248 case SMC_PEERCLOSEWAIT1: 249 case SMC_PEERCLOSEWAIT2: 250 case SMC_PEERFINCLOSEWAIT: 251 /* peer sending PeerConnectionClosed will cause transition */ 252 break; 253 case SMC_PROCESSABORT: 254 cancel_work_sync(&conn->tx_work); 255 smc_close_abort(conn); 256 sk->sk_state = SMC_CLOSED; 257 smc_close_wait_tx_pends(smc); 258 break; 259 case SMC_PEERABORTWAIT: 260 case SMC_CLOSED: 261 /* nothing to do, add tracing in future patch */ 262 break; 263 } 264 265 if (old_state != sk->sk_state) 266 sk->sk_state_change(&smc->sk); 267 return rc; 268 } 269 270 static void smc_close_passive_abort_received(struct smc_sock *smc) 271 { 272 struct smc_cdc_conn_state_flags *txflags = 273 &smc->conn.local_tx_ctrl.conn_state_flags; 274 struct sock *sk = &smc->sk; 275 276 switch (sk->sk_state) { 277 case SMC_ACTIVE: 278 case SMC_APPFINCLOSEWAIT: 279 case SMC_APPCLOSEWAIT1: 280 case SMC_APPCLOSEWAIT2: 281 smc_close_abort(&smc->conn); 282 sk->sk_state = SMC_PROCESSABORT; 283 break; 284 case SMC_PEERCLOSEWAIT1: 285 case SMC_PEERCLOSEWAIT2: 286 if (txflags->peer_done_writing && 287 !txflags->peer_conn_closed) { 288 /* just shutdown, but not yet closed locally */ 289 smc_close_abort(&smc->conn); 290 sk->sk_state = SMC_PROCESSABORT; 291 } else { 292 sk->sk_state = SMC_CLOSED; 293 } 294 break; 295 case SMC_PEERFINCLOSEWAIT: 296 case SMC_PEERABORTWAIT: 297 sk->sk_state = SMC_CLOSED; 298 break; 299 case SMC_INIT: 300 case SMC_PROCESSABORT: 301 /* nothing to do, add tracing in future patch */ 302 break; 303 } 304 } 305 306 /* Some kind of closing has been received: peer_conn_closed, peer_conn_abort, 307 * or peer_done_writing. 308 */ 309 static void smc_close_passive_work(struct work_struct *work) 310 { 311 struct smc_connection *conn = container_of(work, 312 struct smc_connection, 313 close_work); 314 struct smc_sock *smc = container_of(conn, struct smc_sock, conn); 315 struct smc_cdc_conn_state_flags *rxflags; 316 struct sock *sk = &smc->sk; 317 int old_state; 318 319 lock_sock(&smc->sk); 320 old_state = sk->sk_state; 321 322 if (!conn->alert_token_local) { 323 /* abnormal termination */ 324 smc_close_active_abort(smc); 325 goto wakeup; 326 } 327 328 rxflags = &smc->conn.local_rx_ctrl.conn_state_flags; 329 if (rxflags->peer_conn_abort) { 330 smc_close_passive_abort_received(smc); 331 goto wakeup; 332 } 333 334 switch (sk->sk_state) { 335 case SMC_INIT: 336 if (atomic_read(&smc->conn.bytes_to_rcv) || 337 (rxflags->peer_done_writing && 338 !rxflags->peer_conn_closed)) 339 sk->sk_state = SMC_APPCLOSEWAIT1; 340 else 341 sk->sk_state = SMC_CLOSED; 342 break; 343 case SMC_ACTIVE: 344 sk->sk_state = SMC_APPCLOSEWAIT1; 345 break; 346 case SMC_PEERCLOSEWAIT1: 347 if (rxflags->peer_done_writing) 348 sk->sk_state = SMC_PEERCLOSEWAIT2; 349 /* fall through to check for closing */ 350 case SMC_PEERCLOSEWAIT2: 351 case SMC_PEERFINCLOSEWAIT: 352 if (!smc_cdc_rxed_any_close(&smc->conn)) 353 break; 354 if (sock_flag(sk, SOCK_DEAD) && 355 (sk->sk_shutdown == SHUTDOWN_MASK)) { 356 /* smc_release has already been called locally */ 357 sk->sk_state = SMC_CLOSED; 358 } else { 359 /* just shutdown, but not yet closed locally */ 360 sk->sk_state = SMC_APPFINCLOSEWAIT; 361 } 362 break; 363 case SMC_APPCLOSEWAIT1: 364 case SMC_APPCLOSEWAIT2: 365 case SMC_APPFINCLOSEWAIT: 366 case SMC_PEERABORTWAIT: 367 case SMC_PROCESSABORT: 368 case SMC_CLOSED: 369 /* nothing to do, add tracing in future patch */ 370 break; 371 } 372 373 wakeup: 374 if (old_state != sk->sk_state) 375 sk->sk_state_change(sk); 376 sk->sk_data_ready(sk); /* wakeup blocked rcvbuf consumers */ 377 sk->sk_write_space(sk); /* wakeup blocked sndbuf producers */ 378 379 if ((sk->sk_state == SMC_CLOSED) && 380 (sock_flag(sk, SOCK_DEAD) || !sk->sk_socket)) { 381 smc_conn_free(&smc->conn); 382 schedule_delayed_work(&smc->sock_put_work, 383 SMC_CLOSE_SOCK_PUT_DELAY); 384 } 385 release_sock(&smc->sk); 386 } 387 388 void smc_close_sock_put_work(struct work_struct *work) 389 { 390 struct smc_sock *smc = container_of(to_delayed_work(work), 391 struct smc_sock, 392 sock_put_work); 393 394 smc->sk.sk_prot->unhash(&smc->sk); 395 sock_put(&smc->sk); 396 } 397 398 int smc_close_shutdown_write(struct smc_sock *smc) 399 { 400 struct smc_connection *conn = &smc->conn; 401 long timeout = SMC_MAX_STREAM_WAIT_TIMEOUT; 402 struct sock *sk = &smc->sk; 403 int old_state; 404 int rc = 0; 405 406 if (sock_flag(sk, SOCK_LINGER)) 407 timeout = sk->sk_lingertime; 408 409 again: 410 old_state = sk->sk_state; 411 switch (old_state) { 412 case SMC_ACTIVE: 413 smc_close_stream_wait(smc, timeout); 414 release_sock(sk); 415 cancel_work_sync(&conn->tx_work); 416 lock_sock(sk); 417 /* send close wr request */ 418 rc = smc_close_wr(conn); 419 if (sk->sk_state == SMC_ACTIVE) 420 sk->sk_state = SMC_PEERCLOSEWAIT1; 421 else 422 goto again; 423 break; 424 case SMC_APPCLOSEWAIT1: 425 /* passive close */ 426 if (!smc_cdc_rxed_any_close(conn)) 427 smc_close_stream_wait(smc, timeout); 428 release_sock(sk); 429 cancel_work_sync(&conn->tx_work); 430 lock_sock(sk); 431 /* confirm close from peer */ 432 rc = smc_close_wr(conn); 433 sk->sk_state = SMC_APPCLOSEWAIT2; 434 break; 435 case SMC_APPCLOSEWAIT2: 436 case SMC_PEERFINCLOSEWAIT: 437 case SMC_PEERCLOSEWAIT1: 438 case SMC_PEERCLOSEWAIT2: 439 case SMC_APPFINCLOSEWAIT: 440 case SMC_PROCESSABORT: 441 case SMC_PEERABORTWAIT: 442 /* nothing to do, add tracing in future patch */ 443 break; 444 } 445 446 if (old_state != sk->sk_state) 447 sk->sk_state_change(&smc->sk); 448 return rc; 449 } 450 451 /* Initialize close properties on connection establishment. */ 452 void smc_close_init(struct smc_sock *smc) 453 { 454 INIT_WORK(&smc->conn.close_work, smc_close_passive_work); 455 } 456