1 /* 2 * Shared Memory Communications over RDMA (SMC-R) and RoCE 3 * 4 * Socket Closing - normal and abnormal 5 * 6 * Copyright IBM Corp. 2016 7 * 8 * Author(s): Ursula Braun <ubraun@linux.vnet.ibm.com> 9 */ 10 11 #include <linux/workqueue.h> 12 #include <linux/sched/signal.h> 13 14 #include <net/sock.h> 15 16 #include "smc.h" 17 #include "smc_tx.h" 18 #include "smc_cdc.h" 19 #include "smc_close.h" 20 21 #define SMC_CLOSE_WAIT_TX_PENDS_TIME (5 * HZ) 22 23 static void smc_close_cleanup_listen(struct sock *parent) 24 { 25 struct sock *sk; 26 27 /* Close non-accepted connections */ 28 while ((sk = smc_accept_dequeue(parent, NULL))) 29 smc_close_non_accepted(sk); 30 } 31 32 static void smc_close_wait_tx_pends(struct smc_sock *smc) 33 { 34 DEFINE_WAIT_FUNC(wait, woken_wake_function); 35 struct sock *sk = &smc->sk; 36 signed long timeout; 37 38 timeout = SMC_CLOSE_WAIT_TX_PENDS_TIME; 39 add_wait_queue(sk_sleep(sk), &wait); 40 while (!signal_pending(current) && timeout) { 41 int rc; 42 43 rc = sk_wait_event(sk, &timeout, 44 !smc_cdc_tx_has_pending(&smc->conn), 45 &wait); 46 if (rc) 47 break; 48 } 49 remove_wait_queue(sk_sleep(sk), &wait); 50 } 51 52 /* wait for sndbuf data being transmitted */ 53 static void smc_close_stream_wait(struct smc_sock *smc, long timeout) 54 { 55 DEFINE_WAIT_FUNC(wait, woken_wake_function); 56 struct sock *sk = &smc->sk; 57 58 if (!timeout) 59 return; 60 61 if (!smc_tx_prepared_sends(&smc->conn)) 62 return; 63 64 smc->wait_close_tx_prepared = 1; 65 add_wait_queue(sk_sleep(sk), &wait); 66 while (!signal_pending(current) && timeout) { 67 int rc; 68 69 rc = sk_wait_event(sk, &timeout, 70 !smc_tx_prepared_sends(&smc->conn) || 71 (sk->sk_err == ECONNABORTED) || 72 (sk->sk_err == ECONNRESET), 73 &wait); 74 if (rc) 75 break; 76 } 77 remove_wait_queue(sk_sleep(sk), &wait); 78 smc->wait_close_tx_prepared = 0; 79 } 80 81 void smc_close_wake_tx_prepared(struct smc_sock *smc) 82 { 83 if (smc->wait_close_tx_prepared) 84 /* wake up socket closing */ 85 smc->sk.sk_state_change(&smc->sk); 86 } 87 88 static int smc_close_wr(struct smc_connection *conn) 89 { 90 conn->local_tx_ctrl.conn_state_flags.peer_done_writing = 1; 91 92 return smc_cdc_get_slot_and_msg_send(conn); 93 } 94 95 static int smc_close_final(struct smc_connection *conn) 96 { 97 if (atomic_read(&conn->bytes_to_rcv)) 98 conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1; 99 else 100 conn->local_tx_ctrl.conn_state_flags.peer_conn_closed = 1; 101 102 return smc_cdc_get_slot_and_msg_send(conn); 103 } 104 105 static int smc_close_abort(struct smc_connection *conn) 106 { 107 conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1; 108 109 return smc_cdc_get_slot_and_msg_send(conn); 110 } 111 112 /* terminate smc socket abnormally - active abort 113 * RDMA communication no longer possible 114 */ 115 void smc_close_active_abort(struct smc_sock *smc) 116 { 117 struct smc_cdc_conn_state_flags *txflags = 118 &smc->conn.local_tx_ctrl.conn_state_flags; 119 120 bh_lock_sock(&smc->sk); 121 smc->sk.sk_err = ECONNABORTED; 122 if (smc->clcsock && smc->clcsock->sk) { 123 smc->clcsock->sk->sk_err = ECONNABORTED; 124 smc->clcsock->sk->sk_state_change(smc->clcsock->sk); 125 } 126 switch (smc->sk.sk_state) { 127 case SMC_INIT: 128 smc->sk.sk_state = SMC_PEERABORTWAIT; 129 break; 130 case SMC_APPCLOSEWAIT1: 131 case SMC_APPCLOSEWAIT2: 132 txflags->peer_conn_abort = 1; 133 sock_release(smc->clcsock); 134 if (!smc_cdc_rxed_any_close(&smc->conn)) 135 smc->sk.sk_state = SMC_PEERABORTWAIT; 136 else 137 smc->sk.sk_state = SMC_CLOSED; 138 break; 139 case SMC_PEERCLOSEWAIT1: 140 case SMC_PEERCLOSEWAIT2: 141 if (!txflags->peer_conn_closed) { 142 smc->sk.sk_state = SMC_PEERABORTWAIT; 143 txflags->peer_conn_abort = 1; 144 sock_release(smc->clcsock); 145 } else { 146 smc->sk.sk_state = SMC_CLOSED; 147 } 148 break; 149 case SMC_PROCESSABORT: 150 case SMC_APPFINCLOSEWAIT: 151 if (!txflags->peer_conn_closed) { 152 txflags->peer_conn_abort = 1; 153 sock_release(smc->clcsock); 154 } 155 smc->sk.sk_state = SMC_CLOSED; 156 break; 157 case SMC_PEERFINCLOSEWAIT: 158 case SMC_PEERABORTWAIT: 159 case SMC_CLOSED: 160 break; 161 } 162 163 sock_set_flag(&smc->sk, SOCK_DEAD); 164 bh_unlock_sock(&smc->sk); 165 smc->sk.sk_state_change(&smc->sk); 166 } 167 168 int smc_close_active(struct smc_sock *smc) 169 { 170 struct smc_cdc_conn_state_flags *txflags = 171 &smc->conn.local_tx_ctrl.conn_state_flags; 172 long timeout = SMC_MAX_STREAM_WAIT_TIMEOUT; 173 struct smc_connection *conn = &smc->conn; 174 struct sock *sk = &smc->sk; 175 int old_state; 176 int rc = 0; 177 178 if (sock_flag(sk, SOCK_LINGER) && 179 !(current->flags & PF_EXITING)) 180 timeout = sk->sk_lingertime; 181 182 again: 183 old_state = sk->sk_state; 184 switch (old_state) { 185 case SMC_INIT: 186 sk->sk_state = SMC_CLOSED; 187 if (smc->smc_listen_work.func) 188 flush_work(&smc->smc_listen_work); 189 sock_put(sk); 190 break; 191 case SMC_LISTEN: 192 sk->sk_state = SMC_CLOSED; 193 sk->sk_state_change(sk); /* wake up accept */ 194 if (smc->clcsock && smc->clcsock->sk) { 195 rc = kernel_sock_shutdown(smc->clcsock, SHUT_RDWR); 196 /* wake up kernel_accept of smc_tcp_listen_worker */ 197 smc->clcsock->sk->sk_data_ready(smc->clcsock->sk); 198 } 199 release_sock(sk); 200 smc_close_cleanup_listen(sk); 201 flush_work(&smc->tcp_listen_work); 202 lock_sock(sk); 203 break; 204 case SMC_ACTIVE: 205 smc_close_stream_wait(smc, timeout); 206 release_sock(sk); 207 cancel_work_sync(&conn->tx_work); 208 lock_sock(sk); 209 if (sk->sk_state == SMC_ACTIVE) { 210 /* send close request */ 211 rc = smc_close_final(conn); 212 sk->sk_state = SMC_PEERCLOSEWAIT1; 213 } else { 214 /* peer event has changed the state */ 215 goto again; 216 } 217 break; 218 case SMC_APPFINCLOSEWAIT: 219 /* socket already shutdown wr or both (active close) */ 220 if (txflags->peer_done_writing && 221 !txflags->peer_conn_closed) { 222 /* just shutdown wr done, send close request */ 223 rc = smc_close_final(conn); 224 } 225 sk->sk_state = SMC_CLOSED; 226 smc_close_wait_tx_pends(smc); 227 break; 228 case SMC_APPCLOSEWAIT1: 229 case SMC_APPCLOSEWAIT2: 230 if (!smc_cdc_rxed_any_close(conn)) 231 smc_close_stream_wait(smc, timeout); 232 release_sock(sk); 233 cancel_work_sync(&conn->tx_work); 234 lock_sock(sk); 235 if (sk->sk_err != ECONNABORTED) { 236 /* confirm close from peer */ 237 rc = smc_close_final(conn); 238 if (rc) 239 break; 240 } 241 if (smc_cdc_rxed_any_close(conn)) 242 /* peer has closed the socket already */ 243 sk->sk_state = SMC_CLOSED; 244 else 245 /* peer has just issued a shutdown write */ 246 sk->sk_state = SMC_PEERFINCLOSEWAIT; 247 smc_close_wait_tx_pends(smc); 248 break; 249 case SMC_PEERCLOSEWAIT1: 250 case SMC_PEERCLOSEWAIT2: 251 case SMC_PEERFINCLOSEWAIT: 252 /* peer sending PeerConnectionClosed will cause transition */ 253 break; 254 case SMC_PROCESSABORT: 255 cancel_work_sync(&conn->tx_work); 256 smc_close_abort(conn); 257 sk->sk_state = SMC_CLOSED; 258 smc_close_wait_tx_pends(smc); 259 break; 260 case SMC_PEERABORTWAIT: 261 case SMC_CLOSED: 262 /* nothing to do, add tracing in future patch */ 263 break; 264 } 265 266 if (old_state != sk->sk_state) 267 sk->sk_state_change(&smc->sk); 268 return rc; 269 } 270 271 static void smc_close_passive_abort_received(struct smc_sock *smc) 272 { 273 struct smc_cdc_conn_state_flags *txflags = 274 &smc->conn.local_tx_ctrl.conn_state_flags; 275 struct sock *sk = &smc->sk; 276 277 switch (sk->sk_state) { 278 case SMC_ACTIVE: 279 case SMC_APPFINCLOSEWAIT: 280 case SMC_APPCLOSEWAIT1: 281 case SMC_APPCLOSEWAIT2: 282 smc_close_abort(&smc->conn); 283 sk->sk_state = SMC_PROCESSABORT; 284 break; 285 case SMC_PEERCLOSEWAIT1: 286 case SMC_PEERCLOSEWAIT2: 287 if (txflags->peer_done_writing && 288 !txflags->peer_conn_closed) { 289 /* just shutdown, but not yet closed locally */ 290 smc_close_abort(&smc->conn); 291 sk->sk_state = SMC_PROCESSABORT; 292 } else { 293 sk->sk_state = SMC_CLOSED; 294 } 295 break; 296 case SMC_PEERFINCLOSEWAIT: 297 case SMC_PEERABORTWAIT: 298 sk->sk_state = SMC_CLOSED; 299 break; 300 case SMC_INIT: 301 case SMC_PROCESSABORT: 302 /* nothing to do, add tracing in future patch */ 303 break; 304 } 305 } 306 307 /* Some kind of closing has been received: peer_conn_closed, peer_conn_abort, 308 * or peer_done_writing. 309 * Called under tasklet context. 310 */ 311 void smc_close_passive_received(struct smc_sock *smc) 312 { 313 struct smc_cdc_conn_state_flags *rxflags = 314 &smc->conn.local_rx_ctrl.conn_state_flags; 315 struct sock *sk = &smc->sk; 316 int old_state; 317 318 sk->sk_shutdown |= RCV_SHUTDOWN; 319 if (smc->clcsock && smc->clcsock->sk) 320 smc->clcsock->sk->sk_shutdown |= RCV_SHUTDOWN; 321 sock_set_flag(&smc->sk, SOCK_DONE); 322 323 old_state = sk->sk_state; 324 325 if (rxflags->peer_conn_abort) { 326 smc_close_passive_abort_received(smc); 327 goto wakeup; 328 } 329 330 switch (sk->sk_state) { 331 case SMC_INIT: 332 if (atomic_read(&smc->conn.bytes_to_rcv) || 333 (rxflags->peer_done_writing && 334 !rxflags->peer_conn_closed)) 335 sk->sk_state = SMC_APPCLOSEWAIT1; 336 else 337 sk->sk_state = SMC_CLOSED; 338 break; 339 case SMC_ACTIVE: 340 sk->sk_state = SMC_APPCLOSEWAIT1; 341 break; 342 case SMC_PEERCLOSEWAIT1: 343 if (rxflags->peer_done_writing) 344 sk->sk_state = SMC_PEERCLOSEWAIT2; 345 /* fall through to check for closing */ 346 case SMC_PEERCLOSEWAIT2: 347 case SMC_PEERFINCLOSEWAIT: 348 if (!smc_cdc_rxed_any_close(&smc->conn)) 349 break; 350 if (sock_flag(sk, SOCK_DEAD) && 351 (sk->sk_shutdown == SHUTDOWN_MASK)) { 352 /* smc_release has already been called locally */ 353 sk->sk_state = SMC_CLOSED; 354 } else { 355 /* just shutdown, but not yet closed locally */ 356 sk->sk_state = SMC_APPFINCLOSEWAIT; 357 } 358 break; 359 case SMC_APPCLOSEWAIT1: 360 case SMC_APPCLOSEWAIT2: 361 case SMC_APPFINCLOSEWAIT: 362 case SMC_PEERABORTWAIT: 363 case SMC_PROCESSABORT: 364 case SMC_CLOSED: 365 /* nothing to do, add tracing in future patch */ 366 break; 367 } 368 369 wakeup: 370 if (old_state != sk->sk_state) 371 sk->sk_state_change(sk); 372 sk->sk_data_ready(sk); /* wakeup blocked rcvbuf consumers */ 373 sk->sk_write_space(sk); /* wakeup blocked sndbuf producers */ 374 375 if ((sk->sk_state == SMC_CLOSED) && 376 (sock_flag(sk, SOCK_DEAD) || (old_state == SMC_INIT))) { 377 smc_conn_free(&smc->conn); 378 schedule_delayed_work(&smc->sock_put_work, 379 SMC_CLOSE_SOCK_PUT_DELAY); 380 } 381 } 382 383 void smc_close_sock_put_work(struct work_struct *work) 384 { 385 struct smc_sock *smc = container_of(to_delayed_work(work), 386 struct smc_sock, 387 sock_put_work); 388 389 smc->sk.sk_prot->unhash(&smc->sk); 390 sock_put(&smc->sk); 391 } 392 393 int smc_close_shutdown_write(struct smc_sock *smc) 394 { 395 struct smc_connection *conn = &smc->conn; 396 long timeout = SMC_MAX_STREAM_WAIT_TIMEOUT; 397 struct sock *sk = &smc->sk; 398 int old_state; 399 int rc = 0; 400 401 if (sock_flag(sk, SOCK_LINGER)) 402 timeout = sk->sk_lingertime; 403 404 again: 405 old_state = sk->sk_state; 406 switch (old_state) { 407 case SMC_ACTIVE: 408 smc_close_stream_wait(smc, timeout); 409 release_sock(sk); 410 cancel_work_sync(&conn->tx_work); 411 lock_sock(sk); 412 /* send close wr request */ 413 rc = smc_close_wr(conn); 414 if (sk->sk_state == SMC_ACTIVE) 415 sk->sk_state = SMC_PEERCLOSEWAIT1; 416 else 417 goto again; 418 break; 419 case SMC_APPCLOSEWAIT1: 420 /* passive close */ 421 if (!smc_cdc_rxed_any_close(conn)) 422 smc_close_stream_wait(smc, timeout); 423 release_sock(sk); 424 cancel_work_sync(&conn->tx_work); 425 lock_sock(sk); 426 /* confirm close from peer */ 427 rc = smc_close_wr(conn); 428 sk->sk_state = SMC_APPCLOSEWAIT2; 429 break; 430 case SMC_APPCLOSEWAIT2: 431 case SMC_PEERFINCLOSEWAIT: 432 case SMC_PEERCLOSEWAIT1: 433 case SMC_PEERCLOSEWAIT2: 434 case SMC_APPFINCLOSEWAIT: 435 case SMC_PROCESSABORT: 436 case SMC_PEERABORTWAIT: 437 /* nothing to do, add tracing in future patch */ 438 break; 439 } 440 441 if (old_state != sk->sk_state) 442 sk->sk_state_change(&smc->sk); 443 return rc; 444 } 445