1 /* 2 * Shared Memory Communications over RDMA (SMC-R) and RoCE 3 * 4 * Socket Closing - normal and abnormal 5 * 6 * Copyright IBM Corp. 2016 7 * 8 * Author(s): Ursula Braun <ubraun@linux.vnet.ibm.com> 9 */ 10 11 #include <linux/workqueue.h> 12 #include <net/sock.h> 13 14 #include "smc.h" 15 #include "smc_tx.h" 16 #include "smc_cdc.h" 17 #include "smc_close.h" 18 19 #define SMC_CLOSE_WAIT_TX_PENDS_TIME (5 * HZ) 20 21 static void smc_close_cleanup_listen(struct sock *parent) 22 { 23 struct sock *sk; 24 25 /* Close non-accepted connections */ 26 while ((sk = smc_accept_dequeue(parent, NULL))) 27 smc_close_non_accepted(sk); 28 } 29 30 static void smc_close_wait_tx_pends(struct smc_sock *smc) 31 { 32 DEFINE_WAIT_FUNC(wait, woken_wake_function); 33 struct sock *sk = &smc->sk; 34 signed long timeout; 35 36 timeout = SMC_CLOSE_WAIT_TX_PENDS_TIME; 37 add_wait_queue(sk_sleep(sk), &wait); 38 while (!signal_pending(current) && timeout) { 39 int rc; 40 41 rc = sk_wait_event(sk, &timeout, 42 !smc_cdc_tx_has_pending(&smc->conn), 43 &wait); 44 if (rc) 45 break; 46 } 47 remove_wait_queue(sk_sleep(sk), &wait); 48 } 49 50 /* wait for sndbuf data being transmitted */ 51 static void smc_close_stream_wait(struct smc_sock *smc, long timeout) 52 { 53 DEFINE_WAIT_FUNC(wait, woken_wake_function); 54 struct sock *sk = &smc->sk; 55 56 if (!timeout) 57 return; 58 59 if (!smc_tx_prepared_sends(&smc->conn)) 60 return; 61 62 smc->wait_close_tx_prepared = 1; 63 add_wait_queue(sk_sleep(sk), &wait); 64 while (!signal_pending(current) && timeout) { 65 int rc; 66 67 rc = sk_wait_event(sk, &timeout, 68 !smc_tx_prepared_sends(&smc->conn) || 69 (sk->sk_err == ECONNABORTED) || 70 (sk->sk_err == ECONNRESET), 71 &wait); 72 if (rc) 73 break; 74 } 75 remove_wait_queue(sk_sleep(sk), &wait); 76 smc->wait_close_tx_prepared = 0; 77 } 78 79 void smc_close_wake_tx_prepared(struct smc_sock *smc) 80 { 81 if (smc->wait_close_tx_prepared) 82 /* wake up socket closing */ 83 smc->sk.sk_state_change(&smc->sk); 84 } 85 86 static int smc_close_wr(struct smc_connection *conn) 87 { 88 conn->local_tx_ctrl.conn_state_flags.peer_done_writing = 1; 89 90 return smc_cdc_get_slot_and_msg_send(conn); 91 } 92 93 static int smc_close_final(struct smc_connection *conn) 94 { 95 if (atomic_read(&conn->bytes_to_rcv)) 96 conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1; 97 else 98 conn->local_tx_ctrl.conn_state_flags.peer_conn_closed = 1; 99 100 return smc_cdc_get_slot_and_msg_send(conn); 101 } 102 103 static int smc_close_abort(struct smc_connection *conn) 104 { 105 conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1; 106 107 return smc_cdc_get_slot_and_msg_send(conn); 108 } 109 110 /* terminate smc socket abnormally - active abort 111 * RDMA communication no longer possible 112 */ 113 void smc_close_active_abort(struct smc_sock *smc) 114 { 115 struct smc_cdc_conn_state_flags *txflags = 116 &smc->conn.local_tx_ctrl.conn_state_flags; 117 118 bh_lock_sock(&smc->sk); 119 smc->sk.sk_err = ECONNABORTED; 120 if (smc->clcsock && smc->clcsock->sk) { 121 smc->clcsock->sk->sk_err = ECONNABORTED; 122 smc->clcsock->sk->sk_state_change(smc->clcsock->sk); 123 } 124 switch (smc->sk.sk_state) { 125 case SMC_INIT: 126 smc->sk.sk_state = SMC_PEERABORTWAIT; 127 break; 128 case SMC_APPCLOSEWAIT1: 129 case SMC_APPCLOSEWAIT2: 130 txflags->peer_conn_abort = 1; 131 sock_release(smc->clcsock); 132 if (!smc_cdc_rxed_any_close(&smc->conn)) 133 smc->sk.sk_state = SMC_PEERABORTWAIT; 134 else 135 smc->sk.sk_state = SMC_CLOSED; 136 break; 137 case SMC_PEERCLOSEWAIT1: 138 case SMC_PEERCLOSEWAIT2: 139 if (!txflags->peer_conn_closed) { 140 smc->sk.sk_state = SMC_PEERABORTWAIT; 141 txflags->peer_conn_abort = 1; 142 sock_release(smc->clcsock); 143 } else { 144 smc->sk.sk_state = SMC_CLOSED; 145 } 146 break; 147 case SMC_PROCESSABORT: 148 case SMC_APPFINCLOSEWAIT: 149 if (!txflags->peer_conn_closed) { 150 txflags->peer_conn_abort = 1; 151 sock_release(smc->clcsock); 152 } 153 smc->sk.sk_state = SMC_CLOSED; 154 break; 155 case SMC_PEERFINCLOSEWAIT: 156 case SMC_PEERABORTWAIT: 157 case SMC_CLOSED: 158 break; 159 } 160 161 sock_set_flag(&smc->sk, SOCK_DEAD); 162 bh_unlock_sock(&smc->sk); 163 smc->sk.sk_state_change(&smc->sk); 164 } 165 166 int smc_close_active(struct smc_sock *smc) 167 { 168 struct smc_cdc_conn_state_flags *txflags = 169 &smc->conn.local_tx_ctrl.conn_state_flags; 170 long timeout = SMC_MAX_STREAM_WAIT_TIMEOUT; 171 struct smc_connection *conn = &smc->conn; 172 struct sock *sk = &smc->sk; 173 int old_state; 174 int rc = 0; 175 176 if (sock_flag(sk, SOCK_LINGER) && 177 !(current->flags & PF_EXITING)) 178 timeout = sk->sk_lingertime; 179 180 again: 181 old_state = sk->sk_state; 182 switch (old_state) { 183 case SMC_INIT: 184 sk->sk_state = SMC_CLOSED; 185 if (smc->smc_listen_work.func) 186 flush_work(&smc->smc_listen_work); 187 sock_put(sk); 188 break; 189 case SMC_LISTEN: 190 sk->sk_state = SMC_CLOSED; 191 sk->sk_state_change(sk); /* wake up accept */ 192 if (smc->clcsock && smc->clcsock->sk) { 193 rc = kernel_sock_shutdown(smc->clcsock, SHUT_RDWR); 194 /* wake up kernel_accept of smc_tcp_listen_worker */ 195 smc->clcsock->sk->sk_data_ready(smc->clcsock->sk); 196 } 197 release_sock(sk); 198 smc_close_cleanup_listen(sk); 199 flush_work(&smc->tcp_listen_work); 200 lock_sock(sk); 201 break; 202 case SMC_ACTIVE: 203 smc_close_stream_wait(smc, timeout); 204 release_sock(sk); 205 cancel_work_sync(&conn->tx_work); 206 lock_sock(sk); 207 if (sk->sk_state == SMC_ACTIVE) { 208 /* send close request */ 209 rc = smc_close_final(conn); 210 sk->sk_state = SMC_PEERCLOSEWAIT1; 211 } else { 212 /* peer event has changed the state */ 213 goto again; 214 } 215 break; 216 case SMC_APPFINCLOSEWAIT: 217 /* socket already shutdown wr or both (active close) */ 218 if (txflags->peer_done_writing && 219 !txflags->peer_conn_closed) { 220 /* just shutdown wr done, send close request */ 221 rc = smc_close_final(conn); 222 } 223 sk->sk_state = SMC_CLOSED; 224 smc_close_wait_tx_pends(smc); 225 break; 226 case SMC_APPCLOSEWAIT1: 227 case SMC_APPCLOSEWAIT2: 228 if (!smc_cdc_rxed_any_close(conn)) 229 smc_close_stream_wait(smc, timeout); 230 release_sock(sk); 231 cancel_work_sync(&conn->tx_work); 232 lock_sock(sk); 233 if (sk->sk_err != ECONNABORTED) { 234 /* confirm close from peer */ 235 rc = smc_close_final(conn); 236 if (rc) 237 break; 238 } 239 if (smc_cdc_rxed_any_close(conn)) 240 /* peer has closed the socket already */ 241 sk->sk_state = SMC_CLOSED; 242 else 243 /* peer has just issued a shutdown write */ 244 sk->sk_state = SMC_PEERFINCLOSEWAIT; 245 smc_close_wait_tx_pends(smc); 246 break; 247 case SMC_PEERCLOSEWAIT1: 248 case SMC_PEERCLOSEWAIT2: 249 case SMC_PEERFINCLOSEWAIT: 250 /* peer sending PeerConnectionClosed will cause transition */ 251 break; 252 case SMC_PROCESSABORT: 253 cancel_work_sync(&conn->tx_work); 254 smc_close_abort(conn); 255 sk->sk_state = SMC_CLOSED; 256 smc_close_wait_tx_pends(smc); 257 break; 258 case SMC_PEERABORTWAIT: 259 case SMC_CLOSED: 260 /* nothing to do, add tracing in future patch */ 261 break; 262 } 263 264 if (old_state != sk->sk_state) 265 sk->sk_state_change(&smc->sk); 266 return rc; 267 } 268 269 static void smc_close_passive_abort_received(struct smc_sock *smc) 270 { 271 struct smc_cdc_conn_state_flags *txflags = 272 &smc->conn.local_tx_ctrl.conn_state_flags; 273 struct sock *sk = &smc->sk; 274 275 switch (sk->sk_state) { 276 case SMC_ACTIVE: 277 case SMC_APPFINCLOSEWAIT: 278 case SMC_APPCLOSEWAIT1: 279 case SMC_APPCLOSEWAIT2: 280 smc_close_abort(&smc->conn); 281 sk->sk_state = SMC_PROCESSABORT; 282 break; 283 case SMC_PEERCLOSEWAIT1: 284 case SMC_PEERCLOSEWAIT2: 285 if (txflags->peer_done_writing && 286 !txflags->peer_conn_closed) { 287 /* just shutdown, but not yet closed locally */ 288 smc_close_abort(&smc->conn); 289 sk->sk_state = SMC_PROCESSABORT; 290 } else { 291 sk->sk_state = SMC_CLOSED; 292 } 293 break; 294 case SMC_PEERFINCLOSEWAIT: 295 case SMC_PEERABORTWAIT: 296 sk->sk_state = SMC_CLOSED; 297 break; 298 case SMC_INIT: 299 case SMC_PROCESSABORT: 300 /* nothing to do, add tracing in future patch */ 301 break; 302 } 303 } 304 305 /* Some kind of closing has been received: peer_conn_closed, peer_conn_abort, 306 * or peer_done_writing. 307 * Called under tasklet context. 308 */ 309 void smc_close_passive_received(struct smc_sock *smc) 310 { 311 struct smc_cdc_conn_state_flags *rxflags = 312 &smc->conn.local_rx_ctrl.conn_state_flags; 313 struct sock *sk = &smc->sk; 314 int old_state; 315 316 sk->sk_shutdown |= RCV_SHUTDOWN; 317 if (smc->clcsock && smc->clcsock->sk) 318 smc->clcsock->sk->sk_shutdown |= RCV_SHUTDOWN; 319 sock_set_flag(&smc->sk, SOCK_DONE); 320 321 old_state = sk->sk_state; 322 323 if (rxflags->peer_conn_abort) { 324 smc_close_passive_abort_received(smc); 325 goto wakeup; 326 } 327 328 switch (sk->sk_state) { 329 case SMC_INIT: 330 if (atomic_read(&smc->conn.bytes_to_rcv) || 331 (rxflags->peer_done_writing && 332 !rxflags->peer_conn_closed)) 333 sk->sk_state = SMC_APPCLOSEWAIT1; 334 else 335 sk->sk_state = SMC_CLOSED; 336 break; 337 case SMC_ACTIVE: 338 sk->sk_state = SMC_APPCLOSEWAIT1; 339 break; 340 case SMC_PEERCLOSEWAIT1: 341 if (rxflags->peer_done_writing) 342 sk->sk_state = SMC_PEERCLOSEWAIT2; 343 /* fall through to check for closing */ 344 case SMC_PEERCLOSEWAIT2: 345 case SMC_PEERFINCLOSEWAIT: 346 if (!smc_cdc_rxed_any_close(&smc->conn)) 347 break; 348 if (sock_flag(sk, SOCK_DEAD) && 349 (sk->sk_shutdown == SHUTDOWN_MASK)) { 350 /* smc_release has already been called locally */ 351 sk->sk_state = SMC_CLOSED; 352 } else { 353 /* just shutdown, but not yet closed locally */ 354 sk->sk_state = SMC_APPFINCLOSEWAIT; 355 } 356 break; 357 case SMC_APPCLOSEWAIT1: 358 case SMC_APPCLOSEWAIT2: 359 case SMC_APPFINCLOSEWAIT: 360 case SMC_PEERABORTWAIT: 361 case SMC_PROCESSABORT: 362 case SMC_CLOSED: 363 /* nothing to do, add tracing in future patch */ 364 break; 365 } 366 367 wakeup: 368 if (old_state != sk->sk_state) 369 sk->sk_state_change(sk); 370 sk->sk_data_ready(sk); /* wakeup blocked rcvbuf consumers */ 371 sk->sk_write_space(sk); /* wakeup blocked sndbuf producers */ 372 373 if ((sk->sk_state == SMC_CLOSED) && 374 (sock_flag(sk, SOCK_DEAD) || (old_state == SMC_INIT))) { 375 smc_conn_free(&smc->conn); 376 schedule_delayed_work(&smc->sock_put_work, 377 SMC_CLOSE_SOCK_PUT_DELAY); 378 } 379 } 380 381 void smc_close_sock_put_work(struct work_struct *work) 382 { 383 struct smc_sock *smc = container_of(to_delayed_work(work), 384 struct smc_sock, 385 sock_put_work); 386 387 smc->sk.sk_prot->unhash(&smc->sk); 388 sock_put(&smc->sk); 389 } 390 391 int smc_close_shutdown_write(struct smc_sock *smc) 392 { 393 struct smc_connection *conn = &smc->conn; 394 long timeout = SMC_MAX_STREAM_WAIT_TIMEOUT; 395 struct sock *sk = &smc->sk; 396 int old_state; 397 int rc = 0; 398 399 if (sock_flag(sk, SOCK_LINGER)) 400 timeout = sk->sk_lingertime; 401 402 again: 403 old_state = sk->sk_state; 404 switch (old_state) { 405 case SMC_ACTIVE: 406 smc_close_stream_wait(smc, timeout); 407 release_sock(sk); 408 cancel_work_sync(&conn->tx_work); 409 lock_sock(sk); 410 /* send close wr request */ 411 rc = smc_close_wr(conn); 412 if (sk->sk_state == SMC_ACTIVE) 413 sk->sk_state = SMC_PEERCLOSEWAIT1; 414 else 415 goto again; 416 break; 417 case SMC_APPCLOSEWAIT1: 418 /* passive close */ 419 if (!smc_cdc_rxed_any_close(conn)) 420 smc_close_stream_wait(smc, timeout); 421 release_sock(sk); 422 cancel_work_sync(&conn->tx_work); 423 lock_sock(sk); 424 /* confirm close from peer */ 425 rc = smc_close_wr(conn); 426 sk->sk_state = SMC_APPCLOSEWAIT2; 427 break; 428 case SMC_APPCLOSEWAIT2: 429 case SMC_PEERFINCLOSEWAIT: 430 case SMC_PEERCLOSEWAIT1: 431 case SMC_PEERCLOSEWAIT2: 432 case SMC_APPFINCLOSEWAIT: 433 case SMC_PROCESSABORT: 434 case SMC_PEERABORTWAIT: 435 /* nothing to do, add tracing in future patch */ 436 break; 437 } 438 439 if (old_state != sk->sk_state) 440 sk->sk_state_change(&smc->sk); 441 return rc; 442 } 443