1 // SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause 2 3 /* Authors: Bernard Metzler <bmt@zurich.ibm.com> */ 4 /* Fredy Neeser */ 5 /* Greg Joyce <greg@opengridcomputing.com> */ 6 /* Copyright (c) 2008-2019, IBM Corporation */ 7 /* Copyright (c) 2017, Open Grid Computing, Inc. */ 8 9 #include <linux/errno.h> 10 #include <linux/types.h> 11 #include <linux/net.h> 12 #include <linux/inetdevice.h> 13 #include <net/addrconf.h> 14 #include <linux/workqueue.h> 15 #include <net/sock.h> 16 #include <net/tcp.h> 17 #include <linux/inet.h> 18 #include <linux/tcp.h> 19 20 #include <rdma/iw_cm.h> 21 #include <rdma/ib_verbs.h> 22 #include <rdma/ib_user_verbs.h> 23 24 #include "siw.h" 25 #include "siw_cm.h" 26 27 /* 28 * Set to any combination of 29 * MPA_V2_RDMA_NO_RTR, MPA_V2_RDMA_READ_RTR, MPA_V2_RDMA_WRITE_RTR 30 */ 31 static __be16 rtr_type = MPA_V2_RDMA_READ_RTR | MPA_V2_RDMA_WRITE_RTR; 32 static const bool relaxed_ird_negotiation = 1; 33 34 static void siw_cm_llp_state_change(struct sock *s); 35 static void siw_cm_llp_data_ready(struct sock *s); 36 static void siw_cm_llp_write_space(struct sock *s); 37 static void siw_cm_llp_error_report(struct sock *s); 38 static int siw_cm_upcall(struct siw_cep *cep, enum iw_cm_event_type reason, 39 int status); 40 41 static void siw_sk_assign_cm_upcalls(struct sock *sk) 42 { 43 write_lock_bh(&sk->sk_callback_lock); 44 sk->sk_state_change = siw_cm_llp_state_change; 45 sk->sk_data_ready = siw_cm_llp_data_ready; 46 sk->sk_write_space = siw_cm_llp_write_space; 47 sk->sk_error_report = siw_cm_llp_error_report; 48 write_unlock_bh(&sk->sk_callback_lock); 49 } 50 51 static void siw_sk_save_upcalls(struct sock *sk) 52 { 53 struct siw_cep *cep = sk_to_cep(sk); 54 55 write_lock_bh(&sk->sk_callback_lock); 56 cep->sk_state_change = sk->sk_state_change; 57 cep->sk_data_ready = sk->sk_data_ready; 58 cep->sk_write_space = sk->sk_write_space; 59 cep->sk_error_report = sk->sk_error_report; 60 write_unlock_bh(&sk->sk_callback_lock); 61 } 62 63 static void siw_sk_restore_upcalls(struct sock *sk, struct siw_cep *cep) 64 { 65 sk->sk_state_change = cep->sk_state_change; 66 sk->sk_data_ready = cep->sk_data_ready; 67 sk->sk_write_space = cep->sk_write_space; 68 sk->sk_error_report = cep->sk_error_report; 69 sk->sk_user_data = NULL; 70 } 71 72 static void siw_qp_socket_assoc(struct siw_cep *cep, struct siw_qp *qp) 73 { 74 struct socket *s = cep->sock; 75 struct sock *sk = s->sk; 76 77 write_lock_bh(&sk->sk_callback_lock); 78 79 qp->attrs.sk = s; 80 sk->sk_data_ready = siw_qp_llp_data_ready; 81 sk->sk_write_space = siw_qp_llp_write_space; 82 83 write_unlock_bh(&sk->sk_callback_lock); 84 } 85 86 static void siw_socket_disassoc(struct socket *s) 87 { 88 struct sock *sk = s->sk; 89 struct siw_cep *cep; 90 91 if (sk) { 92 write_lock_bh(&sk->sk_callback_lock); 93 cep = sk_to_cep(sk); 94 if (cep) { 95 siw_sk_restore_upcalls(sk, cep); 96 siw_cep_put(cep); 97 } else { 98 pr_warn("siw: cannot restore sk callbacks: no ep\n"); 99 } 100 write_unlock_bh(&sk->sk_callback_lock); 101 } else { 102 pr_warn("siw: cannot restore sk callbacks: no sk\n"); 103 } 104 } 105 106 static void siw_rtr_data_ready(struct sock *sk) 107 { 108 struct siw_cep *cep; 109 struct siw_qp *qp = NULL; 110 read_descriptor_t rd_desc; 111 112 read_lock(&sk->sk_callback_lock); 113 114 cep = sk_to_cep(sk); 115 if (!cep) { 116 WARN(1, "No connection endpoint\n"); 117 goto out; 118 } 119 qp = sk_to_qp(sk); 120 121 memset(&rd_desc, 0, sizeof(rd_desc)); 122 rd_desc.arg.data = qp; 123 rd_desc.count = 1; 124 125 tcp_read_sock(sk, &rd_desc, siw_tcp_rx_data); 126 /* 127 * Check if first frame was successfully processed. 128 * Signal connection full establishment if yes. 129 * Failed data processing would have already scheduled 130 * connection drop. 131 */ 132 if (!qp->rx_stream.rx_suspend) 133 siw_cm_upcall(cep, IW_CM_EVENT_ESTABLISHED, 0); 134 out: 135 read_unlock(&sk->sk_callback_lock); 136 if (qp) 137 siw_qp_socket_assoc(cep, qp); 138 } 139 140 static void siw_sk_assign_rtr_upcalls(struct siw_cep *cep) 141 { 142 struct sock *sk = cep->sock->sk; 143 144 write_lock_bh(&sk->sk_callback_lock); 145 sk->sk_data_ready = siw_rtr_data_ready; 146 sk->sk_write_space = siw_qp_llp_write_space; 147 write_unlock_bh(&sk->sk_callback_lock); 148 } 149 150 static void siw_cep_socket_assoc(struct siw_cep *cep, struct socket *s) 151 { 152 cep->sock = s; 153 siw_cep_get(cep); 154 s->sk->sk_user_data = cep; 155 156 siw_sk_save_upcalls(s->sk); 157 siw_sk_assign_cm_upcalls(s->sk); 158 } 159 160 static struct siw_cep *siw_cep_alloc(struct siw_device *sdev) 161 { 162 struct siw_cep *cep = kzalloc(sizeof(*cep), GFP_KERNEL); 163 unsigned long flags; 164 165 if (!cep) 166 return NULL; 167 168 INIT_LIST_HEAD(&cep->listenq); 169 INIT_LIST_HEAD(&cep->devq); 170 INIT_LIST_HEAD(&cep->work_freelist); 171 172 kref_init(&cep->ref); 173 cep->state = SIW_EPSTATE_IDLE; 174 init_waitqueue_head(&cep->waitq); 175 spin_lock_init(&cep->lock); 176 cep->sdev = sdev; 177 cep->enhanced_rdma_conn_est = false; 178 179 spin_lock_irqsave(&sdev->lock, flags); 180 list_add_tail(&cep->devq, &sdev->cep_list); 181 spin_unlock_irqrestore(&sdev->lock, flags); 182 183 siw_dbg_cep(cep, "new endpoint\n"); 184 return cep; 185 } 186 187 static void siw_cm_free_work(struct siw_cep *cep) 188 { 189 struct list_head *w, *tmp; 190 struct siw_cm_work *work; 191 192 list_for_each_safe(w, tmp, &cep->work_freelist) { 193 work = list_entry(w, struct siw_cm_work, list); 194 list_del(&work->list); 195 kfree(work); 196 } 197 } 198 199 static void siw_cancel_mpatimer(struct siw_cep *cep) 200 { 201 spin_lock_bh(&cep->lock); 202 if (cep->mpa_timer) { 203 if (cancel_delayed_work(&cep->mpa_timer->work)) { 204 siw_cep_put(cep); 205 kfree(cep->mpa_timer); /* not needed again */ 206 } 207 cep->mpa_timer = NULL; 208 } 209 spin_unlock_bh(&cep->lock); 210 } 211 212 static void siw_put_work(struct siw_cm_work *work) 213 { 214 INIT_LIST_HEAD(&work->list); 215 spin_lock_bh(&work->cep->lock); 216 list_add(&work->list, &work->cep->work_freelist); 217 spin_unlock_bh(&work->cep->lock); 218 } 219 220 static void siw_cep_set_inuse(struct siw_cep *cep) 221 { 222 unsigned long flags; 223 int rv; 224 retry: 225 spin_lock_irqsave(&cep->lock, flags); 226 227 if (cep->in_use) { 228 spin_unlock_irqrestore(&cep->lock, flags); 229 rv = wait_event_interruptible(cep->waitq, !cep->in_use); 230 if (signal_pending(current)) 231 flush_signals(current); 232 goto retry; 233 } else { 234 cep->in_use = 1; 235 spin_unlock_irqrestore(&cep->lock, flags); 236 } 237 } 238 239 static void siw_cep_set_free(struct siw_cep *cep) 240 { 241 unsigned long flags; 242 243 spin_lock_irqsave(&cep->lock, flags); 244 cep->in_use = 0; 245 spin_unlock_irqrestore(&cep->lock, flags); 246 247 wake_up(&cep->waitq); 248 } 249 250 static void __siw_cep_dealloc(struct kref *ref) 251 { 252 struct siw_cep *cep = container_of(ref, struct siw_cep, ref); 253 struct siw_device *sdev = cep->sdev; 254 unsigned long flags; 255 256 WARN_ON(cep->listen_cep); 257 258 /* kfree(NULL) is safe */ 259 kfree(cep->mpa.pdata); 260 spin_lock_bh(&cep->lock); 261 if (!list_empty(&cep->work_freelist)) 262 siw_cm_free_work(cep); 263 spin_unlock_bh(&cep->lock); 264 265 spin_lock_irqsave(&sdev->lock, flags); 266 list_del(&cep->devq); 267 spin_unlock_irqrestore(&sdev->lock, flags); 268 269 siw_dbg_cep(cep, "free endpoint\n"); 270 kfree(cep); 271 } 272 273 static struct siw_cm_work *siw_get_work(struct siw_cep *cep) 274 { 275 struct siw_cm_work *work = NULL; 276 277 spin_lock_bh(&cep->lock); 278 if (!list_empty(&cep->work_freelist)) { 279 work = list_entry(cep->work_freelist.next, struct siw_cm_work, 280 list); 281 list_del_init(&work->list); 282 } 283 spin_unlock_bh(&cep->lock); 284 return work; 285 } 286 287 static int siw_cm_alloc_work(struct siw_cep *cep, int num) 288 { 289 struct siw_cm_work *work; 290 291 while (num--) { 292 work = kmalloc(sizeof(*work), GFP_KERNEL); 293 if (!work) { 294 if (!(list_empty(&cep->work_freelist))) 295 siw_cm_free_work(cep); 296 return -ENOMEM; 297 } 298 work->cep = cep; 299 INIT_LIST_HEAD(&work->list); 300 list_add(&work->list, &cep->work_freelist); 301 } 302 return 0; 303 } 304 305 /* 306 * siw_cm_upcall() 307 * 308 * Upcall to IWCM to inform about async connection events 309 */ 310 static int siw_cm_upcall(struct siw_cep *cep, enum iw_cm_event_type reason, 311 int status) 312 { 313 struct iw_cm_event event; 314 struct iw_cm_id *id; 315 316 memset(&event, 0, sizeof(event)); 317 event.status = status; 318 event.event = reason; 319 320 if (reason == IW_CM_EVENT_CONNECT_REQUEST) { 321 event.provider_data = cep; 322 id = cep->listen_cep->cm_id; 323 } else { 324 id = cep->cm_id; 325 } 326 /* Signal IRD and ORD */ 327 if (reason == IW_CM_EVENT_ESTABLISHED || 328 reason == IW_CM_EVENT_CONNECT_REPLY) { 329 /* Signal negotiated IRD/ORD values we will use */ 330 event.ird = cep->ird; 331 event.ord = cep->ord; 332 } else if (reason == IW_CM_EVENT_CONNECT_REQUEST) { 333 event.ird = cep->ord; 334 event.ord = cep->ird; 335 } 336 /* Signal private data and address information */ 337 if (reason == IW_CM_EVENT_CONNECT_REQUEST || 338 reason == IW_CM_EVENT_CONNECT_REPLY) { 339 u16 pd_len = be16_to_cpu(cep->mpa.hdr.params.pd_len); 340 341 if (pd_len) { 342 /* 343 * hand over MPA private data 344 */ 345 event.private_data_len = pd_len; 346 event.private_data = cep->mpa.pdata; 347 348 /* Hide MPA V2 IRD/ORD control */ 349 if (cep->enhanced_rdma_conn_est) { 350 event.private_data_len -= 351 sizeof(struct mpa_v2_data); 352 event.private_data += 353 sizeof(struct mpa_v2_data); 354 } 355 } 356 getname_local(cep->sock, &event.local_addr); 357 getname_peer(cep->sock, &event.remote_addr); 358 } 359 siw_dbg_cep(cep, "[QP %u]: id 0x%p, reason=%d, status=%d\n", 360 cep->qp ? qp_id(cep->qp) : -1, id, reason, status); 361 362 return id->event_handler(id, &event); 363 } 364 365 /* 366 * siw_qp_cm_drop() 367 * 368 * Drops established LLP connection if present and not already 369 * scheduled for dropping. Called from user context, SQ workqueue 370 * or receive IRQ. Caller signals if socket can be immediately 371 * closed (basically, if not in IRQ). 372 */ 373 void siw_qp_cm_drop(struct siw_qp *qp, int schedule) 374 { 375 struct siw_cep *cep = qp->cep; 376 377 qp->rx_stream.rx_suspend = 1; 378 qp->tx_ctx.tx_suspend = 1; 379 380 if (!qp->cep) 381 return; 382 383 if (schedule) { 384 siw_cm_queue_work(cep, SIW_CM_WORK_CLOSE_LLP); 385 } else { 386 siw_cep_set_inuse(cep); 387 388 if (cep->state == SIW_EPSTATE_CLOSED) { 389 siw_dbg_cep(cep, "already closed\n"); 390 goto out; 391 } 392 siw_dbg_cep(cep, "immediate close, state %d\n", cep->state); 393 394 if (qp->term_info.valid) 395 siw_send_terminate(qp); 396 397 if (cep->cm_id) { 398 switch (cep->state) { 399 case SIW_EPSTATE_AWAIT_MPAREP: 400 siw_cm_upcall(cep, IW_CM_EVENT_CONNECT_REPLY, 401 -EINVAL); 402 break; 403 404 case SIW_EPSTATE_RDMA_MODE: 405 siw_cm_upcall(cep, IW_CM_EVENT_CLOSE, 0); 406 break; 407 408 case SIW_EPSTATE_IDLE: 409 case SIW_EPSTATE_LISTENING: 410 case SIW_EPSTATE_CONNECTING: 411 case SIW_EPSTATE_AWAIT_MPAREQ: 412 case SIW_EPSTATE_RECVD_MPAREQ: 413 case SIW_EPSTATE_CLOSED: 414 default: 415 break; 416 } 417 cep->cm_id->rem_ref(cep->cm_id); 418 cep->cm_id = NULL; 419 siw_cep_put(cep); 420 } 421 cep->state = SIW_EPSTATE_CLOSED; 422 423 if (cep->sock) { 424 siw_socket_disassoc(cep->sock); 425 /* 426 * Immediately close socket 427 */ 428 sock_release(cep->sock); 429 cep->sock = NULL; 430 } 431 if (cep->qp) { 432 cep->qp = NULL; 433 siw_qp_put(qp); 434 } 435 out: 436 siw_cep_set_free(cep); 437 } 438 } 439 440 void siw_cep_put(struct siw_cep *cep) 441 { 442 WARN_ON(kref_read(&cep->ref) < 1); 443 kref_put(&cep->ref, __siw_cep_dealloc); 444 } 445 446 void siw_cep_get(struct siw_cep *cep) 447 { 448 kref_get(&cep->ref); 449 } 450 451 /* 452 * Expects params->pd_len in host byte order 453 */ 454 static int siw_send_mpareqrep(struct siw_cep *cep, const void *pdata, u8 pd_len) 455 { 456 struct socket *s = cep->sock; 457 struct mpa_rr *rr = &cep->mpa.hdr; 458 struct kvec iov[3]; 459 struct msghdr msg; 460 int rv; 461 int iovec_num = 0; 462 int mpa_len; 463 464 memset(&msg, 0, sizeof(msg)); 465 466 iov[iovec_num].iov_base = rr; 467 iov[iovec_num].iov_len = sizeof(*rr); 468 mpa_len = sizeof(*rr); 469 470 if (cep->enhanced_rdma_conn_est) { 471 iovec_num++; 472 iov[iovec_num].iov_base = &cep->mpa.v2_ctrl; 473 iov[iovec_num].iov_len = sizeof(cep->mpa.v2_ctrl); 474 mpa_len += sizeof(cep->mpa.v2_ctrl); 475 } 476 if (pd_len) { 477 iovec_num++; 478 iov[iovec_num].iov_base = (char *)pdata; 479 iov[iovec_num].iov_len = pd_len; 480 mpa_len += pd_len; 481 } 482 if (cep->enhanced_rdma_conn_est) 483 pd_len += sizeof(cep->mpa.v2_ctrl); 484 485 rr->params.pd_len = cpu_to_be16(pd_len); 486 487 rv = kernel_sendmsg(s, &msg, iov, iovec_num + 1, mpa_len); 488 489 return rv < 0 ? rv : 0; 490 } 491 492 /* 493 * Receive MPA Request/Reply header. 494 * 495 * Returns 0 if complete MPA Request/Reply header including 496 * eventual private data was received. Returns -EAGAIN if 497 * header was partially received or negative error code otherwise. 498 * 499 * Context: May be called in process context only 500 */ 501 static int siw_recv_mpa_rr(struct siw_cep *cep) 502 { 503 struct mpa_rr *hdr = &cep->mpa.hdr; 504 struct socket *s = cep->sock; 505 u16 pd_len; 506 int rcvd, to_rcv; 507 508 if (cep->mpa.bytes_rcvd < sizeof(struct mpa_rr)) { 509 rcvd = ksock_recv(s, (char *)hdr + cep->mpa.bytes_rcvd, 510 sizeof(struct mpa_rr) - cep->mpa.bytes_rcvd, 511 0); 512 if (rcvd <= 0) 513 return -ECONNABORTED; 514 515 cep->mpa.bytes_rcvd += rcvd; 516 517 if (cep->mpa.bytes_rcvd < sizeof(struct mpa_rr)) 518 return -EAGAIN; 519 520 if (be16_to_cpu(hdr->params.pd_len) > MPA_MAX_PRIVDATA) 521 return -EPROTO; 522 } 523 pd_len = be16_to_cpu(hdr->params.pd_len); 524 525 /* 526 * At least the MPA Request/Reply header (frame not including 527 * private data) has been received. 528 * Receive (or continue receiving) any private data. 529 */ 530 to_rcv = pd_len - (cep->mpa.bytes_rcvd - sizeof(struct mpa_rr)); 531 532 if (!to_rcv) { 533 /* 534 * We must have hdr->params.pd_len == 0 and thus received a 535 * complete MPA Request/Reply frame. 536 * Check against peer protocol violation. 537 */ 538 u32 word; 539 540 rcvd = ksock_recv(s, (char *)&word, sizeof(word), MSG_DONTWAIT); 541 if (rcvd == -EAGAIN) 542 return 0; 543 544 if (rcvd == 0) { 545 siw_dbg_cep(cep, "peer EOF\n"); 546 return -EPIPE; 547 } 548 if (rcvd < 0) { 549 siw_dbg_cep(cep, "error: %d\n", rcvd); 550 return rcvd; 551 } 552 siw_dbg_cep(cep, "peer sent extra data: %d\n", rcvd); 553 554 return -EPROTO; 555 } 556 557 /* 558 * At this point, we must have hdr->params.pd_len != 0. 559 * A private data buffer gets allocated if hdr->params.pd_len != 0. 560 */ 561 if (!cep->mpa.pdata) { 562 cep->mpa.pdata = kmalloc(pd_len + 4, GFP_KERNEL); 563 if (!cep->mpa.pdata) 564 return -ENOMEM; 565 } 566 rcvd = ksock_recv( 567 s, cep->mpa.pdata + cep->mpa.bytes_rcvd - sizeof(struct mpa_rr), 568 to_rcv + 4, MSG_DONTWAIT); 569 570 if (rcvd < 0) 571 return rcvd; 572 573 if (rcvd > to_rcv) 574 return -EPROTO; 575 576 cep->mpa.bytes_rcvd += rcvd; 577 578 if (to_rcv == rcvd) { 579 siw_dbg_cep(cep, "%d bytes private data received\n", pd_len); 580 return 0; 581 } 582 return -EAGAIN; 583 } 584 585 /* 586 * siw_proc_mpareq() 587 * 588 * Read MPA Request from socket and signal new connection to IWCM 589 * if success. Caller must hold lock on corresponding listening CEP. 590 */ 591 static int siw_proc_mpareq(struct siw_cep *cep) 592 { 593 struct mpa_rr *req; 594 int version, rv; 595 u16 pd_len; 596 597 rv = siw_recv_mpa_rr(cep); 598 if (rv) 599 return rv; 600 601 req = &cep->mpa.hdr; 602 603 version = __mpa_rr_revision(req->params.bits); 604 pd_len = be16_to_cpu(req->params.pd_len); 605 606 if (version > MPA_REVISION_2) 607 /* allow for 0, 1, and 2 only */ 608 return -EPROTO; 609 610 if (memcmp(req->key, MPA_KEY_REQ, 16)) 611 return -EPROTO; 612 613 /* Prepare for sending MPA reply */ 614 memcpy(req->key, MPA_KEY_REP, 16); 615 616 if (version == MPA_REVISION_2 && 617 (req->params.bits & MPA_RR_FLAG_ENHANCED)) { 618 /* 619 * MPA version 2 must signal IRD/ORD values and P2P mode 620 * in private data if header flag MPA_RR_FLAG_ENHANCED 621 * is set. 622 */ 623 if (pd_len < sizeof(struct mpa_v2_data)) 624 goto reject_conn; 625 626 cep->enhanced_rdma_conn_est = true; 627 } 628 629 /* MPA Markers: currently not supported. Marker TX to be added. */ 630 if (req->params.bits & MPA_RR_FLAG_MARKERS) 631 goto reject_conn; 632 633 if (req->params.bits & MPA_RR_FLAG_CRC) { 634 /* 635 * RFC 5044, page 27: CRC MUST be used if peer requests it. 636 * siw specific: 'mpa_crc_strict' parameter to reject 637 * connection with CRC if local CRC off enforced by 638 * 'mpa_crc_strict' module parameter. 639 */ 640 if (!mpa_crc_required && mpa_crc_strict) 641 goto reject_conn; 642 643 /* Enable CRC if requested by module parameter */ 644 if (mpa_crc_required) 645 req->params.bits |= MPA_RR_FLAG_CRC; 646 } 647 if (cep->enhanced_rdma_conn_est) { 648 struct mpa_v2_data *v2 = (struct mpa_v2_data *)cep->mpa.pdata; 649 650 /* 651 * Peer requested ORD becomes requested local IRD, 652 * peer requested IRD becomes requested local ORD. 653 * IRD and ORD get limited by global maximum values. 654 */ 655 cep->ord = ntohs(v2->ird) & MPA_IRD_ORD_MASK; 656 cep->ord = min(cep->ord, SIW_MAX_ORD_QP); 657 cep->ird = ntohs(v2->ord) & MPA_IRD_ORD_MASK; 658 cep->ird = min(cep->ird, SIW_MAX_IRD_QP); 659 660 /* May get overwritten by locally negotiated values */ 661 cep->mpa.v2_ctrl.ird = htons(cep->ird); 662 cep->mpa.v2_ctrl.ord = htons(cep->ord); 663 664 /* 665 * Support for peer sent zero length Write or Read to 666 * let local side enter RTS. Writes are preferred. 667 * Sends would require pre-posting a Receive and are 668 * not supported. 669 * Propose zero length Write if none of Read and Write 670 * is indicated. 671 */ 672 if (v2->ird & MPA_V2_PEER_TO_PEER) { 673 cep->mpa.v2_ctrl.ird |= MPA_V2_PEER_TO_PEER; 674 675 if (v2->ord & MPA_V2_RDMA_WRITE_RTR) 676 cep->mpa.v2_ctrl.ord |= MPA_V2_RDMA_WRITE_RTR; 677 else if (v2->ord & MPA_V2_RDMA_READ_RTR) 678 cep->mpa.v2_ctrl.ord |= MPA_V2_RDMA_READ_RTR; 679 else 680 cep->mpa.v2_ctrl.ord |= MPA_V2_RDMA_WRITE_RTR; 681 } 682 } 683 684 cep->state = SIW_EPSTATE_RECVD_MPAREQ; 685 686 /* Keep reference until IWCM accepts/rejects */ 687 siw_cep_get(cep); 688 rv = siw_cm_upcall(cep, IW_CM_EVENT_CONNECT_REQUEST, 0); 689 if (rv) 690 siw_cep_put(cep); 691 692 return rv; 693 694 reject_conn: 695 siw_dbg_cep(cep, "reject: crc %d:%d:%d, m %d:%d\n", 696 req->params.bits & MPA_RR_FLAG_CRC ? 1 : 0, 697 mpa_crc_required, mpa_crc_strict, 698 req->params.bits & MPA_RR_FLAG_MARKERS ? 1 : 0, 0); 699 700 req->params.bits &= ~MPA_RR_FLAG_MARKERS; 701 req->params.bits |= MPA_RR_FLAG_REJECT; 702 703 if (!mpa_crc_required && mpa_crc_strict) 704 req->params.bits &= ~MPA_RR_FLAG_CRC; 705 706 if (pd_len) 707 kfree(cep->mpa.pdata); 708 709 cep->mpa.pdata = NULL; 710 711 siw_send_mpareqrep(cep, NULL, 0); 712 713 return -EOPNOTSUPP; 714 } 715 716 static int siw_proc_mpareply(struct siw_cep *cep) 717 { 718 struct siw_qp_attrs qp_attrs; 719 enum siw_qp_attr_mask qp_attr_mask; 720 struct siw_qp *qp = cep->qp; 721 struct mpa_rr *rep; 722 int rv; 723 u16 rep_ord; 724 u16 rep_ird; 725 bool ird_insufficient = false; 726 enum mpa_v2_ctrl mpa_p2p_mode = MPA_V2_RDMA_NO_RTR; 727 728 rv = siw_recv_mpa_rr(cep); 729 if (rv != -EAGAIN) 730 siw_cancel_mpatimer(cep); 731 if (rv) 732 goto out_err; 733 734 rep = &cep->mpa.hdr; 735 736 if (__mpa_rr_revision(rep->params.bits) > MPA_REVISION_2) { 737 /* allow for 0, 1, and 2 only */ 738 rv = -EPROTO; 739 goto out_err; 740 } 741 if (memcmp(rep->key, MPA_KEY_REP, 16)) { 742 siw_init_terminate(qp, TERM_ERROR_LAYER_LLP, LLP_ETYPE_MPA, 743 LLP_ECODE_INVALID_REQ_RESP, 0); 744 siw_send_terminate(qp); 745 rv = -EPROTO; 746 goto out_err; 747 } 748 if (rep->params.bits & MPA_RR_FLAG_REJECT) { 749 siw_dbg_cep(cep, "got mpa reject\n"); 750 siw_cm_upcall(cep, IW_CM_EVENT_CONNECT_REPLY, -ECONNRESET); 751 752 return -ECONNRESET; 753 } 754 if (try_gso && rep->params.bits & MPA_RR_FLAG_GSO_EXP) { 755 siw_dbg_cep(cep, "peer allows GSO on TX\n"); 756 qp->tx_ctx.gso_seg_limit = 0; 757 } 758 if ((rep->params.bits & MPA_RR_FLAG_MARKERS) || 759 (mpa_crc_required && !(rep->params.bits & MPA_RR_FLAG_CRC)) || 760 (mpa_crc_strict && !mpa_crc_required && 761 (rep->params.bits & MPA_RR_FLAG_CRC))) { 762 siw_dbg_cep(cep, "reply unsupp: crc %d:%d:%d, m %d:%d\n", 763 rep->params.bits & MPA_RR_FLAG_CRC ? 1 : 0, 764 mpa_crc_required, mpa_crc_strict, 765 rep->params.bits & MPA_RR_FLAG_MARKERS ? 1 : 0, 0); 766 767 siw_cm_upcall(cep, IW_CM_EVENT_CONNECT_REPLY, -ECONNREFUSED); 768 769 return -EINVAL; 770 } 771 if (cep->enhanced_rdma_conn_est) { 772 struct mpa_v2_data *v2; 773 774 if (__mpa_rr_revision(rep->params.bits) < MPA_REVISION_2 || 775 !(rep->params.bits & MPA_RR_FLAG_ENHANCED)) { 776 /* 777 * Protocol failure: The responder MUST reply with 778 * MPA version 2 and MUST set MPA_RR_FLAG_ENHANCED. 779 */ 780 siw_dbg_cep(cep, "mpa reply error: vers %d, enhcd %d\n", 781 __mpa_rr_revision(rep->params.bits), 782 rep->params.bits & MPA_RR_FLAG_ENHANCED ? 783 1 : 784 0); 785 786 siw_cm_upcall(cep, IW_CM_EVENT_CONNECT_REPLY, 787 -ECONNRESET); 788 return -EINVAL; 789 } 790 v2 = (struct mpa_v2_data *)cep->mpa.pdata; 791 rep_ird = ntohs(v2->ird) & MPA_IRD_ORD_MASK; 792 rep_ord = ntohs(v2->ord) & MPA_IRD_ORD_MASK; 793 794 if (cep->ird < rep_ord && 795 (relaxed_ird_negotiation == false || 796 rep_ord > cep->sdev->attrs.max_ird)) { 797 siw_dbg_cep(cep, "ird %d, rep_ord %d, max_ord %d\n", 798 cep->ird, rep_ord, 799 cep->sdev->attrs.max_ord); 800 ird_insufficient = true; 801 } 802 if (cep->ord > rep_ird && relaxed_ird_negotiation == false) { 803 siw_dbg_cep(cep, "ord %d, rep_ird %d\n", cep->ord, 804 rep_ird); 805 ird_insufficient = true; 806 } 807 /* 808 * Always report negotiated peer values to user, 809 * even if IRD/ORD negotiation failed 810 */ 811 cep->ird = rep_ord; 812 cep->ord = rep_ird; 813 814 if (ird_insufficient) { 815 /* 816 * If the initiator IRD is insuffient for the 817 * responder ORD, send a TERM. 818 */ 819 siw_init_terminate(qp, TERM_ERROR_LAYER_LLP, 820 LLP_ETYPE_MPA, 821 LLP_ECODE_INSUFFICIENT_IRD, 0); 822 siw_send_terminate(qp); 823 rv = -ENOMEM; 824 goto out_err; 825 } 826 if (cep->mpa.v2_ctrl_req.ird & MPA_V2_PEER_TO_PEER) 827 mpa_p2p_mode = 828 cep->mpa.v2_ctrl_req.ord & 829 (MPA_V2_RDMA_WRITE_RTR | MPA_V2_RDMA_READ_RTR); 830 831 /* 832 * Check if we requested P2P mode, and if peer agrees 833 */ 834 if (mpa_p2p_mode != MPA_V2_RDMA_NO_RTR) { 835 if ((mpa_p2p_mode & v2->ord) == 0) { 836 /* 837 * We requested RTR mode(s), but the peer 838 * did not pick any mode we support. 839 */ 840 siw_dbg_cep(cep, 841 "rtr mode: req %2x, got %2x\n", 842 mpa_p2p_mode, 843 v2->ord & (MPA_V2_RDMA_WRITE_RTR | 844 MPA_V2_RDMA_READ_RTR)); 845 846 siw_init_terminate(qp, TERM_ERROR_LAYER_LLP, 847 LLP_ETYPE_MPA, 848 LLP_ECODE_NO_MATCHING_RTR, 849 0); 850 siw_send_terminate(qp); 851 rv = -EPROTO; 852 goto out_err; 853 } 854 mpa_p2p_mode = v2->ord & (MPA_V2_RDMA_WRITE_RTR | 855 MPA_V2_RDMA_READ_RTR); 856 } 857 } 858 memset(&qp_attrs, 0, sizeof(qp_attrs)); 859 860 if (rep->params.bits & MPA_RR_FLAG_CRC) 861 qp_attrs.flags = SIW_MPA_CRC; 862 863 qp_attrs.irq_size = cep->ird; 864 qp_attrs.orq_size = cep->ord; 865 qp_attrs.sk = cep->sock; 866 qp_attrs.state = SIW_QP_STATE_RTS; 867 868 qp_attr_mask = SIW_QP_ATTR_STATE | SIW_QP_ATTR_LLP_HANDLE | 869 SIW_QP_ATTR_ORD | SIW_QP_ATTR_IRD | SIW_QP_ATTR_MPA; 870 871 /* Move socket RX/TX under QP control */ 872 down_write(&qp->state_lock); 873 if (qp->attrs.state > SIW_QP_STATE_RTR) { 874 rv = -EINVAL; 875 up_write(&qp->state_lock); 876 goto out_err; 877 } 878 rv = siw_qp_modify(qp, &qp_attrs, qp_attr_mask); 879 880 siw_qp_socket_assoc(cep, qp); 881 882 up_write(&qp->state_lock); 883 884 /* Send extra RDMA frame to trigger peer RTS if negotiated */ 885 if (mpa_p2p_mode != MPA_V2_RDMA_NO_RTR) { 886 rv = siw_qp_mpa_rts(qp, mpa_p2p_mode); 887 if (rv) 888 goto out_err; 889 } 890 if (!rv) { 891 rv = siw_cm_upcall(cep, IW_CM_EVENT_CONNECT_REPLY, 0); 892 if (!rv) 893 cep->state = SIW_EPSTATE_RDMA_MODE; 894 895 return 0; 896 } 897 898 out_err: 899 siw_cm_upcall(cep, IW_CM_EVENT_CONNECT_REPLY, -EINVAL); 900 901 return rv; 902 } 903 904 /* 905 * siw_accept_newconn - accept an incoming pending connection 906 * 907 */ 908 static void siw_accept_newconn(struct siw_cep *cep) 909 { 910 struct socket *s = cep->sock; 911 struct socket *new_s = NULL; 912 struct siw_cep *new_cep = NULL; 913 int rv = 0; /* debug only. should disappear */ 914 915 if (cep->state != SIW_EPSTATE_LISTENING) 916 goto error; 917 918 new_cep = siw_cep_alloc(cep->sdev); 919 if (!new_cep) 920 goto error; 921 922 /* 923 * 4: Allocate a sufficient number of work elements 924 * to allow concurrent handling of local + peer close 925 * events, MPA header processing + MPA timeout. 926 */ 927 if (siw_cm_alloc_work(new_cep, 4) != 0) 928 goto error; 929 930 /* 931 * Copy saved socket callbacks from listening CEP 932 * and assign new socket with new CEP 933 */ 934 new_cep->sk_state_change = cep->sk_state_change; 935 new_cep->sk_data_ready = cep->sk_data_ready; 936 new_cep->sk_write_space = cep->sk_write_space; 937 new_cep->sk_error_report = cep->sk_error_report; 938 939 rv = kernel_accept(s, &new_s, O_NONBLOCK); 940 if (rv != 0) { 941 /* 942 * Connection already aborted by peer..? 943 */ 944 siw_dbg_cep(cep, "kernel_accept() error: %d\n", rv); 945 goto error; 946 } 947 new_cep->sock = new_s; 948 siw_cep_get(new_cep); 949 new_s->sk->sk_user_data = new_cep; 950 951 siw_dbg_cep(cep, "listen socket 0x%p, new 0x%p\n", s, new_s); 952 953 if (siw_tcp_nagle == false) { 954 int val = 1; 955 956 rv = kernel_setsockopt(new_s, SOL_TCP, TCP_NODELAY, 957 (char *)&val, sizeof(val)); 958 if (rv) { 959 siw_dbg_cep(cep, "setsockopt NODELAY error: %d\n", rv); 960 goto error; 961 } 962 } 963 new_cep->state = SIW_EPSTATE_AWAIT_MPAREQ; 964 965 rv = siw_cm_queue_work(new_cep, SIW_CM_WORK_MPATIMEOUT); 966 if (rv) 967 goto error; 968 /* 969 * See siw_proc_mpareq() etc. for the use of new_cep->listen_cep. 970 */ 971 new_cep->listen_cep = cep; 972 siw_cep_get(cep); 973 974 if (atomic_read(&new_s->sk->sk_rmem_alloc)) { 975 /* 976 * MPA REQ already queued 977 */ 978 siw_dbg_cep(cep, "immediate mpa request\n"); 979 980 siw_cep_set_inuse(new_cep); 981 rv = siw_proc_mpareq(new_cep); 982 siw_cep_set_free(new_cep); 983 984 if (rv != -EAGAIN) { 985 siw_cep_put(cep); 986 new_cep->listen_cep = NULL; 987 if (rv) 988 goto error; 989 } 990 } 991 return; 992 993 error: 994 if (new_cep) 995 siw_cep_put(new_cep); 996 997 if (new_s) { 998 siw_socket_disassoc(new_s); 999 sock_release(new_s); 1000 new_cep->sock = NULL; 1001 } 1002 siw_dbg_cep(cep, "error %d\n", rv); 1003 } 1004 1005 static void siw_cm_work_handler(struct work_struct *w) 1006 { 1007 struct siw_cm_work *work; 1008 struct siw_cep *cep; 1009 int release_cep = 0, rv = 0; 1010 1011 work = container_of(w, struct siw_cm_work, work.work); 1012 cep = work->cep; 1013 1014 siw_dbg_cep(cep, "[QP %u]: work type: %d, state %d\n", 1015 cep->qp ? qp_id(cep->qp) : -1, work->type, cep->state); 1016 1017 siw_cep_set_inuse(cep); 1018 1019 switch (work->type) { 1020 case SIW_CM_WORK_ACCEPT: 1021 siw_accept_newconn(cep); 1022 break; 1023 1024 case SIW_CM_WORK_READ_MPAHDR: 1025 if (cep->state == SIW_EPSTATE_AWAIT_MPAREQ) { 1026 if (cep->listen_cep) { 1027 siw_cep_set_inuse(cep->listen_cep); 1028 1029 if (cep->listen_cep->state == 1030 SIW_EPSTATE_LISTENING) 1031 rv = siw_proc_mpareq(cep); 1032 else 1033 rv = -EFAULT; 1034 1035 siw_cep_set_free(cep->listen_cep); 1036 1037 if (rv != -EAGAIN) { 1038 siw_cep_put(cep->listen_cep); 1039 cep->listen_cep = NULL; 1040 if (rv) 1041 siw_cep_put(cep); 1042 } 1043 } 1044 } else if (cep->state == SIW_EPSTATE_AWAIT_MPAREP) { 1045 rv = siw_proc_mpareply(cep); 1046 } else { 1047 /* 1048 * CEP already moved out of MPA handshake. 1049 * any connection management already done. 1050 * silently ignore the mpa packet. 1051 */ 1052 if (cep->state == SIW_EPSTATE_RDMA_MODE) { 1053 cep->sock->sk->sk_data_ready(cep->sock->sk); 1054 siw_dbg_cep(cep, "already in RDMA mode"); 1055 } else { 1056 siw_dbg_cep(cep, "out of state: %d\n", 1057 cep->state); 1058 } 1059 } 1060 if (rv && rv != EAGAIN) 1061 release_cep = 1; 1062 break; 1063 1064 case SIW_CM_WORK_CLOSE_LLP: 1065 /* 1066 * QP scheduled LLP close 1067 */ 1068 if (cep->qp && cep->qp->term_info.valid) 1069 siw_send_terminate(cep->qp); 1070 1071 if (cep->cm_id) 1072 siw_cm_upcall(cep, IW_CM_EVENT_CLOSE, 0); 1073 1074 release_cep = 1; 1075 break; 1076 1077 case SIW_CM_WORK_PEER_CLOSE: 1078 if (cep->cm_id) { 1079 if (cep->state == SIW_EPSTATE_AWAIT_MPAREP) { 1080 /* 1081 * MPA reply not received, but connection drop 1082 */ 1083 siw_cm_upcall(cep, IW_CM_EVENT_CONNECT_REPLY, 1084 -ECONNRESET); 1085 } else if (cep->state == SIW_EPSTATE_RDMA_MODE) { 1086 /* 1087 * NOTE: IW_CM_EVENT_DISCONNECT is given just 1088 * to transition IWCM into CLOSING. 1089 */ 1090 siw_cm_upcall(cep, IW_CM_EVENT_DISCONNECT, 0); 1091 siw_cm_upcall(cep, IW_CM_EVENT_CLOSE, 0); 1092 } 1093 /* 1094 * for other states there is no connection 1095 * known to the IWCM. 1096 */ 1097 } else { 1098 if (cep->state == SIW_EPSTATE_RECVD_MPAREQ) { 1099 /* 1100 * Wait for the ulp/CM to call accept/reject 1101 */ 1102 siw_dbg_cep(cep, 1103 "mpa req recvd, wait for ULP\n"); 1104 } else if (cep->state == SIW_EPSTATE_AWAIT_MPAREQ) { 1105 /* 1106 * Socket close before MPA request received. 1107 */ 1108 siw_dbg_cep(cep, "no mpareq: drop listener\n"); 1109 siw_cep_put(cep->listen_cep); 1110 cep->listen_cep = NULL; 1111 } 1112 } 1113 release_cep = 1; 1114 break; 1115 1116 case SIW_CM_WORK_MPATIMEOUT: 1117 cep->mpa_timer = NULL; 1118 1119 if (cep->state == SIW_EPSTATE_AWAIT_MPAREP) { 1120 /* 1121 * MPA request timed out: 1122 * Hide any partially received private data and signal 1123 * timeout 1124 */ 1125 cep->mpa.hdr.params.pd_len = 0; 1126 1127 if (cep->cm_id) 1128 siw_cm_upcall(cep, IW_CM_EVENT_CONNECT_REPLY, 1129 -ETIMEDOUT); 1130 release_cep = 1; 1131 1132 } else if (cep->state == SIW_EPSTATE_AWAIT_MPAREQ) { 1133 /* 1134 * No MPA request received after peer TCP stream setup. 1135 */ 1136 if (cep->listen_cep) { 1137 siw_cep_put(cep->listen_cep); 1138 cep->listen_cep = NULL; 1139 } 1140 release_cep = 1; 1141 } 1142 break; 1143 1144 default: 1145 WARN(1, "Undefined CM work type: %d\n", work->type); 1146 } 1147 if (release_cep) { 1148 siw_dbg_cep(cep, 1149 "release: timer=%s, QP[%u], id 0x%p\n", 1150 cep->mpa_timer ? "y" : "n", 1151 cep->qp ? qp_id(cep->qp) : -1, cep->cm_id); 1152 1153 siw_cancel_mpatimer(cep); 1154 1155 cep->state = SIW_EPSTATE_CLOSED; 1156 1157 if (cep->qp) { 1158 struct siw_qp *qp = cep->qp; 1159 /* 1160 * Serialize a potential race with application 1161 * closing the QP and calling siw_qp_cm_drop() 1162 */ 1163 siw_qp_get(qp); 1164 siw_cep_set_free(cep); 1165 1166 siw_qp_llp_close(qp); 1167 siw_qp_put(qp); 1168 1169 siw_cep_set_inuse(cep); 1170 cep->qp = NULL; 1171 siw_qp_put(qp); 1172 } 1173 if (cep->sock) { 1174 siw_socket_disassoc(cep->sock); 1175 sock_release(cep->sock); 1176 cep->sock = NULL; 1177 } 1178 if (cep->cm_id) { 1179 cep->cm_id->rem_ref(cep->cm_id); 1180 cep->cm_id = NULL; 1181 siw_cep_put(cep); 1182 } 1183 } 1184 siw_cep_set_free(cep); 1185 siw_put_work(work); 1186 siw_cep_put(cep); 1187 } 1188 1189 static struct workqueue_struct *siw_cm_wq; 1190 1191 int siw_cm_queue_work(struct siw_cep *cep, enum siw_work_type type) 1192 { 1193 struct siw_cm_work *work = siw_get_work(cep); 1194 unsigned long delay = 0; 1195 1196 if (!work) { 1197 siw_dbg_cep(cep, "failed with no work available\n"); 1198 return -ENOMEM; 1199 } 1200 work->type = type; 1201 work->cep = cep; 1202 1203 siw_cep_get(cep); 1204 1205 INIT_DELAYED_WORK(&work->work, siw_cm_work_handler); 1206 1207 if (type == SIW_CM_WORK_MPATIMEOUT) { 1208 cep->mpa_timer = work; 1209 1210 if (cep->state == SIW_EPSTATE_AWAIT_MPAREP) 1211 delay = MPAREQ_TIMEOUT; 1212 else 1213 delay = MPAREP_TIMEOUT; 1214 } 1215 siw_dbg_cep(cep, "[QP %u]: work type: %d, work 0x%p, timeout %lu\n", 1216 cep->qp ? qp_id(cep->qp) : -1, type, work, delay); 1217 1218 queue_delayed_work(siw_cm_wq, &work->work, delay); 1219 1220 return 0; 1221 } 1222 1223 static void siw_cm_llp_data_ready(struct sock *sk) 1224 { 1225 struct siw_cep *cep; 1226 1227 read_lock(&sk->sk_callback_lock); 1228 1229 cep = sk_to_cep(sk); 1230 if (!cep) { 1231 WARN_ON(1); 1232 goto out; 1233 } 1234 siw_dbg_cep(cep, "state: %d\n", cep->state); 1235 1236 switch (cep->state) { 1237 case SIW_EPSTATE_RDMA_MODE: 1238 /* fall through */ 1239 case SIW_EPSTATE_LISTENING: 1240 break; 1241 1242 case SIW_EPSTATE_AWAIT_MPAREQ: 1243 /* fall through */ 1244 case SIW_EPSTATE_AWAIT_MPAREP: 1245 siw_cm_queue_work(cep, SIW_CM_WORK_READ_MPAHDR); 1246 break; 1247 1248 default: 1249 siw_dbg_cep(cep, "unexpected data, state %d\n", cep->state); 1250 break; 1251 } 1252 out: 1253 read_unlock(&sk->sk_callback_lock); 1254 } 1255 1256 static void siw_cm_llp_write_space(struct sock *sk) 1257 { 1258 struct siw_cep *cep = sk_to_cep(sk); 1259 1260 if (cep) 1261 siw_dbg_cep(cep, "state: %d\n", cep->state); 1262 } 1263 1264 static void siw_cm_llp_error_report(struct sock *sk) 1265 { 1266 struct siw_cep *cep = sk_to_cep(sk); 1267 1268 if (cep) { 1269 siw_dbg_cep(cep, "error %d, socket state: %d, cep state: %d\n", 1270 sk->sk_err, sk->sk_state, cep->state); 1271 cep->sk_error_report(sk); 1272 } 1273 } 1274 1275 static void siw_cm_llp_state_change(struct sock *sk) 1276 { 1277 struct siw_cep *cep; 1278 void (*orig_state_change)(struct sock *s); 1279 1280 read_lock(&sk->sk_callback_lock); 1281 1282 cep = sk_to_cep(sk); 1283 if (!cep) { 1284 /* endpoint already disassociated */ 1285 read_unlock(&sk->sk_callback_lock); 1286 return; 1287 } 1288 orig_state_change = cep->sk_state_change; 1289 1290 siw_dbg_cep(cep, "state: %d\n", cep->state); 1291 1292 switch (sk->sk_state) { 1293 case TCP_ESTABLISHED: 1294 /* 1295 * handle accepting socket as special case where only 1296 * new connection is possible 1297 */ 1298 siw_cm_queue_work(cep, SIW_CM_WORK_ACCEPT); 1299 break; 1300 1301 case TCP_CLOSE: 1302 case TCP_CLOSE_WAIT: 1303 if (cep->qp) 1304 cep->qp->tx_ctx.tx_suspend = 1; 1305 siw_cm_queue_work(cep, SIW_CM_WORK_PEER_CLOSE); 1306 break; 1307 1308 default: 1309 siw_dbg_cep(cep, "unexpected socket state %d\n", sk->sk_state); 1310 } 1311 read_unlock(&sk->sk_callback_lock); 1312 orig_state_change(sk); 1313 } 1314 1315 static int kernel_bindconnect(struct socket *s, struct sockaddr *laddr, 1316 struct sockaddr *raddr) 1317 { 1318 int rv, flags = 0, s_val = 1; 1319 size_t size = laddr->sa_family == AF_INET ? 1320 sizeof(struct sockaddr_in) : sizeof(struct sockaddr_in6); 1321 1322 /* 1323 * Make address available again asap. 1324 */ 1325 rv = kernel_setsockopt(s, SOL_SOCKET, SO_REUSEADDR, (char *)&s_val, 1326 sizeof(s_val)); 1327 if (rv < 0) 1328 return rv; 1329 1330 rv = s->ops->bind(s, laddr, size); 1331 if (rv < 0) 1332 return rv; 1333 1334 rv = s->ops->connect(s, raddr, size, flags); 1335 1336 return rv < 0 ? rv : 0; 1337 } 1338 1339 int siw_connect(struct iw_cm_id *id, struct iw_cm_conn_param *params) 1340 { 1341 struct siw_device *sdev = to_siw_dev(id->device); 1342 struct siw_qp *qp; 1343 struct siw_cep *cep = NULL; 1344 struct socket *s = NULL; 1345 struct sockaddr *laddr = (struct sockaddr *)&id->local_addr, 1346 *raddr = (struct sockaddr *)&id->remote_addr; 1347 bool p2p_mode = peer_to_peer, v4 = true; 1348 u16 pd_len = params->private_data_len; 1349 int version = mpa_version, rv; 1350 1351 if (pd_len > MPA_MAX_PRIVDATA) 1352 return -EINVAL; 1353 1354 if (params->ird > sdev->attrs.max_ird || 1355 params->ord > sdev->attrs.max_ord) 1356 return -ENOMEM; 1357 1358 if (laddr->sa_family == AF_INET6) 1359 v4 = false; 1360 else if (laddr->sa_family != AF_INET) 1361 return -EAFNOSUPPORT; 1362 1363 /* 1364 * Respect any iwarp port mapping: Use mapped remote address 1365 * if valid. Local address must not be mapped, since siw 1366 * uses kernel TCP stack. 1367 */ 1368 if ((v4 && to_sockaddr_in(id->remote_addr).sin_port != 0) || 1369 to_sockaddr_in6(id->remote_addr).sin6_port != 0) 1370 raddr = (struct sockaddr *)&id->m_remote_addr; 1371 1372 qp = siw_qp_id2obj(sdev, params->qpn); 1373 if (!qp) { 1374 WARN(1, "[QP %u] does not exist\n", params->qpn); 1375 rv = -EINVAL; 1376 goto error; 1377 } 1378 if (v4) 1379 siw_dbg_qp(qp, 1380 "id 0x%p, pd_len %d, laddr %pI4 %d, raddr %pI4 %d\n", 1381 id, pd_len, 1382 &((struct sockaddr_in *)(laddr))->sin_addr, 1383 ntohs(((struct sockaddr_in *)(laddr))->sin_port), 1384 &((struct sockaddr_in *)(raddr))->sin_addr, 1385 ntohs(((struct sockaddr_in *)(raddr))->sin_port)); 1386 else 1387 siw_dbg_qp(qp, 1388 "id 0x%p, pd_len %d, laddr %pI6 %d, raddr %pI6 %d\n", 1389 id, pd_len, 1390 &((struct sockaddr_in6 *)(laddr))->sin6_addr, 1391 ntohs(((struct sockaddr_in6 *)(laddr))->sin6_port), 1392 &((struct sockaddr_in6 *)(raddr))->sin6_addr, 1393 ntohs(((struct sockaddr_in6 *)(raddr))->sin6_port)); 1394 1395 rv = sock_create(v4 ? AF_INET : AF_INET6, SOCK_STREAM, IPPROTO_TCP, &s); 1396 if (rv < 0) 1397 goto error; 1398 1399 /* 1400 * NOTE: For simplification, connect() is called in blocking 1401 * mode. Might be reconsidered for async connection setup at 1402 * TCP level. 1403 */ 1404 rv = kernel_bindconnect(s, laddr, raddr); 1405 if (rv != 0) { 1406 siw_dbg_qp(qp, "kernel_bindconnect: error %d\n", rv); 1407 goto error; 1408 } 1409 if (siw_tcp_nagle == false) { 1410 int val = 1; 1411 1412 rv = kernel_setsockopt(s, SOL_TCP, TCP_NODELAY, (char *)&val, 1413 sizeof(val)); 1414 if (rv) { 1415 siw_dbg_qp(qp, "setsockopt NODELAY error: %d\n", rv); 1416 goto error; 1417 } 1418 } 1419 cep = siw_cep_alloc(sdev); 1420 if (!cep) { 1421 rv = -ENOMEM; 1422 goto error; 1423 } 1424 siw_cep_set_inuse(cep); 1425 1426 /* Associate QP with CEP */ 1427 siw_cep_get(cep); 1428 qp->cep = cep; 1429 1430 /* siw_qp_get(qp) already done by QP lookup */ 1431 cep->qp = qp; 1432 1433 id->add_ref(id); 1434 cep->cm_id = id; 1435 1436 /* 1437 * 4: Allocate a sufficient number of work elements 1438 * to allow concurrent handling of local + peer close 1439 * events, MPA header processing + MPA timeout. 1440 */ 1441 rv = siw_cm_alloc_work(cep, 4); 1442 if (rv != 0) { 1443 rv = -ENOMEM; 1444 goto error; 1445 } 1446 cep->ird = params->ird; 1447 cep->ord = params->ord; 1448 1449 if (p2p_mode && cep->ord == 0) 1450 cep->ord = 1; 1451 1452 cep->state = SIW_EPSTATE_CONNECTING; 1453 1454 /* 1455 * Associate CEP with socket 1456 */ 1457 siw_cep_socket_assoc(cep, s); 1458 1459 cep->state = SIW_EPSTATE_AWAIT_MPAREP; 1460 1461 /* 1462 * Set MPA Request bits: CRC if required, no MPA Markers, 1463 * MPA Rev. according to module parameter 'mpa_version', Key 'Request'. 1464 */ 1465 cep->mpa.hdr.params.bits = 0; 1466 if (version > MPA_REVISION_2) { 1467 pr_warn("Setting MPA version to %u\n", MPA_REVISION_2); 1468 version = MPA_REVISION_2; 1469 /* Adjust also module parameter */ 1470 mpa_version = MPA_REVISION_2; 1471 } 1472 __mpa_rr_set_revision(&cep->mpa.hdr.params.bits, version); 1473 1474 if (try_gso) 1475 cep->mpa.hdr.params.bits |= MPA_RR_FLAG_GSO_EXP; 1476 1477 if (mpa_crc_required) 1478 cep->mpa.hdr.params.bits |= MPA_RR_FLAG_CRC; 1479 1480 /* 1481 * If MPA version == 2: 1482 * o Include ORD and IRD. 1483 * o Indicate peer-to-peer mode, if required by module 1484 * parameter 'peer_to_peer'. 1485 */ 1486 if (version == MPA_REVISION_2) { 1487 cep->enhanced_rdma_conn_est = true; 1488 cep->mpa.hdr.params.bits |= MPA_RR_FLAG_ENHANCED; 1489 1490 cep->mpa.v2_ctrl.ird = htons(cep->ird); 1491 cep->mpa.v2_ctrl.ord = htons(cep->ord); 1492 1493 if (p2p_mode) { 1494 cep->mpa.v2_ctrl.ird |= MPA_V2_PEER_TO_PEER; 1495 cep->mpa.v2_ctrl.ord |= rtr_type; 1496 } 1497 /* Remember own P2P mode requested */ 1498 cep->mpa.v2_ctrl_req.ird = cep->mpa.v2_ctrl.ird; 1499 cep->mpa.v2_ctrl_req.ord = cep->mpa.v2_ctrl.ord; 1500 } 1501 memcpy(cep->mpa.hdr.key, MPA_KEY_REQ, 16); 1502 1503 rv = siw_send_mpareqrep(cep, params->private_data, pd_len); 1504 /* 1505 * Reset private data. 1506 */ 1507 cep->mpa.hdr.params.pd_len = 0; 1508 1509 if (rv >= 0) { 1510 rv = siw_cm_queue_work(cep, SIW_CM_WORK_MPATIMEOUT); 1511 if (!rv) { 1512 siw_dbg_cep(cep, "id 0x%p, [QP %u]: exit\n", id, 1513 qp_id(qp)); 1514 siw_cep_set_free(cep); 1515 return 0; 1516 } 1517 } 1518 error: 1519 siw_dbg_qp(qp, "failed: %d\n", rv); 1520 1521 if (cep) { 1522 siw_socket_disassoc(s); 1523 sock_release(s); 1524 cep->sock = NULL; 1525 1526 cep->qp = NULL; 1527 1528 cep->cm_id = NULL; 1529 id->rem_ref(id); 1530 siw_cep_put(cep); 1531 1532 qp->cep = NULL; 1533 siw_cep_put(cep); 1534 1535 cep->state = SIW_EPSTATE_CLOSED; 1536 1537 siw_cep_set_free(cep); 1538 1539 siw_cep_put(cep); 1540 1541 } else if (s) { 1542 sock_release(s); 1543 } 1544 siw_qp_put(qp); 1545 1546 return rv; 1547 } 1548 1549 /* 1550 * siw_accept - Let SoftiWARP accept an RDMA connection request 1551 * 1552 * @id: New connection management id to be used for accepted 1553 * connection request 1554 * @params: Connection parameters provided by ULP for accepting connection 1555 * 1556 * Transition QP to RTS state, associate new CM id @id with accepted CEP 1557 * and get prepared for TCP input by installing socket callbacks. 1558 * Then send MPA Reply and generate the "connection established" event. 1559 * Socket callbacks must be installed before sending MPA Reply, because 1560 * the latter may cause a first RDMA message to arrive from the RDMA Initiator 1561 * side very quickly, at which time the socket callbacks must be ready. 1562 */ 1563 int siw_accept(struct iw_cm_id *id, struct iw_cm_conn_param *params) 1564 { 1565 struct siw_device *sdev = to_siw_dev(id->device); 1566 struct siw_cep *cep = (struct siw_cep *)id->provider_data; 1567 struct siw_qp *qp; 1568 struct siw_qp_attrs qp_attrs; 1569 int rv, max_priv_data = MPA_MAX_PRIVDATA; 1570 bool wait_for_peer_rts = false; 1571 1572 siw_cep_set_inuse(cep); 1573 siw_cep_put(cep); 1574 1575 /* Free lingering inbound private data */ 1576 if (cep->mpa.hdr.params.pd_len) { 1577 cep->mpa.hdr.params.pd_len = 0; 1578 kfree(cep->mpa.pdata); 1579 cep->mpa.pdata = NULL; 1580 } 1581 siw_cancel_mpatimer(cep); 1582 1583 if (cep->state != SIW_EPSTATE_RECVD_MPAREQ) { 1584 siw_dbg_cep(cep, "id 0x%p: out of state\n", id); 1585 1586 siw_cep_set_free(cep); 1587 siw_cep_put(cep); 1588 1589 return -ECONNRESET; 1590 } 1591 qp = siw_qp_id2obj(sdev, params->qpn); 1592 if (!qp) { 1593 WARN(1, "[QP %d] does not exist\n", params->qpn); 1594 siw_cep_set_free(cep); 1595 siw_cep_put(cep); 1596 1597 return -EINVAL; 1598 } 1599 down_write(&qp->state_lock); 1600 if (qp->attrs.state > SIW_QP_STATE_RTR) { 1601 rv = -EINVAL; 1602 up_write(&qp->state_lock); 1603 goto error; 1604 } 1605 siw_dbg_cep(cep, "id 0x%p\n", id); 1606 1607 if (try_gso && cep->mpa.hdr.params.bits & MPA_RR_FLAG_GSO_EXP) { 1608 siw_dbg_cep(cep, "peer allows GSO on TX\n"); 1609 qp->tx_ctx.gso_seg_limit = 0; 1610 } 1611 if (params->ord > sdev->attrs.max_ord || 1612 params->ird > sdev->attrs.max_ird) { 1613 siw_dbg_cep( 1614 cep, 1615 "id 0x%p, [QP %u]: ord %d (max %d), ird %d (max %d)\n", 1616 id, qp_id(qp), params->ord, sdev->attrs.max_ord, 1617 params->ird, sdev->attrs.max_ird); 1618 rv = -EINVAL; 1619 up_write(&qp->state_lock); 1620 goto error; 1621 } 1622 if (cep->enhanced_rdma_conn_est) 1623 max_priv_data -= sizeof(struct mpa_v2_data); 1624 1625 if (params->private_data_len > max_priv_data) { 1626 siw_dbg_cep( 1627 cep, 1628 "id 0x%p, [QP %u]: private data length: %d (max %d)\n", 1629 id, qp_id(qp), params->private_data_len, max_priv_data); 1630 rv = -EINVAL; 1631 up_write(&qp->state_lock); 1632 goto error; 1633 } 1634 if (cep->enhanced_rdma_conn_est) { 1635 if (params->ord > cep->ord) { 1636 if (relaxed_ird_negotiation) { 1637 params->ord = cep->ord; 1638 } else { 1639 cep->ird = params->ird; 1640 cep->ord = params->ord; 1641 rv = -EINVAL; 1642 up_write(&qp->state_lock); 1643 goto error; 1644 } 1645 } 1646 if (params->ird < cep->ird) { 1647 if (relaxed_ird_negotiation && 1648 cep->ird <= sdev->attrs.max_ird) 1649 params->ird = cep->ird; 1650 else { 1651 rv = -ENOMEM; 1652 up_write(&qp->state_lock); 1653 goto error; 1654 } 1655 } 1656 if (cep->mpa.v2_ctrl.ord & 1657 (MPA_V2_RDMA_WRITE_RTR | MPA_V2_RDMA_READ_RTR)) 1658 wait_for_peer_rts = true; 1659 /* 1660 * Signal back negotiated IRD and ORD values 1661 */ 1662 cep->mpa.v2_ctrl.ord = 1663 htons(params->ord & MPA_IRD_ORD_MASK) | 1664 (cep->mpa.v2_ctrl.ord & ~MPA_V2_MASK_IRD_ORD); 1665 cep->mpa.v2_ctrl.ird = 1666 htons(params->ird & MPA_IRD_ORD_MASK) | 1667 (cep->mpa.v2_ctrl.ird & ~MPA_V2_MASK_IRD_ORD); 1668 } 1669 cep->ird = params->ird; 1670 cep->ord = params->ord; 1671 1672 cep->cm_id = id; 1673 id->add_ref(id); 1674 1675 memset(&qp_attrs, 0, sizeof(qp_attrs)); 1676 qp_attrs.orq_size = cep->ord; 1677 qp_attrs.irq_size = cep->ird; 1678 qp_attrs.sk = cep->sock; 1679 if (cep->mpa.hdr.params.bits & MPA_RR_FLAG_CRC) 1680 qp_attrs.flags = SIW_MPA_CRC; 1681 qp_attrs.state = SIW_QP_STATE_RTS; 1682 1683 siw_dbg_cep(cep, "id 0x%p, [QP%u]: moving to rts\n", id, qp_id(qp)); 1684 1685 /* Associate QP with CEP */ 1686 siw_cep_get(cep); 1687 qp->cep = cep; 1688 1689 /* siw_qp_get(qp) already done by QP lookup */ 1690 cep->qp = qp; 1691 1692 cep->state = SIW_EPSTATE_RDMA_MODE; 1693 1694 /* Move socket RX/TX under QP control */ 1695 rv = siw_qp_modify(qp, &qp_attrs, 1696 SIW_QP_ATTR_STATE | SIW_QP_ATTR_LLP_HANDLE | 1697 SIW_QP_ATTR_ORD | SIW_QP_ATTR_IRD | 1698 SIW_QP_ATTR_MPA); 1699 up_write(&qp->state_lock); 1700 1701 if (rv) 1702 goto error; 1703 1704 siw_dbg_cep(cep, "id 0x%p, [QP %u]: send mpa reply, %d byte pdata\n", 1705 id, qp_id(qp), params->private_data_len); 1706 1707 rv = siw_send_mpareqrep(cep, params->private_data, 1708 params->private_data_len); 1709 if (rv != 0) 1710 goto error; 1711 1712 if (wait_for_peer_rts) { 1713 siw_sk_assign_rtr_upcalls(cep); 1714 } else { 1715 siw_qp_socket_assoc(cep, qp); 1716 rv = siw_cm_upcall(cep, IW_CM_EVENT_ESTABLISHED, 0); 1717 if (rv) 1718 goto error; 1719 } 1720 siw_cep_set_free(cep); 1721 1722 return 0; 1723 error: 1724 siw_socket_disassoc(cep->sock); 1725 sock_release(cep->sock); 1726 cep->sock = NULL; 1727 1728 cep->state = SIW_EPSTATE_CLOSED; 1729 1730 if (cep->cm_id) { 1731 cep->cm_id->rem_ref(id); 1732 cep->cm_id = NULL; 1733 } 1734 if (qp->cep) { 1735 siw_cep_put(cep); 1736 qp->cep = NULL; 1737 } 1738 cep->qp = NULL; 1739 siw_qp_put(qp); 1740 1741 siw_cep_set_free(cep); 1742 siw_cep_put(cep); 1743 1744 return rv; 1745 } 1746 1747 /* 1748 * siw_reject() 1749 * 1750 * Local connection reject case. Send private data back to peer, 1751 * close connection and dereference connection id. 1752 */ 1753 int siw_reject(struct iw_cm_id *id, const void *pdata, u8 pd_len) 1754 { 1755 struct siw_cep *cep = (struct siw_cep *)id->provider_data; 1756 1757 siw_cep_set_inuse(cep); 1758 siw_cep_put(cep); 1759 1760 siw_cancel_mpatimer(cep); 1761 1762 if (cep->state != SIW_EPSTATE_RECVD_MPAREQ) { 1763 siw_dbg_cep(cep, "id 0x%p: out of state\n", id); 1764 1765 siw_cep_set_free(cep); 1766 siw_cep_put(cep); /* put last reference */ 1767 1768 return -ECONNRESET; 1769 } 1770 siw_dbg_cep(cep, "id 0x%p, cep->state %d, pd_len %d\n", id, cep->state, 1771 pd_len); 1772 1773 if (__mpa_rr_revision(cep->mpa.hdr.params.bits) >= MPA_REVISION_1) { 1774 cep->mpa.hdr.params.bits |= MPA_RR_FLAG_REJECT; /* reject */ 1775 siw_send_mpareqrep(cep, pdata, pd_len); 1776 } 1777 siw_socket_disassoc(cep->sock); 1778 sock_release(cep->sock); 1779 cep->sock = NULL; 1780 1781 cep->state = SIW_EPSTATE_CLOSED; 1782 1783 siw_cep_set_free(cep); 1784 siw_cep_put(cep); 1785 1786 return 0; 1787 } 1788 1789 static int siw_listen_address(struct iw_cm_id *id, int backlog, 1790 struct sockaddr *laddr, int addr_family) 1791 { 1792 struct socket *s; 1793 struct siw_cep *cep = NULL; 1794 struct siw_device *sdev = to_siw_dev(id->device); 1795 int rv = 0, s_val; 1796 1797 rv = sock_create(addr_family, SOCK_STREAM, IPPROTO_TCP, &s); 1798 if (rv < 0) 1799 return rv; 1800 1801 /* 1802 * Allow binding local port when still in TIME_WAIT from last close. 1803 */ 1804 s_val = 1; 1805 rv = kernel_setsockopt(s, SOL_SOCKET, SO_REUSEADDR, (char *)&s_val, 1806 sizeof(s_val)); 1807 if (rv) { 1808 siw_dbg(id->device, "id 0x%p: setsockopt error: %d\n", id, rv); 1809 goto error; 1810 } 1811 rv = s->ops->bind(s, laddr, addr_family == AF_INET ? 1812 sizeof(struct sockaddr_in) : 1813 sizeof(struct sockaddr_in6)); 1814 if (rv) { 1815 siw_dbg(id->device, "id 0x%p: socket bind error: %d\n", id, rv); 1816 goto error; 1817 } 1818 cep = siw_cep_alloc(sdev); 1819 if (!cep) { 1820 rv = -ENOMEM; 1821 goto error; 1822 } 1823 siw_cep_socket_assoc(cep, s); 1824 1825 rv = siw_cm_alloc_work(cep, backlog); 1826 if (rv) { 1827 siw_dbg(id->device, 1828 "id 0x%p: alloc_work error %d, backlog %d\n", id, 1829 rv, backlog); 1830 goto error; 1831 } 1832 rv = s->ops->listen(s, backlog); 1833 if (rv) { 1834 siw_dbg(id->device, "id 0x%p: listen error %d\n", id, rv); 1835 goto error; 1836 } 1837 cep->cm_id = id; 1838 id->add_ref(id); 1839 1840 /* 1841 * In case of a wildcard rdma_listen on a multi-homed device, 1842 * a listener's IWCM id is associated with more than one listening CEP. 1843 * 1844 * We currently use id->provider_data in three different ways: 1845 * 1846 * o For a listener's IWCM id, id->provider_data points to 1847 * the list_head of the list of listening CEPs. 1848 * Uses: siw_create_listen(), siw_destroy_listen() 1849 * 1850 * o For each accepted passive-side IWCM id, id->provider_data 1851 * points to the CEP itself. This is a consequence of 1852 * - siw_cm_upcall() setting event.provider_data = cep and 1853 * - the IWCM's cm_conn_req_handler() setting provider_data of the 1854 * new passive-side IWCM id equal to event.provider_data 1855 * Uses: siw_accept(), siw_reject() 1856 * 1857 * o For an active-side IWCM id, id->provider_data is not used at all. 1858 * 1859 */ 1860 if (!id->provider_data) { 1861 id->provider_data = 1862 kmalloc(sizeof(struct list_head), GFP_KERNEL); 1863 if (!id->provider_data) { 1864 rv = -ENOMEM; 1865 goto error; 1866 } 1867 INIT_LIST_HEAD((struct list_head *)id->provider_data); 1868 } 1869 list_add_tail(&cep->listenq, (struct list_head *)id->provider_data); 1870 cep->state = SIW_EPSTATE_LISTENING; 1871 1872 if (addr_family == AF_INET) 1873 siw_dbg(id->device, "Listen at laddr %pI4 %u\n", 1874 &(((struct sockaddr_in *)laddr)->sin_addr), 1875 ((struct sockaddr_in *)laddr)->sin_port); 1876 else 1877 siw_dbg(id->device, "Listen at laddr %pI6 %u\n", 1878 &(((struct sockaddr_in6 *)laddr)->sin6_addr), 1879 ((struct sockaddr_in6 *)laddr)->sin6_port); 1880 1881 return 0; 1882 1883 error: 1884 siw_dbg(id->device, "failed: %d\n", rv); 1885 1886 if (cep) { 1887 siw_cep_set_inuse(cep); 1888 1889 if (cep->cm_id) { 1890 cep->cm_id->rem_ref(cep->cm_id); 1891 cep->cm_id = NULL; 1892 } 1893 cep->sock = NULL; 1894 siw_socket_disassoc(s); 1895 cep->state = SIW_EPSTATE_CLOSED; 1896 1897 siw_cep_set_free(cep); 1898 siw_cep_put(cep); 1899 } 1900 sock_release(s); 1901 1902 return rv; 1903 } 1904 1905 static void siw_drop_listeners(struct iw_cm_id *id) 1906 { 1907 struct list_head *p, *tmp; 1908 1909 /* 1910 * In case of a wildcard rdma_listen on a multi-homed device, 1911 * a listener's IWCM id is associated with more than one listening CEP. 1912 */ 1913 list_for_each_safe(p, tmp, (struct list_head *)id->provider_data) { 1914 struct siw_cep *cep = list_entry(p, struct siw_cep, listenq); 1915 1916 list_del(p); 1917 1918 siw_dbg_cep(cep, "id 0x%p: drop cep, state %d\n", id, 1919 cep->state); 1920 1921 siw_cep_set_inuse(cep); 1922 1923 if (cep->cm_id) { 1924 cep->cm_id->rem_ref(cep->cm_id); 1925 cep->cm_id = NULL; 1926 } 1927 if (cep->sock) { 1928 siw_socket_disassoc(cep->sock); 1929 sock_release(cep->sock); 1930 cep->sock = NULL; 1931 } 1932 cep->state = SIW_EPSTATE_CLOSED; 1933 siw_cep_set_free(cep); 1934 siw_cep_put(cep); 1935 } 1936 } 1937 1938 /* 1939 * siw_create_listen - Create resources for a listener's IWCM ID @id 1940 * 1941 * Listens on the socket addresses id->local_addr and id->remote_addr. 1942 * 1943 * If the listener's @id provides a specific local IP address, at most one 1944 * listening socket is created and associated with @id. 1945 * 1946 * If the listener's @id provides the wildcard (zero) local IP address, 1947 * a separate listen is performed for each local IP address of the device 1948 * by creating a listening socket and binding to that local IP address. 1949 * 1950 */ 1951 int siw_create_listen(struct iw_cm_id *id, int backlog) 1952 { 1953 struct net_device *dev = to_siw_dev(id->device)->netdev; 1954 int rv = 0, listeners = 0; 1955 1956 siw_dbg(id->device, "id 0x%p: backlog %d\n", id, backlog); 1957 1958 /* 1959 * For each attached address of the interface, create a 1960 * listening socket, if id->local_addr is the wildcard 1961 * IP address or matches the IP address. 1962 */ 1963 if (id->local_addr.ss_family == AF_INET) { 1964 struct in_device *in_dev = in_dev_get(dev); 1965 struct sockaddr_in s_laddr, *s_raddr; 1966 const struct in_ifaddr *ifa; 1967 1968 memcpy(&s_laddr, &id->local_addr, sizeof(s_laddr)); 1969 s_raddr = (struct sockaddr_in *)&id->remote_addr; 1970 1971 siw_dbg(id->device, 1972 "id 0x%p: laddr %pI4:%d, raddr %pI4:%d\n", 1973 id, &s_laddr.sin_addr, ntohs(s_laddr.sin_port), 1974 &s_raddr->sin_addr, ntohs(s_raddr->sin_port)); 1975 1976 rtnl_lock(); 1977 in_dev_for_each_ifa_rtnl(ifa, in_dev) { 1978 if (ipv4_is_zeronet(s_laddr.sin_addr.s_addr) || 1979 s_laddr.sin_addr.s_addr == ifa->ifa_address) { 1980 s_laddr.sin_addr.s_addr = ifa->ifa_address; 1981 1982 rv = siw_listen_address(id, backlog, 1983 (struct sockaddr *)&s_laddr, 1984 AF_INET); 1985 if (!rv) 1986 listeners++; 1987 } 1988 } 1989 rtnl_unlock(); 1990 in_dev_put(in_dev); 1991 } else if (id->local_addr.ss_family == AF_INET6) { 1992 struct inet6_dev *in6_dev = in6_dev_get(dev); 1993 struct inet6_ifaddr *ifp; 1994 struct sockaddr_in6 *s_laddr = &to_sockaddr_in6(id->local_addr), 1995 *s_raddr = &to_sockaddr_in6(id->remote_addr); 1996 1997 siw_dbg(id->device, 1998 "id 0x%p: laddr %pI6:%d, raddr %pI6:%d\n", 1999 id, &s_laddr->sin6_addr, ntohs(s_laddr->sin6_port), 2000 &s_raddr->sin6_addr, ntohs(s_raddr->sin6_port)); 2001 2002 read_lock_bh(&in6_dev->lock); 2003 list_for_each_entry(ifp, &in6_dev->addr_list, if_list) { 2004 struct sockaddr_in6 bind_addr; 2005 2006 if (ipv6_addr_any(&s_laddr->sin6_addr) || 2007 ipv6_addr_equal(&s_laddr->sin6_addr, &ifp->addr)) { 2008 bind_addr.sin6_family = AF_INET6; 2009 bind_addr.sin6_port = s_laddr->sin6_port; 2010 bind_addr.sin6_flowinfo = 0; 2011 bind_addr.sin6_addr = ifp->addr; 2012 bind_addr.sin6_scope_id = dev->ifindex; 2013 2014 rv = siw_listen_address(id, backlog, 2015 (struct sockaddr *)&bind_addr, 2016 AF_INET6); 2017 if (!rv) 2018 listeners++; 2019 } 2020 } 2021 read_unlock_bh(&in6_dev->lock); 2022 2023 in6_dev_put(in6_dev); 2024 } else { 2025 return -EAFNOSUPPORT; 2026 } 2027 if (listeners) 2028 rv = 0; 2029 else if (!rv) 2030 rv = -EINVAL; 2031 2032 siw_dbg(id->device, "id 0x%p: %s\n", id, rv ? "FAIL" : "OK"); 2033 2034 return rv; 2035 } 2036 2037 int siw_destroy_listen(struct iw_cm_id *id) 2038 { 2039 siw_dbg(id->device, "id 0x%p\n", id); 2040 2041 if (!id->provider_data) { 2042 siw_dbg(id->device, "id 0x%p: no cep(s)\n", id); 2043 return 0; 2044 } 2045 siw_drop_listeners(id); 2046 kfree(id->provider_data); 2047 id->provider_data = NULL; 2048 2049 return 0; 2050 } 2051 2052 int siw_cm_init(void) 2053 { 2054 /* 2055 * create_single_workqueue for strict ordering 2056 */ 2057 siw_cm_wq = create_singlethread_workqueue("siw_cm_wq"); 2058 if (!siw_cm_wq) 2059 return -ENOMEM; 2060 2061 return 0; 2062 } 2063 2064 void siw_cm_exit(void) 2065 { 2066 if (siw_cm_wq) { 2067 flush_workqueue(siw_cm_wq); 2068 destroy_workqueue(siw_cm_wq); 2069 } 2070 } 2071