1 /* 2 * Copyright (c) 2004, 2005 Intel Corporation. All rights reserved. 3 * Copyright (c) 2004 Topspin Corporation. All rights reserved. 4 * Copyright (c) 2004, 2005 Voltaire Corporation. All rights reserved. 5 * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. 6 * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved. 7 * Copyright (c) 2005 Network Appliance, Inc. All rights reserved. 8 * 9 * This software is available to you under a choice of one of two 10 * licenses. You may choose to be licensed under the terms of the GNU 11 * General Public License (GPL) Version 2, available from the file 12 * COPYING in the main directory of this source tree, or the 13 * OpenIB.org BSD license below: 14 * 15 * Redistribution and use in source and binary forms, with or 16 * without modification, are permitted provided that the following 17 * conditions are met: 18 * 19 * - Redistributions of source code must retain the above 20 * copyright notice, this list of conditions and the following 21 * disclaimer. 22 * 23 * - Redistributions in binary form must reproduce the above 24 * copyright notice, this list of conditions and the following 25 * disclaimer in the documentation and/or other materials 26 * provided with the distribution. 27 * 28 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 29 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 30 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 31 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 32 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 33 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 34 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 35 * SOFTWARE. 36 * 37 */ 38 #include <linux/dma-mapping.h> 39 #include <linux/err.h> 40 #include <linux/idr.h> 41 #include <linux/interrupt.h> 42 #include <linux/rbtree.h> 43 #include <linux/sched.h> 44 #include <linux/spinlock.h> 45 #include <linux/workqueue.h> 46 #include <linux/completion.h> 47 #include <linux/slab.h> 48 #include <linux/module.h> 49 #include <linux/sysctl.h> 50 51 #include <rdma/iw_cm.h> 52 #include <rdma/ib_addr.h> 53 #include <rdma/iw_portmap.h> 54 #include <rdma/rdma_netlink.h> 55 56 #include "iwcm.h" 57 58 MODULE_AUTHOR("Tom Tucker"); 59 MODULE_DESCRIPTION("iWARP CM"); 60 MODULE_LICENSE("Dual BSD/GPL"); 61 62 static const char * const iwcm_rej_reason_strs[] = { 63 [ECONNRESET] = "reset by remote host", 64 [ECONNREFUSED] = "refused by remote application", 65 [ETIMEDOUT] = "setup timeout", 66 }; 67 68 const char *__attribute_const__ iwcm_reject_msg(int reason) 69 { 70 size_t index; 71 72 /* iWARP uses negative errnos */ 73 index = -reason; 74 75 if (index < ARRAY_SIZE(iwcm_rej_reason_strs) && 76 iwcm_rej_reason_strs[index]) 77 return iwcm_rej_reason_strs[index]; 78 else 79 return "unrecognized reason"; 80 } 81 EXPORT_SYMBOL(iwcm_reject_msg); 82 83 static struct rdma_nl_cbs iwcm_nl_cb_table[RDMA_NL_IWPM_NUM_OPS] = { 84 [RDMA_NL_IWPM_REG_PID] = {.dump = iwpm_register_pid_cb}, 85 [RDMA_NL_IWPM_ADD_MAPPING] = {.dump = iwpm_add_mapping_cb}, 86 [RDMA_NL_IWPM_QUERY_MAPPING] = {.dump = iwpm_add_and_query_mapping_cb}, 87 [RDMA_NL_IWPM_REMOTE_INFO] = {.dump = iwpm_remote_info_cb}, 88 [RDMA_NL_IWPM_HANDLE_ERR] = {.dump = iwpm_mapping_error_cb}, 89 [RDMA_NL_IWPM_MAPINFO] = {.dump = iwpm_mapping_info_cb}, 90 [RDMA_NL_IWPM_MAPINFO_NUM] = {.dump = iwpm_ack_mapping_info_cb}, 91 [RDMA_NL_IWPM_HELLO] = {.dump = iwpm_hello_cb} 92 }; 93 94 static struct workqueue_struct *iwcm_wq; 95 struct iwcm_work { 96 struct work_struct work; 97 struct iwcm_id_private *cm_id; 98 struct list_head list; 99 struct iw_cm_event event; 100 struct list_head free_list; 101 }; 102 103 static unsigned int default_backlog = 256; 104 105 static struct ctl_table_header *iwcm_ctl_table_hdr; 106 static struct ctl_table iwcm_ctl_table[] = { 107 { 108 .procname = "default_backlog", 109 .data = &default_backlog, 110 .maxlen = sizeof(default_backlog), 111 .mode = 0644, 112 .proc_handler = proc_dointvec, 113 }, 114 { } 115 }; 116 117 /* 118 * The following services provide a mechanism for pre-allocating iwcm_work 119 * elements. The design pre-allocates them based on the cm_id type: 120 * LISTENING IDS: Get enough elements preallocated to handle the 121 * listen backlog. 122 * ACTIVE IDS: 4: CONNECT_REPLY, ESTABLISHED, DISCONNECT, CLOSE 123 * PASSIVE IDS: 3: ESTABLISHED, DISCONNECT, CLOSE 124 * 125 * Allocating them in connect and listen avoids having to deal 126 * with allocation failures on the event upcall from the provider (which 127 * is called in the interrupt context). 128 * 129 * One exception is when creating the cm_id for incoming connection requests. 130 * There are two cases: 131 * 1) in the event upcall, cm_event_handler(), for a listening cm_id. If 132 * the backlog is exceeded, then no more connection request events will 133 * be processed. cm_event_handler() returns -ENOMEM in this case. Its up 134 * to the provider to reject the connection request. 135 * 2) in the connection request workqueue handler, cm_conn_req_handler(). 136 * If work elements cannot be allocated for the new connect request cm_id, 137 * then IWCM will call the provider reject method. This is ok since 138 * cm_conn_req_handler() runs in the workqueue thread context. 139 */ 140 141 static struct iwcm_work *get_work(struct iwcm_id_private *cm_id_priv) 142 { 143 struct iwcm_work *work; 144 145 if (list_empty(&cm_id_priv->work_free_list)) 146 return NULL; 147 work = list_entry(cm_id_priv->work_free_list.next, struct iwcm_work, 148 free_list); 149 list_del_init(&work->free_list); 150 return work; 151 } 152 153 static void put_work(struct iwcm_work *work) 154 { 155 list_add(&work->free_list, &work->cm_id->work_free_list); 156 } 157 158 static void dealloc_work_entries(struct iwcm_id_private *cm_id_priv) 159 { 160 struct list_head *e, *tmp; 161 162 list_for_each_safe(e, tmp, &cm_id_priv->work_free_list) { 163 list_del(e); 164 kfree(list_entry(e, struct iwcm_work, free_list)); 165 } 166 } 167 168 static int alloc_work_entries(struct iwcm_id_private *cm_id_priv, int count) 169 { 170 struct iwcm_work *work; 171 172 BUG_ON(!list_empty(&cm_id_priv->work_free_list)); 173 while (count--) { 174 work = kmalloc(sizeof(struct iwcm_work), GFP_KERNEL); 175 if (!work) { 176 dealloc_work_entries(cm_id_priv); 177 return -ENOMEM; 178 } 179 work->cm_id = cm_id_priv; 180 INIT_LIST_HEAD(&work->list); 181 put_work(work); 182 } 183 return 0; 184 } 185 186 /* 187 * Save private data from incoming connection requests to 188 * iw_cm_event, so the low level driver doesn't have to. Adjust 189 * the event ptr to point to the local copy. 190 */ 191 static int copy_private_data(struct iw_cm_event *event) 192 { 193 void *p; 194 195 p = kmemdup(event->private_data, event->private_data_len, GFP_ATOMIC); 196 if (!p) 197 return -ENOMEM; 198 event->private_data = p; 199 return 0; 200 } 201 202 static void free_cm_id(struct iwcm_id_private *cm_id_priv) 203 { 204 dealloc_work_entries(cm_id_priv); 205 kfree(cm_id_priv); 206 } 207 208 /* 209 * Release a reference on cm_id. If the last reference is being 210 * released, free the cm_id and return 1. 211 */ 212 static int iwcm_deref_id(struct iwcm_id_private *cm_id_priv) 213 { 214 if (refcount_dec_and_test(&cm_id_priv->refcount)) { 215 BUG_ON(!list_empty(&cm_id_priv->work_list)); 216 free_cm_id(cm_id_priv); 217 return 1; 218 } 219 220 return 0; 221 } 222 223 static void add_ref(struct iw_cm_id *cm_id) 224 { 225 struct iwcm_id_private *cm_id_priv; 226 cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); 227 refcount_inc(&cm_id_priv->refcount); 228 } 229 230 static void rem_ref(struct iw_cm_id *cm_id) 231 { 232 struct iwcm_id_private *cm_id_priv; 233 234 cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); 235 236 (void)iwcm_deref_id(cm_id_priv); 237 } 238 239 static int cm_event_handler(struct iw_cm_id *cm_id, struct iw_cm_event *event); 240 241 struct iw_cm_id *iw_create_cm_id(struct ib_device *device, 242 iw_cm_handler cm_handler, 243 void *context) 244 { 245 struct iwcm_id_private *cm_id_priv; 246 247 cm_id_priv = kzalloc(sizeof(*cm_id_priv), GFP_KERNEL); 248 if (!cm_id_priv) 249 return ERR_PTR(-ENOMEM); 250 251 cm_id_priv->state = IW_CM_STATE_IDLE; 252 cm_id_priv->id.device = device; 253 cm_id_priv->id.cm_handler = cm_handler; 254 cm_id_priv->id.context = context; 255 cm_id_priv->id.event_handler = cm_event_handler; 256 cm_id_priv->id.add_ref = add_ref; 257 cm_id_priv->id.rem_ref = rem_ref; 258 spin_lock_init(&cm_id_priv->lock); 259 refcount_set(&cm_id_priv->refcount, 1); 260 init_waitqueue_head(&cm_id_priv->connect_wait); 261 init_completion(&cm_id_priv->destroy_comp); 262 INIT_LIST_HEAD(&cm_id_priv->work_list); 263 INIT_LIST_HEAD(&cm_id_priv->work_free_list); 264 265 return &cm_id_priv->id; 266 } 267 EXPORT_SYMBOL(iw_create_cm_id); 268 269 270 static int iwcm_modify_qp_err(struct ib_qp *qp) 271 { 272 struct ib_qp_attr qp_attr; 273 274 if (!qp) 275 return -EINVAL; 276 277 qp_attr.qp_state = IB_QPS_ERR; 278 return ib_modify_qp(qp, &qp_attr, IB_QP_STATE); 279 } 280 281 /* 282 * This is really the RDMAC CLOSING state. It is most similar to the 283 * IB SQD QP state. 284 */ 285 static int iwcm_modify_qp_sqd(struct ib_qp *qp) 286 { 287 struct ib_qp_attr qp_attr; 288 289 BUG_ON(qp == NULL); 290 qp_attr.qp_state = IB_QPS_SQD; 291 return ib_modify_qp(qp, &qp_attr, IB_QP_STATE); 292 } 293 294 /* 295 * CM_ID <-- CLOSING 296 * 297 * Block if a passive or active connection is currently being processed. Then 298 * process the event as follows: 299 * - If we are ESTABLISHED, move to CLOSING and modify the QP state 300 * based on the abrupt flag 301 * - If the connection is already in the CLOSING or IDLE state, the peer is 302 * disconnecting concurrently with us and we've already seen the 303 * DISCONNECT event -- ignore the request and return 0 304 * - Disconnect on a listening endpoint returns -EINVAL 305 */ 306 int iw_cm_disconnect(struct iw_cm_id *cm_id, int abrupt) 307 { 308 struct iwcm_id_private *cm_id_priv; 309 unsigned long flags; 310 int ret = 0; 311 struct ib_qp *qp = NULL; 312 313 cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); 314 /* Wait if we're currently in a connect or accept downcall */ 315 wait_event(cm_id_priv->connect_wait, 316 !test_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags)); 317 318 spin_lock_irqsave(&cm_id_priv->lock, flags); 319 switch (cm_id_priv->state) { 320 case IW_CM_STATE_ESTABLISHED: 321 cm_id_priv->state = IW_CM_STATE_CLOSING; 322 323 /* QP could be <nul> for user-mode client */ 324 if (cm_id_priv->qp) 325 qp = cm_id_priv->qp; 326 else 327 ret = -EINVAL; 328 break; 329 case IW_CM_STATE_LISTEN: 330 ret = -EINVAL; 331 break; 332 case IW_CM_STATE_CLOSING: 333 /* remote peer closed first */ 334 case IW_CM_STATE_IDLE: 335 /* accept or connect returned !0 */ 336 break; 337 case IW_CM_STATE_CONN_RECV: 338 /* 339 * App called disconnect before/without calling accept after 340 * connect_request event delivered. 341 */ 342 break; 343 case IW_CM_STATE_CONN_SENT: 344 /* Can only get here if wait above fails */ 345 default: 346 BUG(); 347 } 348 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 349 350 if (qp) { 351 if (abrupt) 352 ret = iwcm_modify_qp_err(qp); 353 else 354 ret = iwcm_modify_qp_sqd(qp); 355 356 /* 357 * If both sides are disconnecting the QP could 358 * already be in ERR or SQD states 359 */ 360 ret = 0; 361 } 362 363 return ret; 364 } 365 EXPORT_SYMBOL(iw_cm_disconnect); 366 367 /* 368 * CM_ID <-- DESTROYING 369 * 370 * Clean up all resources associated with the connection and release 371 * the initial reference taken by iw_create_cm_id. 372 * 373 * Returns true if and only if the last cm_id_priv reference has been dropped. 374 */ 375 static bool destroy_cm_id(struct iw_cm_id *cm_id) 376 { 377 struct iwcm_id_private *cm_id_priv; 378 struct ib_qp *qp; 379 unsigned long flags; 380 381 cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); 382 /* 383 * Wait if we're currently in a connect or accept downcall. A 384 * listening endpoint should never block here. 385 */ 386 wait_event(cm_id_priv->connect_wait, 387 !test_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags)); 388 389 /* 390 * Since we're deleting the cm_id, drop any events that 391 * might arrive before the last dereference. 392 */ 393 set_bit(IWCM_F_DROP_EVENTS, &cm_id_priv->flags); 394 395 spin_lock_irqsave(&cm_id_priv->lock, flags); 396 qp = cm_id_priv->qp; 397 cm_id_priv->qp = NULL; 398 399 switch (cm_id_priv->state) { 400 case IW_CM_STATE_LISTEN: 401 cm_id_priv->state = IW_CM_STATE_DESTROYING; 402 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 403 /* destroy the listening endpoint */ 404 cm_id->device->ops.iw_destroy_listen(cm_id); 405 spin_lock_irqsave(&cm_id_priv->lock, flags); 406 break; 407 case IW_CM_STATE_ESTABLISHED: 408 cm_id_priv->state = IW_CM_STATE_DESTROYING; 409 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 410 /* Abrupt close of the connection */ 411 (void)iwcm_modify_qp_err(qp); 412 spin_lock_irqsave(&cm_id_priv->lock, flags); 413 break; 414 case IW_CM_STATE_IDLE: 415 case IW_CM_STATE_CLOSING: 416 cm_id_priv->state = IW_CM_STATE_DESTROYING; 417 break; 418 case IW_CM_STATE_CONN_RECV: 419 /* 420 * App called destroy before/without calling accept after 421 * receiving connection request event notification or 422 * returned non zero from the event callback function. 423 * In either case, must tell the provider to reject. 424 */ 425 cm_id_priv->state = IW_CM_STATE_DESTROYING; 426 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 427 cm_id->device->ops.iw_reject(cm_id, NULL, 0); 428 spin_lock_irqsave(&cm_id_priv->lock, flags); 429 break; 430 case IW_CM_STATE_CONN_SENT: 431 case IW_CM_STATE_DESTROYING: 432 default: 433 BUG(); 434 break; 435 } 436 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 437 if (qp) 438 cm_id_priv->id.device->ops.iw_rem_ref(qp); 439 440 if (cm_id->mapped) { 441 iwpm_remove_mapinfo(&cm_id->local_addr, &cm_id->m_local_addr); 442 iwpm_remove_mapping(&cm_id->local_addr, RDMA_NL_IWCM); 443 } 444 445 return iwcm_deref_id(cm_id_priv); 446 } 447 448 /* 449 * This function is only called by the application thread and cannot 450 * be called by the event thread. The function will wait for all 451 * references to be released on the cm_id and then kfree the cm_id 452 * object. 453 */ 454 void iw_destroy_cm_id(struct iw_cm_id *cm_id) 455 { 456 if (!destroy_cm_id(cm_id)) 457 flush_workqueue(iwcm_wq); 458 } 459 EXPORT_SYMBOL(iw_destroy_cm_id); 460 461 /** 462 * iw_cm_check_wildcard - If IP address is 0 then use original 463 * @pm_addr: sockaddr containing the ip to check for wildcard 464 * @cm_addr: sockaddr containing the actual IP address 465 * @cm_outaddr: sockaddr to set IP addr which leaving port 466 * 467 * Checks the pm_addr for wildcard and then sets cm_outaddr's 468 * IP to the actual (cm_addr). 469 */ 470 static void iw_cm_check_wildcard(struct sockaddr_storage *pm_addr, 471 struct sockaddr_storage *cm_addr, 472 struct sockaddr_storage *cm_outaddr) 473 { 474 if (pm_addr->ss_family == AF_INET) { 475 struct sockaddr_in *pm4_addr = (struct sockaddr_in *)pm_addr; 476 477 if (pm4_addr->sin_addr.s_addr == htonl(INADDR_ANY)) { 478 struct sockaddr_in *cm4_addr = 479 (struct sockaddr_in *)cm_addr; 480 struct sockaddr_in *cm4_outaddr = 481 (struct sockaddr_in *)cm_outaddr; 482 483 cm4_outaddr->sin_addr = cm4_addr->sin_addr; 484 } 485 } else { 486 struct sockaddr_in6 *pm6_addr = (struct sockaddr_in6 *)pm_addr; 487 488 if (ipv6_addr_type(&pm6_addr->sin6_addr) == IPV6_ADDR_ANY) { 489 struct sockaddr_in6 *cm6_addr = 490 (struct sockaddr_in6 *)cm_addr; 491 struct sockaddr_in6 *cm6_outaddr = 492 (struct sockaddr_in6 *)cm_outaddr; 493 494 cm6_outaddr->sin6_addr = cm6_addr->sin6_addr; 495 } 496 } 497 } 498 499 /** 500 * iw_cm_map - Use portmapper to map the ports 501 * @cm_id: connection manager pointer 502 * @active: Indicates the active side when true 503 * returns nonzero for error only if iwpm_create_mapinfo() fails 504 * 505 * Tries to add a mapping for a port using the Portmapper. If 506 * successful in mapping the IP/Port it will check the remote 507 * mapped IP address for a wildcard IP address and replace the 508 * zero IP address with the remote_addr. 509 */ 510 static int iw_cm_map(struct iw_cm_id *cm_id, bool active) 511 { 512 const char *devname = dev_name(&cm_id->device->dev); 513 const char *ifname = cm_id->device->iw_ifname; 514 struct iwpm_dev_data pm_reg_msg = {}; 515 struct iwpm_sa_data pm_msg; 516 int status; 517 518 if (strlen(devname) >= sizeof(pm_reg_msg.dev_name) || 519 strlen(ifname) >= sizeof(pm_reg_msg.if_name)) 520 return -EINVAL; 521 522 cm_id->m_local_addr = cm_id->local_addr; 523 cm_id->m_remote_addr = cm_id->remote_addr; 524 525 strcpy(pm_reg_msg.dev_name, devname); 526 strcpy(pm_reg_msg.if_name, ifname); 527 528 if (iwpm_register_pid(&pm_reg_msg, RDMA_NL_IWCM) || 529 !iwpm_valid_pid()) 530 return 0; 531 532 cm_id->mapped = true; 533 pm_msg.loc_addr = cm_id->local_addr; 534 pm_msg.rem_addr = cm_id->remote_addr; 535 pm_msg.flags = (cm_id->device->iw_driver_flags & IW_F_NO_PORT_MAP) ? 536 IWPM_FLAGS_NO_PORT_MAP : 0; 537 if (active) 538 status = iwpm_add_and_query_mapping(&pm_msg, 539 RDMA_NL_IWCM); 540 else 541 status = iwpm_add_mapping(&pm_msg, RDMA_NL_IWCM); 542 543 if (!status) { 544 cm_id->m_local_addr = pm_msg.mapped_loc_addr; 545 if (active) { 546 cm_id->m_remote_addr = pm_msg.mapped_rem_addr; 547 iw_cm_check_wildcard(&pm_msg.mapped_rem_addr, 548 &cm_id->remote_addr, 549 &cm_id->m_remote_addr); 550 } 551 } 552 553 return iwpm_create_mapinfo(&cm_id->local_addr, 554 &cm_id->m_local_addr, 555 RDMA_NL_IWCM, pm_msg.flags); 556 } 557 558 /* 559 * CM_ID <-- LISTEN 560 * 561 * Start listening for connect requests. Generates one CONNECT_REQUEST 562 * event for each inbound connect request. 563 */ 564 int iw_cm_listen(struct iw_cm_id *cm_id, int backlog) 565 { 566 struct iwcm_id_private *cm_id_priv; 567 unsigned long flags; 568 int ret; 569 570 cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); 571 572 if (!backlog) 573 backlog = default_backlog; 574 575 ret = alloc_work_entries(cm_id_priv, backlog); 576 if (ret) 577 return ret; 578 579 spin_lock_irqsave(&cm_id_priv->lock, flags); 580 switch (cm_id_priv->state) { 581 case IW_CM_STATE_IDLE: 582 cm_id_priv->state = IW_CM_STATE_LISTEN; 583 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 584 ret = iw_cm_map(cm_id, false); 585 if (!ret) 586 ret = cm_id->device->ops.iw_create_listen(cm_id, 587 backlog); 588 if (ret) 589 cm_id_priv->state = IW_CM_STATE_IDLE; 590 spin_lock_irqsave(&cm_id_priv->lock, flags); 591 break; 592 default: 593 ret = -EINVAL; 594 } 595 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 596 597 return ret; 598 } 599 EXPORT_SYMBOL(iw_cm_listen); 600 601 /* 602 * CM_ID <-- IDLE 603 * 604 * Rejects an inbound connection request. No events are generated. 605 */ 606 int iw_cm_reject(struct iw_cm_id *cm_id, 607 const void *private_data, 608 u8 private_data_len) 609 { 610 struct iwcm_id_private *cm_id_priv; 611 unsigned long flags; 612 int ret; 613 614 cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); 615 set_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); 616 617 spin_lock_irqsave(&cm_id_priv->lock, flags); 618 if (cm_id_priv->state != IW_CM_STATE_CONN_RECV) { 619 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 620 clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); 621 wake_up_all(&cm_id_priv->connect_wait); 622 return -EINVAL; 623 } 624 cm_id_priv->state = IW_CM_STATE_IDLE; 625 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 626 627 ret = cm_id->device->ops.iw_reject(cm_id, private_data, 628 private_data_len); 629 630 clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); 631 wake_up_all(&cm_id_priv->connect_wait); 632 633 return ret; 634 } 635 EXPORT_SYMBOL(iw_cm_reject); 636 637 /* 638 * CM_ID <-- ESTABLISHED 639 * 640 * Accepts an inbound connection request and generates an ESTABLISHED 641 * event. Callers of iw_cm_disconnect and iw_destroy_cm_id will block 642 * until the ESTABLISHED event is received from the provider. 643 */ 644 int iw_cm_accept(struct iw_cm_id *cm_id, 645 struct iw_cm_conn_param *iw_param) 646 { 647 struct iwcm_id_private *cm_id_priv; 648 struct ib_qp *qp; 649 unsigned long flags; 650 int ret; 651 652 cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); 653 set_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); 654 655 spin_lock_irqsave(&cm_id_priv->lock, flags); 656 if (cm_id_priv->state != IW_CM_STATE_CONN_RECV) { 657 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 658 clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); 659 wake_up_all(&cm_id_priv->connect_wait); 660 return -EINVAL; 661 } 662 /* Get the ib_qp given the QPN */ 663 qp = cm_id->device->ops.iw_get_qp(cm_id->device, iw_param->qpn); 664 if (!qp) { 665 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 666 clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); 667 wake_up_all(&cm_id_priv->connect_wait); 668 return -EINVAL; 669 } 670 cm_id->device->ops.iw_add_ref(qp); 671 cm_id_priv->qp = qp; 672 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 673 674 ret = cm_id->device->ops.iw_accept(cm_id, iw_param); 675 if (ret) { 676 /* An error on accept precludes provider events */ 677 BUG_ON(cm_id_priv->state != IW_CM_STATE_CONN_RECV); 678 cm_id_priv->state = IW_CM_STATE_IDLE; 679 spin_lock_irqsave(&cm_id_priv->lock, flags); 680 qp = cm_id_priv->qp; 681 cm_id_priv->qp = NULL; 682 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 683 if (qp) 684 cm_id->device->ops.iw_rem_ref(qp); 685 clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); 686 wake_up_all(&cm_id_priv->connect_wait); 687 } 688 689 return ret; 690 } 691 EXPORT_SYMBOL(iw_cm_accept); 692 693 /* 694 * Active Side: CM_ID <-- CONN_SENT 695 * 696 * If successful, results in the generation of a CONNECT_REPLY 697 * event. iw_cm_disconnect and iw_cm_destroy will block until the 698 * CONNECT_REPLY event is received from the provider. 699 */ 700 int iw_cm_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param) 701 { 702 struct iwcm_id_private *cm_id_priv; 703 int ret; 704 unsigned long flags; 705 struct ib_qp *qp = NULL; 706 707 cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); 708 709 ret = alloc_work_entries(cm_id_priv, 4); 710 if (ret) 711 return ret; 712 713 set_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); 714 spin_lock_irqsave(&cm_id_priv->lock, flags); 715 716 if (cm_id_priv->state != IW_CM_STATE_IDLE) { 717 ret = -EINVAL; 718 goto err; 719 } 720 721 /* Get the ib_qp given the QPN */ 722 qp = cm_id->device->ops.iw_get_qp(cm_id->device, iw_param->qpn); 723 if (!qp) { 724 ret = -EINVAL; 725 goto err; 726 } 727 cm_id->device->ops.iw_add_ref(qp); 728 cm_id_priv->qp = qp; 729 cm_id_priv->state = IW_CM_STATE_CONN_SENT; 730 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 731 732 ret = iw_cm_map(cm_id, true); 733 if (!ret) 734 ret = cm_id->device->ops.iw_connect(cm_id, iw_param); 735 if (!ret) 736 return 0; /* success */ 737 738 spin_lock_irqsave(&cm_id_priv->lock, flags); 739 qp = cm_id_priv->qp; 740 cm_id_priv->qp = NULL; 741 cm_id_priv->state = IW_CM_STATE_IDLE; 742 err: 743 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 744 if (qp) 745 cm_id->device->ops.iw_rem_ref(qp); 746 clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); 747 wake_up_all(&cm_id_priv->connect_wait); 748 return ret; 749 } 750 EXPORT_SYMBOL(iw_cm_connect); 751 752 /* 753 * Passive Side: new CM_ID <-- CONN_RECV 754 * 755 * Handles an inbound connect request. The function creates a new 756 * iw_cm_id to represent the new connection and inherits the client 757 * callback function and other attributes from the listening parent. 758 * 759 * The work item contains a pointer to the listen_cm_id and the event. The 760 * listen_cm_id contains the client cm_handler, context and 761 * device. These are copied when the device is cloned. The event 762 * contains the new four tuple. 763 * 764 * An error on the child should not affect the parent, so this 765 * function does not return a value. 766 */ 767 static void cm_conn_req_handler(struct iwcm_id_private *listen_id_priv, 768 struct iw_cm_event *iw_event) 769 { 770 unsigned long flags; 771 struct iw_cm_id *cm_id; 772 struct iwcm_id_private *cm_id_priv; 773 int ret; 774 775 /* 776 * The provider should never generate a connection request 777 * event with a bad status. 778 */ 779 BUG_ON(iw_event->status); 780 781 cm_id = iw_create_cm_id(listen_id_priv->id.device, 782 listen_id_priv->id.cm_handler, 783 listen_id_priv->id.context); 784 /* If the cm_id could not be created, ignore the request */ 785 if (IS_ERR(cm_id)) 786 goto out; 787 788 cm_id->provider_data = iw_event->provider_data; 789 cm_id->m_local_addr = iw_event->local_addr; 790 cm_id->m_remote_addr = iw_event->remote_addr; 791 cm_id->local_addr = listen_id_priv->id.local_addr; 792 793 ret = iwpm_get_remote_info(&listen_id_priv->id.m_local_addr, 794 &iw_event->remote_addr, 795 &cm_id->remote_addr, 796 RDMA_NL_IWCM); 797 if (ret) { 798 cm_id->remote_addr = iw_event->remote_addr; 799 } else { 800 iw_cm_check_wildcard(&listen_id_priv->id.m_local_addr, 801 &iw_event->local_addr, 802 &cm_id->local_addr); 803 iw_event->local_addr = cm_id->local_addr; 804 iw_event->remote_addr = cm_id->remote_addr; 805 } 806 807 cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); 808 cm_id_priv->state = IW_CM_STATE_CONN_RECV; 809 810 /* 811 * We could be destroying the listening id. If so, ignore this 812 * upcall. 813 */ 814 spin_lock_irqsave(&listen_id_priv->lock, flags); 815 if (listen_id_priv->state != IW_CM_STATE_LISTEN) { 816 spin_unlock_irqrestore(&listen_id_priv->lock, flags); 817 iw_cm_reject(cm_id, NULL, 0); 818 iw_destroy_cm_id(cm_id); 819 goto out; 820 } 821 spin_unlock_irqrestore(&listen_id_priv->lock, flags); 822 823 ret = alloc_work_entries(cm_id_priv, 3); 824 if (ret) { 825 iw_cm_reject(cm_id, NULL, 0); 826 iw_destroy_cm_id(cm_id); 827 goto out; 828 } 829 830 /* Call the client CM handler */ 831 ret = cm_id->cm_handler(cm_id, iw_event); 832 if (ret) { 833 iw_cm_reject(cm_id, NULL, 0); 834 iw_destroy_cm_id(cm_id); 835 } 836 837 out: 838 if (iw_event->private_data_len) 839 kfree(iw_event->private_data); 840 } 841 842 /* 843 * Passive Side: CM_ID <-- ESTABLISHED 844 * 845 * The provider generated an ESTABLISHED event which means that 846 * the MPA negotion has completed successfully and we are now in MPA 847 * FPDU mode. 848 * 849 * This event can only be received in the CONN_RECV state. If the 850 * remote peer closed, the ESTABLISHED event would be received followed 851 * by the CLOSE event. If the app closes, it will block until we wake 852 * it up after processing this event. 853 */ 854 static int cm_conn_est_handler(struct iwcm_id_private *cm_id_priv, 855 struct iw_cm_event *iw_event) 856 { 857 unsigned long flags; 858 int ret; 859 860 spin_lock_irqsave(&cm_id_priv->lock, flags); 861 862 /* 863 * We clear the CONNECT_WAIT bit here to allow the callback 864 * function to call iw_cm_disconnect. Calling iw_destroy_cm_id 865 * from a callback handler is not allowed. 866 */ 867 clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); 868 BUG_ON(cm_id_priv->state != IW_CM_STATE_CONN_RECV); 869 cm_id_priv->state = IW_CM_STATE_ESTABLISHED; 870 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 871 ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, iw_event); 872 wake_up_all(&cm_id_priv->connect_wait); 873 874 return ret; 875 } 876 877 /* 878 * Active Side: CM_ID <-- ESTABLISHED 879 * 880 * The app has called connect and is waiting for the established event to 881 * post it's requests to the server. This event will wake up anyone 882 * blocked in iw_cm_disconnect or iw_destroy_id. 883 */ 884 static int cm_conn_rep_handler(struct iwcm_id_private *cm_id_priv, 885 struct iw_cm_event *iw_event) 886 { 887 struct ib_qp *qp = NULL; 888 unsigned long flags; 889 int ret; 890 891 spin_lock_irqsave(&cm_id_priv->lock, flags); 892 /* 893 * Clear the connect wait bit so a callback function calling 894 * iw_cm_disconnect will not wait and deadlock this thread 895 */ 896 clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); 897 BUG_ON(cm_id_priv->state != IW_CM_STATE_CONN_SENT); 898 if (iw_event->status == 0) { 899 cm_id_priv->id.m_local_addr = iw_event->local_addr; 900 cm_id_priv->id.m_remote_addr = iw_event->remote_addr; 901 iw_event->local_addr = cm_id_priv->id.local_addr; 902 iw_event->remote_addr = cm_id_priv->id.remote_addr; 903 cm_id_priv->state = IW_CM_STATE_ESTABLISHED; 904 } else { 905 /* REJECTED or RESET */ 906 qp = cm_id_priv->qp; 907 cm_id_priv->qp = NULL; 908 cm_id_priv->state = IW_CM_STATE_IDLE; 909 } 910 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 911 if (qp) 912 cm_id_priv->id.device->ops.iw_rem_ref(qp); 913 ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, iw_event); 914 915 if (iw_event->private_data_len) 916 kfree(iw_event->private_data); 917 918 /* Wake up waiters on connect complete */ 919 wake_up_all(&cm_id_priv->connect_wait); 920 921 return ret; 922 } 923 924 /* 925 * CM_ID <-- CLOSING 926 * 927 * If in the ESTABLISHED state, move to CLOSING. 928 */ 929 static void cm_disconnect_handler(struct iwcm_id_private *cm_id_priv, 930 struct iw_cm_event *iw_event) 931 { 932 unsigned long flags; 933 934 spin_lock_irqsave(&cm_id_priv->lock, flags); 935 if (cm_id_priv->state == IW_CM_STATE_ESTABLISHED) 936 cm_id_priv->state = IW_CM_STATE_CLOSING; 937 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 938 } 939 940 /* 941 * CM_ID <-- IDLE 942 * 943 * If in the ESTBLISHED or CLOSING states, the QP will have have been 944 * moved by the provider to the ERR state. Disassociate the CM_ID from 945 * the QP, move to IDLE, and remove the 'connected' reference. 946 * 947 * If in some other state, the cm_id was destroyed asynchronously. 948 * This is the last reference that will result in waking up 949 * the app thread blocked in iw_destroy_cm_id. 950 */ 951 static int cm_close_handler(struct iwcm_id_private *cm_id_priv, 952 struct iw_cm_event *iw_event) 953 { 954 struct ib_qp *qp; 955 unsigned long flags; 956 int ret = 0, notify_event = 0; 957 spin_lock_irqsave(&cm_id_priv->lock, flags); 958 qp = cm_id_priv->qp; 959 cm_id_priv->qp = NULL; 960 961 switch (cm_id_priv->state) { 962 case IW_CM_STATE_ESTABLISHED: 963 case IW_CM_STATE_CLOSING: 964 cm_id_priv->state = IW_CM_STATE_IDLE; 965 notify_event = 1; 966 break; 967 case IW_CM_STATE_DESTROYING: 968 break; 969 default: 970 BUG(); 971 } 972 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 973 974 if (qp) 975 cm_id_priv->id.device->ops.iw_rem_ref(qp); 976 if (notify_event) 977 ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, iw_event); 978 return ret; 979 } 980 981 static int process_event(struct iwcm_id_private *cm_id_priv, 982 struct iw_cm_event *iw_event) 983 { 984 int ret = 0; 985 986 switch (iw_event->event) { 987 case IW_CM_EVENT_CONNECT_REQUEST: 988 cm_conn_req_handler(cm_id_priv, iw_event); 989 break; 990 case IW_CM_EVENT_CONNECT_REPLY: 991 ret = cm_conn_rep_handler(cm_id_priv, iw_event); 992 break; 993 case IW_CM_EVENT_ESTABLISHED: 994 ret = cm_conn_est_handler(cm_id_priv, iw_event); 995 break; 996 case IW_CM_EVENT_DISCONNECT: 997 cm_disconnect_handler(cm_id_priv, iw_event); 998 break; 999 case IW_CM_EVENT_CLOSE: 1000 ret = cm_close_handler(cm_id_priv, iw_event); 1001 break; 1002 default: 1003 BUG(); 1004 } 1005 1006 return ret; 1007 } 1008 1009 /* 1010 * Process events on the work_list for the cm_id. If the callback 1011 * function requests that the cm_id be deleted, a flag is set in the 1012 * cm_id flags to indicate that when the last reference is 1013 * removed, the cm_id is to be destroyed. This is necessary to 1014 * distinguish between an object that will be destroyed by the app 1015 * thread asleep on the destroy_comp list vs. an object destroyed 1016 * here synchronously when the last reference is removed. 1017 */ 1018 static void cm_work_handler(struct work_struct *_work) 1019 { 1020 struct iwcm_work *work = container_of(_work, struct iwcm_work, work); 1021 struct iw_cm_event levent; 1022 struct iwcm_id_private *cm_id_priv = work->cm_id; 1023 unsigned long flags; 1024 int empty; 1025 int ret = 0; 1026 1027 spin_lock_irqsave(&cm_id_priv->lock, flags); 1028 empty = list_empty(&cm_id_priv->work_list); 1029 while (!empty) { 1030 work = list_entry(cm_id_priv->work_list.next, 1031 struct iwcm_work, list); 1032 list_del_init(&work->list); 1033 empty = list_empty(&cm_id_priv->work_list); 1034 levent = work->event; 1035 put_work(work); 1036 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 1037 1038 if (!test_bit(IWCM_F_DROP_EVENTS, &cm_id_priv->flags)) { 1039 ret = process_event(cm_id_priv, &levent); 1040 if (ret) 1041 WARN_ON_ONCE(destroy_cm_id(&cm_id_priv->id)); 1042 } else 1043 pr_debug("dropping event %d\n", levent.event); 1044 if (iwcm_deref_id(cm_id_priv)) 1045 return; 1046 if (empty) 1047 return; 1048 spin_lock_irqsave(&cm_id_priv->lock, flags); 1049 } 1050 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 1051 } 1052 1053 /* 1054 * This function is called on interrupt context. Schedule events on 1055 * the iwcm_wq thread to allow callback functions to downcall into 1056 * the CM and/or block. Events are queued to a per-CM_ID 1057 * work_list. If this is the first event on the work_list, the work 1058 * element is also queued on the iwcm_wq thread. 1059 * 1060 * Each event holds a reference on the cm_id. Until the last posted 1061 * event has been delivered and processed, the cm_id cannot be 1062 * deleted. 1063 * 1064 * Returns: 1065 * 0 - the event was handled. 1066 * -ENOMEM - the event was not handled due to lack of resources. 1067 */ 1068 static int cm_event_handler(struct iw_cm_id *cm_id, 1069 struct iw_cm_event *iw_event) 1070 { 1071 struct iwcm_work *work; 1072 struct iwcm_id_private *cm_id_priv; 1073 unsigned long flags; 1074 int ret = 0; 1075 1076 cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); 1077 1078 spin_lock_irqsave(&cm_id_priv->lock, flags); 1079 work = get_work(cm_id_priv); 1080 if (!work) { 1081 ret = -ENOMEM; 1082 goto out; 1083 } 1084 1085 INIT_WORK(&work->work, cm_work_handler); 1086 work->cm_id = cm_id_priv; 1087 work->event = *iw_event; 1088 1089 if ((work->event.event == IW_CM_EVENT_CONNECT_REQUEST || 1090 work->event.event == IW_CM_EVENT_CONNECT_REPLY) && 1091 work->event.private_data_len) { 1092 ret = copy_private_data(&work->event); 1093 if (ret) { 1094 put_work(work); 1095 goto out; 1096 } 1097 } 1098 1099 refcount_inc(&cm_id_priv->refcount); 1100 if (list_empty(&cm_id_priv->work_list)) { 1101 list_add_tail(&work->list, &cm_id_priv->work_list); 1102 queue_work(iwcm_wq, &work->work); 1103 } else 1104 list_add_tail(&work->list, &cm_id_priv->work_list); 1105 out: 1106 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 1107 return ret; 1108 } 1109 1110 static int iwcm_init_qp_init_attr(struct iwcm_id_private *cm_id_priv, 1111 struct ib_qp_attr *qp_attr, 1112 int *qp_attr_mask) 1113 { 1114 unsigned long flags; 1115 int ret; 1116 1117 spin_lock_irqsave(&cm_id_priv->lock, flags); 1118 switch (cm_id_priv->state) { 1119 case IW_CM_STATE_IDLE: 1120 case IW_CM_STATE_CONN_SENT: 1121 case IW_CM_STATE_CONN_RECV: 1122 case IW_CM_STATE_ESTABLISHED: 1123 *qp_attr_mask = IB_QP_STATE | IB_QP_ACCESS_FLAGS; 1124 qp_attr->qp_access_flags = IB_ACCESS_REMOTE_WRITE| 1125 IB_ACCESS_REMOTE_READ; 1126 ret = 0; 1127 break; 1128 default: 1129 ret = -EINVAL; 1130 break; 1131 } 1132 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 1133 return ret; 1134 } 1135 1136 static int iwcm_init_qp_rts_attr(struct iwcm_id_private *cm_id_priv, 1137 struct ib_qp_attr *qp_attr, 1138 int *qp_attr_mask) 1139 { 1140 unsigned long flags; 1141 int ret; 1142 1143 spin_lock_irqsave(&cm_id_priv->lock, flags); 1144 switch (cm_id_priv->state) { 1145 case IW_CM_STATE_IDLE: 1146 case IW_CM_STATE_CONN_SENT: 1147 case IW_CM_STATE_CONN_RECV: 1148 case IW_CM_STATE_ESTABLISHED: 1149 *qp_attr_mask = 0; 1150 ret = 0; 1151 break; 1152 default: 1153 ret = -EINVAL; 1154 break; 1155 } 1156 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 1157 return ret; 1158 } 1159 1160 int iw_cm_init_qp_attr(struct iw_cm_id *cm_id, 1161 struct ib_qp_attr *qp_attr, 1162 int *qp_attr_mask) 1163 { 1164 struct iwcm_id_private *cm_id_priv; 1165 int ret; 1166 1167 cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); 1168 switch (qp_attr->qp_state) { 1169 case IB_QPS_INIT: 1170 case IB_QPS_RTR: 1171 ret = iwcm_init_qp_init_attr(cm_id_priv, 1172 qp_attr, qp_attr_mask); 1173 break; 1174 case IB_QPS_RTS: 1175 ret = iwcm_init_qp_rts_attr(cm_id_priv, 1176 qp_attr, qp_attr_mask); 1177 break; 1178 default: 1179 ret = -EINVAL; 1180 break; 1181 } 1182 return ret; 1183 } 1184 EXPORT_SYMBOL(iw_cm_init_qp_attr); 1185 1186 static int __init iw_cm_init(void) 1187 { 1188 int ret; 1189 1190 ret = iwpm_init(RDMA_NL_IWCM); 1191 if (ret) 1192 return ret; 1193 1194 iwcm_wq = alloc_ordered_workqueue("iw_cm_wq", WQ_MEM_RECLAIM); 1195 if (!iwcm_wq) 1196 goto err_alloc; 1197 1198 iwcm_ctl_table_hdr = register_net_sysctl(&init_net, "net/iw_cm", 1199 iwcm_ctl_table); 1200 if (!iwcm_ctl_table_hdr) { 1201 pr_err("iw_cm: couldn't register sysctl paths\n"); 1202 goto err_sysctl; 1203 } 1204 1205 rdma_nl_register(RDMA_NL_IWCM, iwcm_nl_cb_table); 1206 return 0; 1207 1208 err_sysctl: 1209 destroy_workqueue(iwcm_wq); 1210 err_alloc: 1211 iwpm_exit(RDMA_NL_IWCM); 1212 return -ENOMEM; 1213 } 1214 1215 static void __exit iw_cm_cleanup(void) 1216 { 1217 rdma_nl_unregister(RDMA_NL_IWCM); 1218 unregister_net_sysctl_table(iwcm_ctl_table_hdr); 1219 destroy_workqueue(iwcm_wq); 1220 iwpm_exit(RDMA_NL_IWCM); 1221 } 1222 1223 MODULE_ALIAS_RDMA_NETLINK(RDMA_NL_IWCM, 2); 1224 1225 module_init(iw_cm_init); 1226 module_exit(iw_cm_cleanup); 1227