1 /* 2 * Copyright (c) 2004, 2005 Intel Corporation. All rights reserved. 3 * Copyright (c) 2004 Topspin Corporation. All rights reserved. 4 * Copyright (c) 2004, 2005 Voltaire Corporation. All rights reserved. 5 * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. 6 * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved. 7 * Copyright (c) 2005 Network Appliance, Inc. All rights reserved. 8 * 9 * This software is available to you under a choice of one of two 10 * licenses. You may choose to be licensed under the terms of the GNU 11 * General Public License (GPL) Version 2, available from the file 12 * COPYING in the main directory of this source tree, or the 13 * OpenIB.org BSD license below: 14 * 15 * Redistribution and use in source and binary forms, with or 16 * without modification, are permitted provided that the following 17 * conditions are met: 18 * 19 * - Redistributions of source code must retain the above 20 * copyright notice, this list of conditions and the following 21 * disclaimer. 22 * 23 * - Redistributions in binary form must reproduce the above 24 * copyright notice, this list of conditions and the following 25 * disclaimer in the documentation and/or other materials 26 * provided with the distribution. 27 * 28 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 29 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 30 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 31 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 32 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 33 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 34 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 35 * SOFTWARE. 36 * 37 */ 38 #include <linux/dma-mapping.h> 39 #include <linux/err.h> 40 #include <linux/idr.h> 41 #include <linux/interrupt.h> 42 #include <linux/rbtree.h> 43 #include <linux/sched.h> 44 #include <linux/spinlock.h> 45 #include <linux/workqueue.h> 46 #include <linux/completion.h> 47 #include <linux/slab.h> 48 #include <linux/module.h> 49 #include <linux/sysctl.h> 50 51 #include <rdma/iw_cm.h> 52 #include <rdma/ib_addr.h> 53 #include <rdma/iw_portmap.h> 54 #include <rdma/rdma_netlink.h> 55 56 #include "iwcm.h" 57 58 MODULE_AUTHOR("Tom Tucker"); 59 MODULE_DESCRIPTION("iWARP CM"); 60 MODULE_LICENSE("Dual BSD/GPL"); 61 62 static const char * const iwcm_rej_reason_strs[] = { 63 [ECONNRESET] = "reset by remote host", 64 [ECONNREFUSED] = "refused by remote application", 65 [ETIMEDOUT] = "setup timeout", 66 }; 67 68 const char *__attribute_const__ iwcm_reject_msg(int reason) 69 { 70 size_t index; 71 72 /* iWARP uses negative errnos */ 73 index = -reason; 74 75 if (index < ARRAY_SIZE(iwcm_rej_reason_strs) && 76 iwcm_rej_reason_strs[index]) 77 return iwcm_rej_reason_strs[index]; 78 else 79 return "unrecognized reason"; 80 } 81 EXPORT_SYMBOL(iwcm_reject_msg); 82 83 static struct rdma_nl_cbs iwcm_nl_cb_table[RDMA_NL_IWPM_NUM_OPS] = { 84 [RDMA_NL_IWPM_REG_PID] = {.dump = iwpm_register_pid_cb}, 85 [RDMA_NL_IWPM_ADD_MAPPING] = {.dump = iwpm_add_mapping_cb}, 86 [RDMA_NL_IWPM_QUERY_MAPPING] = {.dump = iwpm_add_and_query_mapping_cb}, 87 [RDMA_NL_IWPM_REMOTE_INFO] = {.dump = iwpm_remote_info_cb}, 88 [RDMA_NL_IWPM_HANDLE_ERR] = {.dump = iwpm_mapping_error_cb}, 89 [RDMA_NL_IWPM_MAPINFO] = {.dump = iwpm_mapping_info_cb}, 90 [RDMA_NL_IWPM_MAPINFO_NUM] = {.dump = iwpm_ack_mapping_info_cb}, 91 [RDMA_NL_IWPM_HELLO] = {.dump = iwpm_hello_cb} 92 }; 93 94 static struct workqueue_struct *iwcm_wq; 95 struct iwcm_work { 96 struct work_struct work; 97 struct iwcm_id_private *cm_id; 98 struct list_head list; 99 struct iw_cm_event event; 100 struct list_head free_list; 101 }; 102 103 static unsigned int default_backlog = 256; 104 105 static struct ctl_table_header *iwcm_ctl_table_hdr; 106 static struct ctl_table iwcm_ctl_table[] = { 107 { 108 .procname = "default_backlog", 109 .data = &default_backlog, 110 .maxlen = sizeof(default_backlog), 111 .mode = 0644, 112 .proc_handler = proc_dointvec, 113 }, 114 { } 115 }; 116 117 /* 118 * The following services provide a mechanism for pre-allocating iwcm_work 119 * elements. The design pre-allocates them based on the cm_id type: 120 * LISTENING IDS: Get enough elements preallocated to handle the 121 * listen backlog. 122 * ACTIVE IDS: 4: CONNECT_REPLY, ESTABLISHED, DISCONNECT, CLOSE 123 * PASSIVE IDS: 3: ESTABLISHED, DISCONNECT, CLOSE 124 * 125 * Allocating them in connect and listen avoids having to deal 126 * with allocation failures on the event upcall from the provider (which 127 * is called in the interrupt context). 128 * 129 * One exception is when creating the cm_id for incoming connection requests. 130 * There are two cases: 131 * 1) in the event upcall, cm_event_handler(), for a listening cm_id. If 132 * the backlog is exceeded, then no more connection request events will 133 * be processed. cm_event_handler() returns -ENOMEM in this case. Its up 134 * to the provider to reject the connection request. 135 * 2) in the connection request workqueue handler, cm_conn_req_handler(). 136 * If work elements cannot be allocated for the new connect request cm_id, 137 * then IWCM will call the provider reject method. This is ok since 138 * cm_conn_req_handler() runs in the workqueue thread context. 139 */ 140 141 static struct iwcm_work *get_work(struct iwcm_id_private *cm_id_priv) 142 { 143 struct iwcm_work *work; 144 145 if (list_empty(&cm_id_priv->work_free_list)) 146 return NULL; 147 work = list_entry(cm_id_priv->work_free_list.next, struct iwcm_work, 148 free_list); 149 list_del_init(&work->free_list); 150 return work; 151 } 152 153 static void put_work(struct iwcm_work *work) 154 { 155 list_add(&work->free_list, &work->cm_id->work_free_list); 156 } 157 158 static void dealloc_work_entries(struct iwcm_id_private *cm_id_priv) 159 { 160 struct list_head *e, *tmp; 161 162 list_for_each_safe(e, tmp, &cm_id_priv->work_free_list) { 163 list_del(e); 164 kfree(list_entry(e, struct iwcm_work, free_list)); 165 } 166 } 167 168 static int alloc_work_entries(struct iwcm_id_private *cm_id_priv, int count) 169 { 170 struct iwcm_work *work; 171 172 BUG_ON(!list_empty(&cm_id_priv->work_free_list)); 173 while (count--) { 174 work = kmalloc(sizeof(struct iwcm_work), GFP_KERNEL); 175 if (!work) { 176 dealloc_work_entries(cm_id_priv); 177 return -ENOMEM; 178 } 179 work->cm_id = cm_id_priv; 180 INIT_LIST_HEAD(&work->list); 181 put_work(work); 182 } 183 return 0; 184 } 185 186 /* 187 * Save private data from incoming connection requests to 188 * iw_cm_event, so the low level driver doesn't have to. Adjust 189 * the event ptr to point to the local copy. 190 */ 191 static int copy_private_data(struct iw_cm_event *event) 192 { 193 void *p; 194 195 p = kmemdup(event->private_data, event->private_data_len, GFP_ATOMIC); 196 if (!p) 197 return -ENOMEM; 198 event->private_data = p; 199 return 0; 200 } 201 202 static void free_cm_id(struct iwcm_id_private *cm_id_priv) 203 { 204 dealloc_work_entries(cm_id_priv); 205 kfree(cm_id_priv); 206 } 207 208 /* 209 * Release a reference on cm_id. If the last reference is being 210 * released, free the cm_id and return 1. 211 */ 212 static int iwcm_deref_id(struct iwcm_id_private *cm_id_priv) 213 { 214 BUG_ON(atomic_read(&cm_id_priv->refcount)==0); 215 if (atomic_dec_and_test(&cm_id_priv->refcount)) { 216 BUG_ON(!list_empty(&cm_id_priv->work_list)); 217 free_cm_id(cm_id_priv); 218 return 1; 219 } 220 221 return 0; 222 } 223 224 static void add_ref(struct iw_cm_id *cm_id) 225 { 226 struct iwcm_id_private *cm_id_priv; 227 cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); 228 atomic_inc(&cm_id_priv->refcount); 229 } 230 231 static void rem_ref(struct iw_cm_id *cm_id) 232 { 233 struct iwcm_id_private *cm_id_priv; 234 235 cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); 236 237 (void)iwcm_deref_id(cm_id_priv); 238 } 239 240 static int cm_event_handler(struct iw_cm_id *cm_id, struct iw_cm_event *event); 241 242 struct iw_cm_id *iw_create_cm_id(struct ib_device *device, 243 iw_cm_handler cm_handler, 244 void *context) 245 { 246 struct iwcm_id_private *cm_id_priv; 247 248 cm_id_priv = kzalloc(sizeof(*cm_id_priv), GFP_KERNEL); 249 if (!cm_id_priv) 250 return ERR_PTR(-ENOMEM); 251 252 cm_id_priv->state = IW_CM_STATE_IDLE; 253 cm_id_priv->id.device = device; 254 cm_id_priv->id.cm_handler = cm_handler; 255 cm_id_priv->id.context = context; 256 cm_id_priv->id.event_handler = cm_event_handler; 257 cm_id_priv->id.add_ref = add_ref; 258 cm_id_priv->id.rem_ref = rem_ref; 259 spin_lock_init(&cm_id_priv->lock); 260 atomic_set(&cm_id_priv->refcount, 1); 261 init_waitqueue_head(&cm_id_priv->connect_wait); 262 init_completion(&cm_id_priv->destroy_comp); 263 INIT_LIST_HEAD(&cm_id_priv->work_list); 264 INIT_LIST_HEAD(&cm_id_priv->work_free_list); 265 266 return &cm_id_priv->id; 267 } 268 EXPORT_SYMBOL(iw_create_cm_id); 269 270 271 static int iwcm_modify_qp_err(struct ib_qp *qp) 272 { 273 struct ib_qp_attr qp_attr; 274 275 if (!qp) 276 return -EINVAL; 277 278 qp_attr.qp_state = IB_QPS_ERR; 279 return ib_modify_qp(qp, &qp_attr, IB_QP_STATE); 280 } 281 282 /* 283 * This is really the RDMAC CLOSING state. It is most similar to the 284 * IB SQD QP state. 285 */ 286 static int iwcm_modify_qp_sqd(struct ib_qp *qp) 287 { 288 struct ib_qp_attr qp_attr; 289 290 BUG_ON(qp == NULL); 291 qp_attr.qp_state = IB_QPS_SQD; 292 return ib_modify_qp(qp, &qp_attr, IB_QP_STATE); 293 } 294 295 /* 296 * CM_ID <-- CLOSING 297 * 298 * Block if a passive or active connection is currently being processed. Then 299 * process the event as follows: 300 * - If we are ESTABLISHED, move to CLOSING and modify the QP state 301 * based on the abrupt flag 302 * - If the connection is already in the CLOSING or IDLE state, the peer is 303 * disconnecting concurrently with us and we've already seen the 304 * DISCONNECT event -- ignore the request and return 0 305 * - Disconnect on a listening endpoint returns -EINVAL 306 */ 307 int iw_cm_disconnect(struct iw_cm_id *cm_id, int abrupt) 308 { 309 struct iwcm_id_private *cm_id_priv; 310 unsigned long flags; 311 int ret = 0; 312 struct ib_qp *qp = NULL; 313 314 cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); 315 /* Wait if we're currently in a connect or accept downcall */ 316 wait_event(cm_id_priv->connect_wait, 317 !test_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags)); 318 319 spin_lock_irqsave(&cm_id_priv->lock, flags); 320 switch (cm_id_priv->state) { 321 case IW_CM_STATE_ESTABLISHED: 322 cm_id_priv->state = IW_CM_STATE_CLOSING; 323 324 /* QP could be <nul> for user-mode client */ 325 if (cm_id_priv->qp) 326 qp = cm_id_priv->qp; 327 else 328 ret = -EINVAL; 329 break; 330 case IW_CM_STATE_LISTEN: 331 ret = -EINVAL; 332 break; 333 case IW_CM_STATE_CLOSING: 334 /* remote peer closed first */ 335 case IW_CM_STATE_IDLE: 336 /* accept or connect returned !0 */ 337 break; 338 case IW_CM_STATE_CONN_RECV: 339 /* 340 * App called disconnect before/without calling accept after 341 * connect_request event delivered. 342 */ 343 break; 344 case IW_CM_STATE_CONN_SENT: 345 /* Can only get here if wait above fails */ 346 default: 347 BUG(); 348 } 349 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 350 351 if (qp) { 352 if (abrupt) 353 ret = iwcm_modify_qp_err(qp); 354 else 355 ret = iwcm_modify_qp_sqd(qp); 356 357 /* 358 * If both sides are disconnecting the QP could 359 * already be in ERR or SQD states 360 */ 361 ret = 0; 362 } 363 364 return ret; 365 } 366 EXPORT_SYMBOL(iw_cm_disconnect); 367 368 /* 369 * CM_ID <-- DESTROYING 370 * 371 * Clean up all resources associated with the connection and release 372 * the initial reference taken by iw_create_cm_id. 373 */ 374 static void destroy_cm_id(struct iw_cm_id *cm_id) 375 { 376 struct iwcm_id_private *cm_id_priv; 377 struct ib_qp *qp; 378 unsigned long flags; 379 380 cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); 381 /* 382 * Wait if we're currently in a connect or accept downcall. A 383 * listening endpoint should never block here. 384 */ 385 wait_event(cm_id_priv->connect_wait, 386 !test_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags)); 387 388 /* 389 * Since we're deleting the cm_id, drop any events that 390 * might arrive before the last dereference. 391 */ 392 set_bit(IWCM_F_DROP_EVENTS, &cm_id_priv->flags); 393 394 spin_lock_irqsave(&cm_id_priv->lock, flags); 395 qp = cm_id_priv->qp; 396 cm_id_priv->qp = NULL; 397 398 switch (cm_id_priv->state) { 399 case IW_CM_STATE_LISTEN: 400 cm_id_priv->state = IW_CM_STATE_DESTROYING; 401 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 402 /* destroy the listening endpoint */ 403 cm_id->device->ops.iw_destroy_listen(cm_id); 404 spin_lock_irqsave(&cm_id_priv->lock, flags); 405 break; 406 case IW_CM_STATE_ESTABLISHED: 407 cm_id_priv->state = IW_CM_STATE_DESTROYING; 408 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 409 /* Abrupt close of the connection */ 410 (void)iwcm_modify_qp_err(qp); 411 spin_lock_irqsave(&cm_id_priv->lock, flags); 412 break; 413 case IW_CM_STATE_IDLE: 414 case IW_CM_STATE_CLOSING: 415 cm_id_priv->state = IW_CM_STATE_DESTROYING; 416 break; 417 case IW_CM_STATE_CONN_RECV: 418 /* 419 * App called destroy before/without calling accept after 420 * receiving connection request event notification or 421 * returned non zero from the event callback function. 422 * In either case, must tell the provider to reject. 423 */ 424 cm_id_priv->state = IW_CM_STATE_DESTROYING; 425 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 426 cm_id->device->ops.iw_reject(cm_id, NULL, 0); 427 spin_lock_irqsave(&cm_id_priv->lock, flags); 428 break; 429 case IW_CM_STATE_CONN_SENT: 430 case IW_CM_STATE_DESTROYING: 431 default: 432 BUG(); 433 break; 434 } 435 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 436 if (qp) 437 cm_id_priv->id.device->ops.iw_rem_ref(qp); 438 439 if (cm_id->mapped) { 440 iwpm_remove_mapinfo(&cm_id->local_addr, &cm_id->m_local_addr); 441 iwpm_remove_mapping(&cm_id->local_addr, RDMA_NL_IWCM); 442 } 443 444 (void)iwcm_deref_id(cm_id_priv); 445 } 446 447 /* 448 * This function is only called by the application thread and cannot 449 * be called by the event thread. The function will wait for all 450 * references to be released on the cm_id and then kfree the cm_id 451 * object. 452 */ 453 void iw_destroy_cm_id(struct iw_cm_id *cm_id) 454 { 455 destroy_cm_id(cm_id); 456 } 457 EXPORT_SYMBOL(iw_destroy_cm_id); 458 459 /** 460 * iw_cm_check_wildcard - If IP address is 0 then use original 461 * @pm_addr: sockaddr containing the ip to check for wildcard 462 * @cm_addr: sockaddr containing the actual IP address 463 * @cm_outaddr: sockaddr to set IP addr which leaving port 464 * 465 * Checks the pm_addr for wildcard and then sets cm_outaddr's 466 * IP to the actual (cm_addr). 467 */ 468 static void iw_cm_check_wildcard(struct sockaddr_storage *pm_addr, 469 struct sockaddr_storage *cm_addr, 470 struct sockaddr_storage *cm_outaddr) 471 { 472 if (pm_addr->ss_family == AF_INET) { 473 struct sockaddr_in *pm4_addr = (struct sockaddr_in *)pm_addr; 474 475 if (pm4_addr->sin_addr.s_addr == htonl(INADDR_ANY)) { 476 struct sockaddr_in *cm4_addr = 477 (struct sockaddr_in *)cm_addr; 478 struct sockaddr_in *cm4_outaddr = 479 (struct sockaddr_in *)cm_outaddr; 480 481 cm4_outaddr->sin_addr = cm4_addr->sin_addr; 482 } 483 } else { 484 struct sockaddr_in6 *pm6_addr = (struct sockaddr_in6 *)pm_addr; 485 486 if (ipv6_addr_type(&pm6_addr->sin6_addr) == IPV6_ADDR_ANY) { 487 struct sockaddr_in6 *cm6_addr = 488 (struct sockaddr_in6 *)cm_addr; 489 struct sockaddr_in6 *cm6_outaddr = 490 (struct sockaddr_in6 *)cm_outaddr; 491 492 cm6_outaddr->sin6_addr = cm6_addr->sin6_addr; 493 } 494 } 495 } 496 497 /** 498 * iw_cm_map - Use portmapper to map the ports 499 * @cm_id: connection manager pointer 500 * @active: Indicates the active side when true 501 * returns nonzero for error only if iwpm_create_mapinfo() fails 502 * 503 * Tries to add a mapping for a port using the Portmapper. If 504 * successful in mapping the IP/Port it will check the remote 505 * mapped IP address for a wildcard IP address and replace the 506 * zero IP address with the remote_addr. 507 */ 508 static int iw_cm_map(struct iw_cm_id *cm_id, bool active) 509 { 510 const char *devname = dev_name(&cm_id->device->dev); 511 const char *ifname = cm_id->device->iw_ifname; 512 struct iwpm_dev_data pm_reg_msg = {}; 513 struct iwpm_sa_data pm_msg; 514 int status; 515 516 if (strlen(devname) >= sizeof(pm_reg_msg.dev_name) || 517 strlen(ifname) >= sizeof(pm_reg_msg.if_name)) 518 return -EINVAL; 519 520 cm_id->m_local_addr = cm_id->local_addr; 521 cm_id->m_remote_addr = cm_id->remote_addr; 522 523 strcpy(pm_reg_msg.dev_name, devname); 524 strcpy(pm_reg_msg.if_name, ifname); 525 526 if (iwpm_register_pid(&pm_reg_msg, RDMA_NL_IWCM) || 527 !iwpm_valid_pid()) 528 return 0; 529 530 cm_id->mapped = true; 531 pm_msg.loc_addr = cm_id->local_addr; 532 pm_msg.rem_addr = cm_id->remote_addr; 533 pm_msg.flags = (cm_id->device->iw_driver_flags & IW_F_NO_PORT_MAP) ? 534 IWPM_FLAGS_NO_PORT_MAP : 0; 535 if (active) 536 status = iwpm_add_and_query_mapping(&pm_msg, 537 RDMA_NL_IWCM); 538 else 539 status = iwpm_add_mapping(&pm_msg, RDMA_NL_IWCM); 540 541 if (!status) { 542 cm_id->m_local_addr = pm_msg.mapped_loc_addr; 543 if (active) { 544 cm_id->m_remote_addr = pm_msg.mapped_rem_addr; 545 iw_cm_check_wildcard(&pm_msg.mapped_rem_addr, 546 &cm_id->remote_addr, 547 &cm_id->m_remote_addr); 548 } 549 } 550 551 return iwpm_create_mapinfo(&cm_id->local_addr, 552 &cm_id->m_local_addr, 553 RDMA_NL_IWCM, pm_msg.flags); 554 } 555 556 /* 557 * CM_ID <-- LISTEN 558 * 559 * Start listening for connect requests. Generates one CONNECT_REQUEST 560 * event for each inbound connect request. 561 */ 562 int iw_cm_listen(struct iw_cm_id *cm_id, int backlog) 563 { 564 struct iwcm_id_private *cm_id_priv; 565 unsigned long flags; 566 int ret; 567 568 cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); 569 570 if (!backlog) 571 backlog = default_backlog; 572 573 ret = alloc_work_entries(cm_id_priv, backlog); 574 if (ret) 575 return ret; 576 577 spin_lock_irqsave(&cm_id_priv->lock, flags); 578 switch (cm_id_priv->state) { 579 case IW_CM_STATE_IDLE: 580 cm_id_priv->state = IW_CM_STATE_LISTEN; 581 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 582 ret = iw_cm_map(cm_id, false); 583 if (!ret) 584 ret = cm_id->device->ops.iw_create_listen(cm_id, 585 backlog); 586 if (ret) 587 cm_id_priv->state = IW_CM_STATE_IDLE; 588 spin_lock_irqsave(&cm_id_priv->lock, flags); 589 break; 590 default: 591 ret = -EINVAL; 592 } 593 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 594 595 return ret; 596 } 597 EXPORT_SYMBOL(iw_cm_listen); 598 599 /* 600 * CM_ID <-- IDLE 601 * 602 * Rejects an inbound connection request. No events are generated. 603 */ 604 int iw_cm_reject(struct iw_cm_id *cm_id, 605 const void *private_data, 606 u8 private_data_len) 607 { 608 struct iwcm_id_private *cm_id_priv; 609 unsigned long flags; 610 int ret; 611 612 cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); 613 set_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); 614 615 spin_lock_irqsave(&cm_id_priv->lock, flags); 616 if (cm_id_priv->state != IW_CM_STATE_CONN_RECV) { 617 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 618 clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); 619 wake_up_all(&cm_id_priv->connect_wait); 620 return -EINVAL; 621 } 622 cm_id_priv->state = IW_CM_STATE_IDLE; 623 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 624 625 ret = cm_id->device->ops.iw_reject(cm_id, private_data, 626 private_data_len); 627 628 clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); 629 wake_up_all(&cm_id_priv->connect_wait); 630 631 return ret; 632 } 633 EXPORT_SYMBOL(iw_cm_reject); 634 635 /* 636 * CM_ID <-- ESTABLISHED 637 * 638 * Accepts an inbound connection request and generates an ESTABLISHED 639 * event. Callers of iw_cm_disconnect and iw_destroy_cm_id will block 640 * until the ESTABLISHED event is received from the provider. 641 */ 642 int iw_cm_accept(struct iw_cm_id *cm_id, 643 struct iw_cm_conn_param *iw_param) 644 { 645 struct iwcm_id_private *cm_id_priv; 646 struct ib_qp *qp; 647 unsigned long flags; 648 int ret; 649 650 cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); 651 set_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); 652 653 spin_lock_irqsave(&cm_id_priv->lock, flags); 654 if (cm_id_priv->state != IW_CM_STATE_CONN_RECV) { 655 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 656 clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); 657 wake_up_all(&cm_id_priv->connect_wait); 658 return -EINVAL; 659 } 660 /* Get the ib_qp given the QPN */ 661 qp = cm_id->device->ops.iw_get_qp(cm_id->device, iw_param->qpn); 662 if (!qp) { 663 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 664 clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); 665 wake_up_all(&cm_id_priv->connect_wait); 666 return -EINVAL; 667 } 668 cm_id->device->ops.iw_add_ref(qp); 669 cm_id_priv->qp = qp; 670 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 671 672 ret = cm_id->device->ops.iw_accept(cm_id, iw_param); 673 if (ret) { 674 /* An error on accept precludes provider events */ 675 BUG_ON(cm_id_priv->state != IW_CM_STATE_CONN_RECV); 676 cm_id_priv->state = IW_CM_STATE_IDLE; 677 spin_lock_irqsave(&cm_id_priv->lock, flags); 678 qp = cm_id_priv->qp; 679 cm_id_priv->qp = NULL; 680 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 681 if (qp) 682 cm_id->device->ops.iw_rem_ref(qp); 683 clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); 684 wake_up_all(&cm_id_priv->connect_wait); 685 } 686 687 return ret; 688 } 689 EXPORT_SYMBOL(iw_cm_accept); 690 691 /* 692 * Active Side: CM_ID <-- CONN_SENT 693 * 694 * If successful, results in the generation of a CONNECT_REPLY 695 * event. iw_cm_disconnect and iw_cm_destroy will block until the 696 * CONNECT_REPLY event is received from the provider. 697 */ 698 int iw_cm_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param) 699 { 700 struct iwcm_id_private *cm_id_priv; 701 int ret; 702 unsigned long flags; 703 struct ib_qp *qp = NULL; 704 705 cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); 706 707 ret = alloc_work_entries(cm_id_priv, 4); 708 if (ret) 709 return ret; 710 711 set_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); 712 spin_lock_irqsave(&cm_id_priv->lock, flags); 713 714 if (cm_id_priv->state != IW_CM_STATE_IDLE) { 715 ret = -EINVAL; 716 goto err; 717 } 718 719 /* Get the ib_qp given the QPN */ 720 qp = cm_id->device->ops.iw_get_qp(cm_id->device, iw_param->qpn); 721 if (!qp) { 722 ret = -EINVAL; 723 goto err; 724 } 725 cm_id->device->ops.iw_add_ref(qp); 726 cm_id_priv->qp = qp; 727 cm_id_priv->state = IW_CM_STATE_CONN_SENT; 728 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 729 730 ret = iw_cm_map(cm_id, true); 731 if (!ret) 732 ret = cm_id->device->ops.iw_connect(cm_id, iw_param); 733 if (!ret) 734 return 0; /* success */ 735 736 spin_lock_irqsave(&cm_id_priv->lock, flags); 737 qp = cm_id_priv->qp; 738 cm_id_priv->qp = NULL; 739 cm_id_priv->state = IW_CM_STATE_IDLE; 740 err: 741 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 742 if (qp) 743 cm_id->device->ops.iw_rem_ref(qp); 744 clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); 745 wake_up_all(&cm_id_priv->connect_wait); 746 return ret; 747 } 748 EXPORT_SYMBOL(iw_cm_connect); 749 750 /* 751 * Passive Side: new CM_ID <-- CONN_RECV 752 * 753 * Handles an inbound connect request. The function creates a new 754 * iw_cm_id to represent the new connection and inherits the client 755 * callback function and other attributes from the listening parent. 756 * 757 * The work item contains a pointer to the listen_cm_id and the event. The 758 * listen_cm_id contains the client cm_handler, context and 759 * device. These are copied when the device is cloned. The event 760 * contains the new four tuple. 761 * 762 * An error on the child should not affect the parent, so this 763 * function does not return a value. 764 */ 765 static void cm_conn_req_handler(struct iwcm_id_private *listen_id_priv, 766 struct iw_cm_event *iw_event) 767 { 768 unsigned long flags; 769 struct iw_cm_id *cm_id; 770 struct iwcm_id_private *cm_id_priv; 771 int ret; 772 773 /* 774 * The provider should never generate a connection request 775 * event with a bad status. 776 */ 777 BUG_ON(iw_event->status); 778 779 cm_id = iw_create_cm_id(listen_id_priv->id.device, 780 listen_id_priv->id.cm_handler, 781 listen_id_priv->id.context); 782 /* If the cm_id could not be created, ignore the request */ 783 if (IS_ERR(cm_id)) 784 goto out; 785 786 cm_id->provider_data = iw_event->provider_data; 787 cm_id->m_local_addr = iw_event->local_addr; 788 cm_id->m_remote_addr = iw_event->remote_addr; 789 cm_id->local_addr = listen_id_priv->id.local_addr; 790 791 ret = iwpm_get_remote_info(&listen_id_priv->id.m_local_addr, 792 &iw_event->remote_addr, 793 &cm_id->remote_addr, 794 RDMA_NL_IWCM); 795 if (ret) { 796 cm_id->remote_addr = iw_event->remote_addr; 797 } else { 798 iw_cm_check_wildcard(&listen_id_priv->id.m_local_addr, 799 &iw_event->local_addr, 800 &cm_id->local_addr); 801 iw_event->local_addr = cm_id->local_addr; 802 iw_event->remote_addr = cm_id->remote_addr; 803 } 804 805 cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); 806 cm_id_priv->state = IW_CM_STATE_CONN_RECV; 807 808 /* 809 * We could be destroying the listening id. If so, ignore this 810 * upcall. 811 */ 812 spin_lock_irqsave(&listen_id_priv->lock, flags); 813 if (listen_id_priv->state != IW_CM_STATE_LISTEN) { 814 spin_unlock_irqrestore(&listen_id_priv->lock, flags); 815 iw_cm_reject(cm_id, NULL, 0); 816 iw_destroy_cm_id(cm_id); 817 goto out; 818 } 819 spin_unlock_irqrestore(&listen_id_priv->lock, flags); 820 821 ret = alloc_work_entries(cm_id_priv, 3); 822 if (ret) { 823 iw_cm_reject(cm_id, NULL, 0); 824 iw_destroy_cm_id(cm_id); 825 goto out; 826 } 827 828 /* Call the client CM handler */ 829 ret = cm_id->cm_handler(cm_id, iw_event); 830 if (ret) { 831 iw_cm_reject(cm_id, NULL, 0); 832 iw_destroy_cm_id(cm_id); 833 } 834 835 out: 836 if (iw_event->private_data_len) 837 kfree(iw_event->private_data); 838 } 839 840 /* 841 * Passive Side: CM_ID <-- ESTABLISHED 842 * 843 * The provider generated an ESTABLISHED event which means that 844 * the MPA negotion has completed successfully and we are now in MPA 845 * FPDU mode. 846 * 847 * This event can only be received in the CONN_RECV state. If the 848 * remote peer closed, the ESTABLISHED event would be received followed 849 * by the CLOSE event. If the app closes, it will block until we wake 850 * it up after processing this event. 851 */ 852 static int cm_conn_est_handler(struct iwcm_id_private *cm_id_priv, 853 struct iw_cm_event *iw_event) 854 { 855 unsigned long flags; 856 int ret; 857 858 spin_lock_irqsave(&cm_id_priv->lock, flags); 859 860 /* 861 * We clear the CONNECT_WAIT bit here to allow the callback 862 * function to call iw_cm_disconnect. Calling iw_destroy_cm_id 863 * from a callback handler is not allowed. 864 */ 865 clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); 866 BUG_ON(cm_id_priv->state != IW_CM_STATE_CONN_RECV); 867 cm_id_priv->state = IW_CM_STATE_ESTABLISHED; 868 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 869 ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, iw_event); 870 wake_up_all(&cm_id_priv->connect_wait); 871 872 return ret; 873 } 874 875 /* 876 * Active Side: CM_ID <-- ESTABLISHED 877 * 878 * The app has called connect and is waiting for the established event to 879 * post it's requests to the server. This event will wake up anyone 880 * blocked in iw_cm_disconnect or iw_destroy_id. 881 */ 882 static int cm_conn_rep_handler(struct iwcm_id_private *cm_id_priv, 883 struct iw_cm_event *iw_event) 884 { 885 struct ib_qp *qp = NULL; 886 unsigned long flags; 887 int ret; 888 889 spin_lock_irqsave(&cm_id_priv->lock, flags); 890 /* 891 * Clear the connect wait bit so a callback function calling 892 * iw_cm_disconnect will not wait and deadlock this thread 893 */ 894 clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); 895 BUG_ON(cm_id_priv->state != IW_CM_STATE_CONN_SENT); 896 if (iw_event->status == 0) { 897 cm_id_priv->id.m_local_addr = iw_event->local_addr; 898 cm_id_priv->id.m_remote_addr = iw_event->remote_addr; 899 iw_event->local_addr = cm_id_priv->id.local_addr; 900 iw_event->remote_addr = cm_id_priv->id.remote_addr; 901 cm_id_priv->state = IW_CM_STATE_ESTABLISHED; 902 } else { 903 /* REJECTED or RESET */ 904 qp = cm_id_priv->qp; 905 cm_id_priv->qp = NULL; 906 cm_id_priv->state = IW_CM_STATE_IDLE; 907 } 908 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 909 if (qp) 910 cm_id_priv->id.device->ops.iw_rem_ref(qp); 911 ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, iw_event); 912 913 if (iw_event->private_data_len) 914 kfree(iw_event->private_data); 915 916 /* Wake up waiters on connect complete */ 917 wake_up_all(&cm_id_priv->connect_wait); 918 919 return ret; 920 } 921 922 /* 923 * CM_ID <-- CLOSING 924 * 925 * If in the ESTABLISHED state, move to CLOSING. 926 */ 927 static void cm_disconnect_handler(struct iwcm_id_private *cm_id_priv, 928 struct iw_cm_event *iw_event) 929 { 930 unsigned long flags; 931 932 spin_lock_irqsave(&cm_id_priv->lock, flags); 933 if (cm_id_priv->state == IW_CM_STATE_ESTABLISHED) 934 cm_id_priv->state = IW_CM_STATE_CLOSING; 935 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 936 } 937 938 /* 939 * CM_ID <-- IDLE 940 * 941 * If in the ESTBLISHED or CLOSING states, the QP will have have been 942 * moved by the provider to the ERR state. Disassociate the CM_ID from 943 * the QP, move to IDLE, and remove the 'connected' reference. 944 * 945 * If in some other state, the cm_id was destroyed asynchronously. 946 * This is the last reference that will result in waking up 947 * the app thread blocked in iw_destroy_cm_id. 948 */ 949 static int cm_close_handler(struct iwcm_id_private *cm_id_priv, 950 struct iw_cm_event *iw_event) 951 { 952 struct ib_qp *qp; 953 unsigned long flags; 954 int ret = 0, notify_event = 0; 955 spin_lock_irqsave(&cm_id_priv->lock, flags); 956 qp = cm_id_priv->qp; 957 cm_id_priv->qp = NULL; 958 959 switch (cm_id_priv->state) { 960 case IW_CM_STATE_ESTABLISHED: 961 case IW_CM_STATE_CLOSING: 962 cm_id_priv->state = IW_CM_STATE_IDLE; 963 notify_event = 1; 964 break; 965 case IW_CM_STATE_DESTROYING: 966 break; 967 default: 968 BUG(); 969 } 970 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 971 972 if (qp) 973 cm_id_priv->id.device->ops.iw_rem_ref(qp); 974 if (notify_event) 975 ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, iw_event); 976 return ret; 977 } 978 979 static int process_event(struct iwcm_id_private *cm_id_priv, 980 struct iw_cm_event *iw_event) 981 { 982 int ret = 0; 983 984 switch (iw_event->event) { 985 case IW_CM_EVENT_CONNECT_REQUEST: 986 cm_conn_req_handler(cm_id_priv, iw_event); 987 break; 988 case IW_CM_EVENT_CONNECT_REPLY: 989 ret = cm_conn_rep_handler(cm_id_priv, iw_event); 990 break; 991 case IW_CM_EVENT_ESTABLISHED: 992 ret = cm_conn_est_handler(cm_id_priv, iw_event); 993 break; 994 case IW_CM_EVENT_DISCONNECT: 995 cm_disconnect_handler(cm_id_priv, iw_event); 996 break; 997 case IW_CM_EVENT_CLOSE: 998 ret = cm_close_handler(cm_id_priv, iw_event); 999 break; 1000 default: 1001 BUG(); 1002 } 1003 1004 return ret; 1005 } 1006 1007 /* 1008 * Process events on the work_list for the cm_id. If the callback 1009 * function requests that the cm_id be deleted, a flag is set in the 1010 * cm_id flags to indicate that when the last reference is 1011 * removed, the cm_id is to be destroyed. This is necessary to 1012 * distinguish between an object that will be destroyed by the app 1013 * thread asleep on the destroy_comp list vs. an object destroyed 1014 * here synchronously when the last reference is removed. 1015 */ 1016 static void cm_work_handler(struct work_struct *_work) 1017 { 1018 struct iwcm_work *work = container_of(_work, struct iwcm_work, work); 1019 struct iw_cm_event levent; 1020 struct iwcm_id_private *cm_id_priv = work->cm_id; 1021 unsigned long flags; 1022 int empty; 1023 int ret = 0; 1024 1025 spin_lock_irqsave(&cm_id_priv->lock, flags); 1026 empty = list_empty(&cm_id_priv->work_list); 1027 while (!empty) { 1028 work = list_entry(cm_id_priv->work_list.next, 1029 struct iwcm_work, list); 1030 list_del_init(&work->list); 1031 empty = list_empty(&cm_id_priv->work_list); 1032 levent = work->event; 1033 put_work(work); 1034 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 1035 1036 if (!test_bit(IWCM_F_DROP_EVENTS, &cm_id_priv->flags)) { 1037 ret = process_event(cm_id_priv, &levent); 1038 if (ret) 1039 destroy_cm_id(&cm_id_priv->id); 1040 } else 1041 pr_debug("dropping event %d\n", levent.event); 1042 if (iwcm_deref_id(cm_id_priv)) 1043 return; 1044 if (empty) 1045 return; 1046 spin_lock_irqsave(&cm_id_priv->lock, flags); 1047 } 1048 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 1049 } 1050 1051 /* 1052 * This function is called on interrupt context. Schedule events on 1053 * the iwcm_wq thread to allow callback functions to downcall into 1054 * the CM and/or block. Events are queued to a per-CM_ID 1055 * work_list. If this is the first event on the work_list, the work 1056 * element is also queued on the iwcm_wq thread. 1057 * 1058 * Each event holds a reference on the cm_id. Until the last posted 1059 * event has been delivered and processed, the cm_id cannot be 1060 * deleted. 1061 * 1062 * Returns: 1063 * 0 - the event was handled. 1064 * -ENOMEM - the event was not handled due to lack of resources. 1065 */ 1066 static int cm_event_handler(struct iw_cm_id *cm_id, 1067 struct iw_cm_event *iw_event) 1068 { 1069 struct iwcm_work *work; 1070 struct iwcm_id_private *cm_id_priv; 1071 unsigned long flags; 1072 int ret = 0; 1073 1074 cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); 1075 1076 spin_lock_irqsave(&cm_id_priv->lock, flags); 1077 work = get_work(cm_id_priv); 1078 if (!work) { 1079 ret = -ENOMEM; 1080 goto out; 1081 } 1082 1083 INIT_WORK(&work->work, cm_work_handler); 1084 work->cm_id = cm_id_priv; 1085 work->event = *iw_event; 1086 1087 if ((work->event.event == IW_CM_EVENT_CONNECT_REQUEST || 1088 work->event.event == IW_CM_EVENT_CONNECT_REPLY) && 1089 work->event.private_data_len) { 1090 ret = copy_private_data(&work->event); 1091 if (ret) { 1092 put_work(work); 1093 goto out; 1094 } 1095 } 1096 1097 atomic_inc(&cm_id_priv->refcount); 1098 if (list_empty(&cm_id_priv->work_list)) { 1099 list_add_tail(&work->list, &cm_id_priv->work_list); 1100 queue_work(iwcm_wq, &work->work); 1101 } else 1102 list_add_tail(&work->list, &cm_id_priv->work_list); 1103 out: 1104 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 1105 return ret; 1106 } 1107 1108 static int iwcm_init_qp_init_attr(struct iwcm_id_private *cm_id_priv, 1109 struct ib_qp_attr *qp_attr, 1110 int *qp_attr_mask) 1111 { 1112 unsigned long flags; 1113 int ret; 1114 1115 spin_lock_irqsave(&cm_id_priv->lock, flags); 1116 switch (cm_id_priv->state) { 1117 case IW_CM_STATE_IDLE: 1118 case IW_CM_STATE_CONN_SENT: 1119 case IW_CM_STATE_CONN_RECV: 1120 case IW_CM_STATE_ESTABLISHED: 1121 *qp_attr_mask = IB_QP_STATE | IB_QP_ACCESS_FLAGS; 1122 qp_attr->qp_access_flags = IB_ACCESS_REMOTE_WRITE| 1123 IB_ACCESS_REMOTE_READ; 1124 ret = 0; 1125 break; 1126 default: 1127 ret = -EINVAL; 1128 break; 1129 } 1130 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 1131 return ret; 1132 } 1133 1134 static int iwcm_init_qp_rts_attr(struct iwcm_id_private *cm_id_priv, 1135 struct ib_qp_attr *qp_attr, 1136 int *qp_attr_mask) 1137 { 1138 unsigned long flags; 1139 int ret; 1140 1141 spin_lock_irqsave(&cm_id_priv->lock, flags); 1142 switch (cm_id_priv->state) { 1143 case IW_CM_STATE_IDLE: 1144 case IW_CM_STATE_CONN_SENT: 1145 case IW_CM_STATE_CONN_RECV: 1146 case IW_CM_STATE_ESTABLISHED: 1147 *qp_attr_mask = 0; 1148 ret = 0; 1149 break; 1150 default: 1151 ret = -EINVAL; 1152 break; 1153 } 1154 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 1155 return ret; 1156 } 1157 1158 int iw_cm_init_qp_attr(struct iw_cm_id *cm_id, 1159 struct ib_qp_attr *qp_attr, 1160 int *qp_attr_mask) 1161 { 1162 struct iwcm_id_private *cm_id_priv; 1163 int ret; 1164 1165 cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); 1166 switch (qp_attr->qp_state) { 1167 case IB_QPS_INIT: 1168 case IB_QPS_RTR: 1169 ret = iwcm_init_qp_init_attr(cm_id_priv, 1170 qp_attr, qp_attr_mask); 1171 break; 1172 case IB_QPS_RTS: 1173 ret = iwcm_init_qp_rts_attr(cm_id_priv, 1174 qp_attr, qp_attr_mask); 1175 break; 1176 default: 1177 ret = -EINVAL; 1178 break; 1179 } 1180 return ret; 1181 } 1182 EXPORT_SYMBOL(iw_cm_init_qp_attr); 1183 1184 static int __init iw_cm_init(void) 1185 { 1186 int ret; 1187 1188 ret = iwpm_init(RDMA_NL_IWCM); 1189 if (ret) 1190 pr_err("iw_cm: couldn't init iwpm\n"); 1191 else 1192 rdma_nl_register(RDMA_NL_IWCM, iwcm_nl_cb_table); 1193 iwcm_wq = alloc_ordered_workqueue("iw_cm_wq", 0); 1194 if (!iwcm_wq) 1195 return -ENOMEM; 1196 1197 iwcm_ctl_table_hdr = register_net_sysctl(&init_net, "net/iw_cm", 1198 iwcm_ctl_table); 1199 if (!iwcm_ctl_table_hdr) { 1200 pr_err("iw_cm: couldn't register sysctl paths\n"); 1201 destroy_workqueue(iwcm_wq); 1202 return -ENOMEM; 1203 } 1204 1205 return 0; 1206 } 1207 1208 static void __exit iw_cm_cleanup(void) 1209 { 1210 unregister_net_sysctl_table(iwcm_ctl_table_hdr); 1211 destroy_workqueue(iwcm_wq); 1212 rdma_nl_unregister(RDMA_NL_IWCM); 1213 iwpm_exit(RDMA_NL_IWCM); 1214 } 1215 1216 MODULE_ALIAS_RDMA_NETLINK(RDMA_NL_IWCM, 2); 1217 1218 module_init(iw_cm_init); 1219 module_exit(iw_cm_cleanup); 1220