1 /* 2 * Copyright (c) 2004, 2005 Intel Corporation. All rights reserved. 3 * Copyright (c) 2004 Topspin Corporation. All rights reserved. 4 * Copyright (c) 2004, 2005 Voltaire Corporation. All rights reserved. 5 * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. 6 * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved. 7 * Copyright (c) 2005 Network Appliance, Inc. All rights reserved. 8 * 9 * This software is available to you under a choice of one of two 10 * licenses. You may choose to be licensed under the terms of the GNU 11 * General Public License (GPL) Version 2, available from the file 12 * COPYING in the main directory of this source tree, or the 13 * OpenIB.org BSD license below: 14 * 15 * Redistribution and use in source and binary forms, with or 16 * without modification, are permitted provided that the following 17 * conditions are met: 18 * 19 * - Redistributions of source code must retain the above 20 * copyright notice, this list of conditions and the following 21 * disclaimer. 22 * 23 * - Redistributions in binary form must reproduce the above 24 * copyright notice, this list of conditions and the following 25 * disclaimer in the documentation and/or other materials 26 * provided with the distribution. 27 * 28 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 29 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 30 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 31 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 32 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 33 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 34 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 35 * SOFTWARE. 36 * 37 */ 38 #include <linux/dma-mapping.h> 39 #include <linux/err.h> 40 #include <linux/idr.h> 41 #include <linux/interrupt.h> 42 #include <linux/rbtree.h> 43 #include <linux/sched.h> 44 #include <linux/spinlock.h> 45 #include <linux/workqueue.h> 46 #include <linux/completion.h> 47 #include <linux/slab.h> 48 #include <linux/module.h> 49 #include <linux/sysctl.h> 50 51 #include <rdma/iw_cm.h> 52 #include <rdma/ib_addr.h> 53 #include <rdma/iw_portmap.h> 54 #include <rdma/rdma_netlink.h> 55 56 #include "iwcm.h" 57 58 MODULE_AUTHOR("Tom Tucker"); 59 MODULE_DESCRIPTION("iWARP CM"); 60 MODULE_LICENSE("Dual BSD/GPL"); 61 62 static struct ibnl_client_cbs iwcm_nl_cb_table[] = { 63 [RDMA_NL_IWPM_REG_PID] = {.dump = iwpm_register_pid_cb}, 64 [RDMA_NL_IWPM_ADD_MAPPING] = {.dump = iwpm_add_mapping_cb}, 65 [RDMA_NL_IWPM_QUERY_MAPPING] = {.dump = iwpm_add_and_query_mapping_cb}, 66 [RDMA_NL_IWPM_REMOTE_INFO] = {.dump = iwpm_remote_info_cb}, 67 [RDMA_NL_IWPM_HANDLE_ERR] = {.dump = iwpm_mapping_error_cb}, 68 [RDMA_NL_IWPM_MAPINFO] = {.dump = iwpm_mapping_info_cb}, 69 [RDMA_NL_IWPM_MAPINFO_NUM] = {.dump = iwpm_ack_mapping_info_cb} 70 }; 71 72 static struct workqueue_struct *iwcm_wq; 73 struct iwcm_work { 74 struct work_struct work; 75 struct iwcm_id_private *cm_id; 76 struct list_head list; 77 struct iw_cm_event event; 78 struct list_head free_list; 79 }; 80 81 static unsigned int default_backlog = 256; 82 83 static struct ctl_table_header *iwcm_ctl_table_hdr; 84 static struct ctl_table iwcm_ctl_table[] = { 85 { 86 .procname = "default_backlog", 87 .data = &default_backlog, 88 .maxlen = sizeof(default_backlog), 89 .mode = 0644, 90 .proc_handler = proc_dointvec, 91 }, 92 { } 93 }; 94 95 /* 96 * The following services provide a mechanism for pre-allocating iwcm_work 97 * elements. The design pre-allocates them based on the cm_id type: 98 * LISTENING IDS: Get enough elements preallocated to handle the 99 * listen backlog. 100 * ACTIVE IDS: 4: CONNECT_REPLY, ESTABLISHED, DISCONNECT, CLOSE 101 * PASSIVE IDS: 3: ESTABLISHED, DISCONNECT, CLOSE 102 * 103 * Allocating them in connect and listen avoids having to deal 104 * with allocation failures on the event upcall from the provider (which 105 * is called in the interrupt context). 106 * 107 * One exception is when creating the cm_id for incoming connection requests. 108 * There are two cases: 109 * 1) in the event upcall, cm_event_handler(), for a listening cm_id. If 110 * the backlog is exceeded, then no more connection request events will 111 * be processed. cm_event_handler() returns -ENOMEM in this case. Its up 112 * to the provider to reject the connection request. 113 * 2) in the connection request workqueue handler, cm_conn_req_handler(). 114 * If work elements cannot be allocated for the new connect request cm_id, 115 * then IWCM will call the provider reject method. This is ok since 116 * cm_conn_req_handler() runs in the workqueue thread context. 117 */ 118 119 static struct iwcm_work *get_work(struct iwcm_id_private *cm_id_priv) 120 { 121 struct iwcm_work *work; 122 123 if (list_empty(&cm_id_priv->work_free_list)) 124 return NULL; 125 work = list_entry(cm_id_priv->work_free_list.next, struct iwcm_work, 126 free_list); 127 list_del_init(&work->free_list); 128 return work; 129 } 130 131 static void put_work(struct iwcm_work *work) 132 { 133 list_add(&work->free_list, &work->cm_id->work_free_list); 134 } 135 136 static void dealloc_work_entries(struct iwcm_id_private *cm_id_priv) 137 { 138 struct list_head *e, *tmp; 139 140 list_for_each_safe(e, tmp, &cm_id_priv->work_free_list) 141 kfree(list_entry(e, struct iwcm_work, free_list)); 142 } 143 144 static int alloc_work_entries(struct iwcm_id_private *cm_id_priv, int count) 145 { 146 struct iwcm_work *work; 147 148 BUG_ON(!list_empty(&cm_id_priv->work_free_list)); 149 while (count--) { 150 work = kmalloc(sizeof(struct iwcm_work), GFP_KERNEL); 151 if (!work) { 152 dealloc_work_entries(cm_id_priv); 153 return -ENOMEM; 154 } 155 work->cm_id = cm_id_priv; 156 INIT_LIST_HEAD(&work->list); 157 put_work(work); 158 } 159 return 0; 160 } 161 162 /* 163 * Save private data from incoming connection requests to 164 * iw_cm_event, so the low level driver doesn't have to. Adjust 165 * the event ptr to point to the local copy. 166 */ 167 static int copy_private_data(struct iw_cm_event *event) 168 { 169 void *p; 170 171 p = kmemdup(event->private_data, event->private_data_len, GFP_ATOMIC); 172 if (!p) 173 return -ENOMEM; 174 event->private_data = p; 175 return 0; 176 } 177 178 static void free_cm_id(struct iwcm_id_private *cm_id_priv) 179 { 180 dealloc_work_entries(cm_id_priv); 181 kfree(cm_id_priv); 182 } 183 184 /* 185 * Release a reference on cm_id. If the last reference is being 186 * released, enable the waiting thread (in iw_destroy_cm_id) to 187 * get woken up, and return 1 if a thread is already waiting. 188 */ 189 static int iwcm_deref_id(struct iwcm_id_private *cm_id_priv) 190 { 191 BUG_ON(atomic_read(&cm_id_priv->refcount)==0); 192 if (atomic_dec_and_test(&cm_id_priv->refcount)) { 193 BUG_ON(!list_empty(&cm_id_priv->work_list)); 194 complete(&cm_id_priv->destroy_comp); 195 return 1; 196 } 197 198 return 0; 199 } 200 201 static void add_ref(struct iw_cm_id *cm_id) 202 { 203 struct iwcm_id_private *cm_id_priv; 204 cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); 205 atomic_inc(&cm_id_priv->refcount); 206 } 207 208 static void rem_ref(struct iw_cm_id *cm_id) 209 { 210 struct iwcm_id_private *cm_id_priv; 211 int cb_destroy; 212 213 cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); 214 215 /* 216 * Test bit before deref in case the cm_id gets freed on another 217 * thread. 218 */ 219 cb_destroy = test_bit(IWCM_F_CALLBACK_DESTROY, &cm_id_priv->flags); 220 if (iwcm_deref_id(cm_id_priv) && cb_destroy) { 221 BUG_ON(!list_empty(&cm_id_priv->work_list)); 222 free_cm_id(cm_id_priv); 223 } 224 } 225 226 static int cm_event_handler(struct iw_cm_id *cm_id, struct iw_cm_event *event); 227 228 struct iw_cm_id *iw_create_cm_id(struct ib_device *device, 229 iw_cm_handler cm_handler, 230 void *context) 231 { 232 struct iwcm_id_private *cm_id_priv; 233 234 cm_id_priv = kzalloc(sizeof(*cm_id_priv), GFP_KERNEL); 235 if (!cm_id_priv) 236 return ERR_PTR(-ENOMEM); 237 238 cm_id_priv->state = IW_CM_STATE_IDLE; 239 cm_id_priv->id.device = device; 240 cm_id_priv->id.cm_handler = cm_handler; 241 cm_id_priv->id.context = context; 242 cm_id_priv->id.event_handler = cm_event_handler; 243 cm_id_priv->id.add_ref = add_ref; 244 cm_id_priv->id.rem_ref = rem_ref; 245 spin_lock_init(&cm_id_priv->lock); 246 atomic_set(&cm_id_priv->refcount, 1); 247 init_waitqueue_head(&cm_id_priv->connect_wait); 248 init_completion(&cm_id_priv->destroy_comp); 249 INIT_LIST_HEAD(&cm_id_priv->work_list); 250 INIT_LIST_HEAD(&cm_id_priv->work_free_list); 251 252 return &cm_id_priv->id; 253 } 254 EXPORT_SYMBOL(iw_create_cm_id); 255 256 257 static int iwcm_modify_qp_err(struct ib_qp *qp) 258 { 259 struct ib_qp_attr qp_attr; 260 261 if (!qp) 262 return -EINVAL; 263 264 qp_attr.qp_state = IB_QPS_ERR; 265 return ib_modify_qp(qp, &qp_attr, IB_QP_STATE); 266 } 267 268 /* 269 * This is really the RDMAC CLOSING state. It is most similar to the 270 * IB SQD QP state. 271 */ 272 static int iwcm_modify_qp_sqd(struct ib_qp *qp) 273 { 274 struct ib_qp_attr qp_attr; 275 276 BUG_ON(qp == NULL); 277 qp_attr.qp_state = IB_QPS_SQD; 278 return ib_modify_qp(qp, &qp_attr, IB_QP_STATE); 279 } 280 281 /* 282 * CM_ID <-- CLOSING 283 * 284 * Block if a passive or active connection is currently being processed. Then 285 * process the event as follows: 286 * - If we are ESTABLISHED, move to CLOSING and modify the QP state 287 * based on the abrupt flag 288 * - If the connection is already in the CLOSING or IDLE state, the peer is 289 * disconnecting concurrently with us and we've already seen the 290 * DISCONNECT event -- ignore the request and return 0 291 * - Disconnect on a listening endpoint returns -EINVAL 292 */ 293 int iw_cm_disconnect(struct iw_cm_id *cm_id, int abrupt) 294 { 295 struct iwcm_id_private *cm_id_priv; 296 unsigned long flags; 297 int ret = 0; 298 struct ib_qp *qp = NULL; 299 300 cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); 301 /* Wait if we're currently in a connect or accept downcall */ 302 wait_event(cm_id_priv->connect_wait, 303 !test_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags)); 304 305 spin_lock_irqsave(&cm_id_priv->lock, flags); 306 switch (cm_id_priv->state) { 307 case IW_CM_STATE_ESTABLISHED: 308 cm_id_priv->state = IW_CM_STATE_CLOSING; 309 310 /* QP could be <nul> for user-mode client */ 311 if (cm_id_priv->qp) 312 qp = cm_id_priv->qp; 313 else 314 ret = -EINVAL; 315 break; 316 case IW_CM_STATE_LISTEN: 317 ret = -EINVAL; 318 break; 319 case IW_CM_STATE_CLOSING: 320 /* remote peer closed first */ 321 case IW_CM_STATE_IDLE: 322 /* accept or connect returned !0 */ 323 break; 324 case IW_CM_STATE_CONN_RECV: 325 /* 326 * App called disconnect before/without calling accept after 327 * connect_request event delivered. 328 */ 329 break; 330 case IW_CM_STATE_CONN_SENT: 331 /* Can only get here if wait above fails */ 332 default: 333 BUG(); 334 } 335 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 336 337 if (qp) { 338 if (abrupt) 339 ret = iwcm_modify_qp_err(qp); 340 else 341 ret = iwcm_modify_qp_sqd(qp); 342 343 /* 344 * If both sides are disconnecting the QP could 345 * already be in ERR or SQD states 346 */ 347 ret = 0; 348 } 349 350 return ret; 351 } 352 EXPORT_SYMBOL(iw_cm_disconnect); 353 354 /* 355 * CM_ID <-- DESTROYING 356 * 357 * Clean up all resources associated with the connection and release 358 * the initial reference taken by iw_create_cm_id. 359 */ 360 static void destroy_cm_id(struct iw_cm_id *cm_id) 361 { 362 struct iwcm_id_private *cm_id_priv; 363 unsigned long flags; 364 365 cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); 366 /* 367 * Wait if we're currently in a connect or accept downcall. A 368 * listening endpoint should never block here. 369 */ 370 wait_event(cm_id_priv->connect_wait, 371 !test_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags)); 372 373 spin_lock_irqsave(&cm_id_priv->lock, flags); 374 switch (cm_id_priv->state) { 375 case IW_CM_STATE_LISTEN: 376 cm_id_priv->state = IW_CM_STATE_DESTROYING; 377 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 378 /* destroy the listening endpoint */ 379 cm_id->device->iwcm->destroy_listen(cm_id); 380 spin_lock_irqsave(&cm_id_priv->lock, flags); 381 break; 382 case IW_CM_STATE_ESTABLISHED: 383 cm_id_priv->state = IW_CM_STATE_DESTROYING; 384 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 385 /* Abrupt close of the connection */ 386 (void)iwcm_modify_qp_err(cm_id_priv->qp); 387 spin_lock_irqsave(&cm_id_priv->lock, flags); 388 break; 389 case IW_CM_STATE_IDLE: 390 case IW_CM_STATE_CLOSING: 391 cm_id_priv->state = IW_CM_STATE_DESTROYING; 392 break; 393 case IW_CM_STATE_CONN_RECV: 394 /* 395 * App called destroy before/without calling accept after 396 * receiving connection request event notification or 397 * returned non zero from the event callback function. 398 * In either case, must tell the provider to reject. 399 */ 400 cm_id_priv->state = IW_CM_STATE_DESTROYING; 401 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 402 cm_id->device->iwcm->reject(cm_id, NULL, 0); 403 spin_lock_irqsave(&cm_id_priv->lock, flags); 404 break; 405 case IW_CM_STATE_CONN_SENT: 406 case IW_CM_STATE_DESTROYING: 407 default: 408 BUG(); 409 break; 410 } 411 if (cm_id_priv->qp) { 412 cm_id_priv->id.device->iwcm->rem_ref(cm_id_priv->qp); 413 cm_id_priv->qp = NULL; 414 } 415 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 416 417 if (cm_id->mapped) { 418 iwpm_remove_mapinfo(&cm_id->local_addr, &cm_id->m_local_addr); 419 iwpm_remove_mapping(&cm_id->local_addr, RDMA_NL_IWCM); 420 } 421 422 (void)iwcm_deref_id(cm_id_priv); 423 } 424 425 /* 426 * This function is only called by the application thread and cannot 427 * be called by the event thread. The function will wait for all 428 * references to be released on the cm_id and then kfree the cm_id 429 * object. 430 */ 431 void iw_destroy_cm_id(struct iw_cm_id *cm_id) 432 { 433 struct iwcm_id_private *cm_id_priv; 434 435 cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); 436 BUG_ON(test_bit(IWCM_F_CALLBACK_DESTROY, &cm_id_priv->flags)); 437 438 destroy_cm_id(cm_id); 439 440 wait_for_completion(&cm_id_priv->destroy_comp); 441 442 free_cm_id(cm_id_priv); 443 } 444 EXPORT_SYMBOL(iw_destroy_cm_id); 445 446 /** 447 * iw_cm_check_wildcard - If IP address is 0 then use original 448 * @pm_addr: sockaddr containing the ip to check for wildcard 449 * @cm_addr: sockaddr containing the actual IP address 450 * @cm_outaddr: sockaddr to set IP addr which leaving port 451 * 452 * Checks the pm_addr for wildcard and then sets cm_outaddr's 453 * IP to the actual (cm_addr). 454 */ 455 static void iw_cm_check_wildcard(struct sockaddr_storage *pm_addr, 456 struct sockaddr_storage *cm_addr, 457 struct sockaddr_storage *cm_outaddr) 458 { 459 if (pm_addr->ss_family == AF_INET) { 460 struct sockaddr_in *pm4_addr = (struct sockaddr_in *)pm_addr; 461 462 if (pm4_addr->sin_addr.s_addr == htonl(INADDR_ANY)) { 463 struct sockaddr_in *cm4_addr = 464 (struct sockaddr_in *)cm_addr; 465 struct sockaddr_in *cm4_outaddr = 466 (struct sockaddr_in *)cm_outaddr; 467 468 cm4_outaddr->sin_addr = cm4_addr->sin_addr; 469 } 470 } else { 471 struct sockaddr_in6 *pm6_addr = (struct sockaddr_in6 *)pm_addr; 472 473 if (ipv6_addr_type(&pm6_addr->sin6_addr) == IPV6_ADDR_ANY) { 474 struct sockaddr_in6 *cm6_addr = 475 (struct sockaddr_in6 *)cm_addr; 476 struct sockaddr_in6 *cm6_outaddr = 477 (struct sockaddr_in6 *)cm_outaddr; 478 479 cm6_outaddr->sin6_addr = cm6_addr->sin6_addr; 480 } 481 } 482 } 483 484 /** 485 * iw_cm_map - Use portmapper to map the ports 486 * @cm_id: connection manager pointer 487 * @active: Indicates the active side when true 488 * returns nonzero for error only if iwpm_create_mapinfo() fails 489 * 490 * Tries to add a mapping for a port using the Portmapper. If 491 * successful in mapping the IP/Port it will check the remote 492 * mapped IP address for a wildcard IP address and replace the 493 * zero IP address with the remote_addr. 494 */ 495 static int iw_cm_map(struct iw_cm_id *cm_id, bool active) 496 { 497 struct iwpm_dev_data pm_reg_msg; 498 struct iwpm_sa_data pm_msg; 499 int status; 500 501 cm_id->m_local_addr = cm_id->local_addr; 502 cm_id->m_remote_addr = cm_id->remote_addr; 503 504 memcpy(pm_reg_msg.dev_name, cm_id->device->name, 505 sizeof(pm_reg_msg.dev_name)); 506 memcpy(pm_reg_msg.if_name, cm_id->device->iwcm->ifname, 507 sizeof(pm_reg_msg.if_name)); 508 509 if (iwpm_register_pid(&pm_reg_msg, RDMA_NL_IWCM) || 510 !iwpm_valid_pid()) 511 return 0; 512 513 cm_id->mapped = true; 514 pm_msg.loc_addr = cm_id->local_addr; 515 pm_msg.rem_addr = cm_id->remote_addr; 516 if (active) 517 status = iwpm_add_and_query_mapping(&pm_msg, 518 RDMA_NL_IWCM); 519 else 520 status = iwpm_add_mapping(&pm_msg, RDMA_NL_IWCM); 521 522 if (!status) { 523 cm_id->m_local_addr = pm_msg.mapped_loc_addr; 524 if (active) { 525 cm_id->m_remote_addr = pm_msg.mapped_rem_addr; 526 iw_cm_check_wildcard(&pm_msg.mapped_rem_addr, 527 &cm_id->remote_addr, 528 &cm_id->m_remote_addr); 529 } 530 } 531 532 return iwpm_create_mapinfo(&cm_id->local_addr, 533 &cm_id->m_local_addr, 534 RDMA_NL_IWCM); 535 } 536 537 /* 538 * CM_ID <-- LISTEN 539 * 540 * Start listening for connect requests. Generates one CONNECT_REQUEST 541 * event for each inbound connect request. 542 */ 543 int iw_cm_listen(struct iw_cm_id *cm_id, int backlog) 544 { 545 struct iwcm_id_private *cm_id_priv; 546 unsigned long flags; 547 int ret; 548 549 cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); 550 551 if (!backlog) 552 backlog = default_backlog; 553 554 ret = alloc_work_entries(cm_id_priv, backlog); 555 if (ret) 556 return ret; 557 558 spin_lock_irqsave(&cm_id_priv->lock, flags); 559 switch (cm_id_priv->state) { 560 case IW_CM_STATE_IDLE: 561 cm_id_priv->state = IW_CM_STATE_LISTEN; 562 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 563 ret = iw_cm_map(cm_id, false); 564 if (!ret) 565 ret = cm_id->device->iwcm->create_listen(cm_id, backlog); 566 if (ret) 567 cm_id_priv->state = IW_CM_STATE_IDLE; 568 spin_lock_irqsave(&cm_id_priv->lock, flags); 569 break; 570 default: 571 ret = -EINVAL; 572 } 573 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 574 575 return ret; 576 } 577 EXPORT_SYMBOL(iw_cm_listen); 578 579 /* 580 * CM_ID <-- IDLE 581 * 582 * Rejects an inbound connection request. No events are generated. 583 */ 584 int iw_cm_reject(struct iw_cm_id *cm_id, 585 const void *private_data, 586 u8 private_data_len) 587 { 588 struct iwcm_id_private *cm_id_priv; 589 unsigned long flags; 590 int ret; 591 592 cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); 593 set_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); 594 595 spin_lock_irqsave(&cm_id_priv->lock, flags); 596 if (cm_id_priv->state != IW_CM_STATE_CONN_RECV) { 597 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 598 clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); 599 wake_up_all(&cm_id_priv->connect_wait); 600 return -EINVAL; 601 } 602 cm_id_priv->state = IW_CM_STATE_IDLE; 603 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 604 605 ret = cm_id->device->iwcm->reject(cm_id, private_data, 606 private_data_len); 607 608 clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); 609 wake_up_all(&cm_id_priv->connect_wait); 610 611 return ret; 612 } 613 EXPORT_SYMBOL(iw_cm_reject); 614 615 /* 616 * CM_ID <-- ESTABLISHED 617 * 618 * Accepts an inbound connection request and generates an ESTABLISHED 619 * event. Callers of iw_cm_disconnect and iw_destroy_cm_id will block 620 * until the ESTABLISHED event is received from the provider. 621 */ 622 int iw_cm_accept(struct iw_cm_id *cm_id, 623 struct iw_cm_conn_param *iw_param) 624 { 625 struct iwcm_id_private *cm_id_priv; 626 struct ib_qp *qp; 627 unsigned long flags; 628 int ret; 629 630 cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); 631 set_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); 632 633 spin_lock_irqsave(&cm_id_priv->lock, flags); 634 if (cm_id_priv->state != IW_CM_STATE_CONN_RECV) { 635 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 636 clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); 637 wake_up_all(&cm_id_priv->connect_wait); 638 return -EINVAL; 639 } 640 /* Get the ib_qp given the QPN */ 641 qp = cm_id->device->iwcm->get_qp(cm_id->device, iw_param->qpn); 642 if (!qp) { 643 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 644 clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); 645 wake_up_all(&cm_id_priv->connect_wait); 646 return -EINVAL; 647 } 648 cm_id->device->iwcm->add_ref(qp); 649 cm_id_priv->qp = qp; 650 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 651 652 ret = cm_id->device->iwcm->accept(cm_id, iw_param); 653 if (ret) { 654 /* An error on accept precludes provider events */ 655 BUG_ON(cm_id_priv->state != IW_CM_STATE_CONN_RECV); 656 cm_id_priv->state = IW_CM_STATE_IDLE; 657 spin_lock_irqsave(&cm_id_priv->lock, flags); 658 if (cm_id_priv->qp) { 659 cm_id->device->iwcm->rem_ref(qp); 660 cm_id_priv->qp = NULL; 661 } 662 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 663 clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); 664 wake_up_all(&cm_id_priv->connect_wait); 665 } 666 667 return ret; 668 } 669 EXPORT_SYMBOL(iw_cm_accept); 670 671 /* 672 * Active Side: CM_ID <-- CONN_SENT 673 * 674 * If successful, results in the generation of a CONNECT_REPLY 675 * event. iw_cm_disconnect and iw_cm_destroy will block until the 676 * CONNECT_REPLY event is received from the provider. 677 */ 678 int iw_cm_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param) 679 { 680 struct iwcm_id_private *cm_id_priv; 681 int ret; 682 unsigned long flags; 683 struct ib_qp *qp; 684 685 cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); 686 687 ret = alloc_work_entries(cm_id_priv, 4); 688 if (ret) 689 return ret; 690 691 set_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); 692 spin_lock_irqsave(&cm_id_priv->lock, flags); 693 694 if (cm_id_priv->state != IW_CM_STATE_IDLE) { 695 ret = -EINVAL; 696 goto err; 697 } 698 699 /* Get the ib_qp given the QPN */ 700 qp = cm_id->device->iwcm->get_qp(cm_id->device, iw_param->qpn); 701 if (!qp) { 702 ret = -EINVAL; 703 goto err; 704 } 705 cm_id->device->iwcm->add_ref(qp); 706 cm_id_priv->qp = qp; 707 cm_id_priv->state = IW_CM_STATE_CONN_SENT; 708 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 709 710 ret = iw_cm_map(cm_id, true); 711 if (!ret) 712 ret = cm_id->device->iwcm->connect(cm_id, iw_param); 713 if (!ret) 714 return 0; /* success */ 715 716 spin_lock_irqsave(&cm_id_priv->lock, flags); 717 if (cm_id_priv->qp) { 718 cm_id->device->iwcm->rem_ref(qp); 719 cm_id_priv->qp = NULL; 720 } 721 cm_id_priv->state = IW_CM_STATE_IDLE; 722 err: 723 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 724 clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); 725 wake_up_all(&cm_id_priv->connect_wait); 726 return ret; 727 } 728 EXPORT_SYMBOL(iw_cm_connect); 729 730 /* 731 * Passive Side: new CM_ID <-- CONN_RECV 732 * 733 * Handles an inbound connect request. The function creates a new 734 * iw_cm_id to represent the new connection and inherits the client 735 * callback function and other attributes from the listening parent. 736 * 737 * The work item contains a pointer to the listen_cm_id and the event. The 738 * listen_cm_id contains the client cm_handler, context and 739 * device. These are copied when the device is cloned. The event 740 * contains the new four tuple. 741 * 742 * An error on the child should not affect the parent, so this 743 * function does not return a value. 744 */ 745 static void cm_conn_req_handler(struct iwcm_id_private *listen_id_priv, 746 struct iw_cm_event *iw_event) 747 { 748 unsigned long flags; 749 struct iw_cm_id *cm_id; 750 struct iwcm_id_private *cm_id_priv; 751 int ret; 752 753 /* 754 * The provider should never generate a connection request 755 * event with a bad status. 756 */ 757 BUG_ON(iw_event->status); 758 759 cm_id = iw_create_cm_id(listen_id_priv->id.device, 760 listen_id_priv->id.cm_handler, 761 listen_id_priv->id.context); 762 /* If the cm_id could not be created, ignore the request */ 763 if (IS_ERR(cm_id)) 764 goto out; 765 766 cm_id->provider_data = iw_event->provider_data; 767 cm_id->m_local_addr = iw_event->local_addr; 768 cm_id->m_remote_addr = iw_event->remote_addr; 769 cm_id->local_addr = listen_id_priv->id.local_addr; 770 771 ret = iwpm_get_remote_info(&listen_id_priv->id.m_local_addr, 772 &iw_event->remote_addr, 773 &cm_id->remote_addr, 774 RDMA_NL_IWCM); 775 if (ret) { 776 cm_id->remote_addr = iw_event->remote_addr; 777 } else { 778 iw_cm_check_wildcard(&listen_id_priv->id.m_local_addr, 779 &iw_event->local_addr, 780 &cm_id->local_addr); 781 iw_event->local_addr = cm_id->local_addr; 782 iw_event->remote_addr = cm_id->remote_addr; 783 } 784 785 cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); 786 cm_id_priv->state = IW_CM_STATE_CONN_RECV; 787 788 /* 789 * We could be destroying the listening id. If so, ignore this 790 * upcall. 791 */ 792 spin_lock_irqsave(&listen_id_priv->lock, flags); 793 if (listen_id_priv->state != IW_CM_STATE_LISTEN) { 794 spin_unlock_irqrestore(&listen_id_priv->lock, flags); 795 iw_cm_reject(cm_id, NULL, 0); 796 iw_destroy_cm_id(cm_id); 797 goto out; 798 } 799 spin_unlock_irqrestore(&listen_id_priv->lock, flags); 800 801 ret = alloc_work_entries(cm_id_priv, 3); 802 if (ret) { 803 iw_cm_reject(cm_id, NULL, 0); 804 iw_destroy_cm_id(cm_id); 805 goto out; 806 } 807 808 /* Call the client CM handler */ 809 ret = cm_id->cm_handler(cm_id, iw_event); 810 if (ret) { 811 iw_cm_reject(cm_id, NULL, 0); 812 set_bit(IWCM_F_CALLBACK_DESTROY, &cm_id_priv->flags); 813 destroy_cm_id(cm_id); 814 if (atomic_read(&cm_id_priv->refcount)==0) 815 free_cm_id(cm_id_priv); 816 } 817 818 out: 819 if (iw_event->private_data_len) 820 kfree(iw_event->private_data); 821 } 822 823 /* 824 * Passive Side: CM_ID <-- ESTABLISHED 825 * 826 * The provider generated an ESTABLISHED event which means that 827 * the MPA negotion has completed successfully and we are now in MPA 828 * FPDU mode. 829 * 830 * This event can only be received in the CONN_RECV state. If the 831 * remote peer closed, the ESTABLISHED event would be received followed 832 * by the CLOSE event. If the app closes, it will block until we wake 833 * it up after processing this event. 834 */ 835 static int cm_conn_est_handler(struct iwcm_id_private *cm_id_priv, 836 struct iw_cm_event *iw_event) 837 { 838 unsigned long flags; 839 int ret; 840 841 spin_lock_irqsave(&cm_id_priv->lock, flags); 842 843 /* 844 * We clear the CONNECT_WAIT bit here to allow the callback 845 * function to call iw_cm_disconnect. Calling iw_destroy_cm_id 846 * from a callback handler is not allowed. 847 */ 848 clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); 849 BUG_ON(cm_id_priv->state != IW_CM_STATE_CONN_RECV); 850 cm_id_priv->state = IW_CM_STATE_ESTABLISHED; 851 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 852 ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, iw_event); 853 wake_up_all(&cm_id_priv->connect_wait); 854 855 return ret; 856 } 857 858 /* 859 * Active Side: CM_ID <-- ESTABLISHED 860 * 861 * The app has called connect and is waiting for the established event to 862 * post it's requests to the server. This event will wake up anyone 863 * blocked in iw_cm_disconnect or iw_destroy_id. 864 */ 865 static int cm_conn_rep_handler(struct iwcm_id_private *cm_id_priv, 866 struct iw_cm_event *iw_event) 867 { 868 unsigned long flags; 869 int ret; 870 871 spin_lock_irqsave(&cm_id_priv->lock, flags); 872 /* 873 * Clear the connect wait bit so a callback function calling 874 * iw_cm_disconnect will not wait and deadlock this thread 875 */ 876 clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); 877 BUG_ON(cm_id_priv->state != IW_CM_STATE_CONN_SENT); 878 if (iw_event->status == 0) { 879 cm_id_priv->id.m_local_addr = iw_event->local_addr; 880 cm_id_priv->id.m_remote_addr = iw_event->remote_addr; 881 iw_event->local_addr = cm_id_priv->id.local_addr; 882 iw_event->remote_addr = cm_id_priv->id.remote_addr; 883 cm_id_priv->state = IW_CM_STATE_ESTABLISHED; 884 } else { 885 /* REJECTED or RESET */ 886 cm_id_priv->id.device->iwcm->rem_ref(cm_id_priv->qp); 887 cm_id_priv->qp = NULL; 888 cm_id_priv->state = IW_CM_STATE_IDLE; 889 } 890 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 891 ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, iw_event); 892 893 if (iw_event->private_data_len) 894 kfree(iw_event->private_data); 895 896 /* Wake up waiters on connect complete */ 897 wake_up_all(&cm_id_priv->connect_wait); 898 899 return ret; 900 } 901 902 /* 903 * CM_ID <-- CLOSING 904 * 905 * If in the ESTABLISHED state, move to CLOSING. 906 */ 907 static void cm_disconnect_handler(struct iwcm_id_private *cm_id_priv, 908 struct iw_cm_event *iw_event) 909 { 910 unsigned long flags; 911 912 spin_lock_irqsave(&cm_id_priv->lock, flags); 913 if (cm_id_priv->state == IW_CM_STATE_ESTABLISHED) 914 cm_id_priv->state = IW_CM_STATE_CLOSING; 915 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 916 } 917 918 /* 919 * CM_ID <-- IDLE 920 * 921 * If in the ESTBLISHED or CLOSING states, the QP will have have been 922 * moved by the provider to the ERR state. Disassociate the CM_ID from 923 * the QP, move to IDLE, and remove the 'connected' reference. 924 * 925 * If in some other state, the cm_id was destroyed asynchronously. 926 * This is the last reference that will result in waking up 927 * the app thread blocked in iw_destroy_cm_id. 928 */ 929 static int cm_close_handler(struct iwcm_id_private *cm_id_priv, 930 struct iw_cm_event *iw_event) 931 { 932 unsigned long flags; 933 int ret = 0; 934 spin_lock_irqsave(&cm_id_priv->lock, flags); 935 936 if (cm_id_priv->qp) { 937 cm_id_priv->id.device->iwcm->rem_ref(cm_id_priv->qp); 938 cm_id_priv->qp = NULL; 939 } 940 switch (cm_id_priv->state) { 941 case IW_CM_STATE_ESTABLISHED: 942 case IW_CM_STATE_CLOSING: 943 cm_id_priv->state = IW_CM_STATE_IDLE; 944 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 945 ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, iw_event); 946 spin_lock_irqsave(&cm_id_priv->lock, flags); 947 break; 948 case IW_CM_STATE_DESTROYING: 949 break; 950 default: 951 BUG(); 952 } 953 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 954 955 return ret; 956 } 957 958 static int process_event(struct iwcm_id_private *cm_id_priv, 959 struct iw_cm_event *iw_event) 960 { 961 int ret = 0; 962 963 switch (iw_event->event) { 964 case IW_CM_EVENT_CONNECT_REQUEST: 965 cm_conn_req_handler(cm_id_priv, iw_event); 966 break; 967 case IW_CM_EVENT_CONNECT_REPLY: 968 ret = cm_conn_rep_handler(cm_id_priv, iw_event); 969 break; 970 case IW_CM_EVENT_ESTABLISHED: 971 ret = cm_conn_est_handler(cm_id_priv, iw_event); 972 break; 973 case IW_CM_EVENT_DISCONNECT: 974 cm_disconnect_handler(cm_id_priv, iw_event); 975 break; 976 case IW_CM_EVENT_CLOSE: 977 ret = cm_close_handler(cm_id_priv, iw_event); 978 break; 979 default: 980 BUG(); 981 } 982 983 return ret; 984 } 985 986 /* 987 * Process events on the work_list for the cm_id. If the callback 988 * function requests that the cm_id be deleted, a flag is set in the 989 * cm_id flags to indicate that when the last reference is 990 * removed, the cm_id is to be destroyed. This is necessary to 991 * distinguish between an object that will be destroyed by the app 992 * thread asleep on the destroy_comp list vs. an object destroyed 993 * here synchronously when the last reference is removed. 994 */ 995 static void cm_work_handler(struct work_struct *_work) 996 { 997 struct iwcm_work *work = container_of(_work, struct iwcm_work, work); 998 struct iw_cm_event levent; 999 struct iwcm_id_private *cm_id_priv = work->cm_id; 1000 unsigned long flags; 1001 int empty; 1002 int ret = 0; 1003 int destroy_id; 1004 1005 spin_lock_irqsave(&cm_id_priv->lock, flags); 1006 empty = list_empty(&cm_id_priv->work_list); 1007 while (!empty) { 1008 work = list_entry(cm_id_priv->work_list.next, 1009 struct iwcm_work, list); 1010 list_del_init(&work->list); 1011 empty = list_empty(&cm_id_priv->work_list); 1012 levent = work->event; 1013 put_work(work); 1014 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 1015 1016 ret = process_event(cm_id_priv, &levent); 1017 if (ret) { 1018 set_bit(IWCM_F_CALLBACK_DESTROY, &cm_id_priv->flags); 1019 destroy_cm_id(&cm_id_priv->id); 1020 } 1021 BUG_ON(atomic_read(&cm_id_priv->refcount)==0); 1022 destroy_id = test_bit(IWCM_F_CALLBACK_DESTROY, &cm_id_priv->flags); 1023 if (iwcm_deref_id(cm_id_priv)) { 1024 if (destroy_id) { 1025 BUG_ON(!list_empty(&cm_id_priv->work_list)); 1026 free_cm_id(cm_id_priv); 1027 } 1028 return; 1029 } 1030 if (empty) 1031 return; 1032 spin_lock_irqsave(&cm_id_priv->lock, flags); 1033 } 1034 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 1035 } 1036 1037 /* 1038 * This function is called on interrupt context. Schedule events on 1039 * the iwcm_wq thread to allow callback functions to downcall into 1040 * the CM and/or block. Events are queued to a per-CM_ID 1041 * work_list. If this is the first event on the work_list, the work 1042 * element is also queued on the iwcm_wq thread. 1043 * 1044 * Each event holds a reference on the cm_id. Until the last posted 1045 * event has been delivered and processed, the cm_id cannot be 1046 * deleted. 1047 * 1048 * Returns: 1049 * 0 - the event was handled. 1050 * -ENOMEM - the event was not handled due to lack of resources. 1051 */ 1052 static int cm_event_handler(struct iw_cm_id *cm_id, 1053 struct iw_cm_event *iw_event) 1054 { 1055 struct iwcm_work *work; 1056 struct iwcm_id_private *cm_id_priv; 1057 unsigned long flags; 1058 int ret = 0; 1059 1060 cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); 1061 1062 spin_lock_irqsave(&cm_id_priv->lock, flags); 1063 work = get_work(cm_id_priv); 1064 if (!work) { 1065 ret = -ENOMEM; 1066 goto out; 1067 } 1068 1069 INIT_WORK(&work->work, cm_work_handler); 1070 work->cm_id = cm_id_priv; 1071 work->event = *iw_event; 1072 1073 if ((work->event.event == IW_CM_EVENT_CONNECT_REQUEST || 1074 work->event.event == IW_CM_EVENT_CONNECT_REPLY) && 1075 work->event.private_data_len) { 1076 ret = copy_private_data(&work->event); 1077 if (ret) { 1078 put_work(work); 1079 goto out; 1080 } 1081 } 1082 1083 atomic_inc(&cm_id_priv->refcount); 1084 if (list_empty(&cm_id_priv->work_list)) { 1085 list_add_tail(&work->list, &cm_id_priv->work_list); 1086 queue_work(iwcm_wq, &work->work); 1087 } else 1088 list_add_tail(&work->list, &cm_id_priv->work_list); 1089 out: 1090 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 1091 return ret; 1092 } 1093 1094 static int iwcm_init_qp_init_attr(struct iwcm_id_private *cm_id_priv, 1095 struct ib_qp_attr *qp_attr, 1096 int *qp_attr_mask) 1097 { 1098 unsigned long flags; 1099 int ret; 1100 1101 spin_lock_irqsave(&cm_id_priv->lock, flags); 1102 switch (cm_id_priv->state) { 1103 case IW_CM_STATE_IDLE: 1104 case IW_CM_STATE_CONN_SENT: 1105 case IW_CM_STATE_CONN_RECV: 1106 case IW_CM_STATE_ESTABLISHED: 1107 *qp_attr_mask = IB_QP_STATE | IB_QP_ACCESS_FLAGS; 1108 qp_attr->qp_access_flags = IB_ACCESS_REMOTE_WRITE| 1109 IB_ACCESS_REMOTE_READ; 1110 ret = 0; 1111 break; 1112 default: 1113 ret = -EINVAL; 1114 break; 1115 } 1116 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 1117 return ret; 1118 } 1119 1120 static int iwcm_init_qp_rts_attr(struct iwcm_id_private *cm_id_priv, 1121 struct ib_qp_attr *qp_attr, 1122 int *qp_attr_mask) 1123 { 1124 unsigned long flags; 1125 int ret; 1126 1127 spin_lock_irqsave(&cm_id_priv->lock, flags); 1128 switch (cm_id_priv->state) { 1129 case IW_CM_STATE_IDLE: 1130 case IW_CM_STATE_CONN_SENT: 1131 case IW_CM_STATE_CONN_RECV: 1132 case IW_CM_STATE_ESTABLISHED: 1133 *qp_attr_mask = 0; 1134 ret = 0; 1135 break; 1136 default: 1137 ret = -EINVAL; 1138 break; 1139 } 1140 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 1141 return ret; 1142 } 1143 1144 int iw_cm_init_qp_attr(struct iw_cm_id *cm_id, 1145 struct ib_qp_attr *qp_attr, 1146 int *qp_attr_mask) 1147 { 1148 struct iwcm_id_private *cm_id_priv; 1149 int ret; 1150 1151 cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); 1152 switch (qp_attr->qp_state) { 1153 case IB_QPS_INIT: 1154 case IB_QPS_RTR: 1155 ret = iwcm_init_qp_init_attr(cm_id_priv, 1156 qp_attr, qp_attr_mask); 1157 break; 1158 case IB_QPS_RTS: 1159 ret = iwcm_init_qp_rts_attr(cm_id_priv, 1160 qp_attr, qp_attr_mask); 1161 break; 1162 default: 1163 ret = -EINVAL; 1164 break; 1165 } 1166 return ret; 1167 } 1168 EXPORT_SYMBOL(iw_cm_init_qp_attr); 1169 1170 static int __init iw_cm_init(void) 1171 { 1172 int ret; 1173 1174 ret = iwpm_init(RDMA_NL_IWCM); 1175 if (ret) 1176 pr_err("iw_cm: couldn't init iwpm\n"); 1177 1178 ret = ibnl_add_client(RDMA_NL_IWCM, ARRAY_SIZE(iwcm_nl_cb_table), 1179 iwcm_nl_cb_table); 1180 if (ret) 1181 pr_err("iw_cm: couldn't register netlink callbacks\n"); 1182 1183 iwcm_wq = create_singlethread_workqueue("iw_cm_wq"); 1184 if (!iwcm_wq) 1185 return -ENOMEM; 1186 1187 iwcm_ctl_table_hdr = register_net_sysctl(&init_net, "net/iw_cm", 1188 iwcm_ctl_table); 1189 if (!iwcm_ctl_table_hdr) { 1190 pr_err("iw_cm: couldn't register sysctl paths\n"); 1191 destroy_workqueue(iwcm_wq); 1192 return -ENOMEM; 1193 } 1194 1195 return 0; 1196 } 1197 1198 static void __exit iw_cm_cleanup(void) 1199 { 1200 unregister_net_sysctl_table(iwcm_ctl_table_hdr); 1201 destroy_workqueue(iwcm_wq); 1202 ibnl_remove_client(RDMA_NL_IWCM); 1203 iwpm_exit(RDMA_NL_IWCM); 1204 } 1205 1206 module_init(iw_cm_init); 1207 module_exit(iw_cm_cleanup); 1208