1 /* 2 * Copyright (c) 2004, 2005 Intel Corporation. All rights reserved. 3 * Copyright (c) 2004 Topspin Corporation. All rights reserved. 4 * Copyright (c) 2004, 2005 Voltaire Corporation. All rights reserved. 5 * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. 6 * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved. 7 * Copyright (c) 2005 Network Appliance, Inc. All rights reserved. 8 * 9 * This software is available to you under a choice of one of two 10 * licenses. You may choose to be licensed under the terms of the GNU 11 * General Public License (GPL) Version 2, available from the file 12 * COPYING in the main directory of this source tree, or the 13 * OpenIB.org BSD license below: 14 * 15 * Redistribution and use in source and binary forms, with or 16 * without modification, are permitted provided that the following 17 * conditions are met: 18 * 19 * - Redistributions of source code must retain the above 20 * copyright notice, this list of conditions and the following 21 * disclaimer. 22 * 23 * - Redistributions in binary form must reproduce the above 24 * copyright notice, this list of conditions and the following 25 * disclaimer in the documentation and/or other materials 26 * provided with the distribution. 27 * 28 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 29 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 30 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 31 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 32 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 33 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 34 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 35 * SOFTWARE. 36 * 37 */ 38 #include <linux/dma-mapping.h> 39 #include <linux/err.h> 40 #include <linux/idr.h> 41 #include <linux/interrupt.h> 42 #include <linux/rbtree.h> 43 #include <linux/sched.h> 44 #include <linux/spinlock.h> 45 #include <linux/workqueue.h> 46 #include <linux/completion.h> 47 #include <linux/slab.h> 48 #include <linux/module.h> 49 #include <linux/sysctl.h> 50 51 #include <rdma/iw_cm.h> 52 #include <rdma/ib_addr.h> 53 #include <rdma/iw_portmap.h> 54 #include <rdma/rdma_netlink.h> 55 56 #include "iwcm.h" 57 58 MODULE_AUTHOR("Tom Tucker"); 59 MODULE_DESCRIPTION("iWARP CM"); 60 MODULE_LICENSE("Dual BSD/GPL"); 61 62 static struct ibnl_client_cbs iwcm_nl_cb_table[] = { 63 [RDMA_NL_IWPM_REG_PID] = {.dump = iwpm_register_pid_cb}, 64 [RDMA_NL_IWPM_ADD_MAPPING] = {.dump = iwpm_add_mapping_cb}, 65 [RDMA_NL_IWPM_QUERY_MAPPING] = {.dump = iwpm_add_and_query_mapping_cb}, 66 [RDMA_NL_IWPM_REMOTE_INFO] = {.dump = iwpm_remote_info_cb}, 67 [RDMA_NL_IWPM_HANDLE_ERR] = {.dump = iwpm_mapping_error_cb}, 68 [RDMA_NL_IWPM_MAPINFO] = {.dump = iwpm_mapping_info_cb}, 69 [RDMA_NL_IWPM_MAPINFO_NUM] = {.dump = iwpm_ack_mapping_info_cb} 70 }; 71 72 static struct workqueue_struct *iwcm_wq; 73 struct iwcm_work { 74 struct work_struct work; 75 struct iwcm_id_private *cm_id; 76 struct list_head list; 77 struct iw_cm_event event; 78 struct list_head free_list; 79 }; 80 81 static unsigned int default_backlog = 256; 82 83 static struct ctl_table_header *iwcm_ctl_table_hdr; 84 static struct ctl_table iwcm_ctl_table[] = { 85 { 86 .procname = "default_backlog", 87 .data = &default_backlog, 88 .maxlen = sizeof(default_backlog), 89 .mode = 0644, 90 .proc_handler = proc_dointvec, 91 }, 92 { } 93 }; 94 95 /* 96 * The following services provide a mechanism for pre-allocating iwcm_work 97 * elements. The design pre-allocates them based on the cm_id type: 98 * LISTENING IDS: Get enough elements preallocated to handle the 99 * listen backlog. 100 * ACTIVE IDS: 4: CONNECT_REPLY, ESTABLISHED, DISCONNECT, CLOSE 101 * PASSIVE IDS: 3: ESTABLISHED, DISCONNECT, CLOSE 102 * 103 * Allocating them in connect and listen avoids having to deal 104 * with allocation failures on the event upcall from the provider (which 105 * is called in the interrupt context). 106 * 107 * One exception is when creating the cm_id for incoming connection requests. 108 * There are two cases: 109 * 1) in the event upcall, cm_event_handler(), for a listening cm_id. If 110 * the backlog is exceeded, then no more connection request events will 111 * be processed. cm_event_handler() returns -ENOMEM in this case. Its up 112 * to the provider to reject the connection request. 113 * 2) in the connection request workqueue handler, cm_conn_req_handler(). 114 * If work elements cannot be allocated for the new connect request cm_id, 115 * then IWCM will call the provider reject method. This is ok since 116 * cm_conn_req_handler() runs in the workqueue thread context. 117 */ 118 119 static struct iwcm_work *get_work(struct iwcm_id_private *cm_id_priv) 120 { 121 struct iwcm_work *work; 122 123 if (list_empty(&cm_id_priv->work_free_list)) 124 return NULL; 125 work = list_entry(cm_id_priv->work_free_list.next, struct iwcm_work, 126 free_list); 127 list_del_init(&work->free_list); 128 return work; 129 } 130 131 static void put_work(struct iwcm_work *work) 132 { 133 list_add(&work->free_list, &work->cm_id->work_free_list); 134 } 135 136 static void dealloc_work_entries(struct iwcm_id_private *cm_id_priv) 137 { 138 struct list_head *e, *tmp; 139 140 list_for_each_safe(e, tmp, &cm_id_priv->work_free_list) 141 kfree(list_entry(e, struct iwcm_work, free_list)); 142 } 143 144 static int alloc_work_entries(struct iwcm_id_private *cm_id_priv, int count) 145 { 146 struct iwcm_work *work; 147 148 BUG_ON(!list_empty(&cm_id_priv->work_free_list)); 149 while (count--) { 150 work = kmalloc(sizeof(struct iwcm_work), GFP_KERNEL); 151 if (!work) { 152 dealloc_work_entries(cm_id_priv); 153 return -ENOMEM; 154 } 155 work->cm_id = cm_id_priv; 156 INIT_LIST_HEAD(&work->list); 157 put_work(work); 158 } 159 return 0; 160 } 161 162 /* 163 * Save private data from incoming connection requests to 164 * iw_cm_event, so the low level driver doesn't have to. Adjust 165 * the event ptr to point to the local copy. 166 */ 167 static int copy_private_data(struct iw_cm_event *event) 168 { 169 void *p; 170 171 p = kmemdup(event->private_data, event->private_data_len, GFP_ATOMIC); 172 if (!p) 173 return -ENOMEM; 174 event->private_data = p; 175 return 0; 176 } 177 178 static void free_cm_id(struct iwcm_id_private *cm_id_priv) 179 { 180 dealloc_work_entries(cm_id_priv); 181 kfree(cm_id_priv); 182 } 183 184 /* 185 * Release a reference on cm_id. If the last reference is being 186 * released, free the cm_id and return 1. 187 */ 188 static int iwcm_deref_id(struct iwcm_id_private *cm_id_priv) 189 { 190 BUG_ON(atomic_read(&cm_id_priv->refcount)==0); 191 if (atomic_dec_and_test(&cm_id_priv->refcount)) { 192 BUG_ON(!list_empty(&cm_id_priv->work_list)); 193 free_cm_id(cm_id_priv); 194 return 1; 195 } 196 197 return 0; 198 } 199 200 static void add_ref(struct iw_cm_id *cm_id) 201 { 202 struct iwcm_id_private *cm_id_priv; 203 cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); 204 atomic_inc(&cm_id_priv->refcount); 205 } 206 207 static void rem_ref(struct iw_cm_id *cm_id) 208 { 209 struct iwcm_id_private *cm_id_priv; 210 211 cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); 212 213 (void)iwcm_deref_id(cm_id_priv); 214 } 215 216 static int cm_event_handler(struct iw_cm_id *cm_id, struct iw_cm_event *event); 217 218 struct iw_cm_id *iw_create_cm_id(struct ib_device *device, 219 iw_cm_handler cm_handler, 220 void *context) 221 { 222 struct iwcm_id_private *cm_id_priv; 223 224 cm_id_priv = kzalloc(sizeof(*cm_id_priv), GFP_KERNEL); 225 if (!cm_id_priv) 226 return ERR_PTR(-ENOMEM); 227 228 cm_id_priv->state = IW_CM_STATE_IDLE; 229 cm_id_priv->id.device = device; 230 cm_id_priv->id.cm_handler = cm_handler; 231 cm_id_priv->id.context = context; 232 cm_id_priv->id.event_handler = cm_event_handler; 233 cm_id_priv->id.add_ref = add_ref; 234 cm_id_priv->id.rem_ref = rem_ref; 235 spin_lock_init(&cm_id_priv->lock); 236 atomic_set(&cm_id_priv->refcount, 1); 237 init_waitqueue_head(&cm_id_priv->connect_wait); 238 init_completion(&cm_id_priv->destroy_comp); 239 INIT_LIST_HEAD(&cm_id_priv->work_list); 240 INIT_LIST_HEAD(&cm_id_priv->work_free_list); 241 242 return &cm_id_priv->id; 243 } 244 EXPORT_SYMBOL(iw_create_cm_id); 245 246 247 static int iwcm_modify_qp_err(struct ib_qp *qp) 248 { 249 struct ib_qp_attr qp_attr; 250 251 if (!qp) 252 return -EINVAL; 253 254 qp_attr.qp_state = IB_QPS_ERR; 255 return ib_modify_qp(qp, &qp_attr, IB_QP_STATE); 256 } 257 258 /* 259 * This is really the RDMAC CLOSING state. It is most similar to the 260 * IB SQD QP state. 261 */ 262 static int iwcm_modify_qp_sqd(struct ib_qp *qp) 263 { 264 struct ib_qp_attr qp_attr; 265 266 BUG_ON(qp == NULL); 267 qp_attr.qp_state = IB_QPS_SQD; 268 return ib_modify_qp(qp, &qp_attr, IB_QP_STATE); 269 } 270 271 /* 272 * CM_ID <-- CLOSING 273 * 274 * Block if a passive or active connection is currently being processed. Then 275 * process the event as follows: 276 * - If we are ESTABLISHED, move to CLOSING and modify the QP state 277 * based on the abrupt flag 278 * - If the connection is already in the CLOSING or IDLE state, the peer is 279 * disconnecting concurrently with us and we've already seen the 280 * DISCONNECT event -- ignore the request and return 0 281 * - Disconnect on a listening endpoint returns -EINVAL 282 */ 283 int iw_cm_disconnect(struct iw_cm_id *cm_id, int abrupt) 284 { 285 struct iwcm_id_private *cm_id_priv; 286 unsigned long flags; 287 int ret = 0; 288 struct ib_qp *qp = NULL; 289 290 cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); 291 /* Wait if we're currently in a connect or accept downcall */ 292 wait_event(cm_id_priv->connect_wait, 293 !test_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags)); 294 295 spin_lock_irqsave(&cm_id_priv->lock, flags); 296 switch (cm_id_priv->state) { 297 case IW_CM_STATE_ESTABLISHED: 298 cm_id_priv->state = IW_CM_STATE_CLOSING; 299 300 /* QP could be <nul> for user-mode client */ 301 if (cm_id_priv->qp) 302 qp = cm_id_priv->qp; 303 else 304 ret = -EINVAL; 305 break; 306 case IW_CM_STATE_LISTEN: 307 ret = -EINVAL; 308 break; 309 case IW_CM_STATE_CLOSING: 310 /* remote peer closed first */ 311 case IW_CM_STATE_IDLE: 312 /* accept or connect returned !0 */ 313 break; 314 case IW_CM_STATE_CONN_RECV: 315 /* 316 * App called disconnect before/without calling accept after 317 * connect_request event delivered. 318 */ 319 break; 320 case IW_CM_STATE_CONN_SENT: 321 /* Can only get here if wait above fails */ 322 default: 323 BUG(); 324 } 325 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 326 327 if (qp) { 328 if (abrupt) 329 ret = iwcm_modify_qp_err(qp); 330 else 331 ret = iwcm_modify_qp_sqd(qp); 332 333 /* 334 * If both sides are disconnecting the QP could 335 * already be in ERR or SQD states 336 */ 337 ret = 0; 338 } 339 340 return ret; 341 } 342 EXPORT_SYMBOL(iw_cm_disconnect); 343 344 /* 345 * CM_ID <-- DESTROYING 346 * 347 * Clean up all resources associated with the connection and release 348 * the initial reference taken by iw_create_cm_id. 349 */ 350 static void destroy_cm_id(struct iw_cm_id *cm_id) 351 { 352 struct iwcm_id_private *cm_id_priv; 353 unsigned long flags; 354 355 cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); 356 /* 357 * Wait if we're currently in a connect or accept downcall. A 358 * listening endpoint should never block here. 359 */ 360 wait_event(cm_id_priv->connect_wait, 361 !test_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags)); 362 363 /* 364 * Since we're deleting the cm_id, drop any events that 365 * might arrive before the last dereference. 366 */ 367 set_bit(IWCM_F_DROP_EVENTS, &cm_id_priv->flags); 368 369 spin_lock_irqsave(&cm_id_priv->lock, flags); 370 switch (cm_id_priv->state) { 371 case IW_CM_STATE_LISTEN: 372 cm_id_priv->state = IW_CM_STATE_DESTROYING; 373 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 374 /* destroy the listening endpoint */ 375 cm_id->device->iwcm->destroy_listen(cm_id); 376 spin_lock_irqsave(&cm_id_priv->lock, flags); 377 break; 378 case IW_CM_STATE_ESTABLISHED: 379 cm_id_priv->state = IW_CM_STATE_DESTROYING; 380 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 381 /* Abrupt close of the connection */ 382 (void)iwcm_modify_qp_err(cm_id_priv->qp); 383 spin_lock_irqsave(&cm_id_priv->lock, flags); 384 break; 385 case IW_CM_STATE_IDLE: 386 case IW_CM_STATE_CLOSING: 387 cm_id_priv->state = IW_CM_STATE_DESTROYING; 388 break; 389 case IW_CM_STATE_CONN_RECV: 390 /* 391 * App called destroy before/without calling accept after 392 * receiving connection request event notification or 393 * returned non zero from the event callback function. 394 * In either case, must tell the provider to reject. 395 */ 396 cm_id_priv->state = IW_CM_STATE_DESTROYING; 397 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 398 cm_id->device->iwcm->reject(cm_id, NULL, 0); 399 spin_lock_irqsave(&cm_id_priv->lock, flags); 400 break; 401 case IW_CM_STATE_CONN_SENT: 402 case IW_CM_STATE_DESTROYING: 403 default: 404 BUG(); 405 break; 406 } 407 if (cm_id_priv->qp) { 408 cm_id_priv->id.device->iwcm->rem_ref(cm_id_priv->qp); 409 cm_id_priv->qp = NULL; 410 } 411 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 412 413 if (cm_id->mapped) { 414 iwpm_remove_mapinfo(&cm_id->local_addr, &cm_id->m_local_addr); 415 iwpm_remove_mapping(&cm_id->local_addr, RDMA_NL_IWCM); 416 } 417 418 (void)iwcm_deref_id(cm_id_priv); 419 } 420 421 /* 422 * This function is only called by the application thread and cannot 423 * be called by the event thread. The function will wait for all 424 * references to be released on the cm_id and then kfree the cm_id 425 * object. 426 */ 427 void iw_destroy_cm_id(struct iw_cm_id *cm_id) 428 { 429 struct iwcm_id_private *cm_id_priv; 430 431 cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); 432 destroy_cm_id(cm_id); 433 } 434 EXPORT_SYMBOL(iw_destroy_cm_id); 435 436 /** 437 * iw_cm_check_wildcard - If IP address is 0 then use original 438 * @pm_addr: sockaddr containing the ip to check for wildcard 439 * @cm_addr: sockaddr containing the actual IP address 440 * @cm_outaddr: sockaddr to set IP addr which leaving port 441 * 442 * Checks the pm_addr for wildcard and then sets cm_outaddr's 443 * IP to the actual (cm_addr). 444 */ 445 static void iw_cm_check_wildcard(struct sockaddr_storage *pm_addr, 446 struct sockaddr_storage *cm_addr, 447 struct sockaddr_storage *cm_outaddr) 448 { 449 if (pm_addr->ss_family == AF_INET) { 450 struct sockaddr_in *pm4_addr = (struct sockaddr_in *)pm_addr; 451 452 if (pm4_addr->sin_addr.s_addr == htonl(INADDR_ANY)) { 453 struct sockaddr_in *cm4_addr = 454 (struct sockaddr_in *)cm_addr; 455 struct sockaddr_in *cm4_outaddr = 456 (struct sockaddr_in *)cm_outaddr; 457 458 cm4_outaddr->sin_addr = cm4_addr->sin_addr; 459 } 460 } else { 461 struct sockaddr_in6 *pm6_addr = (struct sockaddr_in6 *)pm_addr; 462 463 if (ipv6_addr_type(&pm6_addr->sin6_addr) == IPV6_ADDR_ANY) { 464 struct sockaddr_in6 *cm6_addr = 465 (struct sockaddr_in6 *)cm_addr; 466 struct sockaddr_in6 *cm6_outaddr = 467 (struct sockaddr_in6 *)cm_outaddr; 468 469 cm6_outaddr->sin6_addr = cm6_addr->sin6_addr; 470 } 471 } 472 } 473 474 /** 475 * iw_cm_map - Use portmapper to map the ports 476 * @cm_id: connection manager pointer 477 * @active: Indicates the active side when true 478 * returns nonzero for error only if iwpm_create_mapinfo() fails 479 * 480 * Tries to add a mapping for a port using the Portmapper. If 481 * successful in mapping the IP/Port it will check the remote 482 * mapped IP address for a wildcard IP address and replace the 483 * zero IP address with the remote_addr. 484 */ 485 static int iw_cm_map(struct iw_cm_id *cm_id, bool active) 486 { 487 struct iwpm_dev_data pm_reg_msg; 488 struct iwpm_sa_data pm_msg; 489 int status; 490 491 cm_id->m_local_addr = cm_id->local_addr; 492 cm_id->m_remote_addr = cm_id->remote_addr; 493 494 memcpy(pm_reg_msg.dev_name, cm_id->device->name, 495 sizeof(pm_reg_msg.dev_name)); 496 memcpy(pm_reg_msg.if_name, cm_id->device->iwcm->ifname, 497 sizeof(pm_reg_msg.if_name)); 498 499 if (iwpm_register_pid(&pm_reg_msg, RDMA_NL_IWCM) || 500 !iwpm_valid_pid()) 501 return 0; 502 503 cm_id->mapped = true; 504 pm_msg.loc_addr = cm_id->local_addr; 505 pm_msg.rem_addr = cm_id->remote_addr; 506 if (active) 507 status = iwpm_add_and_query_mapping(&pm_msg, 508 RDMA_NL_IWCM); 509 else 510 status = iwpm_add_mapping(&pm_msg, RDMA_NL_IWCM); 511 512 if (!status) { 513 cm_id->m_local_addr = pm_msg.mapped_loc_addr; 514 if (active) { 515 cm_id->m_remote_addr = pm_msg.mapped_rem_addr; 516 iw_cm_check_wildcard(&pm_msg.mapped_rem_addr, 517 &cm_id->remote_addr, 518 &cm_id->m_remote_addr); 519 } 520 } 521 522 return iwpm_create_mapinfo(&cm_id->local_addr, 523 &cm_id->m_local_addr, 524 RDMA_NL_IWCM); 525 } 526 527 /* 528 * CM_ID <-- LISTEN 529 * 530 * Start listening for connect requests. Generates one CONNECT_REQUEST 531 * event for each inbound connect request. 532 */ 533 int iw_cm_listen(struct iw_cm_id *cm_id, int backlog) 534 { 535 struct iwcm_id_private *cm_id_priv; 536 unsigned long flags; 537 int ret; 538 539 cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); 540 541 if (!backlog) 542 backlog = default_backlog; 543 544 ret = alloc_work_entries(cm_id_priv, backlog); 545 if (ret) 546 return ret; 547 548 spin_lock_irqsave(&cm_id_priv->lock, flags); 549 switch (cm_id_priv->state) { 550 case IW_CM_STATE_IDLE: 551 cm_id_priv->state = IW_CM_STATE_LISTEN; 552 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 553 ret = iw_cm_map(cm_id, false); 554 if (!ret) 555 ret = cm_id->device->iwcm->create_listen(cm_id, backlog); 556 if (ret) 557 cm_id_priv->state = IW_CM_STATE_IDLE; 558 spin_lock_irqsave(&cm_id_priv->lock, flags); 559 break; 560 default: 561 ret = -EINVAL; 562 } 563 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 564 565 return ret; 566 } 567 EXPORT_SYMBOL(iw_cm_listen); 568 569 /* 570 * CM_ID <-- IDLE 571 * 572 * Rejects an inbound connection request. No events are generated. 573 */ 574 int iw_cm_reject(struct iw_cm_id *cm_id, 575 const void *private_data, 576 u8 private_data_len) 577 { 578 struct iwcm_id_private *cm_id_priv; 579 unsigned long flags; 580 int ret; 581 582 cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); 583 set_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); 584 585 spin_lock_irqsave(&cm_id_priv->lock, flags); 586 if (cm_id_priv->state != IW_CM_STATE_CONN_RECV) { 587 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 588 clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); 589 wake_up_all(&cm_id_priv->connect_wait); 590 return -EINVAL; 591 } 592 cm_id_priv->state = IW_CM_STATE_IDLE; 593 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 594 595 ret = cm_id->device->iwcm->reject(cm_id, private_data, 596 private_data_len); 597 598 clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); 599 wake_up_all(&cm_id_priv->connect_wait); 600 601 return ret; 602 } 603 EXPORT_SYMBOL(iw_cm_reject); 604 605 /* 606 * CM_ID <-- ESTABLISHED 607 * 608 * Accepts an inbound connection request and generates an ESTABLISHED 609 * event. Callers of iw_cm_disconnect and iw_destroy_cm_id will block 610 * until the ESTABLISHED event is received from the provider. 611 */ 612 int iw_cm_accept(struct iw_cm_id *cm_id, 613 struct iw_cm_conn_param *iw_param) 614 { 615 struct iwcm_id_private *cm_id_priv; 616 struct ib_qp *qp; 617 unsigned long flags; 618 int ret; 619 620 cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); 621 set_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); 622 623 spin_lock_irqsave(&cm_id_priv->lock, flags); 624 if (cm_id_priv->state != IW_CM_STATE_CONN_RECV) { 625 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 626 clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); 627 wake_up_all(&cm_id_priv->connect_wait); 628 return -EINVAL; 629 } 630 /* Get the ib_qp given the QPN */ 631 qp = cm_id->device->iwcm->get_qp(cm_id->device, iw_param->qpn); 632 if (!qp) { 633 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 634 clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); 635 wake_up_all(&cm_id_priv->connect_wait); 636 return -EINVAL; 637 } 638 cm_id->device->iwcm->add_ref(qp); 639 cm_id_priv->qp = qp; 640 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 641 642 ret = cm_id->device->iwcm->accept(cm_id, iw_param); 643 if (ret) { 644 /* An error on accept precludes provider events */ 645 BUG_ON(cm_id_priv->state != IW_CM_STATE_CONN_RECV); 646 cm_id_priv->state = IW_CM_STATE_IDLE; 647 spin_lock_irqsave(&cm_id_priv->lock, flags); 648 if (cm_id_priv->qp) { 649 cm_id->device->iwcm->rem_ref(qp); 650 cm_id_priv->qp = NULL; 651 } 652 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 653 clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); 654 wake_up_all(&cm_id_priv->connect_wait); 655 } 656 657 return ret; 658 } 659 EXPORT_SYMBOL(iw_cm_accept); 660 661 /* 662 * Active Side: CM_ID <-- CONN_SENT 663 * 664 * If successful, results in the generation of a CONNECT_REPLY 665 * event. iw_cm_disconnect and iw_cm_destroy will block until the 666 * CONNECT_REPLY event is received from the provider. 667 */ 668 int iw_cm_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param) 669 { 670 struct iwcm_id_private *cm_id_priv; 671 int ret; 672 unsigned long flags; 673 struct ib_qp *qp; 674 675 cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); 676 677 ret = alloc_work_entries(cm_id_priv, 4); 678 if (ret) 679 return ret; 680 681 set_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); 682 spin_lock_irqsave(&cm_id_priv->lock, flags); 683 684 if (cm_id_priv->state != IW_CM_STATE_IDLE) { 685 ret = -EINVAL; 686 goto err; 687 } 688 689 /* Get the ib_qp given the QPN */ 690 qp = cm_id->device->iwcm->get_qp(cm_id->device, iw_param->qpn); 691 if (!qp) { 692 ret = -EINVAL; 693 goto err; 694 } 695 cm_id->device->iwcm->add_ref(qp); 696 cm_id_priv->qp = qp; 697 cm_id_priv->state = IW_CM_STATE_CONN_SENT; 698 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 699 700 ret = iw_cm_map(cm_id, true); 701 if (!ret) 702 ret = cm_id->device->iwcm->connect(cm_id, iw_param); 703 if (!ret) 704 return 0; /* success */ 705 706 spin_lock_irqsave(&cm_id_priv->lock, flags); 707 if (cm_id_priv->qp) { 708 cm_id->device->iwcm->rem_ref(qp); 709 cm_id_priv->qp = NULL; 710 } 711 cm_id_priv->state = IW_CM_STATE_IDLE; 712 err: 713 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 714 clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); 715 wake_up_all(&cm_id_priv->connect_wait); 716 return ret; 717 } 718 EXPORT_SYMBOL(iw_cm_connect); 719 720 /* 721 * Passive Side: new CM_ID <-- CONN_RECV 722 * 723 * Handles an inbound connect request. The function creates a new 724 * iw_cm_id to represent the new connection and inherits the client 725 * callback function and other attributes from the listening parent. 726 * 727 * The work item contains a pointer to the listen_cm_id and the event. The 728 * listen_cm_id contains the client cm_handler, context and 729 * device. These are copied when the device is cloned. The event 730 * contains the new four tuple. 731 * 732 * An error on the child should not affect the parent, so this 733 * function does not return a value. 734 */ 735 static void cm_conn_req_handler(struct iwcm_id_private *listen_id_priv, 736 struct iw_cm_event *iw_event) 737 { 738 unsigned long flags; 739 struct iw_cm_id *cm_id; 740 struct iwcm_id_private *cm_id_priv; 741 int ret; 742 743 /* 744 * The provider should never generate a connection request 745 * event with a bad status. 746 */ 747 BUG_ON(iw_event->status); 748 749 cm_id = iw_create_cm_id(listen_id_priv->id.device, 750 listen_id_priv->id.cm_handler, 751 listen_id_priv->id.context); 752 /* If the cm_id could not be created, ignore the request */ 753 if (IS_ERR(cm_id)) 754 goto out; 755 756 cm_id->provider_data = iw_event->provider_data; 757 cm_id->m_local_addr = iw_event->local_addr; 758 cm_id->m_remote_addr = iw_event->remote_addr; 759 cm_id->local_addr = listen_id_priv->id.local_addr; 760 761 ret = iwpm_get_remote_info(&listen_id_priv->id.m_local_addr, 762 &iw_event->remote_addr, 763 &cm_id->remote_addr, 764 RDMA_NL_IWCM); 765 if (ret) { 766 cm_id->remote_addr = iw_event->remote_addr; 767 } else { 768 iw_cm_check_wildcard(&listen_id_priv->id.m_local_addr, 769 &iw_event->local_addr, 770 &cm_id->local_addr); 771 iw_event->local_addr = cm_id->local_addr; 772 iw_event->remote_addr = cm_id->remote_addr; 773 } 774 775 cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); 776 cm_id_priv->state = IW_CM_STATE_CONN_RECV; 777 778 /* 779 * We could be destroying the listening id. If so, ignore this 780 * upcall. 781 */ 782 spin_lock_irqsave(&listen_id_priv->lock, flags); 783 if (listen_id_priv->state != IW_CM_STATE_LISTEN) { 784 spin_unlock_irqrestore(&listen_id_priv->lock, flags); 785 iw_cm_reject(cm_id, NULL, 0); 786 iw_destroy_cm_id(cm_id); 787 goto out; 788 } 789 spin_unlock_irqrestore(&listen_id_priv->lock, flags); 790 791 ret = alloc_work_entries(cm_id_priv, 3); 792 if (ret) { 793 iw_cm_reject(cm_id, NULL, 0); 794 iw_destroy_cm_id(cm_id); 795 goto out; 796 } 797 798 /* Call the client CM handler */ 799 ret = cm_id->cm_handler(cm_id, iw_event); 800 if (ret) { 801 iw_cm_reject(cm_id, NULL, 0); 802 iw_destroy_cm_id(cm_id); 803 } 804 805 out: 806 if (iw_event->private_data_len) 807 kfree(iw_event->private_data); 808 } 809 810 /* 811 * Passive Side: CM_ID <-- ESTABLISHED 812 * 813 * The provider generated an ESTABLISHED event which means that 814 * the MPA negotion has completed successfully and we are now in MPA 815 * FPDU mode. 816 * 817 * This event can only be received in the CONN_RECV state. If the 818 * remote peer closed, the ESTABLISHED event would be received followed 819 * by the CLOSE event. If the app closes, it will block until we wake 820 * it up after processing this event. 821 */ 822 static int cm_conn_est_handler(struct iwcm_id_private *cm_id_priv, 823 struct iw_cm_event *iw_event) 824 { 825 unsigned long flags; 826 int ret; 827 828 spin_lock_irqsave(&cm_id_priv->lock, flags); 829 830 /* 831 * We clear the CONNECT_WAIT bit here to allow the callback 832 * function to call iw_cm_disconnect. Calling iw_destroy_cm_id 833 * from a callback handler is not allowed. 834 */ 835 clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); 836 BUG_ON(cm_id_priv->state != IW_CM_STATE_CONN_RECV); 837 cm_id_priv->state = IW_CM_STATE_ESTABLISHED; 838 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 839 ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, iw_event); 840 wake_up_all(&cm_id_priv->connect_wait); 841 842 return ret; 843 } 844 845 /* 846 * Active Side: CM_ID <-- ESTABLISHED 847 * 848 * The app has called connect and is waiting for the established event to 849 * post it's requests to the server. This event will wake up anyone 850 * blocked in iw_cm_disconnect or iw_destroy_id. 851 */ 852 static int cm_conn_rep_handler(struct iwcm_id_private *cm_id_priv, 853 struct iw_cm_event *iw_event) 854 { 855 unsigned long flags; 856 int ret; 857 858 spin_lock_irqsave(&cm_id_priv->lock, flags); 859 /* 860 * Clear the connect wait bit so a callback function calling 861 * iw_cm_disconnect will not wait and deadlock this thread 862 */ 863 clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); 864 BUG_ON(cm_id_priv->state != IW_CM_STATE_CONN_SENT); 865 if (iw_event->status == 0) { 866 cm_id_priv->id.m_local_addr = iw_event->local_addr; 867 cm_id_priv->id.m_remote_addr = iw_event->remote_addr; 868 iw_event->local_addr = cm_id_priv->id.local_addr; 869 iw_event->remote_addr = cm_id_priv->id.remote_addr; 870 cm_id_priv->state = IW_CM_STATE_ESTABLISHED; 871 } else { 872 /* REJECTED or RESET */ 873 cm_id_priv->id.device->iwcm->rem_ref(cm_id_priv->qp); 874 cm_id_priv->qp = NULL; 875 cm_id_priv->state = IW_CM_STATE_IDLE; 876 } 877 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 878 ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, iw_event); 879 880 if (iw_event->private_data_len) 881 kfree(iw_event->private_data); 882 883 /* Wake up waiters on connect complete */ 884 wake_up_all(&cm_id_priv->connect_wait); 885 886 return ret; 887 } 888 889 /* 890 * CM_ID <-- CLOSING 891 * 892 * If in the ESTABLISHED state, move to CLOSING. 893 */ 894 static void cm_disconnect_handler(struct iwcm_id_private *cm_id_priv, 895 struct iw_cm_event *iw_event) 896 { 897 unsigned long flags; 898 899 spin_lock_irqsave(&cm_id_priv->lock, flags); 900 if (cm_id_priv->state == IW_CM_STATE_ESTABLISHED) 901 cm_id_priv->state = IW_CM_STATE_CLOSING; 902 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 903 } 904 905 /* 906 * CM_ID <-- IDLE 907 * 908 * If in the ESTBLISHED or CLOSING states, the QP will have have been 909 * moved by the provider to the ERR state. Disassociate the CM_ID from 910 * the QP, move to IDLE, and remove the 'connected' reference. 911 * 912 * If in some other state, the cm_id was destroyed asynchronously. 913 * This is the last reference that will result in waking up 914 * the app thread blocked in iw_destroy_cm_id. 915 */ 916 static int cm_close_handler(struct iwcm_id_private *cm_id_priv, 917 struct iw_cm_event *iw_event) 918 { 919 unsigned long flags; 920 int ret = 0; 921 spin_lock_irqsave(&cm_id_priv->lock, flags); 922 923 if (cm_id_priv->qp) { 924 cm_id_priv->id.device->iwcm->rem_ref(cm_id_priv->qp); 925 cm_id_priv->qp = NULL; 926 } 927 switch (cm_id_priv->state) { 928 case IW_CM_STATE_ESTABLISHED: 929 case IW_CM_STATE_CLOSING: 930 cm_id_priv->state = IW_CM_STATE_IDLE; 931 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 932 ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, iw_event); 933 spin_lock_irqsave(&cm_id_priv->lock, flags); 934 break; 935 case IW_CM_STATE_DESTROYING: 936 break; 937 default: 938 BUG(); 939 } 940 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 941 942 return ret; 943 } 944 945 static int process_event(struct iwcm_id_private *cm_id_priv, 946 struct iw_cm_event *iw_event) 947 { 948 int ret = 0; 949 950 switch (iw_event->event) { 951 case IW_CM_EVENT_CONNECT_REQUEST: 952 cm_conn_req_handler(cm_id_priv, iw_event); 953 break; 954 case IW_CM_EVENT_CONNECT_REPLY: 955 ret = cm_conn_rep_handler(cm_id_priv, iw_event); 956 break; 957 case IW_CM_EVENT_ESTABLISHED: 958 ret = cm_conn_est_handler(cm_id_priv, iw_event); 959 break; 960 case IW_CM_EVENT_DISCONNECT: 961 cm_disconnect_handler(cm_id_priv, iw_event); 962 break; 963 case IW_CM_EVENT_CLOSE: 964 ret = cm_close_handler(cm_id_priv, iw_event); 965 break; 966 default: 967 BUG(); 968 } 969 970 return ret; 971 } 972 973 /* 974 * Process events on the work_list for the cm_id. If the callback 975 * function requests that the cm_id be deleted, a flag is set in the 976 * cm_id flags to indicate that when the last reference is 977 * removed, the cm_id is to be destroyed. This is necessary to 978 * distinguish between an object that will be destroyed by the app 979 * thread asleep on the destroy_comp list vs. an object destroyed 980 * here synchronously when the last reference is removed. 981 */ 982 static void cm_work_handler(struct work_struct *_work) 983 { 984 struct iwcm_work *work = container_of(_work, struct iwcm_work, work); 985 struct iw_cm_event levent; 986 struct iwcm_id_private *cm_id_priv = work->cm_id; 987 unsigned long flags; 988 int empty; 989 int ret = 0; 990 991 spin_lock_irqsave(&cm_id_priv->lock, flags); 992 empty = list_empty(&cm_id_priv->work_list); 993 while (!empty) { 994 work = list_entry(cm_id_priv->work_list.next, 995 struct iwcm_work, list); 996 list_del_init(&work->list); 997 empty = list_empty(&cm_id_priv->work_list); 998 levent = work->event; 999 put_work(work); 1000 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 1001 1002 if (!test_bit(IWCM_F_DROP_EVENTS, &cm_id_priv->flags)) { 1003 ret = process_event(cm_id_priv, &levent); 1004 if (ret) 1005 destroy_cm_id(&cm_id_priv->id); 1006 } else 1007 pr_debug("dropping event %d\n", levent.event); 1008 if (iwcm_deref_id(cm_id_priv)) 1009 return; 1010 if (empty) 1011 return; 1012 spin_lock_irqsave(&cm_id_priv->lock, flags); 1013 } 1014 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 1015 } 1016 1017 /* 1018 * This function is called on interrupt context. Schedule events on 1019 * the iwcm_wq thread to allow callback functions to downcall into 1020 * the CM and/or block. Events are queued to a per-CM_ID 1021 * work_list. If this is the first event on the work_list, the work 1022 * element is also queued on the iwcm_wq thread. 1023 * 1024 * Each event holds a reference on the cm_id. Until the last posted 1025 * event has been delivered and processed, the cm_id cannot be 1026 * deleted. 1027 * 1028 * Returns: 1029 * 0 - the event was handled. 1030 * -ENOMEM - the event was not handled due to lack of resources. 1031 */ 1032 static int cm_event_handler(struct iw_cm_id *cm_id, 1033 struct iw_cm_event *iw_event) 1034 { 1035 struct iwcm_work *work; 1036 struct iwcm_id_private *cm_id_priv; 1037 unsigned long flags; 1038 int ret = 0; 1039 1040 cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); 1041 1042 spin_lock_irqsave(&cm_id_priv->lock, flags); 1043 work = get_work(cm_id_priv); 1044 if (!work) { 1045 ret = -ENOMEM; 1046 goto out; 1047 } 1048 1049 INIT_WORK(&work->work, cm_work_handler); 1050 work->cm_id = cm_id_priv; 1051 work->event = *iw_event; 1052 1053 if ((work->event.event == IW_CM_EVENT_CONNECT_REQUEST || 1054 work->event.event == IW_CM_EVENT_CONNECT_REPLY) && 1055 work->event.private_data_len) { 1056 ret = copy_private_data(&work->event); 1057 if (ret) { 1058 put_work(work); 1059 goto out; 1060 } 1061 } 1062 1063 atomic_inc(&cm_id_priv->refcount); 1064 if (list_empty(&cm_id_priv->work_list)) { 1065 list_add_tail(&work->list, &cm_id_priv->work_list); 1066 queue_work(iwcm_wq, &work->work); 1067 } else 1068 list_add_tail(&work->list, &cm_id_priv->work_list); 1069 out: 1070 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 1071 return ret; 1072 } 1073 1074 static int iwcm_init_qp_init_attr(struct iwcm_id_private *cm_id_priv, 1075 struct ib_qp_attr *qp_attr, 1076 int *qp_attr_mask) 1077 { 1078 unsigned long flags; 1079 int ret; 1080 1081 spin_lock_irqsave(&cm_id_priv->lock, flags); 1082 switch (cm_id_priv->state) { 1083 case IW_CM_STATE_IDLE: 1084 case IW_CM_STATE_CONN_SENT: 1085 case IW_CM_STATE_CONN_RECV: 1086 case IW_CM_STATE_ESTABLISHED: 1087 *qp_attr_mask = IB_QP_STATE | IB_QP_ACCESS_FLAGS; 1088 qp_attr->qp_access_flags = IB_ACCESS_REMOTE_WRITE| 1089 IB_ACCESS_REMOTE_READ; 1090 ret = 0; 1091 break; 1092 default: 1093 ret = -EINVAL; 1094 break; 1095 } 1096 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 1097 return ret; 1098 } 1099 1100 static int iwcm_init_qp_rts_attr(struct iwcm_id_private *cm_id_priv, 1101 struct ib_qp_attr *qp_attr, 1102 int *qp_attr_mask) 1103 { 1104 unsigned long flags; 1105 int ret; 1106 1107 spin_lock_irqsave(&cm_id_priv->lock, flags); 1108 switch (cm_id_priv->state) { 1109 case IW_CM_STATE_IDLE: 1110 case IW_CM_STATE_CONN_SENT: 1111 case IW_CM_STATE_CONN_RECV: 1112 case IW_CM_STATE_ESTABLISHED: 1113 *qp_attr_mask = 0; 1114 ret = 0; 1115 break; 1116 default: 1117 ret = -EINVAL; 1118 break; 1119 } 1120 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 1121 return ret; 1122 } 1123 1124 int iw_cm_init_qp_attr(struct iw_cm_id *cm_id, 1125 struct ib_qp_attr *qp_attr, 1126 int *qp_attr_mask) 1127 { 1128 struct iwcm_id_private *cm_id_priv; 1129 int ret; 1130 1131 cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); 1132 switch (qp_attr->qp_state) { 1133 case IB_QPS_INIT: 1134 case IB_QPS_RTR: 1135 ret = iwcm_init_qp_init_attr(cm_id_priv, 1136 qp_attr, qp_attr_mask); 1137 break; 1138 case IB_QPS_RTS: 1139 ret = iwcm_init_qp_rts_attr(cm_id_priv, 1140 qp_attr, qp_attr_mask); 1141 break; 1142 default: 1143 ret = -EINVAL; 1144 break; 1145 } 1146 return ret; 1147 } 1148 EXPORT_SYMBOL(iw_cm_init_qp_attr); 1149 1150 static int __init iw_cm_init(void) 1151 { 1152 int ret; 1153 1154 ret = iwpm_init(RDMA_NL_IWCM); 1155 if (ret) 1156 pr_err("iw_cm: couldn't init iwpm\n"); 1157 1158 ret = ibnl_add_client(RDMA_NL_IWCM, ARRAY_SIZE(iwcm_nl_cb_table), 1159 iwcm_nl_cb_table); 1160 if (ret) 1161 pr_err("iw_cm: couldn't register netlink callbacks\n"); 1162 1163 iwcm_wq = alloc_ordered_workqueue("iw_cm_wq", WQ_MEM_RECLAIM); 1164 if (!iwcm_wq) 1165 return -ENOMEM; 1166 1167 iwcm_ctl_table_hdr = register_net_sysctl(&init_net, "net/iw_cm", 1168 iwcm_ctl_table); 1169 if (!iwcm_ctl_table_hdr) { 1170 pr_err("iw_cm: couldn't register sysctl paths\n"); 1171 destroy_workqueue(iwcm_wq); 1172 return -ENOMEM; 1173 } 1174 1175 return 0; 1176 } 1177 1178 static void __exit iw_cm_cleanup(void) 1179 { 1180 unregister_net_sysctl_table(iwcm_ctl_table_hdr); 1181 destroy_workqueue(iwcm_wq); 1182 ibnl_remove_client(RDMA_NL_IWCM); 1183 iwpm_exit(RDMA_NL_IWCM); 1184 } 1185 1186 module_init(iw_cm_init); 1187 module_exit(iw_cm_cleanup); 1188