1 /* 2 * Copyright (c) 2004, 2005 Intel Corporation. All rights reserved. 3 * Copyright (c) 2004 Topspin Corporation. All rights reserved. 4 * Copyright (c) 2004, 2005 Voltaire Corporation. All rights reserved. 5 * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. 6 * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved. 7 * Copyright (c) 2005 Network Appliance, Inc. All rights reserved. 8 * 9 * This software is available to you under a choice of one of two 10 * licenses. You may choose to be licensed under the terms of the GNU 11 * General Public License (GPL) Version 2, available from the file 12 * COPYING in the main directory of this source tree, or the 13 * OpenIB.org BSD license below: 14 * 15 * Redistribution and use in source and binary forms, with or 16 * without modification, are permitted provided that the following 17 * conditions are met: 18 * 19 * - Redistributions of source code must retain the above 20 * copyright notice, this list of conditions and the following 21 * disclaimer. 22 * 23 * - Redistributions in binary form must reproduce the above 24 * copyright notice, this list of conditions and the following 25 * disclaimer in the documentation and/or other materials 26 * provided with the distribution. 27 * 28 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 29 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 30 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 31 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 32 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 33 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 34 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 35 * SOFTWARE. 36 * 37 */ 38 #include <linux/dma-mapping.h> 39 #include <linux/err.h> 40 #include <linux/idr.h> 41 #include <linux/interrupt.h> 42 #include <linux/rbtree.h> 43 #include <linux/sched.h> 44 #include <linux/spinlock.h> 45 #include <linux/workqueue.h> 46 #include <linux/completion.h> 47 #include <linux/slab.h> 48 #include <linux/module.h> 49 #include <linux/sysctl.h> 50 51 #include <rdma/iw_cm.h> 52 #include <rdma/ib_addr.h> 53 #include <rdma/iw_portmap.h> 54 #include <rdma/rdma_netlink.h> 55 56 #include "iwcm.h" 57 58 MODULE_AUTHOR("Tom Tucker"); 59 MODULE_DESCRIPTION("iWARP CM"); 60 MODULE_LICENSE("Dual BSD/GPL"); 61 62 static const char * const iwcm_rej_reason_strs[] = { 63 [ECONNRESET] = "reset by remote host", 64 [ECONNREFUSED] = "refused by remote application", 65 [ETIMEDOUT] = "setup timeout", 66 }; 67 68 const char *__attribute_const__ iwcm_reject_msg(int reason) 69 { 70 size_t index; 71 72 /* iWARP uses negative errnos */ 73 index = -reason; 74 75 if (index < ARRAY_SIZE(iwcm_rej_reason_strs) && 76 iwcm_rej_reason_strs[index]) 77 return iwcm_rej_reason_strs[index]; 78 else 79 return "unrecognized reason"; 80 } 81 EXPORT_SYMBOL(iwcm_reject_msg); 82 83 static struct rdma_nl_cbs iwcm_nl_cb_table[RDMA_NL_IWPM_NUM_OPS] = { 84 [RDMA_NL_IWPM_REG_PID] = {.dump = iwpm_register_pid_cb}, 85 [RDMA_NL_IWPM_ADD_MAPPING] = {.dump = iwpm_add_mapping_cb}, 86 [RDMA_NL_IWPM_QUERY_MAPPING] = {.dump = iwpm_add_and_query_mapping_cb}, 87 [RDMA_NL_IWPM_REMOTE_INFO] = {.dump = iwpm_remote_info_cb}, 88 [RDMA_NL_IWPM_HANDLE_ERR] = {.dump = iwpm_mapping_error_cb}, 89 [RDMA_NL_IWPM_MAPINFO] = {.dump = iwpm_mapping_info_cb}, 90 [RDMA_NL_IWPM_MAPINFO_NUM] = {.dump = iwpm_ack_mapping_info_cb}, 91 [RDMA_NL_IWPM_HELLO] = {.dump = iwpm_hello_cb} 92 }; 93 94 static struct workqueue_struct *iwcm_wq; 95 struct iwcm_work { 96 struct work_struct work; 97 struct iwcm_id_private *cm_id; 98 struct list_head list; 99 struct iw_cm_event event; 100 struct list_head free_list; 101 }; 102 103 static unsigned int default_backlog = 256; 104 105 static struct ctl_table_header *iwcm_ctl_table_hdr; 106 static struct ctl_table iwcm_ctl_table[] = { 107 { 108 .procname = "default_backlog", 109 .data = &default_backlog, 110 .maxlen = sizeof(default_backlog), 111 .mode = 0644, 112 .proc_handler = proc_dointvec, 113 }, 114 { } 115 }; 116 117 /* 118 * The following services provide a mechanism for pre-allocating iwcm_work 119 * elements. The design pre-allocates them based on the cm_id type: 120 * LISTENING IDS: Get enough elements preallocated to handle the 121 * listen backlog. 122 * ACTIVE IDS: 4: CONNECT_REPLY, ESTABLISHED, DISCONNECT, CLOSE 123 * PASSIVE IDS: 3: ESTABLISHED, DISCONNECT, CLOSE 124 * 125 * Allocating them in connect and listen avoids having to deal 126 * with allocation failures on the event upcall from the provider (which 127 * is called in the interrupt context). 128 * 129 * One exception is when creating the cm_id for incoming connection requests. 130 * There are two cases: 131 * 1) in the event upcall, cm_event_handler(), for a listening cm_id. If 132 * the backlog is exceeded, then no more connection request events will 133 * be processed. cm_event_handler() returns -ENOMEM in this case. Its up 134 * to the provider to reject the connection request. 135 * 2) in the connection request workqueue handler, cm_conn_req_handler(). 136 * If work elements cannot be allocated for the new connect request cm_id, 137 * then IWCM will call the provider reject method. This is ok since 138 * cm_conn_req_handler() runs in the workqueue thread context. 139 */ 140 141 static struct iwcm_work *get_work(struct iwcm_id_private *cm_id_priv) 142 { 143 struct iwcm_work *work; 144 145 if (list_empty(&cm_id_priv->work_free_list)) 146 return NULL; 147 work = list_entry(cm_id_priv->work_free_list.next, struct iwcm_work, 148 free_list); 149 list_del_init(&work->free_list); 150 return work; 151 } 152 153 static void put_work(struct iwcm_work *work) 154 { 155 list_add(&work->free_list, &work->cm_id->work_free_list); 156 } 157 158 static void dealloc_work_entries(struct iwcm_id_private *cm_id_priv) 159 { 160 struct list_head *e, *tmp; 161 162 list_for_each_safe(e, tmp, &cm_id_priv->work_free_list) 163 kfree(list_entry(e, struct iwcm_work, free_list)); 164 } 165 166 static int alloc_work_entries(struct iwcm_id_private *cm_id_priv, int count) 167 { 168 struct iwcm_work *work; 169 170 BUG_ON(!list_empty(&cm_id_priv->work_free_list)); 171 while (count--) { 172 work = kmalloc(sizeof(struct iwcm_work), GFP_KERNEL); 173 if (!work) { 174 dealloc_work_entries(cm_id_priv); 175 return -ENOMEM; 176 } 177 work->cm_id = cm_id_priv; 178 INIT_LIST_HEAD(&work->list); 179 put_work(work); 180 } 181 return 0; 182 } 183 184 /* 185 * Save private data from incoming connection requests to 186 * iw_cm_event, so the low level driver doesn't have to. Adjust 187 * the event ptr to point to the local copy. 188 */ 189 static int copy_private_data(struct iw_cm_event *event) 190 { 191 void *p; 192 193 p = kmemdup(event->private_data, event->private_data_len, GFP_ATOMIC); 194 if (!p) 195 return -ENOMEM; 196 event->private_data = p; 197 return 0; 198 } 199 200 static void free_cm_id(struct iwcm_id_private *cm_id_priv) 201 { 202 dealloc_work_entries(cm_id_priv); 203 kfree(cm_id_priv); 204 } 205 206 /* 207 * Release a reference on cm_id. If the last reference is being 208 * released, free the cm_id and return 1. 209 */ 210 static int iwcm_deref_id(struct iwcm_id_private *cm_id_priv) 211 { 212 BUG_ON(atomic_read(&cm_id_priv->refcount)==0); 213 if (atomic_dec_and_test(&cm_id_priv->refcount)) { 214 BUG_ON(!list_empty(&cm_id_priv->work_list)); 215 free_cm_id(cm_id_priv); 216 return 1; 217 } 218 219 return 0; 220 } 221 222 static void add_ref(struct iw_cm_id *cm_id) 223 { 224 struct iwcm_id_private *cm_id_priv; 225 cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); 226 atomic_inc(&cm_id_priv->refcount); 227 } 228 229 static void rem_ref(struct iw_cm_id *cm_id) 230 { 231 struct iwcm_id_private *cm_id_priv; 232 233 cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); 234 235 (void)iwcm_deref_id(cm_id_priv); 236 } 237 238 static int cm_event_handler(struct iw_cm_id *cm_id, struct iw_cm_event *event); 239 240 struct iw_cm_id *iw_create_cm_id(struct ib_device *device, 241 iw_cm_handler cm_handler, 242 void *context) 243 { 244 struct iwcm_id_private *cm_id_priv; 245 246 cm_id_priv = kzalloc(sizeof(*cm_id_priv), GFP_KERNEL); 247 if (!cm_id_priv) 248 return ERR_PTR(-ENOMEM); 249 250 cm_id_priv->state = IW_CM_STATE_IDLE; 251 cm_id_priv->id.device = device; 252 cm_id_priv->id.cm_handler = cm_handler; 253 cm_id_priv->id.context = context; 254 cm_id_priv->id.event_handler = cm_event_handler; 255 cm_id_priv->id.add_ref = add_ref; 256 cm_id_priv->id.rem_ref = rem_ref; 257 spin_lock_init(&cm_id_priv->lock); 258 atomic_set(&cm_id_priv->refcount, 1); 259 init_waitqueue_head(&cm_id_priv->connect_wait); 260 init_completion(&cm_id_priv->destroy_comp); 261 INIT_LIST_HEAD(&cm_id_priv->work_list); 262 INIT_LIST_HEAD(&cm_id_priv->work_free_list); 263 264 return &cm_id_priv->id; 265 } 266 EXPORT_SYMBOL(iw_create_cm_id); 267 268 269 static int iwcm_modify_qp_err(struct ib_qp *qp) 270 { 271 struct ib_qp_attr qp_attr; 272 273 if (!qp) 274 return -EINVAL; 275 276 qp_attr.qp_state = IB_QPS_ERR; 277 return ib_modify_qp(qp, &qp_attr, IB_QP_STATE); 278 } 279 280 /* 281 * This is really the RDMAC CLOSING state. It is most similar to the 282 * IB SQD QP state. 283 */ 284 static int iwcm_modify_qp_sqd(struct ib_qp *qp) 285 { 286 struct ib_qp_attr qp_attr; 287 288 BUG_ON(qp == NULL); 289 qp_attr.qp_state = IB_QPS_SQD; 290 return ib_modify_qp(qp, &qp_attr, IB_QP_STATE); 291 } 292 293 /* 294 * CM_ID <-- CLOSING 295 * 296 * Block if a passive or active connection is currently being processed. Then 297 * process the event as follows: 298 * - If we are ESTABLISHED, move to CLOSING and modify the QP state 299 * based on the abrupt flag 300 * - If the connection is already in the CLOSING or IDLE state, the peer is 301 * disconnecting concurrently with us and we've already seen the 302 * DISCONNECT event -- ignore the request and return 0 303 * - Disconnect on a listening endpoint returns -EINVAL 304 */ 305 int iw_cm_disconnect(struct iw_cm_id *cm_id, int abrupt) 306 { 307 struct iwcm_id_private *cm_id_priv; 308 unsigned long flags; 309 int ret = 0; 310 struct ib_qp *qp = NULL; 311 312 cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); 313 /* Wait if we're currently in a connect or accept downcall */ 314 wait_event(cm_id_priv->connect_wait, 315 !test_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags)); 316 317 spin_lock_irqsave(&cm_id_priv->lock, flags); 318 switch (cm_id_priv->state) { 319 case IW_CM_STATE_ESTABLISHED: 320 cm_id_priv->state = IW_CM_STATE_CLOSING; 321 322 /* QP could be <nul> for user-mode client */ 323 if (cm_id_priv->qp) 324 qp = cm_id_priv->qp; 325 else 326 ret = -EINVAL; 327 break; 328 case IW_CM_STATE_LISTEN: 329 ret = -EINVAL; 330 break; 331 case IW_CM_STATE_CLOSING: 332 /* remote peer closed first */ 333 case IW_CM_STATE_IDLE: 334 /* accept or connect returned !0 */ 335 break; 336 case IW_CM_STATE_CONN_RECV: 337 /* 338 * App called disconnect before/without calling accept after 339 * connect_request event delivered. 340 */ 341 break; 342 case IW_CM_STATE_CONN_SENT: 343 /* Can only get here if wait above fails */ 344 default: 345 BUG(); 346 } 347 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 348 349 if (qp) { 350 if (abrupt) 351 ret = iwcm_modify_qp_err(qp); 352 else 353 ret = iwcm_modify_qp_sqd(qp); 354 355 /* 356 * If both sides are disconnecting the QP could 357 * already be in ERR or SQD states 358 */ 359 ret = 0; 360 } 361 362 return ret; 363 } 364 EXPORT_SYMBOL(iw_cm_disconnect); 365 366 /* 367 * CM_ID <-- DESTROYING 368 * 369 * Clean up all resources associated with the connection and release 370 * the initial reference taken by iw_create_cm_id. 371 */ 372 static void destroy_cm_id(struct iw_cm_id *cm_id) 373 { 374 struct iwcm_id_private *cm_id_priv; 375 unsigned long flags; 376 377 cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); 378 /* 379 * Wait if we're currently in a connect or accept downcall. A 380 * listening endpoint should never block here. 381 */ 382 wait_event(cm_id_priv->connect_wait, 383 !test_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags)); 384 385 /* 386 * Since we're deleting the cm_id, drop any events that 387 * might arrive before the last dereference. 388 */ 389 set_bit(IWCM_F_DROP_EVENTS, &cm_id_priv->flags); 390 391 spin_lock_irqsave(&cm_id_priv->lock, flags); 392 switch (cm_id_priv->state) { 393 case IW_CM_STATE_LISTEN: 394 cm_id_priv->state = IW_CM_STATE_DESTROYING; 395 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 396 /* destroy the listening endpoint */ 397 cm_id->device->iwcm->destroy_listen(cm_id); 398 spin_lock_irqsave(&cm_id_priv->lock, flags); 399 break; 400 case IW_CM_STATE_ESTABLISHED: 401 cm_id_priv->state = IW_CM_STATE_DESTROYING; 402 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 403 /* Abrupt close of the connection */ 404 (void)iwcm_modify_qp_err(cm_id_priv->qp); 405 spin_lock_irqsave(&cm_id_priv->lock, flags); 406 break; 407 case IW_CM_STATE_IDLE: 408 case IW_CM_STATE_CLOSING: 409 cm_id_priv->state = IW_CM_STATE_DESTROYING; 410 break; 411 case IW_CM_STATE_CONN_RECV: 412 /* 413 * App called destroy before/without calling accept after 414 * receiving connection request event notification or 415 * returned non zero from the event callback function. 416 * In either case, must tell the provider to reject. 417 */ 418 cm_id_priv->state = IW_CM_STATE_DESTROYING; 419 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 420 cm_id->device->iwcm->reject(cm_id, NULL, 0); 421 spin_lock_irqsave(&cm_id_priv->lock, flags); 422 break; 423 case IW_CM_STATE_CONN_SENT: 424 case IW_CM_STATE_DESTROYING: 425 default: 426 BUG(); 427 break; 428 } 429 if (cm_id_priv->qp) { 430 cm_id_priv->id.device->iwcm->rem_ref(cm_id_priv->qp); 431 cm_id_priv->qp = NULL; 432 } 433 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 434 435 if (cm_id->mapped) { 436 iwpm_remove_mapinfo(&cm_id->local_addr, &cm_id->m_local_addr); 437 iwpm_remove_mapping(&cm_id->local_addr, RDMA_NL_IWCM); 438 } 439 440 (void)iwcm_deref_id(cm_id_priv); 441 } 442 443 /* 444 * This function is only called by the application thread and cannot 445 * be called by the event thread. The function will wait for all 446 * references to be released on the cm_id and then kfree the cm_id 447 * object. 448 */ 449 void iw_destroy_cm_id(struct iw_cm_id *cm_id) 450 { 451 destroy_cm_id(cm_id); 452 } 453 EXPORT_SYMBOL(iw_destroy_cm_id); 454 455 /** 456 * iw_cm_check_wildcard - If IP address is 0 then use original 457 * @pm_addr: sockaddr containing the ip to check for wildcard 458 * @cm_addr: sockaddr containing the actual IP address 459 * @cm_outaddr: sockaddr to set IP addr which leaving port 460 * 461 * Checks the pm_addr for wildcard and then sets cm_outaddr's 462 * IP to the actual (cm_addr). 463 */ 464 static void iw_cm_check_wildcard(struct sockaddr_storage *pm_addr, 465 struct sockaddr_storage *cm_addr, 466 struct sockaddr_storage *cm_outaddr) 467 { 468 if (pm_addr->ss_family == AF_INET) { 469 struct sockaddr_in *pm4_addr = (struct sockaddr_in *)pm_addr; 470 471 if (pm4_addr->sin_addr.s_addr == htonl(INADDR_ANY)) { 472 struct sockaddr_in *cm4_addr = 473 (struct sockaddr_in *)cm_addr; 474 struct sockaddr_in *cm4_outaddr = 475 (struct sockaddr_in *)cm_outaddr; 476 477 cm4_outaddr->sin_addr = cm4_addr->sin_addr; 478 } 479 } else { 480 struct sockaddr_in6 *pm6_addr = (struct sockaddr_in6 *)pm_addr; 481 482 if (ipv6_addr_type(&pm6_addr->sin6_addr) == IPV6_ADDR_ANY) { 483 struct sockaddr_in6 *cm6_addr = 484 (struct sockaddr_in6 *)cm_addr; 485 struct sockaddr_in6 *cm6_outaddr = 486 (struct sockaddr_in6 *)cm_outaddr; 487 488 cm6_outaddr->sin6_addr = cm6_addr->sin6_addr; 489 } 490 } 491 } 492 493 /** 494 * iw_cm_map - Use portmapper to map the ports 495 * @cm_id: connection manager pointer 496 * @active: Indicates the active side when true 497 * returns nonzero for error only if iwpm_create_mapinfo() fails 498 * 499 * Tries to add a mapping for a port using the Portmapper. If 500 * successful in mapping the IP/Port it will check the remote 501 * mapped IP address for a wildcard IP address and replace the 502 * zero IP address with the remote_addr. 503 */ 504 static int iw_cm_map(struct iw_cm_id *cm_id, bool active) 505 { 506 const char *devname = dev_name(&cm_id->device->dev); 507 const char *ifname = cm_id->device->iwcm->ifname; 508 struct iwpm_dev_data pm_reg_msg = {}; 509 struct iwpm_sa_data pm_msg; 510 int status; 511 512 if (strlen(devname) >= sizeof(pm_reg_msg.dev_name) || 513 strlen(ifname) >= sizeof(pm_reg_msg.if_name)) 514 return -EINVAL; 515 516 cm_id->m_local_addr = cm_id->local_addr; 517 cm_id->m_remote_addr = cm_id->remote_addr; 518 519 strcpy(pm_reg_msg.dev_name, devname); 520 strcpy(pm_reg_msg.if_name, ifname); 521 522 if (iwpm_register_pid(&pm_reg_msg, RDMA_NL_IWCM) || 523 !iwpm_valid_pid()) 524 return 0; 525 526 cm_id->mapped = true; 527 pm_msg.loc_addr = cm_id->local_addr; 528 pm_msg.rem_addr = cm_id->remote_addr; 529 pm_msg.flags = (cm_id->device->iwcm->driver_flags & IW_F_NO_PORT_MAP) ? 530 IWPM_FLAGS_NO_PORT_MAP : 0; 531 if (active) 532 status = iwpm_add_and_query_mapping(&pm_msg, 533 RDMA_NL_IWCM); 534 else 535 status = iwpm_add_mapping(&pm_msg, RDMA_NL_IWCM); 536 537 if (!status) { 538 cm_id->m_local_addr = pm_msg.mapped_loc_addr; 539 if (active) { 540 cm_id->m_remote_addr = pm_msg.mapped_rem_addr; 541 iw_cm_check_wildcard(&pm_msg.mapped_rem_addr, 542 &cm_id->remote_addr, 543 &cm_id->m_remote_addr); 544 } 545 } 546 547 return iwpm_create_mapinfo(&cm_id->local_addr, 548 &cm_id->m_local_addr, 549 RDMA_NL_IWCM, pm_msg.flags); 550 } 551 552 /* 553 * CM_ID <-- LISTEN 554 * 555 * Start listening for connect requests. Generates one CONNECT_REQUEST 556 * event for each inbound connect request. 557 */ 558 int iw_cm_listen(struct iw_cm_id *cm_id, int backlog) 559 { 560 struct iwcm_id_private *cm_id_priv; 561 unsigned long flags; 562 int ret; 563 564 cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); 565 566 if (!backlog) 567 backlog = default_backlog; 568 569 ret = alloc_work_entries(cm_id_priv, backlog); 570 if (ret) 571 return ret; 572 573 spin_lock_irqsave(&cm_id_priv->lock, flags); 574 switch (cm_id_priv->state) { 575 case IW_CM_STATE_IDLE: 576 cm_id_priv->state = IW_CM_STATE_LISTEN; 577 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 578 ret = iw_cm_map(cm_id, false); 579 if (!ret) 580 ret = cm_id->device->iwcm->create_listen(cm_id, backlog); 581 if (ret) 582 cm_id_priv->state = IW_CM_STATE_IDLE; 583 spin_lock_irqsave(&cm_id_priv->lock, flags); 584 break; 585 default: 586 ret = -EINVAL; 587 } 588 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 589 590 return ret; 591 } 592 EXPORT_SYMBOL(iw_cm_listen); 593 594 /* 595 * CM_ID <-- IDLE 596 * 597 * Rejects an inbound connection request. No events are generated. 598 */ 599 int iw_cm_reject(struct iw_cm_id *cm_id, 600 const void *private_data, 601 u8 private_data_len) 602 { 603 struct iwcm_id_private *cm_id_priv; 604 unsigned long flags; 605 int ret; 606 607 cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); 608 set_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); 609 610 spin_lock_irqsave(&cm_id_priv->lock, flags); 611 if (cm_id_priv->state != IW_CM_STATE_CONN_RECV) { 612 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 613 clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); 614 wake_up_all(&cm_id_priv->connect_wait); 615 return -EINVAL; 616 } 617 cm_id_priv->state = IW_CM_STATE_IDLE; 618 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 619 620 ret = cm_id->device->iwcm->reject(cm_id, private_data, 621 private_data_len); 622 623 clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); 624 wake_up_all(&cm_id_priv->connect_wait); 625 626 return ret; 627 } 628 EXPORT_SYMBOL(iw_cm_reject); 629 630 /* 631 * CM_ID <-- ESTABLISHED 632 * 633 * Accepts an inbound connection request and generates an ESTABLISHED 634 * event. Callers of iw_cm_disconnect and iw_destroy_cm_id will block 635 * until the ESTABLISHED event is received from the provider. 636 */ 637 int iw_cm_accept(struct iw_cm_id *cm_id, 638 struct iw_cm_conn_param *iw_param) 639 { 640 struct iwcm_id_private *cm_id_priv; 641 struct ib_qp *qp; 642 unsigned long flags; 643 int ret; 644 645 cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); 646 set_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); 647 648 spin_lock_irqsave(&cm_id_priv->lock, flags); 649 if (cm_id_priv->state != IW_CM_STATE_CONN_RECV) { 650 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 651 clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); 652 wake_up_all(&cm_id_priv->connect_wait); 653 return -EINVAL; 654 } 655 /* Get the ib_qp given the QPN */ 656 qp = cm_id->device->iwcm->get_qp(cm_id->device, iw_param->qpn); 657 if (!qp) { 658 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 659 clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); 660 wake_up_all(&cm_id_priv->connect_wait); 661 return -EINVAL; 662 } 663 cm_id->device->iwcm->add_ref(qp); 664 cm_id_priv->qp = qp; 665 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 666 667 ret = cm_id->device->iwcm->accept(cm_id, iw_param); 668 if (ret) { 669 /* An error on accept precludes provider events */ 670 BUG_ON(cm_id_priv->state != IW_CM_STATE_CONN_RECV); 671 cm_id_priv->state = IW_CM_STATE_IDLE; 672 spin_lock_irqsave(&cm_id_priv->lock, flags); 673 if (cm_id_priv->qp) { 674 cm_id->device->iwcm->rem_ref(qp); 675 cm_id_priv->qp = NULL; 676 } 677 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 678 clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); 679 wake_up_all(&cm_id_priv->connect_wait); 680 } 681 682 return ret; 683 } 684 EXPORT_SYMBOL(iw_cm_accept); 685 686 /* 687 * Active Side: CM_ID <-- CONN_SENT 688 * 689 * If successful, results in the generation of a CONNECT_REPLY 690 * event. iw_cm_disconnect and iw_cm_destroy will block until the 691 * CONNECT_REPLY event is received from the provider. 692 */ 693 int iw_cm_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param) 694 { 695 struct iwcm_id_private *cm_id_priv; 696 int ret; 697 unsigned long flags; 698 struct ib_qp *qp; 699 700 cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); 701 702 ret = alloc_work_entries(cm_id_priv, 4); 703 if (ret) 704 return ret; 705 706 set_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); 707 spin_lock_irqsave(&cm_id_priv->lock, flags); 708 709 if (cm_id_priv->state != IW_CM_STATE_IDLE) { 710 ret = -EINVAL; 711 goto err; 712 } 713 714 /* Get the ib_qp given the QPN */ 715 qp = cm_id->device->iwcm->get_qp(cm_id->device, iw_param->qpn); 716 if (!qp) { 717 ret = -EINVAL; 718 goto err; 719 } 720 cm_id->device->iwcm->add_ref(qp); 721 cm_id_priv->qp = qp; 722 cm_id_priv->state = IW_CM_STATE_CONN_SENT; 723 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 724 725 ret = iw_cm_map(cm_id, true); 726 if (!ret) 727 ret = cm_id->device->iwcm->connect(cm_id, iw_param); 728 if (!ret) 729 return 0; /* success */ 730 731 spin_lock_irqsave(&cm_id_priv->lock, flags); 732 if (cm_id_priv->qp) { 733 cm_id->device->iwcm->rem_ref(qp); 734 cm_id_priv->qp = NULL; 735 } 736 cm_id_priv->state = IW_CM_STATE_IDLE; 737 err: 738 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 739 clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); 740 wake_up_all(&cm_id_priv->connect_wait); 741 return ret; 742 } 743 EXPORT_SYMBOL(iw_cm_connect); 744 745 /* 746 * Passive Side: new CM_ID <-- CONN_RECV 747 * 748 * Handles an inbound connect request. The function creates a new 749 * iw_cm_id to represent the new connection and inherits the client 750 * callback function and other attributes from the listening parent. 751 * 752 * The work item contains a pointer to the listen_cm_id and the event. The 753 * listen_cm_id contains the client cm_handler, context and 754 * device. These are copied when the device is cloned. The event 755 * contains the new four tuple. 756 * 757 * An error on the child should not affect the parent, so this 758 * function does not return a value. 759 */ 760 static void cm_conn_req_handler(struct iwcm_id_private *listen_id_priv, 761 struct iw_cm_event *iw_event) 762 { 763 unsigned long flags; 764 struct iw_cm_id *cm_id; 765 struct iwcm_id_private *cm_id_priv; 766 int ret; 767 768 /* 769 * The provider should never generate a connection request 770 * event with a bad status. 771 */ 772 BUG_ON(iw_event->status); 773 774 cm_id = iw_create_cm_id(listen_id_priv->id.device, 775 listen_id_priv->id.cm_handler, 776 listen_id_priv->id.context); 777 /* If the cm_id could not be created, ignore the request */ 778 if (IS_ERR(cm_id)) 779 goto out; 780 781 cm_id->provider_data = iw_event->provider_data; 782 cm_id->m_local_addr = iw_event->local_addr; 783 cm_id->m_remote_addr = iw_event->remote_addr; 784 cm_id->local_addr = listen_id_priv->id.local_addr; 785 786 ret = iwpm_get_remote_info(&listen_id_priv->id.m_local_addr, 787 &iw_event->remote_addr, 788 &cm_id->remote_addr, 789 RDMA_NL_IWCM); 790 if (ret) { 791 cm_id->remote_addr = iw_event->remote_addr; 792 } else { 793 iw_cm_check_wildcard(&listen_id_priv->id.m_local_addr, 794 &iw_event->local_addr, 795 &cm_id->local_addr); 796 iw_event->local_addr = cm_id->local_addr; 797 iw_event->remote_addr = cm_id->remote_addr; 798 } 799 800 cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); 801 cm_id_priv->state = IW_CM_STATE_CONN_RECV; 802 803 /* 804 * We could be destroying the listening id. If so, ignore this 805 * upcall. 806 */ 807 spin_lock_irqsave(&listen_id_priv->lock, flags); 808 if (listen_id_priv->state != IW_CM_STATE_LISTEN) { 809 spin_unlock_irqrestore(&listen_id_priv->lock, flags); 810 iw_cm_reject(cm_id, NULL, 0); 811 iw_destroy_cm_id(cm_id); 812 goto out; 813 } 814 spin_unlock_irqrestore(&listen_id_priv->lock, flags); 815 816 ret = alloc_work_entries(cm_id_priv, 3); 817 if (ret) { 818 iw_cm_reject(cm_id, NULL, 0); 819 iw_destroy_cm_id(cm_id); 820 goto out; 821 } 822 823 /* Call the client CM handler */ 824 ret = cm_id->cm_handler(cm_id, iw_event); 825 if (ret) { 826 iw_cm_reject(cm_id, NULL, 0); 827 iw_destroy_cm_id(cm_id); 828 } 829 830 out: 831 if (iw_event->private_data_len) 832 kfree(iw_event->private_data); 833 } 834 835 /* 836 * Passive Side: CM_ID <-- ESTABLISHED 837 * 838 * The provider generated an ESTABLISHED event which means that 839 * the MPA negotion has completed successfully and we are now in MPA 840 * FPDU mode. 841 * 842 * This event can only be received in the CONN_RECV state. If the 843 * remote peer closed, the ESTABLISHED event would be received followed 844 * by the CLOSE event. If the app closes, it will block until we wake 845 * it up after processing this event. 846 */ 847 static int cm_conn_est_handler(struct iwcm_id_private *cm_id_priv, 848 struct iw_cm_event *iw_event) 849 { 850 unsigned long flags; 851 int ret; 852 853 spin_lock_irqsave(&cm_id_priv->lock, flags); 854 855 /* 856 * We clear the CONNECT_WAIT bit here to allow the callback 857 * function to call iw_cm_disconnect. Calling iw_destroy_cm_id 858 * from a callback handler is not allowed. 859 */ 860 clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); 861 BUG_ON(cm_id_priv->state != IW_CM_STATE_CONN_RECV); 862 cm_id_priv->state = IW_CM_STATE_ESTABLISHED; 863 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 864 ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, iw_event); 865 wake_up_all(&cm_id_priv->connect_wait); 866 867 return ret; 868 } 869 870 /* 871 * Active Side: CM_ID <-- ESTABLISHED 872 * 873 * The app has called connect and is waiting for the established event to 874 * post it's requests to the server. This event will wake up anyone 875 * blocked in iw_cm_disconnect or iw_destroy_id. 876 */ 877 static int cm_conn_rep_handler(struct iwcm_id_private *cm_id_priv, 878 struct iw_cm_event *iw_event) 879 { 880 unsigned long flags; 881 int ret; 882 883 spin_lock_irqsave(&cm_id_priv->lock, flags); 884 /* 885 * Clear the connect wait bit so a callback function calling 886 * iw_cm_disconnect will not wait and deadlock this thread 887 */ 888 clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); 889 BUG_ON(cm_id_priv->state != IW_CM_STATE_CONN_SENT); 890 if (iw_event->status == 0) { 891 cm_id_priv->id.m_local_addr = iw_event->local_addr; 892 cm_id_priv->id.m_remote_addr = iw_event->remote_addr; 893 iw_event->local_addr = cm_id_priv->id.local_addr; 894 iw_event->remote_addr = cm_id_priv->id.remote_addr; 895 cm_id_priv->state = IW_CM_STATE_ESTABLISHED; 896 } else { 897 /* REJECTED or RESET */ 898 cm_id_priv->id.device->iwcm->rem_ref(cm_id_priv->qp); 899 cm_id_priv->qp = NULL; 900 cm_id_priv->state = IW_CM_STATE_IDLE; 901 } 902 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 903 ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, iw_event); 904 905 if (iw_event->private_data_len) 906 kfree(iw_event->private_data); 907 908 /* Wake up waiters on connect complete */ 909 wake_up_all(&cm_id_priv->connect_wait); 910 911 return ret; 912 } 913 914 /* 915 * CM_ID <-- CLOSING 916 * 917 * If in the ESTABLISHED state, move to CLOSING. 918 */ 919 static void cm_disconnect_handler(struct iwcm_id_private *cm_id_priv, 920 struct iw_cm_event *iw_event) 921 { 922 unsigned long flags; 923 924 spin_lock_irqsave(&cm_id_priv->lock, flags); 925 if (cm_id_priv->state == IW_CM_STATE_ESTABLISHED) 926 cm_id_priv->state = IW_CM_STATE_CLOSING; 927 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 928 } 929 930 /* 931 * CM_ID <-- IDLE 932 * 933 * If in the ESTBLISHED or CLOSING states, the QP will have have been 934 * moved by the provider to the ERR state. Disassociate the CM_ID from 935 * the QP, move to IDLE, and remove the 'connected' reference. 936 * 937 * If in some other state, the cm_id was destroyed asynchronously. 938 * This is the last reference that will result in waking up 939 * the app thread blocked in iw_destroy_cm_id. 940 */ 941 static int cm_close_handler(struct iwcm_id_private *cm_id_priv, 942 struct iw_cm_event *iw_event) 943 { 944 unsigned long flags; 945 int ret = 0; 946 spin_lock_irqsave(&cm_id_priv->lock, flags); 947 948 if (cm_id_priv->qp) { 949 cm_id_priv->id.device->iwcm->rem_ref(cm_id_priv->qp); 950 cm_id_priv->qp = NULL; 951 } 952 switch (cm_id_priv->state) { 953 case IW_CM_STATE_ESTABLISHED: 954 case IW_CM_STATE_CLOSING: 955 cm_id_priv->state = IW_CM_STATE_IDLE; 956 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 957 ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, iw_event); 958 spin_lock_irqsave(&cm_id_priv->lock, flags); 959 break; 960 case IW_CM_STATE_DESTROYING: 961 break; 962 default: 963 BUG(); 964 } 965 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 966 967 return ret; 968 } 969 970 static int process_event(struct iwcm_id_private *cm_id_priv, 971 struct iw_cm_event *iw_event) 972 { 973 int ret = 0; 974 975 switch (iw_event->event) { 976 case IW_CM_EVENT_CONNECT_REQUEST: 977 cm_conn_req_handler(cm_id_priv, iw_event); 978 break; 979 case IW_CM_EVENT_CONNECT_REPLY: 980 ret = cm_conn_rep_handler(cm_id_priv, iw_event); 981 break; 982 case IW_CM_EVENT_ESTABLISHED: 983 ret = cm_conn_est_handler(cm_id_priv, iw_event); 984 break; 985 case IW_CM_EVENT_DISCONNECT: 986 cm_disconnect_handler(cm_id_priv, iw_event); 987 break; 988 case IW_CM_EVENT_CLOSE: 989 ret = cm_close_handler(cm_id_priv, iw_event); 990 break; 991 default: 992 BUG(); 993 } 994 995 return ret; 996 } 997 998 /* 999 * Process events on the work_list for the cm_id. If the callback 1000 * function requests that the cm_id be deleted, a flag is set in the 1001 * cm_id flags to indicate that when the last reference is 1002 * removed, the cm_id is to be destroyed. This is necessary to 1003 * distinguish between an object that will be destroyed by the app 1004 * thread asleep on the destroy_comp list vs. an object destroyed 1005 * here synchronously when the last reference is removed. 1006 */ 1007 static void cm_work_handler(struct work_struct *_work) 1008 { 1009 struct iwcm_work *work = container_of(_work, struct iwcm_work, work); 1010 struct iw_cm_event levent; 1011 struct iwcm_id_private *cm_id_priv = work->cm_id; 1012 unsigned long flags; 1013 int empty; 1014 int ret = 0; 1015 1016 spin_lock_irqsave(&cm_id_priv->lock, flags); 1017 empty = list_empty(&cm_id_priv->work_list); 1018 while (!empty) { 1019 work = list_entry(cm_id_priv->work_list.next, 1020 struct iwcm_work, list); 1021 list_del_init(&work->list); 1022 empty = list_empty(&cm_id_priv->work_list); 1023 levent = work->event; 1024 put_work(work); 1025 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 1026 1027 if (!test_bit(IWCM_F_DROP_EVENTS, &cm_id_priv->flags)) { 1028 ret = process_event(cm_id_priv, &levent); 1029 if (ret) 1030 destroy_cm_id(&cm_id_priv->id); 1031 } else 1032 pr_debug("dropping event %d\n", levent.event); 1033 if (iwcm_deref_id(cm_id_priv)) 1034 return; 1035 if (empty) 1036 return; 1037 spin_lock_irqsave(&cm_id_priv->lock, flags); 1038 } 1039 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 1040 } 1041 1042 /* 1043 * This function is called on interrupt context. Schedule events on 1044 * the iwcm_wq thread to allow callback functions to downcall into 1045 * the CM and/or block. Events are queued to a per-CM_ID 1046 * work_list. If this is the first event on the work_list, the work 1047 * element is also queued on the iwcm_wq thread. 1048 * 1049 * Each event holds a reference on the cm_id. Until the last posted 1050 * event has been delivered and processed, the cm_id cannot be 1051 * deleted. 1052 * 1053 * Returns: 1054 * 0 - the event was handled. 1055 * -ENOMEM - the event was not handled due to lack of resources. 1056 */ 1057 static int cm_event_handler(struct iw_cm_id *cm_id, 1058 struct iw_cm_event *iw_event) 1059 { 1060 struct iwcm_work *work; 1061 struct iwcm_id_private *cm_id_priv; 1062 unsigned long flags; 1063 int ret = 0; 1064 1065 cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); 1066 1067 spin_lock_irqsave(&cm_id_priv->lock, flags); 1068 work = get_work(cm_id_priv); 1069 if (!work) { 1070 ret = -ENOMEM; 1071 goto out; 1072 } 1073 1074 INIT_WORK(&work->work, cm_work_handler); 1075 work->cm_id = cm_id_priv; 1076 work->event = *iw_event; 1077 1078 if ((work->event.event == IW_CM_EVENT_CONNECT_REQUEST || 1079 work->event.event == IW_CM_EVENT_CONNECT_REPLY) && 1080 work->event.private_data_len) { 1081 ret = copy_private_data(&work->event); 1082 if (ret) { 1083 put_work(work); 1084 goto out; 1085 } 1086 } 1087 1088 atomic_inc(&cm_id_priv->refcount); 1089 if (list_empty(&cm_id_priv->work_list)) { 1090 list_add_tail(&work->list, &cm_id_priv->work_list); 1091 queue_work(iwcm_wq, &work->work); 1092 } else 1093 list_add_tail(&work->list, &cm_id_priv->work_list); 1094 out: 1095 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 1096 return ret; 1097 } 1098 1099 static int iwcm_init_qp_init_attr(struct iwcm_id_private *cm_id_priv, 1100 struct ib_qp_attr *qp_attr, 1101 int *qp_attr_mask) 1102 { 1103 unsigned long flags; 1104 int ret; 1105 1106 spin_lock_irqsave(&cm_id_priv->lock, flags); 1107 switch (cm_id_priv->state) { 1108 case IW_CM_STATE_IDLE: 1109 case IW_CM_STATE_CONN_SENT: 1110 case IW_CM_STATE_CONN_RECV: 1111 case IW_CM_STATE_ESTABLISHED: 1112 *qp_attr_mask = IB_QP_STATE | IB_QP_ACCESS_FLAGS; 1113 qp_attr->qp_access_flags = IB_ACCESS_REMOTE_WRITE| 1114 IB_ACCESS_REMOTE_READ; 1115 ret = 0; 1116 break; 1117 default: 1118 ret = -EINVAL; 1119 break; 1120 } 1121 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 1122 return ret; 1123 } 1124 1125 static int iwcm_init_qp_rts_attr(struct iwcm_id_private *cm_id_priv, 1126 struct ib_qp_attr *qp_attr, 1127 int *qp_attr_mask) 1128 { 1129 unsigned long flags; 1130 int ret; 1131 1132 spin_lock_irqsave(&cm_id_priv->lock, flags); 1133 switch (cm_id_priv->state) { 1134 case IW_CM_STATE_IDLE: 1135 case IW_CM_STATE_CONN_SENT: 1136 case IW_CM_STATE_CONN_RECV: 1137 case IW_CM_STATE_ESTABLISHED: 1138 *qp_attr_mask = 0; 1139 ret = 0; 1140 break; 1141 default: 1142 ret = -EINVAL; 1143 break; 1144 } 1145 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 1146 return ret; 1147 } 1148 1149 int iw_cm_init_qp_attr(struct iw_cm_id *cm_id, 1150 struct ib_qp_attr *qp_attr, 1151 int *qp_attr_mask) 1152 { 1153 struct iwcm_id_private *cm_id_priv; 1154 int ret; 1155 1156 cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); 1157 switch (qp_attr->qp_state) { 1158 case IB_QPS_INIT: 1159 case IB_QPS_RTR: 1160 ret = iwcm_init_qp_init_attr(cm_id_priv, 1161 qp_attr, qp_attr_mask); 1162 break; 1163 case IB_QPS_RTS: 1164 ret = iwcm_init_qp_rts_attr(cm_id_priv, 1165 qp_attr, qp_attr_mask); 1166 break; 1167 default: 1168 ret = -EINVAL; 1169 break; 1170 } 1171 return ret; 1172 } 1173 EXPORT_SYMBOL(iw_cm_init_qp_attr); 1174 1175 static int __init iw_cm_init(void) 1176 { 1177 int ret; 1178 1179 ret = iwpm_init(RDMA_NL_IWCM); 1180 if (ret) 1181 pr_err("iw_cm: couldn't init iwpm\n"); 1182 else 1183 rdma_nl_register(RDMA_NL_IWCM, iwcm_nl_cb_table); 1184 iwcm_wq = alloc_ordered_workqueue("iw_cm_wq", 0); 1185 if (!iwcm_wq) 1186 return -ENOMEM; 1187 1188 iwcm_ctl_table_hdr = register_net_sysctl(&init_net, "net/iw_cm", 1189 iwcm_ctl_table); 1190 if (!iwcm_ctl_table_hdr) { 1191 pr_err("iw_cm: couldn't register sysctl paths\n"); 1192 destroy_workqueue(iwcm_wq); 1193 return -ENOMEM; 1194 } 1195 1196 return 0; 1197 } 1198 1199 static void __exit iw_cm_cleanup(void) 1200 { 1201 unregister_net_sysctl_table(iwcm_ctl_table_hdr); 1202 destroy_workqueue(iwcm_wq); 1203 rdma_nl_unregister(RDMA_NL_IWCM); 1204 iwpm_exit(RDMA_NL_IWCM); 1205 } 1206 1207 MODULE_ALIAS_RDMA_NETLINK(RDMA_NL_IWCM, 2); 1208 1209 module_init(iw_cm_init); 1210 module_exit(iw_cm_cleanup); 1211