1 /* 2 * Copyright (c) 2004, 2005 Intel Corporation. All rights reserved. 3 * Copyright (c) 2004 Topspin Corporation. All rights reserved. 4 * Copyright (c) 2004, 2005 Voltaire Corporation. All rights reserved. 5 * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. 6 * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved. 7 * Copyright (c) 2005 Network Appliance, Inc. All rights reserved. 8 * 9 * This software is available to you under a choice of one of two 10 * licenses. You may choose to be licensed under the terms of the GNU 11 * General Public License (GPL) Version 2, available from the file 12 * COPYING in the main directory of this source tree, or the 13 * OpenIB.org BSD license below: 14 * 15 * Redistribution and use in source and binary forms, with or 16 * without modification, are permitted provided that the following 17 * conditions are met: 18 * 19 * - Redistributions of source code must retain the above 20 * copyright notice, this list of conditions and the following 21 * disclaimer. 22 * 23 * - Redistributions in binary form must reproduce the above 24 * copyright notice, this list of conditions and the following 25 * disclaimer in the documentation and/or other materials 26 * provided with the distribution. 27 * 28 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 29 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 30 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 31 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 32 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 33 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 34 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 35 * SOFTWARE. 36 * 37 */ 38 #include <linux/dma-mapping.h> 39 #include <linux/err.h> 40 #include <linux/idr.h> 41 #include <linux/interrupt.h> 42 #include <linux/rbtree.h> 43 #include <linux/sched.h> 44 #include <linux/spinlock.h> 45 #include <linux/workqueue.h> 46 #include <linux/completion.h> 47 #include <linux/slab.h> 48 #include <linux/module.h> 49 #include <linux/sysctl.h> 50 51 #include <rdma/iw_cm.h> 52 #include <rdma/ib_addr.h> 53 #include <rdma/iw_portmap.h> 54 #include <rdma/rdma_netlink.h> 55 56 #include "iwcm.h" 57 58 MODULE_AUTHOR("Tom Tucker"); 59 MODULE_DESCRIPTION("iWARP CM"); 60 MODULE_LICENSE("Dual BSD/GPL"); 61 62 static const char * const iwcm_rej_reason_strs[] = { 63 [ECONNRESET] = "reset by remote host", 64 [ECONNREFUSED] = "refused by remote application", 65 [ETIMEDOUT] = "setup timeout", 66 }; 67 68 const char *__attribute_const__ iwcm_reject_msg(int reason) 69 { 70 size_t index; 71 72 /* iWARP uses negative errnos */ 73 index = -reason; 74 75 if (index < ARRAY_SIZE(iwcm_rej_reason_strs) && 76 iwcm_rej_reason_strs[index]) 77 return iwcm_rej_reason_strs[index]; 78 else 79 return "unrecognized reason"; 80 } 81 EXPORT_SYMBOL(iwcm_reject_msg); 82 83 static struct rdma_nl_cbs iwcm_nl_cb_table[RDMA_NL_IWPM_NUM_OPS] = { 84 [RDMA_NL_IWPM_REG_PID] = {.dump = iwpm_register_pid_cb}, 85 [RDMA_NL_IWPM_ADD_MAPPING] = {.dump = iwpm_add_mapping_cb}, 86 [RDMA_NL_IWPM_QUERY_MAPPING] = {.dump = iwpm_add_and_query_mapping_cb}, 87 [RDMA_NL_IWPM_REMOTE_INFO] = {.dump = iwpm_remote_info_cb}, 88 [RDMA_NL_IWPM_HANDLE_ERR] = {.dump = iwpm_mapping_error_cb}, 89 [RDMA_NL_IWPM_MAPINFO] = {.dump = iwpm_mapping_info_cb}, 90 [RDMA_NL_IWPM_MAPINFO_NUM] = {.dump = iwpm_ack_mapping_info_cb} 91 }; 92 93 static struct workqueue_struct *iwcm_wq; 94 struct iwcm_work { 95 struct work_struct work; 96 struct iwcm_id_private *cm_id; 97 struct list_head list; 98 struct iw_cm_event event; 99 struct list_head free_list; 100 }; 101 102 static unsigned int default_backlog = 256; 103 104 static struct ctl_table_header *iwcm_ctl_table_hdr; 105 static struct ctl_table iwcm_ctl_table[] = { 106 { 107 .procname = "default_backlog", 108 .data = &default_backlog, 109 .maxlen = sizeof(default_backlog), 110 .mode = 0644, 111 .proc_handler = proc_dointvec, 112 }, 113 { } 114 }; 115 116 /* 117 * The following services provide a mechanism for pre-allocating iwcm_work 118 * elements. The design pre-allocates them based on the cm_id type: 119 * LISTENING IDS: Get enough elements preallocated to handle the 120 * listen backlog. 121 * ACTIVE IDS: 4: CONNECT_REPLY, ESTABLISHED, DISCONNECT, CLOSE 122 * PASSIVE IDS: 3: ESTABLISHED, DISCONNECT, CLOSE 123 * 124 * Allocating them in connect and listen avoids having to deal 125 * with allocation failures on the event upcall from the provider (which 126 * is called in the interrupt context). 127 * 128 * One exception is when creating the cm_id for incoming connection requests. 129 * There are two cases: 130 * 1) in the event upcall, cm_event_handler(), for a listening cm_id. If 131 * the backlog is exceeded, then no more connection request events will 132 * be processed. cm_event_handler() returns -ENOMEM in this case. Its up 133 * to the provider to reject the connection request. 134 * 2) in the connection request workqueue handler, cm_conn_req_handler(). 135 * If work elements cannot be allocated for the new connect request cm_id, 136 * then IWCM will call the provider reject method. This is ok since 137 * cm_conn_req_handler() runs in the workqueue thread context. 138 */ 139 140 static struct iwcm_work *get_work(struct iwcm_id_private *cm_id_priv) 141 { 142 struct iwcm_work *work; 143 144 if (list_empty(&cm_id_priv->work_free_list)) 145 return NULL; 146 work = list_entry(cm_id_priv->work_free_list.next, struct iwcm_work, 147 free_list); 148 list_del_init(&work->free_list); 149 return work; 150 } 151 152 static void put_work(struct iwcm_work *work) 153 { 154 list_add(&work->free_list, &work->cm_id->work_free_list); 155 } 156 157 static void dealloc_work_entries(struct iwcm_id_private *cm_id_priv) 158 { 159 struct list_head *e, *tmp; 160 161 list_for_each_safe(e, tmp, &cm_id_priv->work_free_list) 162 kfree(list_entry(e, struct iwcm_work, free_list)); 163 } 164 165 static int alloc_work_entries(struct iwcm_id_private *cm_id_priv, int count) 166 { 167 struct iwcm_work *work; 168 169 BUG_ON(!list_empty(&cm_id_priv->work_free_list)); 170 while (count--) { 171 work = kmalloc(sizeof(struct iwcm_work), GFP_KERNEL); 172 if (!work) { 173 dealloc_work_entries(cm_id_priv); 174 return -ENOMEM; 175 } 176 work->cm_id = cm_id_priv; 177 INIT_LIST_HEAD(&work->list); 178 put_work(work); 179 } 180 return 0; 181 } 182 183 /* 184 * Save private data from incoming connection requests to 185 * iw_cm_event, so the low level driver doesn't have to. Adjust 186 * the event ptr to point to the local copy. 187 */ 188 static int copy_private_data(struct iw_cm_event *event) 189 { 190 void *p; 191 192 p = kmemdup(event->private_data, event->private_data_len, GFP_ATOMIC); 193 if (!p) 194 return -ENOMEM; 195 event->private_data = p; 196 return 0; 197 } 198 199 static void free_cm_id(struct iwcm_id_private *cm_id_priv) 200 { 201 dealloc_work_entries(cm_id_priv); 202 kfree(cm_id_priv); 203 } 204 205 /* 206 * Release a reference on cm_id. If the last reference is being 207 * released, free the cm_id and return 1. 208 */ 209 static int iwcm_deref_id(struct iwcm_id_private *cm_id_priv) 210 { 211 BUG_ON(atomic_read(&cm_id_priv->refcount)==0); 212 if (atomic_dec_and_test(&cm_id_priv->refcount)) { 213 BUG_ON(!list_empty(&cm_id_priv->work_list)); 214 free_cm_id(cm_id_priv); 215 return 1; 216 } 217 218 return 0; 219 } 220 221 static void add_ref(struct iw_cm_id *cm_id) 222 { 223 struct iwcm_id_private *cm_id_priv; 224 cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); 225 atomic_inc(&cm_id_priv->refcount); 226 } 227 228 static void rem_ref(struct iw_cm_id *cm_id) 229 { 230 struct iwcm_id_private *cm_id_priv; 231 232 cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); 233 234 (void)iwcm_deref_id(cm_id_priv); 235 } 236 237 static int cm_event_handler(struct iw_cm_id *cm_id, struct iw_cm_event *event); 238 239 struct iw_cm_id *iw_create_cm_id(struct ib_device *device, 240 iw_cm_handler cm_handler, 241 void *context) 242 { 243 struct iwcm_id_private *cm_id_priv; 244 245 cm_id_priv = kzalloc(sizeof(*cm_id_priv), GFP_KERNEL); 246 if (!cm_id_priv) 247 return ERR_PTR(-ENOMEM); 248 249 cm_id_priv->state = IW_CM_STATE_IDLE; 250 cm_id_priv->id.device = device; 251 cm_id_priv->id.cm_handler = cm_handler; 252 cm_id_priv->id.context = context; 253 cm_id_priv->id.event_handler = cm_event_handler; 254 cm_id_priv->id.add_ref = add_ref; 255 cm_id_priv->id.rem_ref = rem_ref; 256 spin_lock_init(&cm_id_priv->lock); 257 atomic_set(&cm_id_priv->refcount, 1); 258 init_waitqueue_head(&cm_id_priv->connect_wait); 259 init_completion(&cm_id_priv->destroy_comp); 260 INIT_LIST_HEAD(&cm_id_priv->work_list); 261 INIT_LIST_HEAD(&cm_id_priv->work_free_list); 262 263 return &cm_id_priv->id; 264 } 265 EXPORT_SYMBOL(iw_create_cm_id); 266 267 268 static int iwcm_modify_qp_err(struct ib_qp *qp) 269 { 270 struct ib_qp_attr qp_attr; 271 272 if (!qp) 273 return -EINVAL; 274 275 qp_attr.qp_state = IB_QPS_ERR; 276 return ib_modify_qp(qp, &qp_attr, IB_QP_STATE); 277 } 278 279 /* 280 * This is really the RDMAC CLOSING state. It is most similar to the 281 * IB SQD QP state. 282 */ 283 static int iwcm_modify_qp_sqd(struct ib_qp *qp) 284 { 285 struct ib_qp_attr qp_attr; 286 287 BUG_ON(qp == NULL); 288 qp_attr.qp_state = IB_QPS_SQD; 289 return ib_modify_qp(qp, &qp_attr, IB_QP_STATE); 290 } 291 292 /* 293 * CM_ID <-- CLOSING 294 * 295 * Block if a passive or active connection is currently being processed. Then 296 * process the event as follows: 297 * - If we are ESTABLISHED, move to CLOSING and modify the QP state 298 * based on the abrupt flag 299 * - If the connection is already in the CLOSING or IDLE state, the peer is 300 * disconnecting concurrently with us and we've already seen the 301 * DISCONNECT event -- ignore the request and return 0 302 * - Disconnect on a listening endpoint returns -EINVAL 303 */ 304 int iw_cm_disconnect(struct iw_cm_id *cm_id, int abrupt) 305 { 306 struct iwcm_id_private *cm_id_priv; 307 unsigned long flags; 308 int ret = 0; 309 struct ib_qp *qp = NULL; 310 311 cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); 312 /* Wait if we're currently in a connect or accept downcall */ 313 wait_event(cm_id_priv->connect_wait, 314 !test_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags)); 315 316 spin_lock_irqsave(&cm_id_priv->lock, flags); 317 switch (cm_id_priv->state) { 318 case IW_CM_STATE_ESTABLISHED: 319 cm_id_priv->state = IW_CM_STATE_CLOSING; 320 321 /* QP could be <nul> for user-mode client */ 322 if (cm_id_priv->qp) 323 qp = cm_id_priv->qp; 324 else 325 ret = -EINVAL; 326 break; 327 case IW_CM_STATE_LISTEN: 328 ret = -EINVAL; 329 break; 330 case IW_CM_STATE_CLOSING: 331 /* remote peer closed first */ 332 case IW_CM_STATE_IDLE: 333 /* accept or connect returned !0 */ 334 break; 335 case IW_CM_STATE_CONN_RECV: 336 /* 337 * App called disconnect before/without calling accept after 338 * connect_request event delivered. 339 */ 340 break; 341 case IW_CM_STATE_CONN_SENT: 342 /* Can only get here if wait above fails */ 343 default: 344 BUG(); 345 } 346 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 347 348 if (qp) { 349 if (abrupt) 350 ret = iwcm_modify_qp_err(qp); 351 else 352 ret = iwcm_modify_qp_sqd(qp); 353 354 /* 355 * If both sides are disconnecting the QP could 356 * already be in ERR or SQD states 357 */ 358 ret = 0; 359 } 360 361 return ret; 362 } 363 EXPORT_SYMBOL(iw_cm_disconnect); 364 365 /* 366 * CM_ID <-- DESTROYING 367 * 368 * Clean up all resources associated with the connection and release 369 * the initial reference taken by iw_create_cm_id. 370 */ 371 static void destroy_cm_id(struct iw_cm_id *cm_id) 372 { 373 struct iwcm_id_private *cm_id_priv; 374 unsigned long flags; 375 376 cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); 377 /* 378 * Wait if we're currently in a connect or accept downcall. A 379 * listening endpoint should never block here. 380 */ 381 wait_event(cm_id_priv->connect_wait, 382 !test_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags)); 383 384 /* 385 * Since we're deleting the cm_id, drop any events that 386 * might arrive before the last dereference. 387 */ 388 set_bit(IWCM_F_DROP_EVENTS, &cm_id_priv->flags); 389 390 spin_lock_irqsave(&cm_id_priv->lock, flags); 391 switch (cm_id_priv->state) { 392 case IW_CM_STATE_LISTEN: 393 cm_id_priv->state = IW_CM_STATE_DESTROYING; 394 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 395 /* destroy the listening endpoint */ 396 cm_id->device->iwcm->destroy_listen(cm_id); 397 spin_lock_irqsave(&cm_id_priv->lock, flags); 398 break; 399 case IW_CM_STATE_ESTABLISHED: 400 cm_id_priv->state = IW_CM_STATE_DESTROYING; 401 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 402 /* Abrupt close of the connection */ 403 (void)iwcm_modify_qp_err(cm_id_priv->qp); 404 spin_lock_irqsave(&cm_id_priv->lock, flags); 405 break; 406 case IW_CM_STATE_IDLE: 407 case IW_CM_STATE_CLOSING: 408 cm_id_priv->state = IW_CM_STATE_DESTROYING; 409 break; 410 case IW_CM_STATE_CONN_RECV: 411 /* 412 * App called destroy before/without calling accept after 413 * receiving connection request event notification or 414 * returned non zero from the event callback function. 415 * In either case, must tell the provider to reject. 416 */ 417 cm_id_priv->state = IW_CM_STATE_DESTROYING; 418 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 419 cm_id->device->iwcm->reject(cm_id, NULL, 0); 420 spin_lock_irqsave(&cm_id_priv->lock, flags); 421 break; 422 case IW_CM_STATE_CONN_SENT: 423 case IW_CM_STATE_DESTROYING: 424 default: 425 BUG(); 426 break; 427 } 428 if (cm_id_priv->qp) { 429 cm_id_priv->id.device->iwcm->rem_ref(cm_id_priv->qp); 430 cm_id_priv->qp = NULL; 431 } 432 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 433 434 if (cm_id->mapped) { 435 iwpm_remove_mapinfo(&cm_id->local_addr, &cm_id->m_local_addr); 436 iwpm_remove_mapping(&cm_id->local_addr, RDMA_NL_IWCM); 437 } 438 439 (void)iwcm_deref_id(cm_id_priv); 440 } 441 442 /* 443 * This function is only called by the application thread and cannot 444 * be called by the event thread. The function will wait for all 445 * references to be released on the cm_id and then kfree the cm_id 446 * object. 447 */ 448 void iw_destroy_cm_id(struct iw_cm_id *cm_id) 449 { 450 destroy_cm_id(cm_id); 451 } 452 EXPORT_SYMBOL(iw_destroy_cm_id); 453 454 /** 455 * iw_cm_check_wildcard - If IP address is 0 then use original 456 * @pm_addr: sockaddr containing the ip to check for wildcard 457 * @cm_addr: sockaddr containing the actual IP address 458 * @cm_outaddr: sockaddr to set IP addr which leaving port 459 * 460 * Checks the pm_addr for wildcard and then sets cm_outaddr's 461 * IP to the actual (cm_addr). 462 */ 463 static void iw_cm_check_wildcard(struct sockaddr_storage *pm_addr, 464 struct sockaddr_storage *cm_addr, 465 struct sockaddr_storage *cm_outaddr) 466 { 467 if (pm_addr->ss_family == AF_INET) { 468 struct sockaddr_in *pm4_addr = (struct sockaddr_in *)pm_addr; 469 470 if (pm4_addr->sin_addr.s_addr == htonl(INADDR_ANY)) { 471 struct sockaddr_in *cm4_addr = 472 (struct sockaddr_in *)cm_addr; 473 struct sockaddr_in *cm4_outaddr = 474 (struct sockaddr_in *)cm_outaddr; 475 476 cm4_outaddr->sin_addr = cm4_addr->sin_addr; 477 } 478 } else { 479 struct sockaddr_in6 *pm6_addr = (struct sockaddr_in6 *)pm_addr; 480 481 if (ipv6_addr_type(&pm6_addr->sin6_addr) == IPV6_ADDR_ANY) { 482 struct sockaddr_in6 *cm6_addr = 483 (struct sockaddr_in6 *)cm_addr; 484 struct sockaddr_in6 *cm6_outaddr = 485 (struct sockaddr_in6 *)cm_outaddr; 486 487 cm6_outaddr->sin6_addr = cm6_addr->sin6_addr; 488 } 489 } 490 } 491 492 /** 493 * iw_cm_map - Use portmapper to map the ports 494 * @cm_id: connection manager pointer 495 * @active: Indicates the active side when true 496 * returns nonzero for error only if iwpm_create_mapinfo() fails 497 * 498 * Tries to add a mapping for a port using the Portmapper. If 499 * successful in mapping the IP/Port it will check the remote 500 * mapped IP address for a wildcard IP address and replace the 501 * zero IP address with the remote_addr. 502 */ 503 static int iw_cm_map(struct iw_cm_id *cm_id, bool active) 504 { 505 const char *devname = dev_name(&cm_id->device->dev); 506 const char *ifname = cm_id->device->iwcm->ifname; 507 struct iwpm_dev_data pm_reg_msg; 508 struct iwpm_sa_data pm_msg; 509 int status; 510 511 if (strlen(devname) >= sizeof(pm_reg_msg.dev_name) || 512 strlen(ifname) >= sizeof(pm_reg_msg.if_name)) 513 return -EINVAL; 514 515 cm_id->m_local_addr = cm_id->local_addr; 516 cm_id->m_remote_addr = cm_id->remote_addr; 517 518 strncpy(pm_reg_msg.dev_name, devname, sizeof(pm_reg_msg.dev_name)); 519 strncpy(pm_reg_msg.if_name, ifname, sizeof(pm_reg_msg.if_name)); 520 521 if (iwpm_register_pid(&pm_reg_msg, RDMA_NL_IWCM) || 522 !iwpm_valid_pid()) 523 return 0; 524 525 cm_id->mapped = true; 526 pm_msg.loc_addr = cm_id->local_addr; 527 pm_msg.rem_addr = cm_id->remote_addr; 528 if (active) 529 status = iwpm_add_and_query_mapping(&pm_msg, 530 RDMA_NL_IWCM); 531 else 532 status = iwpm_add_mapping(&pm_msg, RDMA_NL_IWCM); 533 534 if (!status) { 535 cm_id->m_local_addr = pm_msg.mapped_loc_addr; 536 if (active) { 537 cm_id->m_remote_addr = pm_msg.mapped_rem_addr; 538 iw_cm_check_wildcard(&pm_msg.mapped_rem_addr, 539 &cm_id->remote_addr, 540 &cm_id->m_remote_addr); 541 } 542 } 543 544 return iwpm_create_mapinfo(&cm_id->local_addr, 545 &cm_id->m_local_addr, 546 RDMA_NL_IWCM); 547 } 548 549 /* 550 * CM_ID <-- LISTEN 551 * 552 * Start listening for connect requests. Generates one CONNECT_REQUEST 553 * event for each inbound connect request. 554 */ 555 int iw_cm_listen(struct iw_cm_id *cm_id, int backlog) 556 { 557 struct iwcm_id_private *cm_id_priv; 558 unsigned long flags; 559 int ret; 560 561 cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); 562 563 if (!backlog) 564 backlog = default_backlog; 565 566 ret = alloc_work_entries(cm_id_priv, backlog); 567 if (ret) 568 return ret; 569 570 spin_lock_irqsave(&cm_id_priv->lock, flags); 571 switch (cm_id_priv->state) { 572 case IW_CM_STATE_IDLE: 573 cm_id_priv->state = IW_CM_STATE_LISTEN; 574 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 575 ret = iw_cm_map(cm_id, false); 576 if (!ret) 577 ret = cm_id->device->iwcm->create_listen(cm_id, backlog); 578 if (ret) 579 cm_id_priv->state = IW_CM_STATE_IDLE; 580 spin_lock_irqsave(&cm_id_priv->lock, flags); 581 break; 582 default: 583 ret = -EINVAL; 584 } 585 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 586 587 return ret; 588 } 589 EXPORT_SYMBOL(iw_cm_listen); 590 591 /* 592 * CM_ID <-- IDLE 593 * 594 * Rejects an inbound connection request. No events are generated. 595 */ 596 int iw_cm_reject(struct iw_cm_id *cm_id, 597 const void *private_data, 598 u8 private_data_len) 599 { 600 struct iwcm_id_private *cm_id_priv; 601 unsigned long flags; 602 int ret; 603 604 cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); 605 set_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); 606 607 spin_lock_irqsave(&cm_id_priv->lock, flags); 608 if (cm_id_priv->state != IW_CM_STATE_CONN_RECV) { 609 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 610 clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); 611 wake_up_all(&cm_id_priv->connect_wait); 612 return -EINVAL; 613 } 614 cm_id_priv->state = IW_CM_STATE_IDLE; 615 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 616 617 ret = cm_id->device->iwcm->reject(cm_id, private_data, 618 private_data_len); 619 620 clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); 621 wake_up_all(&cm_id_priv->connect_wait); 622 623 return ret; 624 } 625 EXPORT_SYMBOL(iw_cm_reject); 626 627 /* 628 * CM_ID <-- ESTABLISHED 629 * 630 * Accepts an inbound connection request and generates an ESTABLISHED 631 * event. Callers of iw_cm_disconnect and iw_destroy_cm_id will block 632 * until the ESTABLISHED event is received from the provider. 633 */ 634 int iw_cm_accept(struct iw_cm_id *cm_id, 635 struct iw_cm_conn_param *iw_param) 636 { 637 struct iwcm_id_private *cm_id_priv; 638 struct ib_qp *qp; 639 unsigned long flags; 640 int ret; 641 642 cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); 643 set_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); 644 645 spin_lock_irqsave(&cm_id_priv->lock, flags); 646 if (cm_id_priv->state != IW_CM_STATE_CONN_RECV) { 647 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 648 clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); 649 wake_up_all(&cm_id_priv->connect_wait); 650 return -EINVAL; 651 } 652 /* Get the ib_qp given the QPN */ 653 qp = cm_id->device->iwcm->get_qp(cm_id->device, iw_param->qpn); 654 if (!qp) { 655 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 656 clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); 657 wake_up_all(&cm_id_priv->connect_wait); 658 return -EINVAL; 659 } 660 cm_id->device->iwcm->add_ref(qp); 661 cm_id_priv->qp = qp; 662 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 663 664 ret = cm_id->device->iwcm->accept(cm_id, iw_param); 665 if (ret) { 666 /* An error on accept precludes provider events */ 667 BUG_ON(cm_id_priv->state != IW_CM_STATE_CONN_RECV); 668 cm_id_priv->state = IW_CM_STATE_IDLE; 669 spin_lock_irqsave(&cm_id_priv->lock, flags); 670 if (cm_id_priv->qp) { 671 cm_id->device->iwcm->rem_ref(qp); 672 cm_id_priv->qp = NULL; 673 } 674 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 675 clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); 676 wake_up_all(&cm_id_priv->connect_wait); 677 } 678 679 return ret; 680 } 681 EXPORT_SYMBOL(iw_cm_accept); 682 683 /* 684 * Active Side: CM_ID <-- CONN_SENT 685 * 686 * If successful, results in the generation of a CONNECT_REPLY 687 * event. iw_cm_disconnect and iw_cm_destroy will block until the 688 * CONNECT_REPLY event is received from the provider. 689 */ 690 int iw_cm_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param) 691 { 692 struct iwcm_id_private *cm_id_priv; 693 int ret; 694 unsigned long flags; 695 struct ib_qp *qp; 696 697 cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); 698 699 ret = alloc_work_entries(cm_id_priv, 4); 700 if (ret) 701 return ret; 702 703 set_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); 704 spin_lock_irqsave(&cm_id_priv->lock, flags); 705 706 if (cm_id_priv->state != IW_CM_STATE_IDLE) { 707 ret = -EINVAL; 708 goto err; 709 } 710 711 /* Get the ib_qp given the QPN */ 712 qp = cm_id->device->iwcm->get_qp(cm_id->device, iw_param->qpn); 713 if (!qp) { 714 ret = -EINVAL; 715 goto err; 716 } 717 cm_id->device->iwcm->add_ref(qp); 718 cm_id_priv->qp = qp; 719 cm_id_priv->state = IW_CM_STATE_CONN_SENT; 720 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 721 722 ret = iw_cm_map(cm_id, true); 723 if (!ret) 724 ret = cm_id->device->iwcm->connect(cm_id, iw_param); 725 if (!ret) 726 return 0; /* success */ 727 728 spin_lock_irqsave(&cm_id_priv->lock, flags); 729 if (cm_id_priv->qp) { 730 cm_id->device->iwcm->rem_ref(qp); 731 cm_id_priv->qp = NULL; 732 } 733 cm_id_priv->state = IW_CM_STATE_IDLE; 734 err: 735 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 736 clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); 737 wake_up_all(&cm_id_priv->connect_wait); 738 return ret; 739 } 740 EXPORT_SYMBOL(iw_cm_connect); 741 742 /* 743 * Passive Side: new CM_ID <-- CONN_RECV 744 * 745 * Handles an inbound connect request. The function creates a new 746 * iw_cm_id to represent the new connection and inherits the client 747 * callback function and other attributes from the listening parent. 748 * 749 * The work item contains a pointer to the listen_cm_id and the event. The 750 * listen_cm_id contains the client cm_handler, context and 751 * device. These are copied when the device is cloned. The event 752 * contains the new four tuple. 753 * 754 * An error on the child should not affect the parent, so this 755 * function does not return a value. 756 */ 757 static void cm_conn_req_handler(struct iwcm_id_private *listen_id_priv, 758 struct iw_cm_event *iw_event) 759 { 760 unsigned long flags; 761 struct iw_cm_id *cm_id; 762 struct iwcm_id_private *cm_id_priv; 763 int ret; 764 765 /* 766 * The provider should never generate a connection request 767 * event with a bad status. 768 */ 769 BUG_ON(iw_event->status); 770 771 cm_id = iw_create_cm_id(listen_id_priv->id.device, 772 listen_id_priv->id.cm_handler, 773 listen_id_priv->id.context); 774 /* If the cm_id could not be created, ignore the request */ 775 if (IS_ERR(cm_id)) 776 goto out; 777 778 cm_id->provider_data = iw_event->provider_data; 779 cm_id->m_local_addr = iw_event->local_addr; 780 cm_id->m_remote_addr = iw_event->remote_addr; 781 cm_id->local_addr = listen_id_priv->id.local_addr; 782 783 ret = iwpm_get_remote_info(&listen_id_priv->id.m_local_addr, 784 &iw_event->remote_addr, 785 &cm_id->remote_addr, 786 RDMA_NL_IWCM); 787 if (ret) { 788 cm_id->remote_addr = iw_event->remote_addr; 789 } else { 790 iw_cm_check_wildcard(&listen_id_priv->id.m_local_addr, 791 &iw_event->local_addr, 792 &cm_id->local_addr); 793 iw_event->local_addr = cm_id->local_addr; 794 iw_event->remote_addr = cm_id->remote_addr; 795 } 796 797 cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); 798 cm_id_priv->state = IW_CM_STATE_CONN_RECV; 799 800 /* 801 * We could be destroying the listening id. If so, ignore this 802 * upcall. 803 */ 804 spin_lock_irqsave(&listen_id_priv->lock, flags); 805 if (listen_id_priv->state != IW_CM_STATE_LISTEN) { 806 spin_unlock_irqrestore(&listen_id_priv->lock, flags); 807 iw_cm_reject(cm_id, NULL, 0); 808 iw_destroy_cm_id(cm_id); 809 goto out; 810 } 811 spin_unlock_irqrestore(&listen_id_priv->lock, flags); 812 813 ret = alloc_work_entries(cm_id_priv, 3); 814 if (ret) { 815 iw_cm_reject(cm_id, NULL, 0); 816 iw_destroy_cm_id(cm_id); 817 goto out; 818 } 819 820 /* Call the client CM handler */ 821 ret = cm_id->cm_handler(cm_id, iw_event); 822 if (ret) { 823 iw_cm_reject(cm_id, NULL, 0); 824 iw_destroy_cm_id(cm_id); 825 } 826 827 out: 828 if (iw_event->private_data_len) 829 kfree(iw_event->private_data); 830 } 831 832 /* 833 * Passive Side: CM_ID <-- ESTABLISHED 834 * 835 * The provider generated an ESTABLISHED event which means that 836 * the MPA negotion has completed successfully and we are now in MPA 837 * FPDU mode. 838 * 839 * This event can only be received in the CONN_RECV state. If the 840 * remote peer closed, the ESTABLISHED event would be received followed 841 * by the CLOSE event. If the app closes, it will block until we wake 842 * it up after processing this event. 843 */ 844 static int cm_conn_est_handler(struct iwcm_id_private *cm_id_priv, 845 struct iw_cm_event *iw_event) 846 { 847 unsigned long flags; 848 int ret; 849 850 spin_lock_irqsave(&cm_id_priv->lock, flags); 851 852 /* 853 * We clear the CONNECT_WAIT bit here to allow the callback 854 * function to call iw_cm_disconnect. Calling iw_destroy_cm_id 855 * from a callback handler is not allowed. 856 */ 857 clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); 858 BUG_ON(cm_id_priv->state != IW_CM_STATE_CONN_RECV); 859 cm_id_priv->state = IW_CM_STATE_ESTABLISHED; 860 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 861 ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, iw_event); 862 wake_up_all(&cm_id_priv->connect_wait); 863 864 return ret; 865 } 866 867 /* 868 * Active Side: CM_ID <-- ESTABLISHED 869 * 870 * The app has called connect and is waiting for the established event to 871 * post it's requests to the server. This event will wake up anyone 872 * blocked in iw_cm_disconnect or iw_destroy_id. 873 */ 874 static int cm_conn_rep_handler(struct iwcm_id_private *cm_id_priv, 875 struct iw_cm_event *iw_event) 876 { 877 unsigned long flags; 878 int ret; 879 880 spin_lock_irqsave(&cm_id_priv->lock, flags); 881 /* 882 * Clear the connect wait bit so a callback function calling 883 * iw_cm_disconnect will not wait and deadlock this thread 884 */ 885 clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); 886 BUG_ON(cm_id_priv->state != IW_CM_STATE_CONN_SENT); 887 if (iw_event->status == 0) { 888 cm_id_priv->id.m_local_addr = iw_event->local_addr; 889 cm_id_priv->id.m_remote_addr = iw_event->remote_addr; 890 iw_event->local_addr = cm_id_priv->id.local_addr; 891 iw_event->remote_addr = cm_id_priv->id.remote_addr; 892 cm_id_priv->state = IW_CM_STATE_ESTABLISHED; 893 } else { 894 /* REJECTED or RESET */ 895 cm_id_priv->id.device->iwcm->rem_ref(cm_id_priv->qp); 896 cm_id_priv->qp = NULL; 897 cm_id_priv->state = IW_CM_STATE_IDLE; 898 } 899 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 900 ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, iw_event); 901 902 if (iw_event->private_data_len) 903 kfree(iw_event->private_data); 904 905 /* Wake up waiters on connect complete */ 906 wake_up_all(&cm_id_priv->connect_wait); 907 908 return ret; 909 } 910 911 /* 912 * CM_ID <-- CLOSING 913 * 914 * If in the ESTABLISHED state, move to CLOSING. 915 */ 916 static void cm_disconnect_handler(struct iwcm_id_private *cm_id_priv, 917 struct iw_cm_event *iw_event) 918 { 919 unsigned long flags; 920 921 spin_lock_irqsave(&cm_id_priv->lock, flags); 922 if (cm_id_priv->state == IW_CM_STATE_ESTABLISHED) 923 cm_id_priv->state = IW_CM_STATE_CLOSING; 924 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 925 } 926 927 /* 928 * CM_ID <-- IDLE 929 * 930 * If in the ESTBLISHED or CLOSING states, the QP will have have been 931 * moved by the provider to the ERR state. Disassociate the CM_ID from 932 * the QP, move to IDLE, and remove the 'connected' reference. 933 * 934 * If in some other state, the cm_id was destroyed asynchronously. 935 * This is the last reference that will result in waking up 936 * the app thread blocked in iw_destroy_cm_id. 937 */ 938 static int cm_close_handler(struct iwcm_id_private *cm_id_priv, 939 struct iw_cm_event *iw_event) 940 { 941 unsigned long flags; 942 int ret = 0; 943 spin_lock_irqsave(&cm_id_priv->lock, flags); 944 945 if (cm_id_priv->qp) { 946 cm_id_priv->id.device->iwcm->rem_ref(cm_id_priv->qp); 947 cm_id_priv->qp = NULL; 948 } 949 switch (cm_id_priv->state) { 950 case IW_CM_STATE_ESTABLISHED: 951 case IW_CM_STATE_CLOSING: 952 cm_id_priv->state = IW_CM_STATE_IDLE; 953 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 954 ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, iw_event); 955 spin_lock_irqsave(&cm_id_priv->lock, flags); 956 break; 957 case IW_CM_STATE_DESTROYING: 958 break; 959 default: 960 BUG(); 961 } 962 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 963 964 return ret; 965 } 966 967 static int process_event(struct iwcm_id_private *cm_id_priv, 968 struct iw_cm_event *iw_event) 969 { 970 int ret = 0; 971 972 switch (iw_event->event) { 973 case IW_CM_EVENT_CONNECT_REQUEST: 974 cm_conn_req_handler(cm_id_priv, iw_event); 975 break; 976 case IW_CM_EVENT_CONNECT_REPLY: 977 ret = cm_conn_rep_handler(cm_id_priv, iw_event); 978 break; 979 case IW_CM_EVENT_ESTABLISHED: 980 ret = cm_conn_est_handler(cm_id_priv, iw_event); 981 break; 982 case IW_CM_EVENT_DISCONNECT: 983 cm_disconnect_handler(cm_id_priv, iw_event); 984 break; 985 case IW_CM_EVENT_CLOSE: 986 ret = cm_close_handler(cm_id_priv, iw_event); 987 break; 988 default: 989 BUG(); 990 } 991 992 return ret; 993 } 994 995 /* 996 * Process events on the work_list for the cm_id. If the callback 997 * function requests that the cm_id be deleted, a flag is set in the 998 * cm_id flags to indicate that when the last reference is 999 * removed, the cm_id is to be destroyed. This is necessary to 1000 * distinguish between an object that will be destroyed by the app 1001 * thread asleep on the destroy_comp list vs. an object destroyed 1002 * here synchronously when the last reference is removed. 1003 */ 1004 static void cm_work_handler(struct work_struct *_work) 1005 { 1006 struct iwcm_work *work = container_of(_work, struct iwcm_work, work); 1007 struct iw_cm_event levent; 1008 struct iwcm_id_private *cm_id_priv = work->cm_id; 1009 unsigned long flags; 1010 int empty; 1011 int ret = 0; 1012 1013 spin_lock_irqsave(&cm_id_priv->lock, flags); 1014 empty = list_empty(&cm_id_priv->work_list); 1015 while (!empty) { 1016 work = list_entry(cm_id_priv->work_list.next, 1017 struct iwcm_work, list); 1018 list_del_init(&work->list); 1019 empty = list_empty(&cm_id_priv->work_list); 1020 levent = work->event; 1021 put_work(work); 1022 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 1023 1024 if (!test_bit(IWCM_F_DROP_EVENTS, &cm_id_priv->flags)) { 1025 ret = process_event(cm_id_priv, &levent); 1026 if (ret) 1027 destroy_cm_id(&cm_id_priv->id); 1028 } else 1029 pr_debug("dropping event %d\n", levent.event); 1030 if (iwcm_deref_id(cm_id_priv)) 1031 return; 1032 if (empty) 1033 return; 1034 spin_lock_irqsave(&cm_id_priv->lock, flags); 1035 } 1036 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 1037 } 1038 1039 /* 1040 * This function is called on interrupt context. Schedule events on 1041 * the iwcm_wq thread to allow callback functions to downcall into 1042 * the CM and/or block. Events are queued to a per-CM_ID 1043 * work_list. If this is the first event on the work_list, the work 1044 * element is also queued on the iwcm_wq thread. 1045 * 1046 * Each event holds a reference on the cm_id. Until the last posted 1047 * event has been delivered and processed, the cm_id cannot be 1048 * deleted. 1049 * 1050 * Returns: 1051 * 0 - the event was handled. 1052 * -ENOMEM - the event was not handled due to lack of resources. 1053 */ 1054 static int cm_event_handler(struct iw_cm_id *cm_id, 1055 struct iw_cm_event *iw_event) 1056 { 1057 struct iwcm_work *work; 1058 struct iwcm_id_private *cm_id_priv; 1059 unsigned long flags; 1060 int ret = 0; 1061 1062 cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); 1063 1064 spin_lock_irqsave(&cm_id_priv->lock, flags); 1065 work = get_work(cm_id_priv); 1066 if (!work) { 1067 ret = -ENOMEM; 1068 goto out; 1069 } 1070 1071 INIT_WORK(&work->work, cm_work_handler); 1072 work->cm_id = cm_id_priv; 1073 work->event = *iw_event; 1074 1075 if ((work->event.event == IW_CM_EVENT_CONNECT_REQUEST || 1076 work->event.event == IW_CM_EVENT_CONNECT_REPLY) && 1077 work->event.private_data_len) { 1078 ret = copy_private_data(&work->event); 1079 if (ret) { 1080 put_work(work); 1081 goto out; 1082 } 1083 } 1084 1085 atomic_inc(&cm_id_priv->refcount); 1086 if (list_empty(&cm_id_priv->work_list)) { 1087 list_add_tail(&work->list, &cm_id_priv->work_list); 1088 queue_work(iwcm_wq, &work->work); 1089 } else 1090 list_add_tail(&work->list, &cm_id_priv->work_list); 1091 out: 1092 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 1093 return ret; 1094 } 1095 1096 static int iwcm_init_qp_init_attr(struct iwcm_id_private *cm_id_priv, 1097 struct ib_qp_attr *qp_attr, 1098 int *qp_attr_mask) 1099 { 1100 unsigned long flags; 1101 int ret; 1102 1103 spin_lock_irqsave(&cm_id_priv->lock, flags); 1104 switch (cm_id_priv->state) { 1105 case IW_CM_STATE_IDLE: 1106 case IW_CM_STATE_CONN_SENT: 1107 case IW_CM_STATE_CONN_RECV: 1108 case IW_CM_STATE_ESTABLISHED: 1109 *qp_attr_mask = IB_QP_STATE | IB_QP_ACCESS_FLAGS; 1110 qp_attr->qp_access_flags = IB_ACCESS_REMOTE_WRITE| 1111 IB_ACCESS_REMOTE_READ; 1112 ret = 0; 1113 break; 1114 default: 1115 ret = -EINVAL; 1116 break; 1117 } 1118 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 1119 return ret; 1120 } 1121 1122 static int iwcm_init_qp_rts_attr(struct iwcm_id_private *cm_id_priv, 1123 struct ib_qp_attr *qp_attr, 1124 int *qp_attr_mask) 1125 { 1126 unsigned long flags; 1127 int ret; 1128 1129 spin_lock_irqsave(&cm_id_priv->lock, flags); 1130 switch (cm_id_priv->state) { 1131 case IW_CM_STATE_IDLE: 1132 case IW_CM_STATE_CONN_SENT: 1133 case IW_CM_STATE_CONN_RECV: 1134 case IW_CM_STATE_ESTABLISHED: 1135 *qp_attr_mask = 0; 1136 ret = 0; 1137 break; 1138 default: 1139 ret = -EINVAL; 1140 break; 1141 } 1142 spin_unlock_irqrestore(&cm_id_priv->lock, flags); 1143 return ret; 1144 } 1145 1146 int iw_cm_init_qp_attr(struct iw_cm_id *cm_id, 1147 struct ib_qp_attr *qp_attr, 1148 int *qp_attr_mask) 1149 { 1150 struct iwcm_id_private *cm_id_priv; 1151 int ret; 1152 1153 cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); 1154 switch (qp_attr->qp_state) { 1155 case IB_QPS_INIT: 1156 case IB_QPS_RTR: 1157 ret = iwcm_init_qp_init_attr(cm_id_priv, 1158 qp_attr, qp_attr_mask); 1159 break; 1160 case IB_QPS_RTS: 1161 ret = iwcm_init_qp_rts_attr(cm_id_priv, 1162 qp_attr, qp_attr_mask); 1163 break; 1164 default: 1165 ret = -EINVAL; 1166 break; 1167 } 1168 return ret; 1169 } 1170 EXPORT_SYMBOL(iw_cm_init_qp_attr); 1171 1172 static int __init iw_cm_init(void) 1173 { 1174 int ret; 1175 1176 ret = iwpm_init(RDMA_NL_IWCM); 1177 if (ret) 1178 pr_err("iw_cm: couldn't init iwpm\n"); 1179 else 1180 rdma_nl_register(RDMA_NL_IWCM, iwcm_nl_cb_table); 1181 iwcm_wq = alloc_ordered_workqueue("iw_cm_wq", 0); 1182 if (!iwcm_wq) 1183 return -ENOMEM; 1184 1185 iwcm_ctl_table_hdr = register_net_sysctl(&init_net, "net/iw_cm", 1186 iwcm_ctl_table); 1187 if (!iwcm_ctl_table_hdr) { 1188 pr_err("iw_cm: couldn't register sysctl paths\n"); 1189 destroy_workqueue(iwcm_wq); 1190 return -ENOMEM; 1191 } 1192 1193 return 0; 1194 } 1195 1196 static void __exit iw_cm_cleanup(void) 1197 { 1198 unregister_net_sysctl_table(iwcm_ctl_table_hdr); 1199 destroy_workqueue(iwcm_wq); 1200 rdma_nl_unregister(RDMA_NL_IWCM); 1201 iwpm_exit(RDMA_NL_IWCM); 1202 } 1203 1204 MODULE_ALIAS_RDMA_NETLINK(RDMA_NL_IWCM, 2); 1205 1206 module_init(iw_cm_init); 1207 module_exit(iw_cm_cleanup); 1208