1 /* 2 * Copyright (c) 2004, 2005, 2006 Voltaire, Inc. All rights reserved. 3 * Copyright (c) 2005, 2006 Cisco Systems. All rights reserved. 4 * Copyright (c) 2013 Mellanox Technologies. All rights reserved. 5 * 6 * This software is available to you under a choice of one of two 7 * licenses. You may choose to be licensed under the terms of the GNU 8 * General Public License (GPL) Version 2, available from the file 9 * COPYING in the main directory of this source tree, or the 10 * OpenIB.org BSD license below: 11 * 12 * Redistribution and use in source and binary forms, with or 13 * without modification, are permitted provided that the following 14 * conditions are met: 15 * 16 * - Redistributions of source code must retain the above 17 * copyright notice, this list of conditions and the following 18 * disclaimer. 19 * 20 * - Redistributions in binary form must reproduce the above 21 * copyright notice, this list of conditions and the following 22 * disclaimer in the documentation and/or other materials 23 * provided with the distribution. 24 * 25 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 26 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 27 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 28 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 29 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 30 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 32 * SOFTWARE. 33 */ 34 #include <linux/kernel.h> 35 #include <linux/module.h> 36 #include <linux/slab.h> 37 #include <linux/delay.h> 38 39 #include "iscsi_iser.h" 40 41 #define ISCSI_ISER_MAX_CONN 8 42 #define ISER_MAX_RX_CQ_LEN (ISER_QP_MAX_RECV_DTOS * ISCSI_ISER_MAX_CONN) 43 #define ISER_MAX_TX_CQ_LEN (ISER_QP_MAX_REQ_DTOS * ISCSI_ISER_MAX_CONN) 44 45 static void iser_cq_tasklet_fn(unsigned long data); 46 static void iser_cq_callback(struct ib_cq *cq, void *cq_context); 47 48 static void iser_cq_event_callback(struct ib_event *cause, void *context) 49 { 50 iser_err("got cq event %d \n", cause->event); 51 } 52 53 static void iser_qp_event_callback(struct ib_event *cause, void *context) 54 { 55 iser_err("got qp event %d\n",cause->event); 56 } 57 58 static void iser_event_handler(struct ib_event_handler *handler, 59 struct ib_event *event) 60 { 61 iser_err("async event %d on device %s port %d\n", event->event, 62 event->device->name, event->element.port_num); 63 } 64 65 /** 66 * iser_create_device_ib_res - creates Protection Domain (PD), Completion 67 * Queue (CQ), DMA Memory Region (DMA MR) with the device associated with 68 * the adapator. 69 * 70 * returns 0 on success, -1 on failure 71 */ 72 static int iser_create_device_ib_res(struct iser_device *device) 73 { 74 int i, j; 75 struct iser_cq_desc *cq_desc; 76 struct ib_device_attr *dev_attr; 77 78 dev_attr = kmalloc(sizeof(*dev_attr), GFP_KERNEL); 79 if (!dev_attr) 80 return -ENOMEM; 81 82 if (ib_query_device(device->ib_device, dev_attr)) { 83 pr_warn("Query device failed for %s\n", device->ib_device->name); 84 goto dev_attr_err; 85 } 86 87 /* Assign function handles - based on FMR support */ 88 if (device->ib_device->alloc_fmr && device->ib_device->dealloc_fmr && 89 device->ib_device->map_phys_fmr && device->ib_device->unmap_fmr) { 90 iser_info("FMR supported, using FMR for registration\n"); 91 device->iser_alloc_rdma_reg_res = iser_create_fmr_pool; 92 device->iser_free_rdma_reg_res = iser_free_fmr_pool; 93 device->iser_reg_rdma_mem = iser_reg_rdma_mem_fmr; 94 device->iser_unreg_rdma_mem = iser_unreg_mem_fmr; 95 } else 96 if (dev_attr->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS) { 97 iser_info("FRWR supported, using FRWR for registration\n"); 98 device->iser_alloc_rdma_reg_res = iser_create_frwr_pool; 99 device->iser_free_rdma_reg_res = iser_free_frwr_pool; 100 device->iser_reg_rdma_mem = iser_reg_rdma_mem_frwr; 101 device->iser_unreg_rdma_mem = iser_unreg_mem_frwr; 102 } else { 103 iser_err("IB device does not support FMRs nor FRWRs, can't register memory\n"); 104 goto dev_attr_err; 105 } 106 107 device->cqs_used = min(ISER_MAX_CQ, device->ib_device->num_comp_vectors); 108 iser_info("using %d CQs, device %s supports %d vectors\n", 109 device->cqs_used, device->ib_device->name, 110 device->ib_device->num_comp_vectors); 111 112 device->cq_desc = kmalloc(sizeof(struct iser_cq_desc) * device->cqs_used, 113 GFP_KERNEL); 114 if (device->cq_desc == NULL) 115 goto cq_desc_err; 116 cq_desc = device->cq_desc; 117 118 device->pd = ib_alloc_pd(device->ib_device); 119 if (IS_ERR(device->pd)) 120 goto pd_err; 121 122 for (i = 0; i < device->cqs_used; i++) { 123 cq_desc[i].device = device; 124 cq_desc[i].cq_index = i; 125 126 device->rx_cq[i] = ib_create_cq(device->ib_device, 127 iser_cq_callback, 128 iser_cq_event_callback, 129 (void *)&cq_desc[i], 130 ISER_MAX_RX_CQ_LEN, i); 131 if (IS_ERR(device->rx_cq[i])) 132 goto cq_err; 133 134 device->tx_cq[i] = ib_create_cq(device->ib_device, 135 NULL, iser_cq_event_callback, 136 (void *)&cq_desc[i], 137 ISER_MAX_TX_CQ_LEN, i); 138 139 if (IS_ERR(device->tx_cq[i])) 140 goto cq_err; 141 142 if (ib_req_notify_cq(device->rx_cq[i], IB_CQ_NEXT_COMP)) 143 goto cq_err; 144 145 tasklet_init(&device->cq_tasklet[i], 146 iser_cq_tasklet_fn, 147 (unsigned long)&cq_desc[i]); 148 } 149 150 device->mr = ib_get_dma_mr(device->pd, IB_ACCESS_LOCAL_WRITE | 151 IB_ACCESS_REMOTE_WRITE | 152 IB_ACCESS_REMOTE_READ); 153 if (IS_ERR(device->mr)) 154 goto dma_mr_err; 155 156 INIT_IB_EVENT_HANDLER(&device->event_handler, device->ib_device, 157 iser_event_handler); 158 if (ib_register_event_handler(&device->event_handler)) 159 goto handler_err; 160 161 kfree(dev_attr); 162 return 0; 163 164 handler_err: 165 ib_dereg_mr(device->mr); 166 dma_mr_err: 167 for (j = 0; j < device->cqs_used; j++) 168 tasklet_kill(&device->cq_tasklet[j]); 169 cq_err: 170 for (j = 0; j < i; j++) { 171 if (device->tx_cq[j]) 172 ib_destroy_cq(device->tx_cq[j]); 173 if (device->rx_cq[j]) 174 ib_destroy_cq(device->rx_cq[j]); 175 } 176 ib_dealloc_pd(device->pd); 177 pd_err: 178 kfree(device->cq_desc); 179 cq_desc_err: 180 iser_err("failed to allocate an IB resource\n"); 181 dev_attr_err: 182 kfree(dev_attr); 183 return -1; 184 } 185 186 /** 187 * iser_free_device_ib_res - destroy/dealloc/dereg the DMA MR, 188 * CQ and PD created with the device associated with the adapator. 189 */ 190 static void iser_free_device_ib_res(struct iser_device *device) 191 { 192 int i; 193 BUG_ON(device->mr == NULL); 194 195 for (i = 0; i < device->cqs_used; i++) { 196 tasklet_kill(&device->cq_tasklet[i]); 197 (void)ib_destroy_cq(device->tx_cq[i]); 198 (void)ib_destroy_cq(device->rx_cq[i]); 199 device->tx_cq[i] = NULL; 200 device->rx_cq[i] = NULL; 201 } 202 203 (void)ib_unregister_event_handler(&device->event_handler); 204 (void)ib_dereg_mr(device->mr); 205 (void)ib_dealloc_pd(device->pd); 206 207 kfree(device->cq_desc); 208 209 device->mr = NULL; 210 device->pd = NULL; 211 } 212 213 /** 214 * iser_create_fmr_pool - Creates FMR pool and page_vector 215 * 216 * returns 0 on success, or errno code on failure 217 */ 218 int iser_create_fmr_pool(struct iser_conn *ib_conn, unsigned cmds_max) 219 { 220 struct iser_device *device = ib_conn->device; 221 struct ib_fmr_pool_param params; 222 int ret = -ENOMEM; 223 224 ib_conn->fastreg.fmr.page_vec = kmalloc(sizeof(struct iser_page_vec) + 225 (sizeof(u64)*(ISCSI_ISER_SG_TABLESIZE + 1)), 226 GFP_KERNEL); 227 if (!ib_conn->fastreg.fmr.page_vec) 228 return ret; 229 230 ib_conn->fastreg.fmr.page_vec->pages = (u64 *)(ib_conn->fastreg.fmr.page_vec + 1); 231 232 params.page_shift = SHIFT_4K; 233 /* when the first/last SG element are not start/end * 234 * page aligned, the map whould be of N+1 pages */ 235 params.max_pages_per_fmr = ISCSI_ISER_SG_TABLESIZE + 1; 236 /* make the pool size twice the max number of SCSI commands * 237 * the ML is expected to queue, watermark for unmap at 50% */ 238 params.pool_size = cmds_max * 2; 239 params.dirty_watermark = cmds_max; 240 params.cache = 0; 241 params.flush_function = NULL; 242 params.access = (IB_ACCESS_LOCAL_WRITE | 243 IB_ACCESS_REMOTE_WRITE | 244 IB_ACCESS_REMOTE_READ); 245 246 ib_conn->fastreg.fmr.pool = ib_create_fmr_pool(device->pd, ¶ms); 247 if (!IS_ERR(ib_conn->fastreg.fmr.pool)) 248 return 0; 249 250 /* no FMR => no need for page_vec */ 251 kfree(ib_conn->fastreg.fmr.page_vec); 252 ib_conn->fastreg.fmr.page_vec = NULL; 253 254 ret = PTR_ERR(ib_conn->fastreg.fmr.pool); 255 ib_conn->fastreg.fmr.pool = NULL; 256 if (ret != -ENOSYS) { 257 iser_err("FMR allocation failed, err %d\n", ret); 258 return ret; 259 } else { 260 iser_warn("FMRs are not supported, using unaligned mode\n"); 261 return 0; 262 } 263 } 264 265 /** 266 * iser_free_fmr_pool - releases the FMR pool and page vec 267 */ 268 void iser_free_fmr_pool(struct iser_conn *ib_conn) 269 { 270 iser_info("freeing conn %p fmr pool %p\n", 271 ib_conn, ib_conn->fastreg.fmr.pool); 272 273 if (ib_conn->fastreg.fmr.pool != NULL) 274 ib_destroy_fmr_pool(ib_conn->fastreg.fmr.pool); 275 276 ib_conn->fastreg.fmr.pool = NULL; 277 278 kfree(ib_conn->fastreg.fmr.page_vec); 279 ib_conn->fastreg.fmr.page_vec = NULL; 280 } 281 282 /** 283 * iser_create_frwr_pool - Creates pool of fast_reg descriptors 284 * for fast registration work requests. 285 * returns 0 on success, or errno code on failure 286 */ 287 int iser_create_frwr_pool(struct iser_conn *ib_conn, unsigned cmds_max) 288 { 289 struct iser_device *device = ib_conn->device; 290 struct fast_reg_descriptor *desc; 291 int i, ret; 292 293 INIT_LIST_HEAD(&ib_conn->fastreg.frwr.pool); 294 ib_conn->fastreg.frwr.pool_size = 0; 295 for (i = 0; i < cmds_max; i++) { 296 desc = kmalloc(sizeof(*desc), GFP_KERNEL); 297 if (!desc) { 298 iser_err("Failed to allocate a new fast_reg descriptor\n"); 299 ret = -ENOMEM; 300 goto err; 301 } 302 303 desc->data_frpl = ib_alloc_fast_reg_page_list(device->ib_device, 304 ISCSI_ISER_SG_TABLESIZE + 1); 305 if (IS_ERR(desc->data_frpl)) { 306 ret = PTR_ERR(desc->data_frpl); 307 iser_err("Failed to allocate ib_fast_reg_page_list err=%d\n", ret); 308 goto fast_reg_page_failure; 309 } 310 311 desc->data_mr = ib_alloc_fast_reg_mr(device->pd, 312 ISCSI_ISER_SG_TABLESIZE + 1); 313 if (IS_ERR(desc->data_mr)) { 314 ret = PTR_ERR(desc->data_mr); 315 iser_err("Failed to allocate ib_fast_reg_mr err=%d\n", ret); 316 goto fast_reg_mr_failure; 317 } 318 desc->valid = true; 319 list_add_tail(&desc->list, &ib_conn->fastreg.frwr.pool); 320 ib_conn->fastreg.frwr.pool_size++; 321 } 322 323 return 0; 324 325 fast_reg_mr_failure: 326 ib_free_fast_reg_page_list(desc->data_frpl); 327 fast_reg_page_failure: 328 kfree(desc); 329 err: 330 iser_free_frwr_pool(ib_conn); 331 return ret; 332 } 333 334 /** 335 * iser_free_frwr_pool - releases the pool of fast_reg descriptors 336 */ 337 void iser_free_frwr_pool(struct iser_conn *ib_conn) 338 { 339 struct fast_reg_descriptor *desc, *tmp; 340 int i = 0; 341 342 if (list_empty(&ib_conn->fastreg.frwr.pool)) 343 return; 344 345 iser_info("freeing conn %p frwr pool\n", ib_conn); 346 347 list_for_each_entry_safe(desc, tmp, &ib_conn->fastreg.frwr.pool, list) { 348 list_del(&desc->list); 349 ib_free_fast_reg_page_list(desc->data_frpl); 350 ib_dereg_mr(desc->data_mr); 351 kfree(desc); 352 ++i; 353 } 354 355 if (i < ib_conn->fastreg.frwr.pool_size) 356 iser_warn("pool still has %d regions registered\n", 357 ib_conn->fastreg.frwr.pool_size - i); 358 } 359 360 /** 361 * iser_create_ib_conn_res - Queue-Pair (QP) 362 * 363 * returns 0 on success, -1 on failure 364 */ 365 static int iser_create_ib_conn_res(struct iser_conn *ib_conn) 366 { 367 struct iser_device *device; 368 struct ib_qp_init_attr init_attr; 369 int ret = -ENOMEM; 370 int index, min_index = 0; 371 372 BUG_ON(ib_conn->device == NULL); 373 374 device = ib_conn->device; 375 376 memset(&init_attr, 0, sizeof init_attr); 377 378 mutex_lock(&ig.connlist_mutex); 379 /* select the CQ with the minimal number of usages */ 380 for (index = 0; index < device->cqs_used; index++) 381 if (device->cq_active_qps[index] < 382 device->cq_active_qps[min_index]) 383 min_index = index; 384 device->cq_active_qps[min_index]++; 385 mutex_unlock(&ig.connlist_mutex); 386 iser_info("cq index %d used for ib_conn %p\n", min_index, ib_conn); 387 388 init_attr.event_handler = iser_qp_event_callback; 389 init_attr.qp_context = (void *)ib_conn; 390 init_attr.send_cq = device->tx_cq[min_index]; 391 init_attr.recv_cq = device->rx_cq[min_index]; 392 init_attr.cap.max_send_wr = ISER_QP_MAX_REQ_DTOS; 393 init_attr.cap.max_recv_wr = ISER_QP_MAX_RECV_DTOS; 394 init_attr.cap.max_send_sge = 2; 395 init_attr.cap.max_recv_sge = 1; 396 init_attr.sq_sig_type = IB_SIGNAL_REQ_WR; 397 init_attr.qp_type = IB_QPT_RC; 398 399 ret = rdma_create_qp(ib_conn->cma_id, device->pd, &init_attr); 400 if (ret) 401 goto out_err; 402 403 ib_conn->qp = ib_conn->cma_id->qp; 404 iser_info("setting conn %p cma_id %p qp %p\n", 405 ib_conn, ib_conn->cma_id, 406 ib_conn->cma_id->qp); 407 return ret; 408 409 out_err: 410 iser_err("unable to alloc mem or create resource, err %d\n", ret); 411 return ret; 412 } 413 414 /** 415 * releases the QP objects, returns 0 on success, 416 * -1 on failure 417 */ 418 static int iser_free_ib_conn_res(struct iser_conn *ib_conn) 419 { 420 int cq_index; 421 BUG_ON(ib_conn == NULL); 422 423 iser_info("freeing conn %p cma_id %p qp %p\n", 424 ib_conn, ib_conn->cma_id, 425 ib_conn->qp); 426 427 /* qp is created only once both addr & route are resolved */ 428 429 if (ib_conn->qp != NULL) { 430 cq_index = ((struct iser_cq_desc *)ib_conn->qp->recv_cq->cq_context)->cq_index; 431 ib_conn->device->cq_active_qps[cq_index]--; 432 433 rdma_destroy_qp(ib_conn->cma_id); 434 } 435 436 ib_conn->qp = NULL; 437 438 return 0; 439 } 440 441 /** 442 * based on the resolved device node GUID see if there already allocated 443 * device for this device. If there's no such, create one. 444 */ 445 static 446 struct iser_device *iser_device_find_by_ib_device(struct rdma_cm_id *cma_id) 447 { 448 struct iser_device *device; 449 450 mutex_lock(&ig.device_list_mutex); 451 452 list_for_each_entry(device, &ig.device_list, ig_list) 453 /* find if there's a match using the node GUID */ 454 if (device->ib_device->node_guid == cma_id->device->node_guid) 455 goto inc_refcnt; 456 457 device = kzalloc(sizeof *device, GFP_KERNEL); 458 if (device == NULL) 459 goto out; 460 461 /* assign this device to the device */ 462 device->ib_device = cma_id->device; 463 /* init the device and link it into ig device list */ 464 if (iser_create_device_ib_res(device)) { 465 kfree(device); 466 device = NULL; 467 goto out; 468 } 469 list_add(&device->ig_list, &ig.device_list); 470 471 inc_refcnt: 472 device->refcount++; 473 out: 474 mutex_unlock(&ig.device_list_mutex); 475 return device; 476 } 477 478 /* if there's no demand for this device, release it */ 479 static void iser_device_try_release(struct iser_device *device) 480 { 481 mutex_lock(&ig.device_list_mutex); 482 device->refcount--; 483 iser_info("device %p refcount %d\n", device, device->refcount); 484 if (!device->refcount) { 485 iser_free_device_ib_res(device); 486 list_del(&device->ig_list); 487 kfree(device); 488 } 489 mutex_unlock(&ig.device_list_mutex); 490 } 491 492 static int iser_conn_state_comp_exch(struct iser_conn *ib_conn, 493 enum iser_ib_conn_state comp, 494 enum iser_ib_conn_state exch) 495 { 496 int ret; 497 498 spin_lock_bh(&ib_conn->lock); 499 if ((ret = (ib_conn->state == comp))) 500 ib_conn->state = exch; 501 spin_unlock_bh(&ib_conn->lock); 502 return ret; 503 } 504 505 /** 506 * Frees all conn objects and deallocs conn descriptor 507 */ 508 static void iser_conn_release(struct iser_conn *ib_conn, int can_destroy_id) 509 { 510 struct iser_device *device = ib_conn->device; 511 512 BUG_ON(ib_conn->state != ISER_CONN_DOWN); 513 514 mutex_lock(&ig.connlist_mutex); 515 list_del(&ib_conn->conn_list); 516 mutex_unlock(&ig.connlist_mutex); 517 iser_free_rx_descriptors(ib_conn); 518 iser_free_ib_conn_res(ib_conn); 519 ib_conn->device = NULL; 520 /* on EVENT_ADDR_ERROR there's no device yet for this conn */ 521 if (device != NULL) 522 iser_device_try_release(device); 523 /* if cma handler context, the caller actually destroy the id */ 524 if (ib_conn->cma_id != NULL && can_destroy_id) { 525 rdma_destroy_id(ib_conn->cma_id); 526 ib_conn->cma_id = NULL; 527 } 528 iscsi_destroy_endpoint(ib_conn->ep); 529 } 530 531 void iser_conn_get(struct iser_conn *ib_conn) 532 { 533 atomic_inc(&ib_conn->refcount); 534 } 535 536 int iser_conn_put(struct iser_conn *ib_conn, int can_destroy_id) 537 { 538 if (atomic_dec_and_test(&ib_conn->refcount)) { 539 iser_conn_release(ib_conn, can_destroy_id); 540 return 1; 541 } 542 return 0; 543 } 544 545 /** 546 * triggers start of the disconnect procedures and wait for them to be done 547 */ 548 void iser_conn_terminate(struct iser_conn *ib_conn) 549 { 550 int err = 0; 551 552 /* change the ib conn state only if the conn is UP, however always call 553 * rdma_disconnect since this is the only way to cause the CMA to change 554 * the QP state to ERROR 555 */ 556 557 iser_conn_state_comp_exch(ib_conn, ISER_CONN_UP, ISER_CONN_TERMINATING); 558 err = rdma_disconnect(ib_conn->cma_id); 559 if (err) 560 iser_err("Failed to disconnect, conn: 0x%p err %d\n", 561 ib_conn,err); 562 563 wait_event_interruptible(ib_conn->wait, 564 ib_conn->state == ISER_CONN_DOWN); 565 566 iser_conn_put(ib_conn, 1); /* deref ib conn deallocate */ 567 } 568 569 static int iser_connect_error(struct rdma_cm_id *cma_id) 570 { 571 struct iser_conn *ib_conn; 572 ib_conn = (struct iser_conn *)cma_id->context; 573 574 ib_conn->state = ISER_CONN_DOWN; 575 wake_up_interruptible(&ib_conn->wait); 576 return iser_conn_put(ib_conn, 0); /* deref ib conn's cma id */ 577 } 578 579 static int iser_addr_handler(struct rdma_cm_id *cma_id) 580 { 581 struct iser_device *device; 582 struct iser_conn *ib_conn; 583 int ret; 584 585 device = iser_device_find_by_ib_device(cma_id); 586 if (!device) { 587 iser_err("device lookup/creation failed\n"); 588 return iser_connect_error(cma_id); 589 } 590 591 ib_conn = (struct iser_conn *)cma_id->context; 592 ib_conn->device = device; 593 594 ret = rdma_resolve_route(cma_id, 1000); 595 if (ret) { 596 iser_err("resolve route failed: %d\n", ret); 597 return iser_connect_error(cma_id); 598 } 599 600 return 0; 601 } 602 603 static int iser_route_handler(struct rdma_cm_id *cma_id) 604 { 605 struct rdma_conn_param conn_param; 606 int ret; 607 struct iser_cm_hdr req_hdr; 608 609 ret = iser_create_ib_conn_res((struct iser_conn *)cma_id->context); 610 if (ret) 611 goto failure; 612 613 memset(&conn_param, 0, sizeof conn_param); 614 conn_param.responder_resources = 4; 615 conn_param.initiator_depth = 1; 616 conn_param.retry_count = 7; 617 conn_param.rnr_retry_count = 6; 618 619 memset(&req_hdr, 0, sizeof(req_hdr)); 620 req_hdr.flags = (ISER_ZBVA_NOT_SUPPORTED | 621 ISER_SEND_W_INV_NOT_SUPPORTED); 622 conn_param.private_data = (void *)&req_hdr; 623 conn_param.private_data_len = sizeof(struct iser_cm_hdr); 624 625 ret = rdma_connect(cma_id, &conn_param); 626 if (ret) { 627 iser_err("failure connecting: %d\n", ret); 628 goto failure; 629 } 630 631 return 0; 632 failure: 633 return iser_connect_error(cma_id); 634 } 635 636 static void iser_connected_handler(struct rdma_cm_id *cma_id) 637 { 638 struct iser_conn *ib_conn; 639 640 ib_conn = (struct iser_conn *)cma_id->context; 641 ib_conn->state = ISER_CONN_UP; 642 wake_up_interruptible(&ib_conn->wait); 643 } 644 645 static int iser_disconnected_handler(struct rdma_cm_id *cma_id) 646 { 647 struct iser_conn *ib_conn; 648 int ret; 649 650 ib_conn = (struct iser_conn *)cma_id->context; 651 652 /* getting here when the state is UP means that the conn is being * 653 * terminated asynchronously from the iSCSI layer's perspective. */ 654 if (iser_conn_state_comp_exch(ib_conn, ISER_CONN_UP, 655 ISER_CONN_TERMINATING)) 656 iscsi_conn_failure(ib_conn->iser_conn->iscsi_conn, 657 ISCSI_ERR_CONN_FAILED); 658 659 /* Complete the termination process if no posts are pending */ 660 if (ib_conn->post_recv_buf_count == 0 && 661 (atomic_read(&ib_conn->post_send_buf_count) == 0)) { 662 ib_conn->state = ISER_CONN_DOWN; 663 wake_up_interruptible(&ib_conn->wait); 664 } 665 666 ret = iser_conn_put(ib_conn, 0); /* deref ib conn's cma id */ 667 return ret; 668 } 669 670 static int iser_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *event) 671 { 672 int ret = 0; 673 674 iser_info("event %d status %d conn %p id %p\n", 675 event->event, event->status, cma_id->context, cma_id); 676 677 switch (event->event) { 678 case RDMA_CM_EVENT_ADDR_RESOLVED: 679 ret = iser_addr_handler(cma_id); 680 break; 681 case RDMA_CM_EVENT_ROUTE_RESOLVED: 682 ret = iser_route_handler(cma_id); 683 break; 684 case RDMA_CM_EVENT_ESTABLISHED: 685 iser_connected_handler(cma_id); 686 break; 687 case RDMA_CM_EVENT_ADDR_ERROR: 688 case RDMA_CM_EVENT_ROUTE_ERROR: 689 case RDMA_CM_EVENT_CONNECT_ERROR: 690 case RDMA_CM_EVENT_UNREACHABLE: 691 case RDMA_CM_EVENT_REJECTED: 692 ret = iser_connect_error(cma_id); 693 break; 694 case RDMA_CM_EVENT_DISCONNECTED: 695 case RDMA_CM_EVENT_DEVICE_REMOVAL: 696 case RDMA_CM_EVENT_ADDR_CHANGE: 697 ret = iser_disconnected_handler(cma_id); 698 break; 699 default: 700 iser_err("Unexpected RDMA CM event (%d)\n", event->event); 701 break; 702 } 703 return ret; 704 } 705 706 void iser_conn_init(struct iser_conn *ib_conn) 707 { 708 ib_conn->state = ISER_CONN_INIT; 709 init_waitqueue_head(&ib_conn->wait); 710 ib_conn->post_recv_buf_count = 0; 711 atomic_set(&ib_conn->post_send_buf_count, 0); 712 atomic_set(&ib_conn->refcount, 1); /* ref ib conn allocation */ 713 INIT_LIST_HEAD(&ib_conn->conn_list); 714 spin_lock_init(&ib_conn->lock); 715 } 716 717 /** 718 * starts the process of connecting to the target 719 * sleeps until the connection is established or rejected 720 */ 721 int iser_connect(struct iser_conn *ib_conn, 722 struct sockaddr_in *src_addr, 723 struct sockaddr_in *dst_addr, 724 int non_blocking) 725 { 726 struct sockaddr *src, *dst; 727 int err = 0; 728 729 sprintf(ib_conn->name, "%pI4:%d", 730 &dst_addr->sin_addr.s_addr, dst_addr->sin_port); 731 732 /* the device is known only --after-- address resolution */ 733 ib_conn->device = NULL; 734 735 iser_info("connecting to: %pI4, port 0x%x\n", 736 &dst_addr->sin_addr, dst_addr->sin_port); 737 738 ib_conn->state = ISER_CONN_PENDING; 739 740 iser_conn_get(ib_conn); /* ref ib conn's cma id */ 741 ib_conn->cma_id = rdma_create_id(iser_cma_handler, 742 (void *)ib_conn, 743 RDMA_PS_TCP, IB_QPT_RC); 744 if (IS_ERR(ib_conn->cma_id)) { 745 err = PTR_ERR(ib_conn->cma_id); 746 iser_err("rdma_create_id failed: %d\n", err); 747 goto id_failure; 748 } 749 750 src = (struct sockaddr *)src_addr; 751 dst = (struct sockaddr *)dst_addr; 752 err = rdma_resolve_addr(ib_conn->cma_id, src, dst, 1000); 753 if (err) { 754 iser_err("rdma_resolve_addr failed: %d\n", err); 755 goto addr_failure; 756 } 757 758 if (!non_blocking) { 759 wait_event_interruptible(ib_conn->wait, 760 (ib_conn->state != ISER_CONN_PENDING)); 761 762 if (ib_conn->state != ISER_CONN_UP) { 763 err = -EIO; 764 goto connect_failure; 765 } 766 } 767 768 mutex_lock(&ig.connlist_mutex); 769 list_add(&ib_conn->conn_list, &ig.connlist); 770 mutex_unlock(&ig.connlist_mutex); 771 return 0; 772 773 id_failure: 774 ib_conn->cma_id = NULL; 775 addr_failure: 776 ib_conn->state = ISER_CONN_DOWN; 777 iser_conn_put(ib_conn, 1); /* deref ib conn's cma id */ 778 connect_failure: 779 iser_conn_put(ib_conn, 1); /* deref ib conn deallocate */ 780 return err; 781 } 782 783 /** 784 * iser_reg_page_vec - Register physical memory 785 * 786 * returns: 0 on success, errno code on failure 787 */ 788 int iser_reg_page_vec(struct iser_conn *ib_conn, 789 struct iser_page_vec *page_vec, 790 struct iser_mem_reg *mem_reg) 791 { 792 struct ib_pool_fmr *mem; 793 u64 io_addr; 794 u64 *page_list; 795 int status; 796 797 page_list = page_vec->pages; 798 io_addr = page_list[0]; 799 800 mem = ib_fmr_pool_map_phys(ib_conn->fastreg.fmr.pool, 801 page_list, 802 page_vec->length, 803 io_addr); 804 805 if (IS_ERR(mem)) { 806 status = (int)PTR_ERR(mem); 807 iser_err("ib_fmr_pool_map_phys failed: %d\n", status); 808 return status; 809 } 810 811 mem_reg->lkey = mem->fmr->lkey; 812 mem_reg->rkey = mem->fmr->rkey; 813 mem_reg->len = page_vec->length * SIZE_4K; 814 mem_reg->va = io_addr; 815 mem_reg->is_mr = 1; 816 mem_reg->mem_h = (void *)mem; 817 818 mem_reg->va += page_vec->offset; 819 mem_reg->len = page_vec->data_size; 820 821 iser_dbg("PHYSICAL Mem.register, [PHYS p_array: 0x%p, sz: %d, " 822 "entry[0]: (0x%08lx,%ld)] -> " 823 "[lkey: 0x%08X mem_h: 0x%p va: 0x%08lX sz: %ld]\n", 824 page_vec, page_vec->length, 825 (unsigned long)page_vec->pages[0], 826 (unsigned long)page_vec->data_size, 827 (unsigned int)mem_reg->lkey, mem_reg->mem_h, 828 (unsigned long)mem_reg->va, (unsigned long)mem_reg->len); 829 return 0; 830 } 831 832 /** 833 * Unregister (previosuly registered using FMR) memory. 834 * If memory is non-FMR does nothing. 835 */ 836 void iser_unreg_mem_fmr(struct iscsi_iser_task *iser_task, 837 enum iser_data_dir cmd_dir) 838 { 839 struct iser_mem_reg *reg = &iser_task->rdma_regd[cmd_dir].reg; 840 int ret; 841 842 if (!reg->is_mr) 843 return; 844 845 iser_dbg("PHYSICAL Mem.Unregister mem_h %p\n",reg->mem_h); 846 847 ret = ib_fmr_pool_unmap((struct ib_pool_fmr *)reg->mem_h); 848 if (ret) 849 iser_err("ib_fmr_pool_unmap failed %d\n", ret); 850 851 reg->mem_h = NULL; 852 } 853 854 void iser_unreg_mem_frwr(struct iscsi_iser_task *iser_task, 855 enum iser_data_dir cmd_dir) 856 { 857 struct iser_mem_reg *reg = &iser_task->rdma_regd[cmd_dir].reg; 858 struct iser_conn *ib_conn = iser_task->iser_conn->ib_conn; 859 struct fast_reg_descriptor *desc = reg->mem_h; 860 861 if (!reg->is_mr) 862 return; 863 864 reg->mem_h = NULL; 865 reg->is_mr = 0; 866 spin_lock_bh(&ib_conn->lock); 867 list_add_tail(&desc->list, &ib_conn->fastreg.frwr.pool); 868 spin_unlock_bh(&ib_conn->lock); 869 } 870 871 int iser_post_recvl(struct iser_conn *ib_conn) 872 { 873 struct ib_recv_wr rx_wr, *rx_wr_failed; 874 struct ib_sge sge; 875 int ib_ret; 876 877 sge.addr = ib_conn->login_resp_dma; 878 sge.length = ISER_RX_LOGIN_SIZE; 879 sge.lkey = ib_conn->device->mr->lkey; 880 881 rx_wr.wr_id = (unsigned long)ib_conn->login_resp_buf; 882 rx_wr.sg_list = &sge; 883 rx_wr.num_sge = 1; 884 rx_wr.next = NULL; 885 886 ib_conn->post_recv_buf_count++; 887 ib_ret = ib_post_recv(ib_conn->qp, &rx_wr, &rx_wr_failed); 888 if (ib_ret) { 889 iser_err("ib_post_recv failed ret=%d\n", ib_ret); 890 ib_conn->post_recv_buf_count--; 891 } 892 return ib_ret; 893 } 894 895 int iser_post_recvm(struct iser_conn *ib_conn, int count) 896 { 897 struct ib_recv_wr *rx_wr, *rx_wr_failed; 898 int i, ib_ret; 899 unsigned int my_rx_head = ib_conn->rx_desc_head; 900 struct iser_rx_desc *rx_desc; 901 902 for (rx_wr = ib_conn->rx_wr, i = 0; i < count; i++, rx_wr++) { 903 rx_desc = &ib_conn->rx_descs[my_rx_head]; 904 rx_wr->wr_id = (unsigned long)rx_desc; 905 rx_wr->sg_list = &rx_desc->rx_sg; 906 rx_wr->num_sge = 1; 907 rx_wr->next = rx_wr + 1; 908 my_rx_head = (my_rx_head + 1) & ib_conn->qp_max_recv_dtos_mask; 909 } 910 911 rx_wr--; 912 rx_wr->next = NULL; /* mark end of work requests list */ 913 914 ib_conn->post_recv_buf_count += count; 915 ib_ret = ib_post_recv(ib_conn->qp, ib_conn->rx_wr, &rx_wr_failed); 916 if (ib_ret) { 917 iser_err("ib_post_recv failed ret=%d\n", ib_ret); 918 ib_conn->post_recv_buf_count -= count; 919 } else 920 ib_conn->rx_desc_head = my_rx_head; 921 return ib_ret; 922 } 923 924 925 /** 926 * iser_start_send - Initiate a Send DTO operation 927 * 928 * returns 0 on success, -1 on failure 929 */ 930 int iser_post_send(struct iser_conn *ib_conn, struct iser_tx_desc *tx_desc) 931 { 932 int ib_ret; 933 struct ib_send_wr send_wr, *send_wr_failed; 934 935 ib_dma_sync_single_for_device(ib_conn->device->ib_device, 936 tx_desc->dma_addr, ISER_HEADERS_LEN, DMA_TO_DEVICE); 937 938 send_wr.next = NULL; 939 send_wr.wr_id = (unsigned long)tx_desc; 940 send_wr.sg_list = tx_desc->tx_sg; 941 send_wr.num_sge = tx_desc->num_sge; 942 send_wr.opcode = IB_WR_SEND; 943 send_wr.send_flags = IB_SEND_SIGNALED; 944 945 atomic_inc(&ib_conn->post_send_buf_count); 946 947 ib_ret = ib_post_send(ib_conn->qp, &send_wr, &send_wr_failed); 948 if (ib_ret) { 949 iser_err("ib_post_send failed, ret:%d\n", ib_ret); 950 atomic_dec(&ib_conn->post_send_buf_count); 951 } 952 return ib_ret; 953 } 954 955 static void iser_handle_comp_error(struct iser_tx_desc *desc, 956 struct iser_conn *ib_conn) 957 { 958 if (desc && desc->type == ISCSI_TX_DATAOUT) 959 kmem_cache_free(ig.desc_cache, desc); 960 961 if (ib_conn->post_recv_buf_count == 0 && 962 atomic_read(&ib_conn->post_send_buf_count) == 0) { 963 /* getting here when the state is UP means that the conn is * 964 * being terminated asynchronously from the iSCSI layer's * 965 * perspective. */ 966 if (iser_conn_state_comp_exch(ib_conn, ISER_CONN_UP, 967 ISER_CONN_TERMINATING)) 968 iscsi_conn_failure(ib_conn->iser_conn->iscsi_conn, 969 ISCSI_ERR_CONN_FAILED); 970 971 /* no more non completed posts to the QP, complete the 972 * termination process w.o worrying on disconnect event */ 973 ib_conn->state = ISER_CONN_DOWN; 974 wake_up_interruptible(&ib_conn->wait); 975 } 976 } 977 978 static int iser_drain_tx_cq(struct iser_device *device, int cq_index) 979 { 980 struct ib_cq *cq = device->tx_cq[cq_index]; 981 struct ib_wc wc; 982 struct iser_tx_desc *tx_desc; 983 struct iser_conn *ib_conn; 984 int completed_tx = 0; 985 986 while (ib_poll_cq(cq, 1, &wc) == 1) { 987 tx_desc = (struct iser_tx_desc *) (unsigned long) wc.wr_id; 988 ib_conn = wc.qp->qp_context; 989 if (wc.status == IB_WC_SUCCESS) { 990 if (wc.opcode == IB_WC_SEND) 991 iser_snd_completion(tx_desc, ib_conn); 992 else if (wc.opcode == IB_WC_LOCAL_INV || 993 wc.opcode == IB_WC_FAST_REG_MR) { 994 atomic_dec(&ib_conn->post_send_buf_count); 995 continue; 996 } else 997 iser_err("expected opcode %d got %d\n", 998 IB_WC_SEND, wc.opcode); 999 } else { 1000 iser_err("tx id %llx status %d vend_err %x\n", 1001 wc.wr_id, wc.status, wc.vendor_err); 1002 atomic_dec(&ib_conn->post_send_buf_count); 1003 iser_handle_comp_error(tx_desc, ib_conn); 1004 } 1005 completed_tx++; 1006 } 1007 return completed_tx; 1008 } 1009 1010 1011 static void iser_cq_tasklet_fn(unsigned long data) 1012 { 1013 struct iser_cq_desc *cq_desc = (struct iser_cq_desc *)data; 1014 struct iser_device *device = cq_desc->device; 1015 int cq_index = cq_desc->cq_index; 1016 struct ib_cq *cq = device->rx_cq[cq_index]; 1017 struct ib_wc wc; 1018 struct iser_rx_desc *desc; 1019 unsigned long xfer_len; 1020 struct iser_conn *ib_conn; 1021 int completed_tx, completed_rx; 1022 completed_tx = completed_rx = 0; 1023 1024 while (ib_poll_cq(cq, 1, &wc) == 1) { 1025 desc = (struct iser_rx_desc *) (unsigned long) wc.wr_id; 1026 BUG_ON(desc == NULL); 1027 ib_conn = wc.qp->qp_context; 1028 if (wc.status == IB_WC_SUCCESS) { 1029 if (wc.opcode == IB_WC_RECV) { 1030 xfer_len = (unsigned long)wc.byte_len; 1031 iser_rcv_completion(desc, xfer_len, ib_conn); 1032 } else 1033 iser_err("expected opcode %d got %d\n", 1034 IB_WC_RECV, wc.opcode); 1035 } else { 1036 if (wc.status != IB_WC_WR_FLUSH_ERR) 1037 iser_err("rx id %llx status %d vend_err %x\n", 1038 wc.wr_id, wc.status, wc.vendor_err); 1039 ib_conn->post_recv_buf_count--; 1040 iser_handle_comp_error(NULL, ib_conn); 1041 } 1042 completed_rx++; 1043 if (!(completed_rx & 63)) 1044 completed_tx += iser_drain_tx_cq(device, cq_index); 1045 } 1046 /* #warning "it is assumed here that arming CQ only once its empty" * 1047 * " would not cause interrupts to be missed" */ 1048 ib_req_notify_cq(cq, IB_CQ_NEXT_COMP); 1049 1050 completed_tx += iser_drain_tx_cq(device, cq_index); 1051 iser_dbg("got %d rx %d tx completions\n", completed_rx, completed_tx); 1052 } 1053 1054 static void iser_cq_callback(struct ib_cq *cq, void *cq_context) 1055 { 1056 struct iser_cq_desc *cq_desc = (struct iser_cq_desc *)cq_context; 1057 struct iser_device *device = cq_desc->device; 1058 int cq_index = cq_desc->cq_index; 1059 1060 tasklet_schedule(&device->cq_tasklet[cq_index]); 1061 } 1062