1 /* 2 * Copyright (c) 2004, 2005, 2006 Voltaire, Inc. All rights reserved. 3 * Copyright (c) 2005, 2006 Cisco Systems. All rights reserved. 4 * Copyright (c) 2013 Mellanox Technologies. All rights reserved. 5 * 6 * This software is available to you under a choice of one of two 7 * licenses. You may choose to be licensed under the terms of the GNU 8 * General Public License (GPL) Version 2, available from the file 9 * COPYING in the main directory of this source tree, or the 10 * OpenIB.org BSD license below: 11 * 12 * Redistribution and use in source and binary forms, with or 13 * without modification, are permitted provided that the following 14 * conditions are met: 15 * 16 * - Redistributions of source code must retain the above 17 * copyright notice, this list of conditions and the following 18 * disclaimer. 19 * 20 * - Redistributions in binary form must reproduce the above 21 * copyright notice, this list of conditions and the following 22 * disclaimer in the documentation and/or other materials 23 * provided with the distribution. 24 * 25 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 26 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 27 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 28 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 29 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 30 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 32 * SOFTWARE. 33 */ 34 #include <linux/kernel.h> 35 #include <linux/module.h> 36 #include <linux/slab.h> 37 #include <linux/delay.h> 38 39 #include "iscsi_iser.h" 40 41 #define ISCSI_ISER_MAX_CONN 8 42 #define ISER_MAX_RX_CQ_LEN (ISER_QP_MAX_RECV_DTOS * ISCSI_ISER_MAX_CONN) 43 #define ISER_MAX_TX_CQ_LEN (ISER_QP_MAX_REQ_DTOS * ISCSI_ISER_MAX_CONN) 44 45 static void iser_cq_tasklet_fn(unsigned long data); 46 static void iser_cq_callback(struct ib_cq *cq, void *cq_context); 47 48 static void iser_cq_event_callback(struct ib_event *cause, void *context) 49 { 50 iser_err("got cq event %d \n", cause->event); 51 } 52 53 static void iser_qp_event_callback(struct ib_event *cause, void *context) 54 { 55 iser_err("got qp event %d\n",cause->event); 56 } 57 58 static void iser_event_handler(struct ib_event_handler *handler, 59 struct ib_event *event) 60 { 61 iser_err("async event %d on device %s port %d\n", event->event, 62 event->device->name, event->element.port_num); 63 } 64 65 /** 66 * iser_create_device_ib_res - creates Protection Domain (PD), Completion 67 * Queue (CQ), DMA Memory Region (DMA MR) with the device associated with 68 * the adapator. 69 * 70 * returns 0 on success, -1 on failure 71 */ 72 static int iser_create_device_ib_res(struct iser_device *device) 73 { 74 int i, j; 75 struct iser_cq_desc *cq_desc; 76 struct ib_device_attr *dev_attr; 77 78 dev_attr = kmalloc(sizeof(*dev_attr), GFP_KERNEL); 79 if (!dev_attr) 80 return -ENOMEM; 81 82 if (ib_query_device(device->ib_device, dev_attr)) { 83 pr_warn("Query device failed for %s\n", device->ib_device->name); 84 goto dev_attr_err; 85 } 86 87 /* Assign function handles - based on FMR support */ 88 if (device->ib_device->alloc_fmr && device->ib_device->dealloc_fmr && 89 device->ib_device->map_phys_fmr && device->ib_device->unmap_fmr) { 90 iser_info("FMR supported, using FMR for registration\n"); 91 device->iser_alloc_rdma_reg_res = iser_create_fmr_pool; 92 device->iser_free_rdma_reg_res = iser_free_fmr_pool; 93 device->iser_reg_rdma_mem = iser_reg_rdma_mem_fmr; 94 device->iser_unreg_rdma_mem = iser_unreg_mem_fmr; 95 } else 96 if (dev_attr->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS) { 97 iser_info("FRWR supported, using FRWR for registration\n"); 98 device->iser_alloc_rdma_reg_res = iser_create_frwr_pool; 99 device->iser_free_rdma_reg_res = iser_free_frwr_pool; 100 device->iser_reg_rdma_mem = iser_reg_rdma_mem_frwr; 101 device->iser_unreg_rdma_mem = iser_unreg_mem_frwr; 102 } else { 103 iser_err("IB device does not support FMRs nor FRWRs, can't register memory\n"); 104 goto dev_attr_err; 105 } 106 107 device->cqs_used = min(ISER_MAX_CQ, device->ib_device->num_comp_vectors); 108 iser_info("using %d CQs, device %s supports %d vectors\n", 109 device->cqs_used, device->ib_device->name, 110 device->ib_device->num_comp_vectors); 111 112 device->cq_desc = kmalloc(sizeof(struct iser_cq_desc) * device->cqs_used, 113 GFP_KERNEL); 114 if (device->cq_desc == NULL) 115 goto cq_desc_err; 116 cq_desc = device->cq_desc; 117 118 device->pd = ib_alloc_pd(device->ib_device); 119 if (IS_ERR(device->pd)) 120 goto pd_err; 121 122 for (i = 0; i < device->cqs_used; i++) { 123 cq_desc[i].device = device; 124 cq_desc[i].cq_index = i; 125 126 device->rx_cq[i] = ib_create_cq(device->ib_device, 127 iser_cq_callback, 128 iser_cq_event_callback, 129 (void *)&cq_desc[i], 130 ISER_MAX_RX_CQ_LEN, i); 131 if (IS_ERR(device->rx_cq[i])) 132 goto cq_err; 133 134 device->tx_cq[i] = ib_create_cq(device->ib_device, 135 NULL, iser_cq_event_callback, 136 (void *)&cq_desc[i], 137 ISER_MAX_TX_CQ_LEN, i); 138 139 if (IS_ERR(device->tx_cq[i])) 140 goto cq_err; 141 142 if (ib_req_notify_cq(device->rx_cq[i], IB_CQ_NEXT_COMP)) 143 goto cq_err; 144 145 tasklet_init(&device->cq_tasklet[i], 146 iser_cq_tasklet_fn, 147 (unsigned long)&cq_desc[i]); 148 } 149 150 device->mr = ib_get_dma_mr(device->pd, IB_ACCESS_LOCAL_WRITE | 151 IB_ACCESS_REMOTE_WRITE | 152 IB_ACCESS_REMOTE_READ); 153 if (IS_ERR(device->mr)) 154 goto dma_mr_err; 155 156 INIT_IB_EVENT_HANDLER(&device->event_handler, device->ib_device, 157 iser_event_handler); 158 if (ib_register_event_handler(&device->event_handler)) 159 goto handler_err; 160 161 kfree(dev_attr); 162 return 0; 163 164 handler_err: 165 ib_dereg_mr(device->mr); 166 dma_mr_err: 167 for (j = 0; j < device->cqs_used; j++) 168 tasklet_kill(&device->cq_tasklet[j]); 169 cq_err: 170 for (j = 0; j < i; j++) { 171 if (device->tx_cq[j]) 172 ib_destroy_cq(device->tx_cq[j]); 173 if (device->rx_cq[j]) 174 ib_destroy_cq(device->rx_cq[j]); 175 } 176 ib_dealloc_pd(device->pd); 177 pd_err: 178 kfree(device->cq_desc); 179 cq_desc_err: 180 iser_err("failed to allocate an IB resource\n"); 181 dev_attr_err: 182 kfree(dev_attr); 183 return -1; 184 } 185 186 /** 187 * iser_free_device_ib_res - destroy/dealloc/dereg the DMA MR, 188 * CQ and PD created with the device associated with the adapator. 189 */ 190 static void iser_free_device_ib_res(struct iser_device *device) 191 { 192 int i; 193 BUG_ON(device->mr == NULL); 194 195 for (i = 0; i < device->cqs_used; i++) { 196 tasklet_kill(&device->cq_tasklet[i]); 197 (void)ib_destroy_cq(device->tx_cq[i]); 198 (void)ib_destroy_cq(device->rx_cq[i]); 199 device->tx_cq[i] = NULL; 200 device->rx_cq[i] = NULL; 201 } 202 203 (void)ib_unregister_event_handler(&device->event_handler); 204 (void)ib_dereg_mr(device->mr); 205 (void)ib_dealloc_pd(device->pd); 206 207 kfree(device->cq_desc); 208 209 device->mr = NULL; 210 device->pd = NULL; 211 } 212 213 /** 214 * iser_create_fmr_pool - Creates FMR pool and page_vector 215 * 216 * returns 0 on success, or errno code on failure 217 */ 218 int iser_create_fmr_pool(struct iser_conn *ib_conn, unsigned cmds_max) 219 { 220 struct iser_device *device = ib_conn->device; 221 struct ib_fmr_pool_param params; 222 int ret = -ENOMEM; 223 224 ib_conn->fastreg.fmr.page_vec = kmalloc(sizeof(struct iser_page_vec) + 225 (sizeof(u64)*(ISCSI_ISER_SG_TABLESIZE + 1)), 226 GFP_KERNEL); 227 if (!ib_conn->fastreg.fmr.page_vec) 228 return ret; 229 230 ib_conn->fastreg.fmr.page_vec->pages = (u64 *)(ib_conn->fastreg.fmr.page_vec + 1); 231 232 params.page_shift = SHIFT_4K; 233 /* when the first/last SG element are not start/end * 234 * page aligned, the map whould be of N+1 pages */ 235 params.max_pages_per_fmr = ISCSI_ISER_SG_TABLESIZE + 1; 236 /* make the pool size twice the max number of SCSI commands * 237 * the ML is expected to queue, watermark for unmap at 50% */ 238 params.pool_size = cmds_max * 2; 239 params.dirty_watermark = cmds_max; 240 params.cache = 0; 241 params.flush_function = NULL; 242 params.access = (IB_ACCESS_LOCAL_WRITE | 243 IB_ACCESS_REMOTE_WRITE | 244 IB_ACCESS_REMOTE_READ); 245 246 ib_conn->fastreg.fmr.pool = ib_create_fmr_pool(device->pd, ¶ms); 247 if (!IS_ERR(ib_conn->fastreg.fmr.pool)) 248 return 0; 249 250 /* no FMR => no need for page_vec */ 251 kfree(ib_conn->fastreg.fmr.page_vec); 252 ib_conn->fastreg.fmr.page_vec = NULL; 253 254 ret = PTR_ERR(ib_conn->fastreg.fmr.pool); 255 ib_conn->fastreg.fmr.pool = NULL; 256 if (ret != -ENOSYS) { 257 iser_err("FMR allocation failed, err %d\n", ret); 258 return ret; 259 } else { 260 iser_warn("FMRs are not supported, using unaligned mode\n"); 261 return 0; 262 } 263 } 264 265 /** 266 * iser_free_fmr_pool - releases the FMR pool and page vec 267 */ 268 void iser_free_fmr_pool(struct iser_conn *ib_conn) 269 { 270 iser_info("freeing conn %p fmr pool %p\n", 271 ib_conn, ib_conn->fastreg.fmr.pool); 272 273 if (ib_conn->fastreg.fmr.pool != NULL) 274 ib_destroy_fmr_pool(ib_conn->fastreg.fmr.pool); 275 276 ib_conn->fastreg.fmr.pool = NULL; 277 278 kfree(ib_conn->fastreg.fmr.page_vec); 279 ib_conn->fastreg.fmr.page_vec = NULL; 280 } 281 282 /** 283 * iser_create_frwr_pool - Creates pool of fast_reg descriptors 284 * for fast registration work requests. 285 * returns 0 on success, or errno code on failure 286 */ 287 int iser_create_frwr_pool(struct iser_conn *ib_conn, unsigned cmds_max) 288 { 289 struct iser_device *device = ib_conn->device; 290 struct fast_reg_descriptor *desc; 291 int i, ret; 292 293 INIT_LIST_HEAD(&ib_conn->fastreg.frwr.pool); 294 ib_conn->fastreg.frwr.pool_size = 0; 295 for (i = 0; i < cmds_max; i++) { 296 desc = kmalloc(sizeof(*desc), GFP_KERNEL); 297 if (!desc) { 298 iser_err("Failed to allocate a new fast_reg descriptor\n"); 299 ret = -ENOMEM; 300 goto err; 301 } 302 303 desc->data_frpl = ib_alloc_fast_reg_page_list(device->ib_device, 304 ISCSI_ISER_SG_TABLESIZE + 1); 305 if (IS_ERR(desc->data_frpl)) { 306 ret = PTR_ERR(desc->data_frpl); 307 iser_err("Failed to allocate ib_fast_reg_page_list err=%d\n", ret); 308 goto fast_reg_page_failure; 309 } 310 311 desc->data_mr = ib_alloc_fast_reg_mr(device->pd, 312 ISCSI_ISER_SG_TABLESIZE + 1); 313 if (IS_ERR(desc->data_mr)) { 314 ret = PTR_ERR(desc->data_mr); 315 iser_err("Failed to allocate ib_fast_reg_mr err=%d\n", ret); 316 goto fast_reg_mr_failure; 317 } 318 desc->valid = true; 319 list_add_tail(&desc->list, &ib_conn->fastreg.frwr.pool); 320 ib_conn->fastreg.frwr.pool_size++; 321 } 322 323 return 0; 324 325 fast_reg_mr_failure: 326 ib_free_fast_reg_page_list(desc->data_frpl); 327 fast_reg_page_failure: 328 kfree(desc); 329 err: 330 iser_free_frwr_pool(ib_conn); 331 return ret; 332 } 333 334 /** 335 * iser_free_frwr_pool - releases the pool of fast_reg descriptors 336 */ 337 void iser_free_frwr_pool(struct iser_conn *ib_conn) 338 { 339 struct fast_reg_descriptor *desc, *tmp; 340 int i = 0; 341 342 if (list_empty(&ib_conn->fastreg.frwr.pool)) 343 return; 344 345 iser_info("freeing conn %p frwr pool\n", ib_conn); 346 347 list_for_each_entry_safe(desc, tmp, &ib_conn->fastreg.frwr.pool, list) { 348 list_del(&desc->list); 349 ib_free_fast_reg_page_list(desc->data_frpl); 350 ib_dereg_mr(desc->data_mr); 351 kfree(desc); 352 ++i; 353 } 354 355 if (i < ib_conn->fastreg.frwr.pool_size) 356 iser_warn("pool still has %d regions registered\n", 357 ib_conn->fastreg.frwr.pool_size - i); 358 } 359 360 /** 361 * iser_create_ib_conn_res - Queue-Pair (QP) 362 * 363 * returns 0 on success, -1 on failure 364 */ 365 static int iser_create_ib_conn_res(struct iser_conn *ib_conn) 366 { 367 struct iser_device *device; 368 struct ib_qp_init_attr init_attr; 369 int ret = -ENOMEM; 370 int index, min_index = 0; 371 372 BUG_ON(ib_conn->device == NULL); 373 374 device = ib_conn->device; 375 376 memset(&init_attr, 0, sizeof init_attr); 377 378 mutex_lock(&ig.connlist_mutex); 379 /* select the CQ with the minimal number of usages */ 380 for (index = 0; index < device->cqs_used; index++) 381 if (device->cq_active_qps[index] < 382 device->cq_active_qps[min_index]) 383 min_index = index; 384 device->cq_active_qps[min_index]++; 385 mutex_unlock(&ig.connlist_mutex); 386 iser_info("cq index %d used for ib_conn %p\n", min_index, ib_conn); 387 388 init_attr.event_handler = iser_qp_event_callback; 389 init_attr.qp_context = (void *)ib_conn; 390 init_attr.send_cq = device->tx_cq[min_index]; 391 init_attr.recv_cq = device->rx_cq[min_index]; 392 init_attr.cap.max_send_wr = ISER_QP_MAX_REQ_DTOS; 393 init_attr.cap.max_recv_wr = ISER_QP_MAX_RECV_DTOS; 394 init_attr.cap.max_send_sge = 2; 395 init_attr.cap.max_recv_sge = 1; 396 init_attr.sq_sig_type = IB_SIGNAL_REQ_WR; 397 init_attr.qp_type = IB_QPT_RC; 398 399 ret = rdma_create_qp(ib_conn->cma_id, device->pd, &init_attr); 400 if (ret) 401 goto out_err; 402 403 ib_conn->qp = ib_conn->cma_id->qp; 404 iser_info("setting conn %p cma_id %p qp %p\n", 405 ib_conn, ib_conn->cma_id, 406 ib_conn->cma_id->qp); 407 return ret; 408 409 out_err: 410 iser_err("unable to alloc mem or create resource, err %d\n", ret); 411 return ret; 412 } 413 414 /** 415 * releases the QP objects, returns 0 on success, 416 * -1 on failure 417 */ 418 static int iser_free_ib_conn_res(struct iser_conn *ib_conn) 419 { 420 int cq_index; 421 BUG_ON(ib_conn == NULL); 422 423 iser_info("freeing conn %p cma_id %p qp %p\n", 424 ib_conn, ib_conn->cma_id, 425 ib_conn->qp); 426 427 /* qp is created only once both addr & route are resolved */ 428 429 if (ib_conn->qp != NULL) { 430 cq_index = ((struct iser_cq_desc *)ib_conn->qp->recv_cq->cq_context)->cq_index; 431 ib_conn->device->cq_active_qps[cq_index]--; 432 433 rdma_destroy_qp(ib_conn->cma_id); 434 } 435 436 ib_conn->qp = NULL; 437 438 return 0; 439 } 440 441 /** 442 * based on the resolved device node GUID see if there already allocated 443 * device for this device. If there's no such, create one. 444 */ 445 static 446 struct iser_device *iser_device_find_by_ib_device(struct rdma_cm_id *cma_id) 447 { 448 struct iser_device *device; 449 450 mutex_lock(&ig.device_list_mutex); 451 452 list_for_each_entry(device, &ig.device_list, ig_list) 453 /* find if there's a match using the node GUID */ 454 if (device->ib_device->node_guid == cma_id->device->node_guid) 455 goto inc_refcnt; 456 457 device = kzalloc(sizeof *device, GFP_KERNEL); 458 if (device == NULL) 459 goto out; 460 461 /* assign this device to the device */ 462 device->ib_device = cma_id->device; 463 /* init the device and link it into ig device list */ 464 if (iser_create_device_ib_res(device)) { 465 kfree(device); 466 device = NULL; 467 goto out; 468 } 469 list_add(&device->ig_list, &ig.device_list); 470 471 inc_refcnt: 472 device->refcount++; 473 out: 474 mutex_unlock(&ig.device_list_mutex); 475 return device; 476 } 477 478 /* if there's no demand for this device, release it */ 479 static void iser_device_try_release(struct iser_device *device) 480 { 481 mutex_lock(&ig.device_list_mutex); 482 device->refcount--; 483 iser_info("device %p refcount %d\n", device, device->refcount); 484 if (!device->refcount) { 485 iser_free_device_ib_res(device); 486 list_del(&device->ig_list); 487 kfree(device); 488 } 489 mutex_unlock(&ig.device_list_mutex); 490 } 491 492 static int iser_conn_state_comp_exch(struct iser_conn *ib_conn, 493 enum iser_ib_conn_state comp, 494 enum iser_ib_conn_state exch) 495 { 496 int ret; 497 498 spin_lock_bh(&ib_conn->lock); 499 if ((ret = (ib_conn->state == comp))) 500 ib_conn->state = exch; 501 spin_unlock_bh(&ib_conn->lock); 502 return ret; 503 } 504 505 /** 506 * Frees all conn objects and deallocs conn descriptor 507 */ 508 static void iser_conn_release(struct iser_conn *ib_conn, int can_destroy_id) 509 { 510 struct iser_device *device = ib_conn->device; 511 512 BUG_ON(ib_conn->state != ISER_CONN_DOWN); 513 514 mutex_lock(&ig.connlist_mutex); 515 list_del(&ib_conn->conn_list); 516 mutex_unlock(&ig.connlist_mutex); 517 iser_free_rx_descriptors(ib_conn); 518 iser_free_ib_conn_res(ib_conn); 519 ib_conn->device = NULL; 520 /* on EVENT_ADDR_ERROR there's no device yet for this conn */ 521 if (device != NULL) 522 iser_device_try_release(device); 523 /* if cma handler context, the caller actually destroy the id */ 524 if (ib_conn->cma_id != NULL && can_destroy_id) { 525 rdma_destroy_id(ib_conn->cma_id); 526 ib_conn->cma_id = NULL; 527 } 528 iscsi_destroy_endpoint(ib_conn->ep); 529 } 530 531 void iser_conn_get(struct iser_conn *ib_conn) 532 { 533 atomic_inc(&ib_conn->refcount); 534 } 535 536 int iser_conn_put(struct iser_conn *ib_conn, int can_destroy_id) 537 { 538 if (atomic_dec_and_test(&ib_conn->refcount)) { 539 iser_conn_release(ib_conn, can_destroy_id); 540 return 1; 541 } 542 return 0; 543 } 544 545 /** 546 * triggers start of the disconnect procedures and wait for them to be done 547 */ 548 void iser_conn_terminate(struct iser_conn *ib_conn) 549 { 550 int err = 0; 551 552 /* change the ib conn state only if the conn is UP, however always call 553 * rdma_disconnect since this is the only way to cause the CMA to change 554 * the QP state to ERROR 555 */ 556 557 iser_conn_state_comp_exch(ib_conn, ISER_CONN_UP, ISER_CONN_TERMINATING); 558 err = rdma_disconnect(ib_conn->cma_id); 559 if (err) 560 iser_err("Failed to disconnect, conn: 0x%p err %d\n", 561 ib_conn,err); 562 563 wait_event_interruptible(ib_conn->wait, 564 ib_conn->state == ISER_CONN_DOWN); 565 566 iser_conn_put(ib_conn, 1); /* deref ib conn deallocate */ 567 } 568 569 static int iser_connect_error(struct rdma_cm_id *cma_id) 570 { 571 struct iser_conn *ib_conn; 572 ib_conn = (struct iser_conn *)cma_id->context; 573 574 ib_conn->state = ISER_CONN_DOWN; 575 wake_up_interruptible(&ib_conn->wait); 576 return iser_conn_put(ib_conn, 0); /* deref ib conn's cma id */ 577 } 578 579 static int iser_addr_handler(struct rdma_cm_id *cma_id) 580 { 581 struct iser_device *device; 582 struct iser_conn *ib_conn; 583 int ret; 584 585 device = iser_device_find_by_ib_device(cma_id); 586 if (!device) { 587 iser_err("device lookup/creation failed\n"); 588 return iser_connect_error(cma_id); 589 } 590 591 ib_conn = (struct iser_conn *)cma_id->context; 592 ib_conn->device = device; 593 594 ret = rdma_resolve_route(cma_id, 1000); 595 if (ret) { 596 iser_err("resolve route failed: %d\n", ret); 597 return iser_connect_error(cma_id); 598 } 599 600 return 0; 601 } 602 603 static int iser_route_handler(struct rdma_cm_id *cma_id) 604 { 605 struct rdma_conn_param conn_param; 606 int ret; 607 struct iser_cm_hdr req_hdr; 608 609 ret = iser_create_ib_conn_res((struct iser_conn *)cma_id->context); 610 if (ret) 611 goto failure; 612 613 memset(&conn_param, 0, sizeof conn_param); 614 conn_param.responder_resources = 4; 615 conn_param.initiator_depth = 1; 616 conn_param.retry_count = 7; 617 conn_param.rnr_retry_count = 6; 618 619 memset(&req_hdr, 0, sizeof(req_hdr)); 620 req_hdr.flags = (ISER_ZBVA_NOT_SUPPORTED | 621 ISER_SEND_W_INV_NOT_SUPPORTED); 622 conn_param.private_data = (void *)&req_hdr; 623 conn_param.private_data_len = sizeof(struct iser_cm_hdr); 624 625 ret = rdma_connect(cma_id, &conn_param); 626 if (ret) { 627 iser_err("failure connecting: %d\n", ret); 628 goto failure; 629 } 630 631 return 0; 632 failure: 633 return iser_connect_error(cma_id); 634 } 635 636 static void iser_connected_handler(struct rdma_cm_id *cma_id) 637 { 638 struct iser_conn *ib_conn; 639 640 ib_conn = (struct iser_conn *)cma_id->context; 641 ib_conn->state = ISER_CONN_UP; 642 wake_up_interruptible(&ib_conn->wait); 643 } 644 645 static int iser_disconnected_handler(struct rdma_cm_id *cma_id) 646 { 647 struct iser_conn *ib_conn; 648 int ret; 649 650 ib_conn = (struct iser_conn *)cma_id->context; 651 652 /* getting here when the state is UP means that the conn is being * 653 * terminated asynchronously from the iSCSI layer's perspective. */ 654 if (iser_conn_state_comp_exch(ib_conn, ISER_CONN_UP, 655 ISER_CONN_TERMINATING)){ 656 if (ib_conn->iser_conn) 657 iscsi_conn_failure(ib_conn->iser_conn->iscsi_conn, 658 ISCSI_ERR_CONN_FAILED); 659 else 660 iser_err("iscsi_iser connection isn't bound\n"); 661 } 662 663 /* Complete the termination process if no posts are pending */ 664 if (ib_conn->post_recv_buf_count == 0 && 665 (atomic_read(&ib_conn->post_send_buf_count) == 0)) { 666 ib_conn->state = ISER_CONN_DOWN; 667 wake_up_interruptible(&ib_conn->wait); 668 } 669 670 ret = iser_conn_put(ib_conn, 0); /* deref ib conn's cma id */ 671 return ret; 672 } 673 674 static int iser_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *event) 675 { 676 int ret = 0; 677 678 iser_info("event %d status %d conn %p id %p\n", 679 event->event, event->status, cma_id->context, cma_id); 680 681 switch (event->event) { 682 case RDMA_CM_EVENT_ADDR_RESOLVED: 683 ret = iser_addr_handler(cma_id); 684 break; 685 case RDMA_CM_EVENT_ROUTE_RESOLVED: 686 ret = iser_route_handler(cma_id); 687 break; 688 case RDMA_CM_EVENT_ESTABLISHED: 689 iser_connected_handler(cma_id); 690 break; 691 case RDMA_CM_EVENT_ADDR_ERROR: 692 case RDMA_CM_EVENT_ROUTE_ERROR: 693 case RDMA_CM_EVENT_CONNECT_ERROR: 694 case RDMA_CM_EVENT_UNREACHABLE: 695 case RDMA_CM_EVENT_REJECTED: 696 ret = iser_connect_error(cma_id); 697 break; 698 case RDMA_CM_EVENT_DISCONNECTED: 699 case RDMA_CM_EVENT_DEVICE_REMOVAL: 700 case RDMA_CM_EVENT_ADDR_CHANGE: 701 ret = iser_disconnected_handler(cma_id); 702 break; 703 default: 704 iser_err("Unexpected RDMA CM event (%d)\n", event->event); 705 break; 706 } 707 return ret; 708 } 709 710 void iser_conn_init(struct iser_conn *ib_conn) 711 { 712 ib_conn->state = ISER_CONN_INIT; 713 init_waitqueue_head(&ib_conn->wait); 714 ib_conn->post_recv_buf_count = 0; 715 atomic_set(&ib_conn->post_send_buf_count, 0); 716 atomic_set(&ib_conn->refcount, 1); /* ref ib conn allocation */ 717 INIT_LIST_HEAD(&ib_conn->conn_list); 718 spin_lock_init(&ib_conn->lock); 719 } 720 721 /** 722 * starts the process of connecting to the target 723 * sleeps until the connection is established or rejected 724 */ 725 int iser_connect(struct iser_conn *ib_conn, 726 struct sockaddr_in *src_addr, 727 struct sockaddr_in *dst_addr, 728 int non_blocking) 729 { 730 struct sockaddr *src, *dst; 731 int err = 0; 732 733 sprintf(ib_conn->name, "%pI4:%d", 734 &dst_addr->sin_addr.s_addr, dst_addr->sin_port); 735 736 /* the device is known only --after-- address resolution */ 737 ib_conn->device = NULL; 738 739 iser_info("connecting to: %pI4, port 0x%x\n", 740 &dst_addr->sin_addr, dst_addr->sin_port); 741 742 ib_conn->state = ISER_CONN_PENDING; 743 744 iser_conn_get(ib_conn); /* ref ib conn's cma id */ 745 ib_conn->cma_id = rdma_create_id(iser_cma_handler, 746 (void *)ib_conn, 747 RDMA_PS_TCP, IB_QPT_RC); 748 if (IS_ERR(ib_conn->cma_id)) { 749 err = PTR_ERR(ib_conn->cma_id); 750 iser_err("rdma_create_id failed: %d\n", err); 751 goto id_failure; 752 } 753 754 src = (struct sockaddr *)src_addr; 755 dst = (struct sockaddr *)dst_addr; 756 err = rdma_resolve_addr(ib_conn->cma_id, src, dst, 1000); 757 if (err) { 758 iser_err("rdma_resolve_addr failed: %d\n", err); 759 goto addr_failure; 760 } 761 762 if (!non_blocking) { 763 wait_event_interruptible(ib_conn->wait, 764 (ib_conn->state != ISER_CONN_PENDING)); 765 766 if (ib_conn->state != ISER_CONN_UP) { 767 err = -EIO; 768 goto connect_failure; 769 } 770 } 771 772 mutex_lock(&ig.connlist_mutex); 773 list_add(&ib_conn->conn_list, &ig.connlist); 774 mutex_unlock(&ig.connlist_mutex); 775 return 0; 776 777 id_failure: 778 ib_conn->cma_id = NULL; 779 addr_failure: 780 ib_conn->state = ISER_CONN_DOWN; 781 iser_conn_put(ib_conn, 1); /* deref ib conn's cma id */ 782 connect_failure: 783 iser_conn_put(ib_conn, 1); /* deref ib conn deallocate */ 784 return err; 785 } 786 787 /** 788 * iser_reg_page_vec - Register physical memory 789 * 790 * returns: 0 on success, errno code on failure 791 */ 792 int iser_reg_page_vec(struct iser_conn *ib_conn, 793 struct iser_page_vec *page_vec, 794 struct iser_mem_reg *mem_reg) 795 { 796 struct ib_pool_fmr *mem; 797 u64 io_addr; 798 u64 *page_list; 799 int status; 800 801 page_list = page_vec->pages; 802 io_addr = page_list[0]; 803 804 mem = ib_fmr_pool_map_phys(ib_conn->fastreg.fmr.pool, 805 page_list, 806 page_vec->length, 807 io_addr); 808 809 if (IS_ERR(mem)) { 810 status = (int)PTR_ERR(mem); 811 iser_err("ib_fmr_pool_map_phys failed: %d\n", status); 812 return status; 813 } 814 815 mem_reg->lkey = mem->fmr->lkey; 816 mem_reg->rkey = mem->fmr->rkey; 817 mem_reg->len = page_vec->length * SIZE_4K; 818 mem_reg->va = io_addr; 819 mem_reg->is_mr = 1; 820 mem_reg->mem_h = (void *)mem; 821 822 mem_reg->va += page_vec->offset; 823 mem_reg->len = page_vec->data_size; 824 825 iser_dbg("PHYSICAL Mem.register, [PHYS p_array: 0x%p, sz: %d, " 826 "entry[0]: (0x%08lx,%ld)] -> " 827 "[lkey: 0x%08X mem_h: 0x%p va: 0x%08lX sz: %ld]\n", 828 page_vec, page_vec->length, 829 (unsigned long)page_vec->pages[0], 830 (unsigned long)page_vec->data_size, 831 (unsigned int)mem_reg->lkey, mem_reg->mem_h, 832 (unsigned long)mem_reg->va, (unsigned long)mem_reg->len); 833 return 0; 834 } 835 836 /** 837 * Unregister (previosuly registered using FMR) memory. 838 * If memory is non-FMR does nothing. 839 */ 840 void iser_unreg_mem_fmr(struct iscsi_iser_task *iser_task, 841 enum iser_data_dir cmd_dir) 842 { 843 struct iser_mem_reg *reg = &iser_task->rdma_regd[cmd_dir].reg; 844 int ret; 845 846 if (!reg->is_mr) 847 return; 848 849 iser_dbg("PHYSICAL Mem.Unregister mem_h %p\n",reg->mem_h); 850 851 ret = ib_fmr_pool_unmap((struct ib_pool_fmr *)reg->mem_h); 852 if (ret) 853 iser_err("ib_fmr_pool_unmap failed %d\n", ret); 854 855 reg->mem_h = NULL; 856 } 857 858 void iser_unreg_mem_frwr(struct iscsi_iser_task *iser_task, 859 enum iser_data_dir cmd_dir) 860 { 861 struct iser_mem_reg *reg = &iser_task->rdma_regd[cmd_dir].reg; 862 struct iser_conn *ib_conn = iser_task->iser_conn->ib_conn; 863 struct fast_reg_descriptor *desc = reg->mem_h; 864 865 if (!reg->is_mr) 866 return; 867 868 reg->mem_h = NULL; 869 reg->is_mr = 0; 870 spin_lock_bh(&ib_conn->lock); 871 list_add_tail(&desc->list, &ib_conn->fastreg.frwr.pool); 872 spin_unlock_bh(&ib_conn->lock); 873 } 874 875 int iser_post_recvl(struct iser_conn *ib_conn) 876 { 877 struct ib_recv_wr rx_wr, *rx_wr_failed; 878 struct ib_sge sge; 879 int ib_ret; 880 881 sge.addr = ib_conn->login_resp_dma; 882 sge.length = ISER_RX_LOGIN_SIZE; 883 sge.lkey = ib_conn->device->mr->lkey; 884 885 rx_wr.wr_id = (unsigned long)ib_conn->login_resp_buf; 886 rx_wr.sg_list = &sge; 887 rx_wr.num_sge = 1; 888 rx_wr.next = NULL; 889 890 ib_conn->post_recv_buf_count++; 891 ib_ret = ib_post_recv(ib_conn->qp, &rx_wr, &rx_wr_failed); 892 if (ib_ret) { 893 iser_err("ib_post_recv failed ret=%d\n", ib_ret); 894 ib_conn->post_recv_buf_count--; 895 } 896 return ib_ret; 897 } 898 899 int iser_post_recvm(struct iser_conn *ib_conn, int count) 900 { 901 struct ib_recv_wr *rx_wr, *rx_wr_failed; 902 int i, ib_ret; 903 unsigned int my_rx_head = ib_conn->rx_desc_head; 904 struct iser_rx_desc *rx_desc; 905 906 for (rx_wr = ib_conn->rx_wr, i = 0; i < count; i++, rx_wr++) { 907 rx_desc = &ib_conn->rx_descs[my_rx_head]; 908 rx_wr->wr_id = (unsigned long)rx_desc; 909 rx_wr->sg_list = &rx_desc->rx_sg; 910 rx_wr->num_sge = 1; 911 rx_wr->next = rx_wr + 1; 912 my_rx_head = (my_rx_head + 1) & ib_conn->qp_max_recv_dtos_mask; 913 } 914 915 rx_wr--; 916 rx_wr->next = NULL; /* mark end of work requests list */ 917 918 ib_conn->post_recv_buf_count += count; 919 ib_ret = ib_post_recv(ib_conn->qp, ib_conn->rx_wr, &rx_wr_failed); 920 if (ib_ret) { 921 iser_err("ib_post_recv failed ret=%d\n", ib_ret); 922 ib_conn->post_recv_buf_count -= count; 923 } else 924 ib_conn->rx_desc_head = my_rx_head; 925 return ib_ret; 926 } 927 928 929 /** 930 * iser_start_send - Initiate a Send DTO operation 931 * 932 * returns 0 on success, -1 on failure 933 */ 934 int iser_post_send(struct iser_conn *ib_conn, struct iser_tx_desc *tx_desc) 935 { 936 int ib_ret; 937 struct ib_send_wr send_wr, *send_wr_failed; 938 939 ib_dma_sync_single_for_device(ib_conn->device->ib_device, 940 tx_desc->dma_addr, ISER_HEADERS_LEN, DMA_TO_DEVICE); 941 942 send_wr.next = NULL; 943 send_wr.wr_id = (unsigned long)tx_desc; 944 send_wr.sg_list = tx_desc->tx_sg; 945 send_wr.num_sge = tx_desc->num_sge; 946 send_wr.opcode = IB_WR_SEND; 947 send_wr.send_flags = IB_SEND_SIGNALED; 948 949 atomic_inc(&ib_conn->post_send_buf_count); 950 951 ib_ret = ib_post_send(ib_conn->qp, &send_wr, &send_wr_failed); 952 if (ib_ret) { 953 iser_err("ib_post_send failed, ret:%d\n", ib_ret); 954 atomic_dec(&ib_conn->post_send_buf_count); 955 } 956 return ib_ret; 957 } 958 959 static void iser_handle_comp_error(struct iser_tx_desc *desc, 960 struct iser_conn *ib_conn) 961 { 962 if (desc && desc->type == ISCSI_TX_DATAOUT) 963 kmem_cache_free(ig.desc_cache, desc); 964 965 if (ib_conn->post_recv_buf_count == 0 && 966 atomic_read(&ib_conn->post_send_buf_count) == 0) { 967 /* getting here when the state is UP means that the conn is * 968 * being terminated asynchronously from the iSCSI layer's * 969 * perspective. */ 970 if (iser_conn_state_comp_exch(ib_conn, ISER_CONN_UP, 971 ISER_CONN_TERMINATING)) 972 iscsi_conn_failure(ib_conn->iser_conn->iscsi_conn, 973 ISCSI_ERR_CONN_FAILED); 974 975 /* no more non completed posts to the QP, complete the 976 * termination process w.o worrying on disconnect event */ 977 ib_conn->state = ISER_CONN_DOWN; 978 wake_up_interruptible(&ib_conn->wait); 979 } 980 } 981 982 static int iser_drain_tx_cq(struct iser_device *device, int cq_index) 983 { 984 struct ib_cq *cq = device->tx_cq[cq_index]; 985 struct ib_wc wc; 986 struct iser_tx_desc *tx_desc; 987 struct iser_conn *ib_conn; 988 int completed_tx = 0; 989 990 while (ib_poll_cq(cq, 1, &wc) == 1) { 991 tx_desc = (struct iser_tx_desc *) (unsigned long) wc.wr_id; 992 ib_conn = wc.qp->qp_context; 993 if (wc.status == IB_WC_SUCCESS) { 994 if (wc.opcode == IB_WC_SEND) 995 iser_snd_completion(tx_desc, ib_conn); 996 else if (wc.opcode == IB_WC_LOCAL_INV || 997 wc.opcode == IB_WC_FAST_REG_MR) { 998 atomic_dec(&ib_conn->post_send_buf_count); 999 continue; 1000 } else 1001 iser_err("expected opcode %d got %d\n", 1002 IB_WC_SEND, wc.opcode); 1003 } else { 1004 iser_err("tx id %llx status %d vend_err %x\n", 1005 wc.wr_id, wc.status, wc.vendor_err); 1006 atomic_dec(&ib_conn->post_send_buf_count); 1007 iser_handle_comp_error(tx_desc, ib_conn); 1008 } 1009 completed_tx++; 1010 } 1011 return completed_tx; 1012 } 1013 1014 1015 static void iser_cq_tasklet_fn(unsigned long data) 1016 { 1017 struct iser_cq_desc *cq_desc = (struct iser_cq_desc *)data; 1018 struct iser_device *device = cq_desc->device; 1019 int cq_index = cq_desc->cq_index; 1020 struct ib_cq *cq = device->rx_cq[cq_index]; 1021 struct ib_wc wc; 1022 struct iser_rx_desc *desc; 1023 unsigned long xfer_len; 1024 struct iser_conn *ib_conn; 1025 int completed_tx, completed_rx; 1026 completed_tx = completed_rx = 0; 1027 1028 while (ib_poll_cq(cq, 1, &wc) == 1) { 1029 desc = (struct iser_rx_desc *) (unsigned long) wc.wr_id; 1030 BUG_ON(desc == NULL); 1031 ib_conn = wc.qp->qp_context; 1032 if (wc.status == IB_WC_SUCCESS) { 1033 if (wc.opcode == IB_WC_RECV) { 1034 xfer_len = (unsigned long)wc.byte_len; 1035 iser_rcv_completion(desc, xfer_len, ib_conn); 1036 } else 1037 iser_err("expected opcode %d got %d\n", 1038 IB_WC_RECV, wc.opcode); 1039 } else { 1040 if (wc.status != IB_WC_WR_FLUSH_ERR) 1041 iser_err("rx id %llx status %d vend_err %x\n", 1042 wc.wr_id, wc.status, wc.vendor_err); 1043 ib_conn->post_recv_buf_count--; 1044 iser_handle_comp_error(NULL, ib_conn); 1045 } 1046 completed_rx++; 1047 if (!(completed_rx & 63)) 1048 completed_tx += iser_drain_tx_cq(device, cq_index); 1049 } 1050 /* #warning "it is assumed here that arming CQ only once its empty" * 1051 * " would not cause interrupts to be missed" */ 1052 ib_req_notify_cq(cq, IB_CQ_NEXT_COMP); 1053 1054 completed_tx += iser_drain_tx_cq(device, cq_index); 1055 iser_dbg("got %d rx %d tx completions\n", completed_rx, completed_tx); 1056 } 1057 1058 static void iser_cq_callback(struct ib_cq *cq, void *cq_context) 1059 { 1060 struct iser_cq_desc *cq_desc = (struct iser_cq_desc *)cq_context; 1061 struct iser_device *device = cq_desc->device; 1062 int cq_index = cq_desc->cq_index; 1063 1064 tasklet_schedule(&device->cq_tasklet[cq_index]); 1065 } 1066