1 /* 2 * Copyright (c) 2004, 2005, 2006 Voltaire, Inc. All rights reserved. 3 * Copyright (c) 2005, 2006 Cisco Systems. All rights reserved. 4 * Copyright (c) 2013-2014 Mellanox Technologies. All rights reserved. 5 * 6 * This software is available to you under a choice of one of two 7 * licenses. You may choose to be licensed under the terms of the GNU 8 * General Public License (GPL) Version 2, available from the file 9 * COPYING in the main directory of this source tree, or the 10 * OpenIB.org BSD license below: 11 * 12 * Redistribution and use in source and binary forms, with or 13 * without modification, are permitted provided that the following 14 * conditions are met: 15 * 16 * - Redistributions of source code must retain the above 17 * copyright notice, this list of conditions and the following 18 * disclaimer. 19 * 20 * - Redistributions in binary form must reproduce the above 21 * copyright notice, this list of conditions and the following 22 * disclaimer in the documentation and/or other materials 23 * provided with the distribution. 24 * 25 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 26 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 27 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 28 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 29 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 30 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 32 * SOFTWARE. 33 */ 34 #include <linux/kernel.h> 35 #include <linux/module.h> 36 #include <linux/slab.h> 37 #include <linux/delay.h> 38 39 #include "iscsi_iser.h" 40 41 #define ISCSI_ISER_MAX_CONN 8 42 #define ISER_MAX_RX_CQ_LEN (ISER_QP_MAX_RECV_DTOS * ISCSI_ISER_MAX_CONN) 43 #define ISER_MAX_TX_CQ_LEN (ISER_QP_MAX_REQ_DTOS * ISCSI_ISER_MAX_CONN) 44 45 static void iser_cq_tasklet_fn(unsigned long data); 46 static void iser_cq_callback(struct ib_cq *cq, void *cq_context); 47 48 static void iser_cq_event_callback(struct ib_event *cause, void *context) 49 { 50 iser_err("got cq event %d \n", cause->event); 51 } 52 53 static void iser_qp_event_callback(struct ib_event *cause, void *context) 54 { 55 iser_err("got qp event %d\n",cause->event); 56 } 57 58 static void iser_event_handler(struct ib_event_handler *handler, 59 struct ib_event *event) 60 { 61 iser_err("async event %d on device %s port %d\n", event->event, 62 event->device->name, event->element.port_num); 63 } 64 65 /** 66 * iser_create_device_ib_res - creates Protection Domain (PD), Completion 67 * Queue (CQ), DMA Memory Region (DMA MR) with the device associated with 68 * the adapator. 69 * 70 * returns 0 on success, -1 on failure 71 */ 72 static int iser_create_device_ib_res(struct iser_device *device) 73 { 74 struct iser_cq_desc *cq_desc; 75 struct ib_device_attr *dev_attr = &device->dev_attr; 76 int ret, i, j; 77 78 ret = ib_query_device(device->ib_device, dev_attr); 79 if (ret) { 80 pr_warn("Query device failed for %s\n", device->ib_device->name); 81 return ret; 82 } 83 84 /* Assign function handles - based on FMR support */ 85 if (device->ib_device->alloc_fmr && device->ib_device->dealloc_fmr && 86 device->ib_device->map_phys_fmr && device->ib_device->unmap_fmr) { 87 iser_info("FMR supported, using FMR for registration\n"); 88 device->iser_alloc_rdma_reg_res = iser_create_fmr_pool; 89 device->iser_free_rdma_reg_res = iser_free_fmr_pool; 90 device->iser_reg_rdma_mem = iser_reg_rdma_mem_fmr; 91 device->iser_unreg_rdma_mem = iser_unreg_mem_fmr; 92 } else 93 if (dev_attr->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS) { 94 iser_info("FastReg supported, using FastReg for registration\n"); 95 device->iser_alloc_rdma_reg_res = iser_create_fastreg_pool; 96 device->iser_free_rdma_reg_res = iser_free_fastreg_pool; 97 device->iser_reg_rdma_mem = iser_reg_rdma_mem_fastreg; 98 device->iser_unreg_rdma_mem = iser_unreg_mem_fastreg; 99 } else { 100 iser_err("IB device does not support FMRs nor FastRegs, can't register memory\n"); 101 return -1; 102 } 103 104 device->cqs_used = min(ISER_MAX_CQ, device->ib_device->num_comp_vectors); 105 iser_info("using %d CQs, device %s supports %d vectors\n", 106 device->cqs_used, device->ib_device->name, 107 device->ib_device->num_comp_vectors); 108 109 device->cq_desc = kmalloc(sizeof(struct iser_cq_desc) * device->cqs_used, 110 GFP_KERNEL); 111 if (device->cq_desc == NULL) 112 goto cq_desc_err; 113 cq_desc = device->cq_desc; 114 115 device->pd = ib_alloc_pd(device->ib_device); 116 if (IS_ERR(device->pd)) 117 goto pd_err; 118 119 for (i = 0; i < device->cqs_used; i++) { 120 cq_desc[i].device = device; 121 cq_desc[i].cq_index = i; 122 123 device->rx_cq[i] = ib_create_cq(device->ib_device, 124 iser_cq_callback, 125 iser_cq_event_callback, 126 (void *)&cq_desc[i], 127 ISER_MAX_RX_CQ_LEN, i); 128 if (IS_ERR(device->rx_cq[i])) 129 goto cq_err; 130 131 device->tx_cq[i] = ib_create_cq(device->ib_device, 132 NULL, iser_cq_event_callback, 133 (void *)&cq_desc[i], 134 ISER_MAX_TX_CQ_LEN, i); 135 136 if (IS_ERR(device->tx_cq[i])) 137 goto cq_err; 138 139 if (ib_req_notify_cq(device->rx_cq[i], IB_CQ_NEXT_COMP)) 140 goto cq_err; 141 142 tasklet_init(&device->cq_tasklet[i], 143 iser_cq_tasklet_fn, 144 (unsigned long)&cq_desc[i]); 145 } 146 147 device->mr = ib_get_dma_mr(device->pd, IB_ACCESS_LOCAL_WRITE | 148 IB_ACCESS_REMOTE_WRITE | 149 IB_ACCESS_REMOTE_READ); 150 if (IS_ERR(device->mr)) 151 goto dma_mr_err; 152 153 INIT_IB_EVENT_HANDLER(&device->event_handler, device->ib_device, 154 iser_event_handler); 155 if (ib_register_event_handler(&device->event_handler)) 156 goto handler_err; 157 158 return 0; 159 160 handler_err: 161 ib_dereg_mr(device->mr); 162 dma_mr_err: 163 for (j = 0; j < device->cqs_used; j++) 164 tasklet_kill(&device->cq_tasklet[j]); 165 cq_err: 166 for (j = 0; j < i; j++) { 167 if (device->tx_cq[j]) 168 ib_destroy_cq(device->tx_cq[j]); 169 if (device->rx_cq[j]) 170 ib_destroy_cq(device->rx_cq[j]); 171 } 172 ib_dealloc_pd(device->pd); 173 pd_err: 174 kfree(device->cq_desc); 175 cq_desc_err: 176 iser_err("failed to allocate an IB resource\n"); 177 return -1; 178 } 179 180 /** 181 * iser_free_device_ib_res - destroy/dealloc/dereg the DMA MR, 182 * CQ and PD created with the device associated with the adapator. 183 */ 184 static void iser_free_device_ib_res(struct iser_device *device) 185 { 186 int i; 187 BUG_ON(device->mr == NULL); 188 189 for (i = 0; i < device->cqs_used; i++) { 190 tasklet_kill(&device->cq_tasklet[i]); 191 (void)ib_destroy_cq(device->tx_cq[i]); 192 (void)ib_destroy_cq(device->rx_cq[i]); 193 device->tx_cq[i] = NULL; 194 device->rx_cq[i] = NULL; 195 } 196 197 (void)ib_unregister_event_handler(&device->event_handler); 198 (void)ib_dereg_mr(device->mr); 199 (void)ib_dealloc_pd(device->pd); 200 201 kfree(device->cq_desc); 202 203 device->mr = NULL; 204 device->pd = NULL; 205 } 206 207 /** 208 * iser_create_fmr_pool - Creates FMR pool and page_vector 209 * 210 * returns 0 on success, or errno code on failure 211 */ 212 int iser_create_fmr_pool(struct iser_conn *ib_conn, unsigned cmds_max) 213 { 214 struct iser_device *device = ib_conn->device; 215 struct ib_fmr_pool_param params; 216 int ret = -ENOMEM; 217 218 ib_conn->fmr.page_vec = kmalloc(sizeof(*ib_conn->fmr.page_vec) + 219 (sizeof(u64)*(ISCSI_ISER_SG_TABLESIZE + 1)), 220 GFP_KERNEL); 221 if (!ib_conn->fmr.page_vec) 222 return ret; 223 224 ib_conn->fmr.page_vec->pages = (u64 *)(ib_conn->fmr.page_vec + 1); 225 226 params.page_shift = SHIFT_4K; 227 /* when the first/last SG element are not start/end * 228 * page aligned, the map whould be of N+1 pages */ 229 params.max_pages_per_fmr = ISCSI_ISER_SG_TABLESIZE + 1; 230 /* make the pool size twice the max number of SCSI commands * 231 * the ML is expected to queue, watermark for unmap at 50% */ 232 params.pool_size = cmds_max * 2; 233 params.dirty_watermark = cmds_max; 234 params.cache = 0; 235 params.flush_function = NULL; 236 params.access = (IB_ACCESS_LOCAL_WRITE | 237 IB_ACCESS_REMOTE_WRITE | 238 IB_ACCESS_REMOTE_READ); 239 240 ib_conn->fmr.pool = ib_create_fmr_pool(device->pd, ¶ms); 241 if (!IS_ERR(ib_conn->fmr.pool)) 242 return 0; 243 244 /* no FMR => no need for page_vec */ 245 kfree(ib_conn->fmr.page_vec); 246 ib_conn->fmr.page_vec = NULL; 247 248 ret = PTR_ERR(ib_conn->fmr.pool); 249 ib_conn->fmr.pool = NULL; 250 if (ret != -ENOSYS) { 251 iser_err("FMR allocation failed, err %d\n", ret); 252 return ret; 253 } else { 254 iser_warn("FMRs are not supported, using unaligned mode\n"); 255 return 0; 256 } 257 } 258 259 /** 260 * iser_free_fmr_pool - releases the FMR pool and page vec 261 */ 262 void iser_free_fmr_pool(struct iser_conn *ib_conn) 263 { 264 iser_info("freeing conn %p fmr pool %p\n", 265 ib_conn, ib_conn->fmr.pool); 266 267 if (ib_conn->fmr.pool != NULL) 268 ib_destroy_fmr_pool(ib_conn->fmr.pool); 269 270 ib_conn->fmr.pool = NULL; 271 272 kfree(ib_conn->fmr.page_vec); 273 ib_conn->fmr.page_vec = NULL; 274 } 275 276 static int 277 iser_create_fastreg_desc(struct ib_device *ib_device, struct ib_pd *pd, 278 bool pi_enable, struct fast_reg_descriptor *desc) 279 { 280 int ret; 281 282 desc->data_frpl = ib_alloc_fast_reg_page_list(ib_device, 283 ISCSI_ISER_SG_TABLESIZE + 1); 284 if (IS_ERR(desc->data_frpl)) { 285 ret = PTR_ERR(desc->data_frpl); 286 iser_err("Failed to allocate ib_fast_reg_page_list err=%d\n", 287 ret); 288 return PTR_ERR(desc->data_frpl); 289 } 290 291 desc->data_mr = ib_alloc_fast_reg_mr(pd, ISCSI_ISER_SG_TABLESIZE + 1); 292 if (IS_ERR(desc->data_mr)) { 293 ret = PTR_ERR(desc->data_mr); 294 iser_err("Failed to allocate ib_fast_reg_mr err=%d\n", ret); 295 goto fast_reg_mr_failure; 296 } 297 desc->reg_indicators |= ISER_DATA_KEY_VALID; 298 299 if (pi_enable) { 300 struct ib_mr_init_attr mr_init_attr = {0}; 301 struct iser_pi_context *pi_ctx = NULL; 302 303 desc->pi_ctx = kzalloc(sizeof(*desc->pi_ctx), GFP_KERNEL); 304 if (!desc->pi_ctx) { 305 iser_err("Failed to allocate pi context\n"); 306 ret = -ENOMEM; 307 goto pi_ctx_alloc_failure; 308 } 309 pi_ctx = desc->pi_ctx; 310 311 pi_ctx->prot_frpl = ib_alloc_fast_reg_page_list(ib_device, 312 ISCSI_ISER_SG_TABLESIZE); 313 if (IS_ERR(pi_ctx->prot_frpl)) { 314 ret = PTR_ERR(pi_ctx->prot_frpl); 315 iser_err("Failed to allocate prot frpl ret=%d\n", 316 ret); 317 goto prot_frpl_failure; 318 } 319 320 pi_ctx->prot_mr = ib_alloc_fast_reg_mr(pd, 321 ISCSI_ISER_SG_TABLESIZE + 1); 322 if (IS_ERR(pi_ctx->prot_mr)) { 323 ret = PTR_ERR(pi_ctx->prot_mr); 324 iser_err("Failed to allocate prot frmr ret=%d\n", 325 ret); 326 goto prot_mr_failure; 327 } 328 desc->reg_indicators |= ISER_PROT_KEY_VALID; 329 330 mr_init_attr.max_reg_descriptors = 2; 331 mr_init_attr.flags |= IB_MR_SIGNATURE_EN; 332 pi_ctx->sig_mr = ib_create_mr(pd, &mr_init_attr); 333 if (IS_ERR(pi_ctx->sig_mr)) { 334 ret = PTR_ERR(pi_ctx->sig_mr); 335 iser_err("Failed to allocate signature enabled mr err=%d\n", 336 ret); 337 goto sig_mr_failure; 338 } 339 desc->reg_indicators |= ISER_SIG_KEY_VALID; 340 } 341 desc->reg_indicators &= ~ISER_FASTREG_PROTECTED; 342 343 iser_dbg("Create fr_desc %p page_list %p\n", 344 desc, desc->data_frpl->page_list); 345 346 return 0; 347 sig_mr_failure: 348 ib_dereg_mr(desc->pi_ctx->prot_mr); 349 prot_mr_failure: 350 ib_free_fast_reg_page_list(desc->pi_ctx->prot_frpl); 351 prot_frpl_failure: 352 kfree(desc->pi_ctx); 353 pi_ctx_alloc_failure: 354 ib_dereg_mr(desc->data_mr); 355 fast_reg_mr_failure: 356 ib_free_fast_reg_page_list(desc->data_frpl); 357 358 return ret; 359 } 360 361 /** 362 * iser_create_fastreg_pool - Creates pool of fast_reg descriptors 363 * for fast registration work requests. 364 * returns 0 on success, or errno code on failure 365 */ 366 int iser_create_fastreg_pool(struct iser_conn *ib_conn, unsigned cmds_max) 367 { 368 struct iser_device *device = ib_conn->device; 369 struct fast_reg_descriptor *desc; 370 int i, ret; 371 372 INIT_LIST_HEAD(&ib_conn->fastreg.pool); 373 ib_conn->fastreg.pool_size = 0; 374 for (i = 0; i < cmds_max; i++) { 375 desc = kzalloc(sizeof(*desc), GFP_KERNEL); 376 if (!desc) { 377 iser_err("Failed to allocate a new fast_reg descriptor\n"); 378 ret = -ENOMEM; 379 goto err; 380 } 381 382 ret = iser_create_fastreg_desc(device->ib_device, device->pd, 383 ib_conn->pi_support, desc); 384 if (ret) { 385 iser_err("Failed to create fastreg descriptor err=%d\n", 386 ret); 387 kfree(desc); 388 goto err; 389 } 390 391 list_add_tail(&desc->list, &ib_conn->fastreg.pool); 392 ib_conn->fastreg.pool_size++; 393 } 394 395 return 0; 396 397 err: 398 iser_free_fastreg_pool(ib_conn); 399 return ret; 400 } 401 402 /** 403 * iser_free_fastreg_pool - releases the pool of fast_reg descriptors 404 */ 405 void iser_free_fastreg_pool(struct iser_conn *ib_conn) 406 { 407 struct fast_reg_descriptor *desc, *tmp; 408 int i = 0; 409 410 if (list_empty(&ib_conn->fastreg.pool)) 411 return; 412 413 iser_info("freeing conn %p fr pool\n", ib_conn); 414 415 list_for_each_entry_safe(desc, tmp, &ib_conn->fastreg.pool, list) { 416 list_del(&desc->list); 417 ib_free_fast_reg_page_list(desc->data_frpl); 418 ib_dereg_mr(desc->data_mr); 419 if (desc->pi_ctx) { 420 ib_free_fast_reg_page_list(desc->pi_ctx->prot_frpl); 421 ib_dereg_mr(desc->pi_ctx->prot_mr); 422 ib_destroy_mr(desc->pi_ctx->sig_mr); 423 kfree(desc->pi_ctx); 424 } 425 kfree(desc); 426 ++i; 427 } 428 429 if (i < ib_conn->fastreg.pool_size) 430 iser_warn("pool still has %d regions registered\n", 431 ib_conn->fastreg.pool_size - i); 432 } 433 434 /** 435 * iser_create_ib_conn_res - Queue-Pair (QP) 436 * 437 * returns 0 on success, -1 on failure 438 */ 439 static int iser_create_ib_conn_res(struct iser_conn *ib_conn) 440 { 441 struct iser_device *device; 442 struct ib_qp_init_attr init_attr; 443 int ret = -ENOMEM; 444 int index, min_index = 0; 445 446 BUG_ON(ib_conn->device == NULL); 447 448 device = ib_conn->device; 449 450 memset(&init_attr, 0, sizeof init_attr); 451 452 mutex_lock(&ig.connlist_mutex); 453 /* select the CQ with the minimal number of usages */ 454 for (index = 0; index < device->cqs_used; index++) 455 if (device->cq_active_qps[index] < 456 device->cq_active_qps[min_index]) 457 min_index = index; 458 device->cq_active_qps[min_index]++; 459 mutex_unlock(&ig.connlist_mutex); 460 iser_info("cq index %d used for ib_conn %p\n", min_index, ib_conn); 461 462 init_attr.event_handler = iser_qp_event_callback; 463 init_attr.qp_context = (void *)ib_conn; 464 init_attr.send_cq = device->tx_cq[min_index]; 465 init_attr.recv_cq = device->rx_cq[min_index]; 466 init_attr.cap.max_recv_wr = ISER_QP_MAX_RECV_DTOS; 467 init_attr.cap.max_send_sge = 2; 468 init_attr.cap.max_recv_sge = 1; 469 init_attr.sq_sig_type = IB_SIGNAL_REQ_WR; 470 init_attr.qp_type = IB_QPT_RC; 471 if (ib_conn->pi_support) { 472 init_attr.cap.max_send_wr = ISER_QP_SIG_MAX_REQ_DTOS; 473 init_attr.create_flags |= IB_QP_CREATE_SIGNATURE_EN; 474 } else { 475 init_attr.cap.max_send_wr = ISER_QP_MAX_REQ_DTOS; 476 } 477 478 ret = rdma_create_qp(ib_conn->cma_id, device->pd, &init_attr); 479 if (ret) 480 goto out_err; 481 482 ib_conn->qp = ib_conn->cma_id->qp; 483 iser_info("setting conn %p cma_id %p qp %p\n", 484 ib_conn, ib_conn->cma_id, 485 ib_conn->cma_id->qp); 486 return ret; 487 488 out_err: 489 iser_err("unable to alloc mem or create resource, err %d\n", ret); 490 return ret; 491 } 492 493 /** 494 * releases the QP objects, returns 0 on success, 495 * -1 on failure 496 */ 497 static int iser_free_ib_conn_res(struct iser_conn *ib_conn) 498 { 499 int cq_index; 500 BUG_ON(ib_conn == NULL); 501 502 iser_info("freeing conn %p cma_id %p qp %p\n", 503 ib_conn, ib_conn->cma_id, 504 ib_conn->qp); 505 506 /* qp is created only once both addr & route are resolved */ 507 508 if (ib_conn->qp != NULL) { 509 cq_index = ((struct iser_cq_desc *)ib_conn->qp->recv_cq->cq_context)->cq_index; 510 ib_conn->device->cq_active_qps[cq_index]--; 511 512 rdma_destroy_qp(ib_conn->cma_id); 513 } 514 515 ib_conn->qp = NULL; 516 517 return 0; 518 } 519 520 /** 521 * based on the resolved device node GUID see if there already allocated 522 * device for this device. If there's no such, create one. 523 */ 524 static 525 struct iser_device *iser_device_find_by_ib_device(struct rdma_cm_id *cma_id) 526 { 527 struct iser_device *device; 528 529 mutex_lock(&ig.device_list_mutex); 530 531 list_for_each_entry(device, &ig.device_list, ig_list) 532 /* find if there's a match using the node GUID */ 533 if (device->ib_device->node_guid == cma_id->device->node_guid) 534 goto inc_refcnt; 535 536 device = kzalloc(sizeof *device, GFP_KERNEL); 537 if (device == NULL) 538 goto out; 539 540 /* assign this device to the device */ 541 device->ib_device = cma_id->device; 542 /* init the device and link it into ig device list */ 543 if (iser_create_device_ib_res(device)) { 544 kfree(device); 545 device = NULL; 546 goto out; 547 } 548 list_add(&device->ig_list, &ig.device_list); 549 550 inc_refcnt: 551 device->refcount++; 552 out: 553 mutex_unlock(&ig.device_list_mutex); 554 return device; 555 } 556 557 /* if there's no demand for this device, release it */ 558 static void iser_device_try_release(struct iser_device *device) 559 { 560 mutex_lock(&ig.device_list_mutex); 561 device->refcount--; 562 iser_info("device %p refcount %d\n", device, device->refcount); 563 if (!device->refcount) { 564 iser_free_device_ib_res(device); 565 list_del(&device->ig_list); 566 kfree(device); 567 } 568 mutex_unlock(&ig.device_list_mutex); 569 } 570 571 static int iser_conn_state_comp_exch(struct iser_conn *ib_conn, 572 enum iser_ib_conn_state comp, 573 enum iser_ib_conn_state exch) 574 { 575 int ret; 576 577 spin_lock_bh(&ib_conn->lock); 578 if ((ret = (ib_conn->state == comp))) 579 ib_conn->state = exch; 580 spin_unlock_bh(&ib_conn->lock); 581 return ret; 582 } 583 584 void iser_release_work(struct work_struct *work) 585 { 586 struct iser_conn *ib_conn; 587 588 ib_conn = container_of(work, struct iser_conn, release_work); 589 590 /* wait for .conn_stop callback */ 591 wait_for_completion(&ib_conn->stop_completion); 592 593 /* wait for the qp`s post send and post receive buffers to empty */ 594 wait_event_interruptible(ib_conn->wait, 595 ib_conn->state == ISER_CONN_DOWN); 596 597 iser_conn_release(ib_conn); 598 } 599 600 /** 601 * Frees all conn objects and deallocs conn descriptor 602 */ 603 void iser_conn_release(struct iser_conn *ib_conn) 604 { 605 struct iser_device *device = ib_conn->device; 606 607 BUG_ON(ib_conn->state == ISER_CONN_UP); 608 609 mutex_lock(&ig.connlist_mutex); 610 list_del(&ib_conn->conn_list); 611 mutex_unlock(&ig.connlist_mutex); 612 iser_free_rx_descriptors(ib_conn); 613 iser_free_ib_conn_res(ib_conn); 614 ib_conn->device = NULL; 615 /* on EVENT_ADDR_ERROR there's no device yet for this conn */ 616 if (device != NULL) 617 iser_device_try_release(device); 618 /* if cma handler context, the caller actually destroy the id */ 619 if (ib_conn->cma_id != NULL) { 620 rdma_destroy_id(ib_conn->cma_id); 621 ib_conn->cma_id = NULL; 622 } 623 iscsi_destroy_endpoint(ib_conn->ep); 624 } 625 626 /** 627 * triggers start of the disconnect procedures and wait for them to be done 628 */ 629 void iser_conn_terminate(struct iser_conn *ib_conn) 630 { 631 int err = 0; 632 633 /* change the ib conn state only if the conn is UP, however always call 634 * rdma_disconnect since this is the only way to cause the CMA to change 635 * the QP state to ERROR 636 */ 637 638 iser_conn_state_comp_exch(ib_conn, ISER_CONN_UP, ISER_CONN_TERMINATING); 639 err = rdma_disconnect(ib_conn->cma_id); 640 if (err) 641 iser_err("Failed to disconnect, conn: 0x%p err %d\n", 642 ib_conn,err); 643 } 644 645 static void iser_connect_error(struct rdma_cm_id *cma_id) 646 { 647 struct iser_conn *ib_conn; 648 649 ib_conn = (struct iser_conn *)cma_id->context; 650 651 ib_conn->state = ISER_CONN_DOWN; 652 wake_up_interruptible(&ib_conn->wait); 653 } 654 655 static void iser_addr_handler(struct rdma_cm_id *cma_id) 656 { 657 struct iser_device *device; 658 struct iser_conn *ib_conn; 659 int ret; 660 661 device = iser_device_find_by_ib_device(cma_id); 662 if (!device) { 663 iser_err("device lookup/creation failed\n"); 664 iser_connect_error(cma_id); 665 return; 666 } 667 668 ib_conn = (struct iser_conn *)cma_id->context; 669 ib_conn->device = device; 670 671 /* connection T10-PI support */ 672 if (iser_pi_enable) { 673 if (!(device->dev_attr.device_cap_flags & 674 IB_DEVICE_SIGNATURE_HANDOVER)) { 675 iser_warn("T10-PI requested but not supported on %s, " 676 "continue without T10-PI\n", 677 ib_conn->device->ib_device->name); 678 ib_conn->pi_support = false; 679 } else { 680 ib_conn->pi_support = true; 681 } 682 } 683 684 ret = rdma_resolve_route(cma_id, 1000); 685 if (ret) { 686 iser_err("resolve route failed: %d\n", ret); 687 iser_connect_error(cma_id); 688 return; 689 } 690 } 691 692 static void iser_route_handler(struct rdma_cm_id *cma_id) 693 { 694 struct rdma_conn_param conn_param; 695 int ret; 696 struct iser_cm_hdr req_hdr; 697 698 ret = iser_create_ib_conn_res((struct iser_conn *)cma_id->context); 699 if (ret) 700 goto failure; 701 702 memset(&conn_param, 0, sizeof conn_param); 703 conn_param.responder_resources = 4; 704 conn_param.initiator_depth = 1; 705 conn_param.retry_count = 7; 706 conn_param.rnr_retry_count = 6; 707 708 memset(&req_hdr, 0, sizeof(req_hdr)); 709 req_hdr.flags = (ISER_ZBVA_NOT_SUPPORTED | 710 ISER_SEND_W_INV_NOT_SUPPORTED); 711 conn_param.private_data = (void *)&req_hdr; 712 conn_param.private_data_len = sizeof(struct iser_cm_hdr); 713 714 ret = rdma_connect(cma_id, &conn_param); 715 if (ret) { 716 iser_err("failure connecting: %d\n", ret); 717 goto failure; 718 } 719 720 return; 721 failure: 722 iser_connect_error(cma_id); 723 } 724 725 static void iser_connected_handler(struct rdma_cm_id *cma_id) 726 { 727 struct iser_conn *ib_conn; 728 struct ib_qp_attr attr; 729 struct ib_qp_init_attr init_attr; 730 731 (void)ib_query_qp(cma_id->qp, &attr, ~0, &init_attr); 732 iser_info("remote qpn:%x my qpn:%x\n", attr.dest_qp_num, cma_id->qp->qp_num); 733 734 ib_conn = (struct iser_conn *)cma_id->context; 735 if (iser_conn_state_comp_exch(ib_conn, ISER_CONN_PENDING, ISER_CONN_UP)) 736 wake_up_interruptible(&ib_conn->wait); 737 } 738 739 static void iser_disconnected_handler(struct rdma_cm_id *cma_id) 740 { 741 struct iser_conn *ib_conn; 742 743 ib_conn = (struct iser_conn *)cma_id->context; 744 745 /* getting here when the state is UP means that the conn is being * 746 * terminated asynchronously from the iSCSI layer's perspective. */ 747 if (iser_conn_state_comp_exch(ib_conn, ISER_CONN_UP, 748 ISER_CONN_TERMINATING)){ 749 if (ib_conn->iscsi_conn) 750 iscsi_conn_failure(ib_conn->iscsi_conn, ISCSI_ERR_CONN_FAILED); 751 else 752 iser_err("iscsi_iser connection isn't bound\n"); 753 } 754 755 /* Complete the termination process if no posts are pending */ 756 if (ib_conn->post_recv_buf_count == 0 && 757 (atomic_read(&ib_conn->post_send_buf_count) == 0)) { 758 ib_conn->state = ISER_CONN_DOWN; 759 wake_up_interruptible(&ib_conn->wait); 760 } 761 } 762 763 static int iser_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *event) 764 { 765 iser_info("event %d status %d conn %p id %p\n", 766 event->event, event->status, cma_id->context, cma_id); 767 768 switch (event->event) { 769 case RDMA_CM_EVENT_ADDR_RESOLVED: 770 iser_addr_handler(cma_id); 771 break; 772 case RDMA_CM_EVENT_ROUTE_RESOLVED: 773 iser_route_handler(cma_id); 774 break; 775 case RDMA_CM_EVENT_ESTABLISHED: 776 iser_connected_handler(cma_id); 777 break; 778 case RDMA_CM_EVENT_ADDR_ERROR: 779 case RDMA_CM_EVENT_ROUTE_ERROR: 780 case RDMA_CM_EVENT_CONNECT_ERROR: 781 case RDMA_CM_EVENT_UNREACHABLE: 782 case RDMA_CM_EVENT_REJECTED: 783 iser_connect_error(cma_id); 784 break; 785 case RDMA_CM_EVENT_DISCONNECTED: 786 case RDMA_CM_EVENT_DEVICE_REMOVAL: 787 case RDMA_CM_EVENT_ADDR_CHANGE: 788 iser_disconnected_handler(cma_id); 789 break; 790 default: 791 iser_err("Unexpected RDMA CM event (%d)\n", event->event); 792 break; 793 } 794 return 0; 795 } 796 797 void iser_conn_init(struct iser_conn *ib_conn) 798 { 799 ib_conn->state = ISER_CONN_INIT; 800 init_waitqueue_head(&ib_conn->wait); 801 ib_conn->post_recv_buf_count = 0; 802 atomic_set(&ib_conn->post_send_buf_count, 0); 803 init_completion(&ib_conn->stop_completion); 804 INIT_LIST_HEAD(&ib_conn->conn_list); 805 spin_lock_init(&ib_conn->lock); 806 } 807 808 /** 809 * starts the process of connecting to the target 810 * sleeps until the connection is established or rejected 811 */ 812 int iser_connect(struct iser_conn *ib_conn, 813 struct sockaddr_in *src_addr, 814 struct sockaddr_in *dst_addr, 815 int non_blocking) 816 { 817 struct sockaddr *src, *dst; 818 int err = 0; 819 820 sprintf(ib_conn->name, "%pI4:%d", 821 &dst_addr->sin_addr.s_addr, dst_addr->sin_port); 822 823 /* the device is known only --after-- address resolution */ 824 ib_conn->device = NULL; 825 826 iser_info("connecting to: %pI4, port 0x%x\n", 827 &dst_addr->sin_addr, dst_addr->sin_port); 828 829 ib_conn->state = ISER_CONN_PENDING; 830 831 ib_conn->cma_id = rdma_create_id(iser_cma_handler, 832 (void *)ib_conn, 833 RDMA_PS_TCP, IB_QPT_RC); 834 if (IS_ERR(ib_conn->cma_id)) { 835 err = PTR_ERR(ib_conn->cma_id); 836 iser_err("rdma_create_id failed: %d\n", err); 837 goto id_failure; 838 } 839 840 src = (struct sockaddr *)src_addr; 841 dst = (struct sockaddr *)dst_addr; 842 err = rdma_resolve_addr(ib_conn->cma_id, src, dst, 1000); 843 if (err) { 844 iser_err("rdma_resolve_addr failed: %d\n", err); 845 goto addr_failure; 846 } 847 848 if (!non_blocking) { 849 wait_event_interruptible(ib_conn->wait, 850 (ib_conn->state != ISER_CONN_PENDING)); 851 852 if (ib_conn->state != ISER_CONN_UP) { 853 err = -EIO; 854 goto connect_failure; 855 } 856 } 857 858 mutex_lock(&ig.connlist_mutex); 859 list_add(&ib_conn->conn_list, &ig.connlist); 860 mutex_unlock(&ig.connlist_mutex); 861 return 0; 862 863 id_failure: 864 ib_conn->cma_id = NULL; 865 addr_failure: 866 ib_conn->state = ISER_CONN_DOWN; 867 connect_failure: 868 iser_conn_release(ib_conn); 869 return err; 870 } 871 872 /** 873 * iser_reg_page_vec - Register physical memory 874 * 875 * returns: 0 on success, errno code on failure 876 */ 877 int iser_reg_page_vec(struct iser_conn *ib_conn, 878 struct iser_page_vec *page_vec, 879 struct iser_mem_reg *mem_reg) 880 { 881 struct ib_pool_fmr *mem; 882 u64 io_addr; 883 u64 *page_list; 884 int status; 885 886 page_list = page_vec->pages; 887 io_addr = page_list[0]; 888 889 mem = ib_fmr_pool_map_phys(ib_conn->fmr.pool, 890 page_list, 891 page_vec->length, 892 io_addr); 893 894 if (IS_ERR(mem)) { 895 status = (int)PTR_ERR(mem); 896 iser_err("ib_fmr_pool_map_phys failed: %d\n", status); 897 return status; 898 } 899 900 mem_reg->lkey = mem->fmr->lkey; 901 mem_reg->rkey = mem->fmr->rkey; 902 mem_reg->len = page_vec->length * SIZE_4K; 903 mem_reg->va = io_addr; 904 mem_reg->is_mr = 1; 905 mem_reg->mem_h = (void *)mem; 906 907 mem_reg->va += page_vec->offset; 908 mem_reg->len = page_vec->data_size; 909 910 iser_dbg("PHYSICAL Mem.register, [PHYS p_array: 0x%p, sz: %d, " 911 "entry[0]: (0x%08lx,%ld)] -> " 912 "[lkey: 0x%08X mem_h: 0x%p va: 0x%08lX sz: %ld]\n", 913 page_vec, page_vec->length, 914 (unsigned long)page_vec->pages[0], 915 (unsigned long)page_vec->data_size, 916 (unsigned int)mem_reg->lkey, mem_reg->mem_h, 917 (unsigned long)mem_reg->va, (unsigned long)mem_reg->len); 918 return 0; 919 } 920 921 /** 922 * Unregister (previosuly registered using FMR) memory. 923 * If memory is non-FMR does nothing. 924 */ 925 void iser_unreg_mem_fmr(struct iscsi_iser_task *iser_task, 926 enum iser_data_dir cmd_dir) 927 { 928 struct iser_mem_reg *reg = &iser_task->rdma_regd[cmd_dir].reg; 929 int ret; 930 931 if (!reg->is_mr) 932 return; 933 934 iser_dbg("PHYSICAL Mem.Unregister mem_h %p\n",reg->mem_h); 935 936 ret = ib_fmr_pool_unmap((struct ib_pool_fmr *)reg->mem_h); 937 if (ret) 938 iser_err("ib_fmr_pool_unmap failed %d\n", ret); 939 940 reg->mem_h = NULL; 941 } 942 943 void iser_unreg_mem_fastreg(struct iscsi_iser_task *iser_task, 944 enum iser_data_dir cmd_dir) 945 { 946 struct iser_mem_reg *reg = &iser_task->rdma_regd[cmd_dir].reg; 947 struct iser_conn *ib_conn = iser_task->ib_conn; 948 struct fast_reg_descriptor *desc = reg->mem_h; 949 950 if (!reg->is_mr) 951 return; 952 953 reg->mem_h = NULL; 954 reg->is_mr = 0; 955 spin_lock_bh(&ib_conn->lock); 956 list_add_tail(&desc->list, &ib_conn->fastreg.pool); 957 spin_unlock_bh(&ib_conn->lock); 958 } 959 960 int iser_post_recvl(struct iser_conn *ib_conn) 961 { 962 struct ib_recv_wr rx_wr, *rx_wr_failed; 963 struct ib_sge sge; 964 int ib_ret; 965 966 sge.addr = ib_conn->login_resp_dma; 967 sge.length = ISER_RX_LOGIN_SIZE; 968 sge.lkey = ib_conn->device->mr->lkey; 969 970 rx_wr.wr_id = (unsigned long)ib_conn->login_resp_buf; 971 rx_wr.sg_list = &sge; 972 rx_wr.num_sge = 1; 973 rx_wr.next = NULL; 974 975 ib_conn->post_recv_buf_count++; 976 ib_ret = ib_post_recv(ib_conn->qp, &rx_wr, &rx_wr_failed); 977 if (ib_ret) { 978 iser_err("ib_post_recv failed ret=%d\n", ib_ret); 979 ib_conn->post_recv_buf_count--; 980 } 981 return ib_ret; 982 } 983 984 int iser_post_recvm(struct iser_conn *ib_conn, int count) 985 { 986 struct ib_recv_wr *rx_wr, *rx_wr_failed; 987 int i, ib_ret; 988 unsigned int my_rx_head = ib_conn->rx_desc_head; 989 struct iser_rx_desc *rx_desc; 990 991 for (rx_wr = ib_conn->rx_wr, i = 0; i < count; i++, rx_wr++) { 992 rx_desc = &ib_conn->rx_descs[my_rx_head]; 993 rx_wr->wr_id = (unsigned long)rx_desc; 994 rx_wr->sg_list = &rx_desc->rx_sg; 995 rx_wr->num_sge = 1; 996 rx_wr->next = rx_wr + 1; 997 my_rx_head = (my_rx_head + 1) & ib_conn->qp_max_recv_dtos_mask; 998 } 999 1000 rx_wr--; 1001 rx_wr->next = NULL; /* mark end of work requests list */ 1002 1003 ib_conn->post_recv_buf_count += count; 1004 ib_ret = ib_post_recv(ib_conn->qp, ib_conn->rx_wr, &rx_wr_failed); 1005 if (ib_ret) { 1006 iser_err("ib_post_recv failed ret=%d\n", ib_ret); 1007 ib_conn->post_recv_buf_count -= count; 1008 } else 1009 ib_conn->rx_desc_head = my_rx_head; 1010 return ib_ret; 1011 } 1012 1013 1014 /** 1015 * iser_start_send - Initiate a Send DTO operation 1016 * 1017 * returns 0 on success, -1 on failure 1018 */ 1019 int iser_post_send(struct iser_conn *ib_conn, struct iser_tx_desc *tx_desc) 1020 { 1021 int ib_ret; 1022 struct ib_send_wr send_wr, *send_wr_failed; 1023 1024 ib_dma_sync_single_for_device(ib_conn->device->ib_device, 1025 tx_desc->dma_addr, ISER_HEADERS_LEN, DMA_TO_DEVICE); 1026 1027 send_wr.next = NULL; 1028 send_wr.wr_id = (unsigned long)tx_desc; 1029 send_wr.sg_list = tx_desc->tx_sg; 1030 send_wr.num_sge = tx_desc->num_sge; 1031 send_wr.opcode = IB_WR_SEND; 1032 send_wr.send_flags = IB_SEND_SIGNALED; 1033 1034 atomic_inc(&ib_conn->post_send_buf_count); 1035 1036 ib_ret = ib_post_send(ib_conn->qp, &send_wr, &send_wr_failed); 1037 if (ib_ret) { 1038 iser_err("ib_post_send failed, ret:%d\n", ib_ret); 1039 atomic_dec(&ib_conn->post_send_buf_count); 1040 } 1041 return ib_ret; 1042 } 1043 1044 static void iser_handle_comp_error(struct iser_tx_desc *desc, 1045 struct iser_conn *ib_conn) 1046 { 1047 if (desc && desc->type == ISCSI_TX_DATAOUT) 1048 kmem_cache_free(ig.desc_cache, desc); 1049 1050 if (ib_conn->post_recv_buf_count == 0 && 1051 atomic_read(&ib_conn->post_send_buf_count) == 0) { 1052 /* getting here when the state is UP means that the conn is * 1053 * being terminated asynchronously from the iSCSI layer's * 1054 * perspective. */ 1055 if (iser_conn_state_comp_exch(ib_conn, ISER_CONN_UP, 1056 ISER_CONN_TERMINATING)) 1057 iscsi_conn_failure(ib_conn->iscsi_conn, 1058 ISCSI_ERR_CONN_FAILED); 1059 1060 /* no more non completed posts to the QP, complete the 1061 * termination process w.o worrying on disconnect event */ 1062 ib_conn->state = ISER_CONN_DOWN; 1063 wake_up_interruptible(&ib_conn->wait); 1064 } 1065 } 1066 1067 static int iser_drain_tx_cq(struct iser_device *device, int cq_index) 1068 { 1069 struct ib_cq *cq = device->tx_cq[cq_index]; 1070 struct ib_wc wc; 1071 struct iser_tx_desc *tx_desc; 1072 struct iser_conn *ib_conn; 1073 int completed_tx = 0; 1074 1075 while (ib_poll_cq(cq, 1, &wc) == 1) { 1076 tx_desc = (struct iser_tx_desc *) (unsigned long) wc.wr_id; 1077 ib_conn = wc.qp->qp_context; 1078 if (wc.status == IB_WC_SUCCESS) { 1079 if (wc.opcode == IB_WC_SEND) 1080 iser_snd_completion(tx_desc, ib_conn); 1081 else 1082 iser_err("expected opcode %d got %d\n", 1083 IB_WC_SEND, wc.opcode); 1084 } else { 1085 iser_err("tx id %llx status %d vend_err %x\n", 1086 wc.wr_id, wc.status, wc.vendor_err); 1087 if (wc.wr_id != ISER_FASTREG_LI_WRID) { 1088 atomic_dec(&ib_conn->post_send_buf_count); 1089 iser_handle_comp_error(tx_desc, ib_conn); 1090 } 1091 } 1092 completed_tx++; 1093 } 1094 return completed_tx; 1095 } 1096 1097 1098 static void iser_cq_tasklet_fn(unsigned long data) 1099 { 1100 struct iser_cq_desc *cq_desc = (struct iser_cq_desc *)data; 1101 struct iser_device *device = cq_desc->device; 1102 int cq_index = cq_desc->cq_index; 1103 struct ib_cq *cq = device->rx_cq[cq_index]; 1104 struct ib_wc wc; 1105 struct iser_rx_desc *desc; 1106 unsigned long xfer_len; 1107 struct iser_conn *ib_conn; 1108 int completed_tx, completed_rx = 0; 1109 1110 /* First do tx drain, so in a case where we have rx flushes and a successful 1111 * tx completion we will still go through completion error handling. 1112 */ 1113 completed_tx = iser_drain_tx_cq(device, cq_index); 1114 1115 while (ib_poll_cq(cq, 1, &wc) == 1) { 1116 desc = (struct iser_rx_desc *) (unsigned long) wc.wr_id; 1117 BUG_ON(desc == NULL); 1118 ib_conn = wc.qp->qp_context; 1119 if (wc.status == IB_WC_SUCCESS) { 1120 if (wc.opcode == IB_WC_RECV) { 1121 xfer_len = (unsigned long)wc.byte_len; 1122 iser_rcv_completion(desc, xfer_len, ib_conn); 1123 } else 1124 iser_err("expected opcode %d got %d\n", 1125 IB_WC_RECV, wc.opcode); 1126 } else { 1127 if (wc.status != IB_WC_WR_FLUSH_ERR) 1128 iser_err("rx id %llx status %d vend_err %x\n", 1129 wc.wr_id, wc.status, wc.vendor_err); 1130 ib_conn->post_recv_buf_count--; 1131 iser_handle_comp_error(NULL, ib_conn); 1132 } 1133 completed_rx++; 1134 if (!(completed_rx & 63)) 1135 completed_tx += iser_drain_tx_cq(device, cq_index); 1136 } 1137 /* #warning "it is assumed here that arming CQ only once its empty" * 1138 * " would not cause interrupts to be missed" */ 1139 ib_req_notify_cq(cq, IB_CQ_NEXT_COMP); 1140 1141 iser_dbg("got %d rx %d tx completions\n", completed_rx, completed_tx); 1142 } 1143 1144 static void iser_cq_callback(struct ib_cq *cq, void *cq_context) 1145 { 1146 struct iser_cq_desc *cq_desc = (struct iser_cq_desc *)cq_context; 1147 struct iser_device *device = cq_desc->device; 1148 int cq_index = cq_desc->cq_index; 1149 1150 tasklet_schedule(&device->cq_tasklet[cq_index]); 1151 } 1152 1153 u8 iser_check_task_pi_status(struct iscsi_iser_task *iser_task, 1154 enum iser_data_dir cmd_dir, sector_t *sector) 1155 { 1156 struct iser_mem_reg *reg = &iser_task->rdma_regd[cmd_dir].reg; 1157 struct fast_reg_descriptor *desc = reg->mem_h; 1158 unsigned long sector_size = iser_task->sc->device->sector_size; 1159 struct ib_mr_status mr_status; 1160 int ret; 1161 1162 if (desc && desc->reg_indicators & ISER_FASTREG_PROTECTED) { 1163 desc->reg_indicators &= ~ISER_FASTREG_PROTECTED; 1164 ret = ib_check_mr_status(desc->pi_ctx->sig_mr, 1165 IB_MR_CHECK_SIG_STATUS, &mr_status); 1166 if (ret) { 1167 pr_err("ib_check_mr_status failed, ret %d\n", ret); 1168 goto err; 1169 } 1170 1171 if (mr_status.fail_status & IB_MR_CHECK_SIG_STATUS) { 1172 sector_t sector_off = mr_status.sig_err.sig_err_offset; 1173 1174 do_div(sector_off, sector_size + 8); 1175 *sector = scsi_get_lba(iser_task->sc) + sector_off; 1176 1177 pr_err("PI error found type %d at sector %llx " 1178 "expected %x vs actual %x\n", 1179 mr_status.sig_err.err_type, 1180 (unsigned long long)*sector, 1181 mr_status.sig_err.expected, 1182 mr_status.sig_err.actual); 1183 1184 switch (mr_status.sig_err.err_type) { 1185 case IB_SIG_BAD_GUARD: 1186 return 0x1; 1187 case IB_SIG_BAD_REFTAG: 1188 return 0x3; 1189 case IB_SIG_BAD_APPTAG: 1190 return 0x2; 1191 } 1192 } 1193 } 1194 1195 return 0; 1196 err: 1197 /* Not alot we can do here, return ambiguous guard error */ 1198 return 0x1; 1199 } 1200