1 /* 2 * Copyright (c) 2004, 2005, 2006 Voltaire, Inc. All rights reserved. 3 * Copyright (c) 2005, 2006 Cisco Systems. All rights reserved. 4 * Copyright (c) 2013-2014 Mellanox Technologies. All rights reserved. 5 * 6 * This software is available to you under a choice of one of two 7 * licenses. You may choose to be licensed under the terms of the GNU 8 * General Public License (GPL) Version 2, available from the file 9 * COPYING in the main directory of this source tree, or the 10 * OpenIB.org BSD license below: 11 * 12 * Redistribution and use in source and binary forms, with or 13 * without modification, are permitted provided that the following 14 * conditions are met: 15 * 16 * - Redistributions of source code must retain the above 17 * copyright notice, this list of conditions and the following 18 * disclaimer. 19 * 20 * - Redistributions in binary form must reproduce the above 21 * copyright notice, this list of conditions and the following 22 * disclaimer in the documentation and/or other materials 23 * provided with the distribution. 24 * 25 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 26 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 27 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 28 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 29 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 30 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 32 * SOFTWARE. 33 */ 34 #include <linux/kernel.h> 35 #include <linux/module.h> 36 #include <linux/slab.h> 37 #include <linux/delay.h> 38 39 #include "iscsi_iser.h" 40 41 #define ISCSI_ISER_MAX_CONN 8 42 #define ISER_MAX_RX_CQ_LEN (ISER_QP_MAX_RECV_DTOS * ISCSI_ISER_MAX_CONN) 43 #define ISER_MAX_TX_CQ_LEN (ISER_QP_MAX_REQ_DTOS * ISCSI_ISER_MAX_CONN) 44 45 static void iser_cq_tasklet_fn(unsigned long data); 46 static void iser_cq_callback(struct ib_cq *cq, void *cq_context); 47 48 static void iser_cq_event_callback(struct ib_event *cause, void *context) 49 { 50 iser_err("got cq event %d \n", cause->event); 51 } 52 53 static void iser_qp_event_callback(struct ib_event *cause, void *context) 54 { 55 iser_err("got qp event %d\n",cause->event); 56 } 57 58 static void iser_event_handler(struct ib_event_handler *handler, 59 struct ib_event *event) 60 { 61 iser_err("async event %d on device %s port %d\n", event->event, 62 event->device->name, event->element.port_num); 63 } 64 65 /** 66 * iser_create_device_ib_res - creates Protection Domain (PD), Completion 67 * Queue (CQ), DMA Memory Region (DMA MR) with the device associated with 68 * the adapator. 69 * 70 * returns 0 on success, -1 on failure 71 */ 72 static int iser_create_device_ib_res(struct iser_device *device) 73 { 74 struct iser_cq_desc *cq_desc; 75 struct ib_device_attr *dev_attr = &device->dev_attr; 76 int ret, i, j; 77 78 ret = ib_query_device(device->ib_device, dev_attr); 79 if (ret) { 80 pr_warn("Query device failed for %s\n", device->ib_device->name); 81 return ret; 82 } 83 84 /* Assign function handles - based on FMR support */ 85 if (device->ib_device->alloc_fmr && device->ib_device->dealloc_fmr && 86 device->ib_device->map_phys_fmr && device->ib_device->unmap_fmr) { 87 iser_info("FMR supported, using FMR for registration\n"); 88 device->iser_alloc_rdma_reg_res = iser_create_fmr_pool; 89 device->iser_free_rdma_reg_res = iser_free_fmr_pool; 90 device->iser_reg_rdma_mem = iser_reg_rdma_mem_fmr; 91 device->iser_unreg_rdma_mem = iser_unreg_mem_fmr; 92 } else 93 if (dev_attr->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS) { 94 iser_info("FastReg supported, using FastReg for registration\n"); 95 device->iser_alloc_rdma_reg_res = iser_create_fastreg_pool; 96 device->iser_free_rdma_reg_res = iser_free_fastreg_pool; 97 device->iser_reg_rdma_mem = iser_reg_rdma_mem_fastreg; 98 device->iser_unreg_rdma_mem = iser_unreg_mem_fastreg; 99 } else { 100 iser_err("IB device does not support FMRs nor FastRegs, can't register memory\n"); 101 return -1; 102 } 103 104 device->cqs_used = min(ISER_MAX_CQ, device->ib_device->num_comp_vectors); 105 iser_info("using %d CQs, device %s supports %d vectors\n", 106 device->cqs_used, device->ib_device->name, 107 device->ib_device->num_comp_vectors); 108 109 device->cq_desc = kmalloc(sizeof(struct iser_cq_desc) * device->cqs_used, 110 GFP_KERNEL); 111 if (device->cq_desc == NULL) 112 goto cq_desc_err; 113 cq_desc = device->cq_desc; 114 115 device->pd = ib_alloc_pd(device->ib_device); 116 if (IS_ERR(device->pd)) 117 goto pd_err; 118 119 for (i = 0; i < device->cqs_used; i++) { 120 cq_desc[i].device = device; 121 cq_desc[i].cq_index = i; 122 123 device->rx_cq[i] = ib_create_cq(device->ib_device, 124 iser_cq_callback, 125 iser_cq_event_callback, 126 (void *)&cq_desc[i], 127 ISER_MAX_RX_CQ_LEN, i); 128 if (IS_ERR(device->rx_cq[i])) 129 goto cq_err; 130 131 device->tx_cq[i] = ib_create_cq(device->ib_device, 132 NULL, iser_cq_event_callback, 133 (void *)&cq_desc[i], 134 ISER_MAX_TX_CQ_LEN, i); 135 136 if (IS_ERR(device->tx_cq[i])) 137 goto cq_err; 138 139 if (ib_req_notify_cq(device->rx_cq[i], IB_CQ_NEXT_COMP)) 140 goto cq_err; 141 142 tasklet_init(&device->cq_tasklet[i], 143 iser_cq_tasklet_fn, 144 (unsigned long)&cq_desc[i]); 145 } 146 147 device->mr = ib_get_dma_mr(device->pd, IB_ACCESS_LOCAL_WRITE | 148 IB_ACCESS_REMOTE_WRITE | 149 IB_ACCESS_REMOTE_READ); 150 if (IS_ERR(device->mr)) 151 goto dma_mr_err; 152 153 INIT_IB_EVENT_HANDLER(&device->event_handler, device->ib_device, 154 iser_event_handler); 155 if (ib_register_event_handler(&device->event_handler)) 156 goto handler_err; 157 158 return 0; 159 160 handler_err: 161 ib_dereg_mr(device->mr); 162 dma_mr_err: 163 for (j = 0; j < device->cqs_used; j++) 164 tasklet_kill(&device->cq_tasklet[j]); 165 cq_err: 166 for (j = 0; j < i; j++) { 167 if (device->tx_cq[j]) 168 ib_destroy_cq(device->tx_cq[j]); 169 if (device->rx_cq[j]) 170 ib_destroy_cq(device->rx_cq[j]); 171 } 172 ib_dealloc_pd(device->pd); 173 pd_err: 174 kfree(device->cq_desc); 175 cq_desc_err: 176 iser_err("failed to allocate an IB resource\n"); 177 return -1; 178 } 179 180 /** 181 * iser_free_device_ib_res - destroy/dealloc/dereg the DMA MR, 182 * CQ and PD created with the device associated with the adapator. 183 */ 184 static void iser_free_device_ib_res(struct iser_device *device) 185 { 186 int i; 187 BUG_ON(device->mr == NULL); 188 189 for (i = 0; i < device->cqs_used; i++) { 190 tasklet_kill(&device->cq_tasklet[i]); 191 (void)ib_destroy_cq(device->tx_cq[i]); 192 (void)ib_destroy_cq(device->rx_cq[i]); 193 device->tx_cq[i] = NULL; 194 device->rx_cq[i] = NULL; 195 } 196 197 (void)ib_unregister_event_handler(&device->event_handler); 198 (void)ib_dereg_mr(device->mr); 199 (void)ib_dealloc_pd(device->pd); 200 201 kfree(device->cq_desc); 202 203 device->mr = NULL; 204 device->pd = NULL; 205 } 206 207 /** 208 * iser_create_fmr_pool - Creates FMR pool and page_vector 209 * 210 * returns 0 on success, or errno code on failure 211 */ 212 int iser_create_fmr_pool(struct iser_conn *ib_conn, unsigned cmds_max) 213 { 214 struct iser_device *device = ib_conn->device; 215 struct ib_fmr_pool_param params; 216 int ret = -ENOMEM; 217 218 ib_conn->fmr.page_vec = kmalloc(sizeof(*ib_conn->fmr.page_vec) + 219 (sizeof(u64)*(ISCSI_ISER_SG_TABLESIZE + 1)), 220 GFP_KERNEL); 221 if (!ib_conn->fmr.page_vec) 222 return ret; 223 224 ib_conn->fmr.page_vec->pages = (u64 *)(ib_conn->fmr.page_vec + 1); 225 226 params.page_shift = SHIFT_4K; 227 /* when the first/last SG element are not start/end * 228 * page aligned, the map whould be of N+1 pages */ 229 params.max_pages_per_fmr = ISCSI_ISER_SG_TABLESIZE + 1; 230 /* make the pool size twice the max number of SCSI commands * 231 * the ML is expected to queue, watermark for unmap at 50% */ 232 params.pool_size = cmds_max * 2; 233 params.dirty_watermark = cmds_max; 234 params.cache = 0; 235 params.flush_function = NULL; 236 params.access = (IB_ACCESS_LOCAL_WRITE | 237 IB_ACCESS_REMOTE_WRITE | 238 IB_ACCESS_REMOTE_READ); 239 240 ib_conn->fmr.pool = ib_create_fmr_pool(device->pd, ¶ms); 241 if (!IS_ERR(ib_conn->fmr.pool)) 242 return 0; 243 244 /* no FMR => no need for page_vec */ 245 kfree(ib_conn->fmr.page_vec); 246 ib_conn->fmr.page_vec = NULL; 247 248 ret = PTR_ERR(ib_conn->fmr.pool); 249 ib_conn->fmr.pool = NULL; 250 if (ret != -ENOSYS) { 251 iser_err("FMR allocation failed, err %d\n", ret); 252 return ret; 253 } else { 254 iser_warn("FMRs are not supported, using unaligned mode\n"); 255 return 0; 256 } 257 } 258 259 /** 260 * iser_free_fmr_pool - releases the FMR pool and page vec 261 */ 262 void iser_free_fmr_pool(struct iser_conn *ib_conn) 263 { 264 iser_info("freeing conn %p fmr pool %p\n", 265 ib_conn, ib_conn->fmr.pool); 266 267 if (ib_conn->fmr.pool != NULL) 268 ib_destroy_fmr_pool(ib_conn->fmr.pool); 269 270 ib_conn->fmr.pool = NULL; 271 272 kfree(ib_conn->fmr.page_vec); 273 ib_conn->fmr.page_vec = NULL; 274 } 275 276 static int 277 iser_create_fastreg_desc(struct ib_device *ib_device, struct ib_pd *pd, 278 bool pi_enable, struct fast_reg_descriptor *desc) 279 { 280 int ret; 281 282 desc->data_frpl = ib_alloc_fast_reg_page_list(ib_device, 283 ISCSI_ISER_SG_TABLESIZE + 1); 284 if (IS_ERR(desc->data_frpl)) { 285 ret = PTR_ERR(desc->data_frpl); 286 iser_err("Failed to allocate ib_fast_reg_page_list err=%d\n", 287 ret); 288 return PTR_ERR(desc->data_frpl); 289 } 290 291 desc->data_mr = ib_alloc_fast_reg_mr(pd, ISCSI_ISER_SG_TABLESIZE + 1); 292 if (IS_ERR(desc->data_mr)) { 293 ret = PTR_ERR(desc->data_mr); 294 iser_err("Failed to allocate ib_fast_reg_mr err=%d\n", ret); 295 goto fast_reg_mr_failure; 296 } 297 desc->reg_indicators |= ISER_DATA_KEY_VALID; 298 299 if (pi_enable) { 300 struct ib_mr_init_attr mr_init_attr = {0}; 301 struct iser_pi_context *pi_ctx = NULL; 302 303 desc->pi_ctx = kzalloc(sizeof(*desc->pi_ctx), GFP_KERNEL); 304 if (!desc->pi_ctx) { 305 iser_err("Failed to allocate pi context\n"); 306 ret = -ENOMEM; 307 goto pi_ctx_alloc_failure; 308 } 309 pi_ctx = desc->pi_ctx; 310 311 pi_ctx->prot_frpl = ib_alloc_fast_reg_page_list(ib_device, 312 ISCSI_ISER_SG_TABLESIZE); 313 if (IS_ERR(pi_ctx->prot_frpl)) { 314 ret = PTR_ERR(pi_ctx->prot_frpl); 315 iser_err("Failed to allocate prot frpl ret=%d\n", 316 ret); 317 goto prot_frpl_failure; 318 } 319 320 pi_ctx->prot_mr = ib_alloc_fast_reg_mr(pd, 321 ISCSI_ISER_SG_TABLESIZE + 1); 322 if (IS_ERR(pi_ctx->prot_mr)) { 323 ret = PTR_ERR(pi_ctx->prot_mr); 324 iser_err("Failed to allocate prot frmr ret=%d\n", 325 ret); 326 goto prot_mr_failure; 327 } 328 desc->reg_indicators |= ISER_PROT_KEY_VALID; 329 330 mr_init_attr.max_reg_descriptors = 2; 331 mr_init_attr.flags |= IB_MR_SIGNATURE_EN; 332 pi_ctx->sig_mr = ib_create_mr(pd, &mr_init_attr); 333 if (IS_ERR(pi_ctx->sig_mr)) { 334 ret = PTR_ERR(pi_ctx->sig_mr); 335 iser_err("Failed to allocate signature enabled mr err=%d\n", 336 ret); 337 goto sig_mr_failure; 338 } 339 desc->reg_indicators |= ISER_SIG_KEY_VALID; 340 } 341 desc->reg_indicators &= ~ISER_FASTREG_PROTECTED; 342 343 iser_dbg("Create fr_desc %p page_list %p\n", 344 desc, desc->data_frpl->page_list); 345 346 return 0; 347 sig_mr_failure: 348 ib_dereg_mr(desc->pi_ctx->prot_mr); 349 prot_mr_failure: 350 ib_free_fast_reg_page_list(desc->pi_ctx->prot_frpl); 351 prot_frpl_failure: 352 kfree(desc->pi_ctx); 353 pi_ctx_alloc_failure: 354 ib_dereg_mr(desc->data_mr); 355 fast_reg_mr_failure: 356 ib_free_fast_reg_page_list(desc->data_frpl); 357 358 return ret; 359 } 360 361 /** 362 * iser_create_fastreg_pool - Creates pool of fast_reg descriptors 363 * for fast registration work requests. 364 * returns 0 on success, or errno code on failure 365 */ 366 int iser_create_fastreg_pool(struct iser_conn *ib_conn, unsigned cmds_max) 367 { 368 struct iser_device *device = ib_conn->device; 369 struct fast_reg_descriptor *desc; 370 int i, ret; 371 372 INIT_LIST_HEAD(&ib_conn->fastreg.pool); 373 ib_conn->fastreg.pool_size = 0; 374 for (i = 0; i < cmds_max; i++) { 375 desc = kzalloc(sizeof(*desc), GFP_KERNEL); 376 if (!desc) { 377 iser_err("Failed to allocate a new fast_reg descriptor\n"); 378 ret = -ENOMEM; 379 goto err; 380 } 381 382 ret = iser_create_fastreg_desc(device->ib_device, device->pd, 383 ib_conn->pi_support, desc); 384 if (ret) { 385 iser_err("Failed to create fastreg descriptor err=%d\n", 386 ret); 387 kfree(desc); 388 goto err; 389 } 390 391 list_add_tail(&desc->list, &ib_conn->fastreg.pool); 392 ib_conn->fastreg.pool_size++; 393 } 394 395 return 0; 396 397 err: 398 iser_free_fastreg_pool(ib_conn); 399 return ret; 400 } 401 402 /** 403 * iser_free_fastreg_pool - releases the pool of fast_reg descriptors 404 */ 405 void iser_free_fastreg_pool(struct iser_conn *ib_conn) 406 { 407 struct fast_reg_descriptor *desc, *tmp; 408 int i = 0; 409 410 if (list_empty(&ib_conn->fastreg.pool)) 411 return; 412 413 iser_info("freeing conn %p fr pool\n", ib_conn); 414 415 list_for_each_entry_safe(desc, tmp, &ib_conn->fastreg.pool, list) { 416 list_del(&desc->list); 417 ib_free_fast_reg_page_list(desc->data_frpl); 418 ib_dereg_mr(desc->data_mr); 419 if (desc->pi_ctx) { 420 ib_free_fast_reg_page_list(desc->pi_ctx->prot_frpl); 421 ib_dereg_mr(desc->pi_ctx->prot_mr); 422 ib_destroy_mr(desc->pi_ctx->sig_mr); 423 kfree(desc->pi_ctx); 424 } 425 kfree(desc); 426 ++i; 427 } 428 429 if (i < ib_conn->fastreg.pool_size) 430 iser_warn("pool still has %d regions registered\n", 431 ib_conn->fastreg.pool_size - i); 432 } 433 434 /** 435 * iser_create_ib_conn_res - Queue-Pair (QP) 436 * 437 * returns 0 on success, -1 on failure 438 */ 439 static int iser_create_ib_conn_res(struct iser_conn *ib_conn) 440 { 441 struct iser_device *device; 442 struct ib_qp_init_attr init_attr; 443 int ret = -ENOMEM; 444 int index, min_index = 0; 445 446 BUG_ON(ib_conn->device == NULL); 447 448 device = ib_conn->device; 449 450 memset(&init_attr, 0, sizeof init_attr); 451 452 mutex_lock(&ig.connlist_mutex); 453 /* select the CQ with the minimal number of usages */ 454 for (index = 0; index < device->cqs_used; index++) 455 if (device->cq_active_qps[index] < 456 device->cq_active_qps[min_index]) 457 min_index = index; 458 device->cq_active_qps[min_index]++; 459 mutex_unlock(&ig.connlist_mutex); 460 iser_info("cq index %d used for ib_conn %p\n", min_index, ib_conn); 461 462 init_attr.event_handler = iser_qp_event_callback; 463 init_attr.qp_context = (void *)ib_conn; 464 init_attr.send_cq = device->tx_cq[min_index]; 465 init_attr.recv_cq = device->rx_cq[min_index]; 466 init_attr.cap.max_recv_wr = ISER_QP_MAX_RECV_DTOS; 467 init_attr.cap.max_send_sge = 2; 468 init_attr.cap.max_recv_sge = 1; 469 init_attr.sq_sig_type = IB_SIGNAL_REQ_WR; 470 init_attr.qp_type = IB_QPT_RC; 471 if (ib_conn->pi_support) { 472 init_attr.cap.max_send_wr = ISER_QP_SIG_MAX_REQ_DTOS; 473 init_attr.create_flags |= IB_QP_CREATE_SIGNATURE_EN; 474 } else { 475 init_attr.cap.max_send_wr = ISER_QP_MAX_REQ_DTOS; 476 } 477 478 ret = rdma_create_qp(ib_conn->cma_id, device->pd, &init_attr); 479 if (ret) 480 goto out_err; 481 482 ib_conn->qp = ib_conn->cma_id->qp; 483 iser_info("setting conn %p cma_id %p qp %p\n", 484 ib_conn, ib_conn->cma_id, 485 ib_conn->cma_id->qp); 486 return ret; 487 488 out_err: 489 iser_err("unable to alloc mem or create resource, err %d\n", ret); 490 return ret; 491 } 492 493 /** 494 * releases the QP objects, returns 0 on success, 495 * -1 on failure 496 */ 497 static int iser_free_ib_conn_res(struct iser_conn *ib_conn) 498 { 499 int cq_index; 500 BUG_ON(ib_conn == NULL); 501 502 iser_info("freeing conn %p cma_id %p qp %p\n", 503 ib_conn, ib_conn->cma_id, 504 ib_conn->qp); 505 506 /* qp is created only once both addr & route are resolved */ 507 508 if (ib_conn->qp != NULL) { 509 cq_index = ((struct iser_cq_desc *)ib_conn->qp->recv_cq->cq_context)->cq_index; 510 ib_conn->device->cq_active_qps[cq_index]--; 511 512 rdma_destroy_qp(ib_conn->cma_id); 513 } 514 515 ib_conn->qp = NULL; 516 517 return 0; 518 } 519 520 /** 521 * based on the resolved device node GUID see if there already allocated 522 * device for this device. If there's no such, create one. 523 */ 524 static 525 struct iser_device *iser_device_find_by_ib_device(struct rdma_cm_id *cma_id) 526 { 527 struct iser_device *device; 528 529 mutex_lock(&ig.device_list_mutex); 530 531 list_for_each_entry(device, &ig.device_list, ig_list) 532 /* find if there's a match using the node GUID */ 533 if (device->ib_device->node_guid == cma_id->device->node_guid) 534 goto inc_refcnt; 535 536 device = kzalloc(sizeof *device, GFP_KERNEL); 537 if (device == NULL) 538 goto out; 539 540 /* assign this device to the device */ 541 device->ib_device = cma_id->device; 542 /* init the device and link it into ig device list */ 543 if (iser_create_device_ib_res(device)) { 544 kfree(device); 545 device = NULL; 546 goto out; 547 } 548 list_add(&device->ig_list, &ig.device_list); 549 550 inc_refcnt: 551 device->refcount++; 552 out: 553 mutex_unlock(&ig.device_list_mutex); 554 return device; 555 } 556 557 /* if there's no demand for this device, release it */ 558 static void iser_device_try_release(struct iser_device *device) 559 { 560 mutex_lock(&ig.device_list_mutex); 561 device->refcount--; 562 iser_info("device %p refcount %d\n", device, device->refcount); 563 if (!device->refcount) { 564 iser_free_device_ib_res(device); 565 list_del(&device->ig_list); 566 kfree(device); 567 } 568 mutex_unlock(&ig.device_list_mutex); 569 } 570 571 static int iser_conn_state_comp_exch(struct iser_conn *ib_conn, 572 enum iser_ib_conn_state comp, 573 enum iser_ib_conn_state exch) 574 { 575 int ret; 576 577 spin_lock_bh(&ib_conn->lock); 578 if ((ret = (ib_conn->state == comp))) 579 ib_conn->state = exch; 580 spin_unlock_bh(&ib_conn->lock); 581 return ret; 582 } 583 584 /** 585 * Frees all conn objects and deallocs conn descriptor 586 */ 587 static void iser_conn_release(struct iser_conn *ib_conn, int can_destroy_id) 588 { 589 struct iser_device *device = ib_conn->device; 590 591 BUG_ON(ib_conn->state != ISER_CONN_DOWN); 592 593 mutex_lock(&ig.connlist_mutex); 594 list_del(&ib_conn->conn_list); 595 mutex_unlock(&ig.connlist_mutex); 596 iser_free_rx_descriptors(ib_conn); 597 iser_free_ib_conn_res(ib_conn); 598 ib_conn->device = NULL; 599 /* on EVENT_ADDR_ERROR there's no device yet for this conn */ 600 if (device != NULL) 601 iser_device_try_release(device); 602 /* if cma handler context, the caller actually destroy the id */ 603 if (ib_conn->cma_id != NULL && can_destroy_id) { 604 rdma_destroy_id(ib_conn->cma_id); 605 ib_conn->cma_id = NULL; 606 } 607 iscsi_destroy_endpoint(ib_conn->ep); 608 } 609 610 void iser_conn_get(struct iser_conn *ib_conn) 611 { 612 atomic_inc(&ib_conn->refcount); 613 } 614 615 int iser_conn_put(struct iser_conn *ib_conn, int can_destroy_id) 616 { 617 if (atomic_dec_and_test(&ib_conn->refcount)) { 618 iser_conn_release(ib_conn, can_destroy_id); 619 return 1; 620 } 621 return 0; 622 } 623 624 /** 625 * triggers start of the disconnect procedures and wait for them to be done 626 */ 627 void iser_conn_terminate(struct iser_conn *ib_conn) 628 { 629 int err = 0; 630 631 /* change the ib conn state only if the conn is UP, however always call 632 * rdma_disconnect since this is the only way to cause the CMA to change 633 * the QP state to ERROR 634 */ 635 636 iser_conn_state_comp_exch(ib_conn, ISER_CONN_UP, ISER_CONN_TERMINATING); 637 err = rdma_disconnect(ib_conn->cma_id); 638 if (err) 639 iser_err("Failed to disconnect, conn: 0x%p err %d\n", 640 ib_conn,err); 641 642 wait_event_interruptible(ib_conn->wait, 643 ib_conn->state == ISER_CONN_DOWN); 644 645 iser_conn_put(ib_conn, 1); /* deref ib conn deallocate */ 646 } 647 648 static int iser_connect_error(struct rdma_cm_id *cma_id) 649 { 650 struct iser_conn *ib_conn; 651 ib_conn = (struct iser_conn *)cma_id->context; 652 653 ib_conn->state = ISER_CONN_DOWN; 654 wake_up_interruptible(&ib_conn->wait); 655 return iser_conn_put(ib_conn, 0); /* deref ib conn's cma id */ 656 } 657 658 static int iser_addr_handler(struct rdma_cm_id *cma_id) 659 { 660 struct iser_device *device; 661 struct iser_conn *ib_conn; 662 int ret; 663 664 device = iser_device_find_by_ib_device(cma_id); 665 if (!device) { 666 iser_err("device lookup/creation failed\n"); 667 return iser_connect_error(cma_id); 668 } 669 670 ib_conn = (struct iser_conn *)cma_id->context; 671 ib_conn->device = device; 672 673 /* connection T10-PI support */ 674 if (iser_pi_enable) { 675 if (!(device->dev_attr.device_cap_flags & 676 IB_DEVICE_SIGNATURE_HANDOVER)) { 677 iser_warn("T10-PI requested but not supported on %s, " 678 "continue without T10-PI\n", 679 ib_conn->device->ib_device->name); 680 ib_conn->pi_support = false; 681 } else { 682 ib_conn->pi_support = true; 683 } 684 } 685 686 ret = rdma_resolve_route(cma_id, 1000); 687 if (ret) { 688 iser_err("resolve route failed: %d\n", ret); 689 return iser_connect_error(cma_id); 690 } 691 692 return 0; 693 } 694 695 static int iser_route_handler(struct rdma_cm_id *cma_id) 696 { 697 struct rdma_conn_param conn_param; 698 int ret; 699 struct iser_cm_hdr req_hdr; 700 701 ret = iser_create_ib_conn_res((struct iser_conn *)cma_id->context); 702 if (ret) 703 goto failure; 704 705 memset(&conn_param, 0, sizeof conn_param); 706 conn_param.responder_resources = 4; 707 conn_param.initiator_depth = 1; 708 conn_param.retry_count = 7; 709 conn_param.rnr_retry_count = 6; 710 711 memset(&req_hdr, 0, sizeof(req_hdr)); 712 req_hdr.flags = (ISER_ZBVA_NOT_SUPPORTED | 713 ISER_SEND_W_INV_NOT_SUPPORTED); 714 conn_param.private_data = (void *)&req_hdr; 715 conn_param.private_data_len = sizeof(struct iser_cm_hdr); 716 717 ret = rdma_connect(cma_id, &conn_param); 718 if (ret) { 719 iser_err("failure connecting: %d\n", ret); 720 goto failure; 721 } 722 723 return 0; 724 failure: 725 return iser_connect_error(cma_id); 726 } 727 728 static void iser_connected_handler(struct rdma_cm_id *cma_id) 729 { 730 struct iser_conn *ib_conn; 731 struct ib_qp_attr attr; 732 struct ib_qp_init_attr init_attr; 733 734 (void)ib_query_qp(cma_id->qp, &attr, ~0, &init_attr); 735 iser_info("remote qpn:%x my qpn:%x\n", attr.dest_qp_num, cma_id->qp->qp_num); 736 737 ib_conn = (struct iser_conn *)cma_id->context; 738 ib_conn->state = ISER_CONN_UP; 739 wake_up_interruptible(&ib_conn->wait); 740 } 741 742 static int iser_disconnected_handler(struct rdma_cm_id *cma_id) 743 { 744 struct iser_conn *ib_conn; 745 int ret; 746 747 ib_conn = (struct iser_conn *)cma_id->context; 748 749 /* getting here when the state is UP means that the conn is being * 750 * terminated asynchronously from the iSCSI layer's perspective. */ 751 if (iser_conn_state_comp_exch(ib_conn, ISER_CONN_UP, 752 ISER_CONN_TERMINATING)){ 753 if (ib_conn->iscsi_conn) 754 iscsi_conn_failure(ib_conn->iscsi_conn, ISCSI_ERR_CONN_FAILED); 755 else 756 iser_err("iscsi_iser connection isn't bound\n"); 757 } 758 759 /* Complete the termination process if no posts are pending */ 760 if (ib_conn->post_recv_buf_count == 0 && 761 (atomic_read(&ib_conn->post_send_buf_count) == 0)) { 762 ib_conn->state = ISER_CONN_DOWN; 763 wake_up_interruptible(&ib_conn->wait); 764 } 765 766 ret = iser_conn_put(ib_conn, 0); /* deref ib conn's cma id */ 767 return ret; 768 } 769 770 static int iser_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *event) 771 { 772 int ret = 0; 773 774 iser_info("event %d status %d conn %p id %p\n", 775 event->event, event->status, cma_id->context, cma_id); 776 777 switch (event->event) { 778 case RDMA_CM_EVENT_ADDR_RESOLVED: 779 ret = iser_addr_handler(cma_id); 780 break; 781 case RDMA_CM_EVENT_ROUTE_RESOLVED: 782 ret = iser_route_handler(cma_id); 783 break; 784 case RDMA_CM_EVENT_ESTABLISHED: 785 iser_connected_handler(cma_id); 786 break; 787 case RDMA_CM_EVENT_ADDR_ERROR: 788 case RDMA_CM_EVENT_ROUTE_ERROR: 789 case RDMA_CM_EVENT_CONNECT_ERROR: 790 case RDMA_CM_EVENT_UNREACHABLE: 791 case RDMA_CM_EVENT_REJECTED: 792 ret = iser_connect_error(cma_id); 793 break; 794 case RDMA_CM_EVENT_DISCONNECTED: 795 case RDMA_CM_EVENT_DEVICE_REMOVAL: 796 case RDMA_CM_EVENT_ADDR_CHANGE: 797 ret = iser_disconnected_handler(cma_id); 798 break; 799 default: 800 iser_err("Unexpected RDMA CM event (%d)\n", event->event); 801 break; 802 } 803 return ret; 804 } 805 806 void iser_conn_init(struct iser_conn *ib_conn) 807 { 808 ib_conn->state = ISER_CONN_INIT; 809 init_waitqueue_head(&ib_conn->wait); 810 ib_conn->post_recv_buf_count = 0; 811 atomic_set(&ib_conn->post_send_buf_count, 0); 812 atomic_set(&ib_conn->refcount, 1); /* ref ib conn allocation */ 813 INIT_LIST_HEAD(&ib_conn->conn_list); 814 spin_lock_init(&ib_conn->lock); 815 } 816 817 /** 818 * starts the process of connecting to the target 819 * sleeps until the connection is established or rejected 820 */ 821 int iser_connect(struct iser_conn *ib_conn, 822 struct sockaddr_in *src_addr, 823 struct sockaddr_in *dst_addr, 824 int non_blocking) 825 { 826 struct sockaddr *src, *dst; 827 int err = 0; 828 829 sprintf(ib_conn->name, "%pI4:%d", 830 &dst_addr->sin_addr.s_addr, dst_addr->sin_port); 831 832 /* the device is known only --after-- address resolution */ 833 ib_conn->device = NULL; 834 835 iser_info("connecting to: %pI4, port 0x%x\n", 836 &dst_addr->sin_addr, dst_addr->sin_port); 837 838 ib_conn->state = ISER_CONN_PENDING; 839 840 iser_conn_get(ib_conn); /* ref ib conn's cma id */ 841 ib_conn->cma_id = rdma_create_id(iser_cma_handler, 842 (void *)ib_conn, 843 RDMA_PS_TCP, IB_QPT_RC); 844 if (IS_ERR(ib_conn->cma_id)) { 845 err = PTR_ERR(ib_conn->cma_id); 846 iser_err("rdma_create_id failed: %d\n", err); 847 goto id_failure; 848 } 849 850 src = (struct sockaddr *)src_addr; 851 dst = (struct sockaddr *)dst_addr; 852 err = rdma_resolve_addr(ib_conn->cma_id, src, dst, 1000); 853 if (err) { 854 iser_err("rdma_resolve_addr failed: %d\n", err); 855 goto addr_failure; 856 } 857 858 if (!non_blocking) { 859 wait_event_interruptible(ib_conn->wait, 860 (ib_conn->state != ISER_CONN_PENDING)); 861 862 if (ib_conn->state != ISER_CONN_UP) { 863 err = -EIO; 864 goto connect_failure; 865 } 866 } 867 868 mutex_lock(&ig.connlist_mutex); 869 list_add(&ib_conn->conn_list, &ig.connlist); 870 mutex_unlock(&ig.connlist_mutex); 871 return 0; 872 873 id_failure: 874 ib_conn->cma_id = NULL; 875 addr_failure: 876 ib_conn->state = ISER_CONN_DOWN; 877 iser_conn_put(ib_conn, 1); /* deref ib conn's cma id */ 878 connect_failure: 879 iser_conn_put(ib_conn, 1); /* deref ib conn deallocate */ 880 return err; 881 } 882 883 /** 884 * iser_reg_page_vec - Register physical memory 885 * 886 * returns: 0 on success, errno code on failure 887 */ 888 int iser_reg_page_vec(struct iser_conn *ib_conn, 889 struct iser_page_vec *page_vec, 890 struct iser_mem_reg *mem_reg) 891 { 892 struct ib_pool_fmr *mem; 893 u64 io_addr; 894 u64 *page_list; 895 int status; 896 897 page_list = page_vec->pages; 898 io_addr = page_list[0]; 899 900 mem = ib_fmr_pool_map_phys(ib_conn->fmr.pool, 901 page_list, 902 page_vec->length, 903 io_addr); 904 905 if (IS_ERR(mem)) { 906 status = (int)PTR_ERR(mem); 907 iser_err("ib_fmr_pool_map_phys failed: %d\n", status); 908 return status; 909 } 910 911 mem_reg->lkey = mem->fmr->lkey; 912 mem_reg->rkey = mem->fmr->rkey; 913 mem_reg->len = page_vec->length * SIZE_4K; 914 mem_reg->va = io_addr; 915 mem_reg->is_mr = 1; 916 mem_reg->mem_h = (void *)mem; 917 918 mem_reg->va += page_vec->offset; 919 mem_reg->len = page_vec->data_size; 920 921 iser_dbg("PHYSICAL Mem.register, [PHYS p_array: 0x%p, sz: %d, " 922 "entry[0]: (0x%08lx,%ld)] -> " 923 "[lkey: 0x%08X mem_h: 0x%p va: 0x%08lX sz: %ld]\n", 924 page_vec, page_vec->length, 925 (unsigned long)page_vec->pages[0], 926 (unsigned long)page_vec->data_size, 927 (unsigned int)mem_reg->lkey, mem_reg->mem_h, 928 (unsigned long)mem_reg->va, (unsigned long)mem_reg->len); 929 return 0; 930 } 931 932 /** 933 * Unregister (previosuly registered using FMR) memory. 934 * If memory is non-FMR does nothing. 935 */ 936 void iser_unreg_mem_fmr(struct iscsi_iser_task *iser_task, 937 enum iser_data_dir cmd_dir) 938 { 939 struct iser_mem_reg *reg = &iser_task->rdma_regd[cmd_dir].reg; 940 int ret; 941 942 if (!reg->is_mr) 943 return; 944 945 iser_dbg("PHYSICAL Mem.Unregister mem_h %p\n",reg->mem_h); 946 947 ret = ib_fmr_pool_unmap((struct ib_pool_fmr *)reg->mem_h); 948 if (ret) 949 iser_err("ib_fmr_pool_unmap failed %d\n", ret); 950 951 reg->mem_h = NULL; 952 } 953 954 void iser_unreg_mem_fastreg(struct iscsi_iser_task *iser_task, 955 enum iser_data_dir cmd_dir) 956 { 957 struct iser_mem_reg *reg = &iser_task->rdma_regd[cmd_dir].reg; 958 struct iser_conn *ib_conn = iser_task->ib_conn; 959 struct fast_reg_descriptor *desc = reg->mem_h; 960 961 if (!reg->is_mr) 962 return; 963 964 reg->mem_h = NULL; 965 reg->is_mr = 0; 966 spin_lock_bh(&ib_conn->lock); 967 list_add_tail(&desc->list, &ib_conn->fastreg.pool); 968 spin_unlock_bh(&ib_conn->lock); 969 } 970 971 int iser_post_recvl(struct iser_conn *ib_conn) 972 { 973 struct ib_recv_wr rx_wr, *rx_wr_failed; 974 struct ib_sge sge; 975 int ib_ret; 976 977 sge.addr = ib_conn->login_resp_dma; 978 sge.length = ISER_RX_LOGIN_SIZE; 979 sge.lkey = ib_conn->device->mr->lkey; 980 981 rx_wr.wr_id = (unsigned long)ib_conn->login_resp_buf; 982 rx_wr.sg_list = &sge; 983 rx_wr.num_sge = 1; 984 rx_wr.next = NULL; 985 986 ib_conn->post_recv_buf_count++; 987 ib_ret = ib_post_recv(ib_conn->qp, &rx_wr, &rx_wr_failed); 988 if (ib_ret) { 989 iser_err("ib_post_recv failed ret=%d\n", ib_ret); 990 ib_conn->post_recv_buf_count--; 991 } 992 return ib_ret; 993 } 994 995 int iser_post_recvm(struct iser_conn *ib_conn, int count) 996 { 997 struct ib_recv_wr *rx_wr, *rx_wr_failed; 998 int i, ib_ret; 999 unsigned int my_rx_head = ib_conn->rx_desc_head; 1000 struct iser_rx_desc *rx_desc; 1001 1002 for (rx_wr = ib_conn->rx_wr, i = 0; i < count; i++, rx_wr++) { 1003 rx_desc = &ib_conn->rx_descs[my_rx_head]; 1004 rx_wr->wr_id = (unsigned long)rx_desc; 1005 rx_wr->sg_list = &rx_desc->rx_sg; 1006 rx_wr->num_sge = 1; 1007 rx_wr->next = rx_wr + 1; 1008 my_rx_head = (my_rx_head + 1) & ib_conn->qp_max_recv_dtos_mask; 1009 } 1010 1011 rx_wr--; 1012 rx_wr->next = NULL; /* mark end of work requests list */ 1013 1014 ib_conn->post_recv_buf_count += count; 1015 ib_ret = ib_post_recv(ib_conn->qp, ib_conn->rx_wr, &rx_wr_failed); 1016 if (ib_ret) { 1017 iser_err("ib_post_recv failed ret=%d\n", ib_ret); 1018 ib_conn->post_recv_buf_count -= count; 1019 } else 1020 ib_conn->rx_desc_head = my_rx_head; 1021 return ib_ret; 1022 } 1023 1024 1025 /** 1026 * iser_start_send - Initiate a Send DTO operation 1027 * 1028 * returns 0 on success, -1 on failure 1029 */ 1030 int iser_post_send(struct iser_conn *ib_conn, struct iser_tx_desc *tx_desc) 1031 { 1032 int ib_ret; 1033 struct ib_send_wr send_wr, *send_wr_failed; 1034 1035 ib_dma_sync_single_for_device(ib_conn->device->ib_device, 1036 tx_desc->dma_addr, ISER_HEADERS_LEN, DMA_TO_DEVICE); 1037 1038 send_wr.next = NULL; 1039 send_wr.wr_id = (unsigned long)tx_desc; 1040 send_wr.sg_list = tx_desc->tx_sg; 1041 send_wr.num_sge = tx_desc->num_sge; 1042 send_wr.opcode = IB_WR_SEND; 1043 send_wr.send_flags = IB_SEND_SIGNALED; 1044 1045 atomic_inc(&ib_conn->post_send_buf_count); 1046 1047 ib_ret = ib_post_send(ib_conn->qp, &send_wr, &send_wr_failed); 1048 if (ib_ret) { 1049 iser_err("ib_post_send failed, ret:%d\n", ib_ret); 1050 atomic_dec(&ib_conn->post_send_buf_count); 1051 } 1052 return ib_ret; 1053 } 1054 1055 static void iser_handle_comp_error(struct iser_tx_desc *desc, 1056 struct iser_conn *ib_conn) 1057 { 1058 if (desc && desc->type == ISCSI_TX_DATAOUT) 1059 kmem_cache_free(ig.desc_cache, desc); 1060 1061 if (ib_conn->post_recv_buf_count == 0 && 1062 atomic_read(&ib_conn->post_send_buf_count) == 0) { 1063 /* getting here when the state is UP means that the conn is * 1064 * being terminated asynchronously from the iSCSI layer's * 1065 * perspective. */ 1066 if (iser_conn_state_comp_exch(ib_conn, ISER_CONN_UP, 1067 ISER_CONN_TERMINATING)) 1068 iscsi_conn_failure(ib_conn->iscsi_conn, 1069 ISCSI_ERR_CONN_FAILED); 1070 1071 /* no more non completed posts to the QP, complete the 1072 * termination process w.o worrying on disconnect event */ 1073 ib_conn->state = ISER_CONN_DOWN; 1074 wake_up_interruptible(&ib_conn->wait); 1075 } 1076 } 1077 1078 static int iser_drain_tx_cq(struct iser_device *device, int cq_index) 1079 { 1080 struct ib_cq *cq = device->tx_cq[cq_index]; 1081 struct ib_wc wc; 1082 struct iser_tx_desc *tx_desc; 1083 struct iser_conn *ib_conn; 1084 int completed_tx = 0; 1085 1086 while (ib_poll_cq(cq, 1, &wc) == 1) { 1087 tx_desc = (struct iser_tx_desc *) (unsigned long) wc.wr_id; 1088 ib_conn = wc.qp->qp_context; 1089 if (wc.status == IB_WC_SUCCESS) { 1090 if (wc.opcode == IB_WC_SEND) 1091 iser_snd_completion(tx_desc, ib_conn); 1092 else 1093 iser_err("expected opcode %d got %d\n", 1094 IB_WC_SEND, wc.opcode); 1095 } else { 1096 iser_err("tx id %llx status %d vend_err %x\n", 1097 wc.wr_id, wc.status, wc.vendor_err); 1098 if (wc.wr_id != ISER_FASTREG_LI_WRID) { 1099 atomic_dec(&ib_conn->post_send_buf_count); 1100 iser_handle_comp_error(tx_desc, ib_conn); 1101 } 1102 } 1103 completed_tx++; 1104 } 1105 return completed_tx; 1106 } 1107 1108 1109 static void iser_cq_tasklet_fn(unsigned long data) 1110 { 1111 struct iser_cq_desc *cq_desc = (struct iser_cq_desc *)data; 1112 struct iser_device *device = cq_desc->device; 1113 int cq_index = cq_desc->cq_index; 1114 struct ib_cq *cq = device->rx_cq[cq_index]; 1115 struct ib_wc wc; 1116 struct iser_rx_desc *desc; 1117 unsigned long xfer_len; 1118 struct iser_conn *ib_conn; 1119 int completed_tx, completed_rx = 0; 1120 1121 /* First do tx drain, so in a case where we have rx flushes and a successful 1122 * tx completion we will still go through completion error handling. 1123 */ 1124 completed_tx = iser_drain_tx_cq(device, cq_index); 1125 1126 while (ib_poll_cq(cq, 1, &wc) == 1) { 1127 desc = (struct iser_rx_desc *) (unsigned long) wc.wr_id; 1128 BUG_ON(desc == NULL); 1129 ib_conn = wc.qp->qp_context; 1130 if (wc.status == IB_WC_SUCCESS) { 1131 if (wc.opcode == IB_WC_RECV) { 1132 xfer_len = (unsigned long)wc.byte_len; 1133 iser_rcv_completion(desc, xfer_len, ib_conn); 1134 } else 1135 iser_err("expected opcode %d got %d\n", 1136 IB_WC_RECV, wc.opcode); 1137 } else { 1138 if (wc.status != IB_WC_WR_FLUSH_ERR) 1139 iser_err("rx id %llx status %d vend_err %x\n", 1140 wc.wr_id, wc.status, wc.vendor_err); 1141 ib_conn->post_recv_buf_count--; 1142 iser_handle_comp_error(NULL, ib_conn); 1143 } 1144 completed_rx++; 1145 if (!(completed_rx & 63)) 1146 completed_tx += iser_drain_tx_cq(device, cq_index); 1147 } 1148 /* #warning "it is assumed here that arming CQ only once its empty" * 1149 * " would not cause interrupts to be missed" */ 1150 ib_req_notify_cq(cq, IB_CQ_NEXT_COMP); 1151 1152 iser_dbg("got %d rx %d tx completions\n", completed_rx, completed_tx); 1153 } 1154 1155 static void iser_cq_callback(struct ib_cq *cq, void *cq_context) 1156 { 1157 struct iser_cq_desc *cq_desc = (struct iser_cq_desc *)cq_context; 1158 struct iser_device *device = cq_desc->device; 1159 int cq_index = cq_desc->cq_index; 1160 1161 tasklet_schedule(&device->cq_tasklet[cq_index]); 1162 } 1163 1164 u8 iser_check_task_pi_status(struct iscsi_iser_task *iser_task, 1165 enum iser_data_dir cmd_dir, sector_t *sector) 1166 { 1167 struct iser_mem_reg *reg = &iser_task->rdma_regd[cmd_dir].reg; 1168 struct fast_reg_descriptor *desc = reg->mem_h; 1169 unsigned long sector_size = iser_task->sc->device->sector_size; 1170 struct ib_mr_status mr_status; 1171 int ret; 1172 1173 if (desc && desc->reg_indicators & ISER_FASTREG_PROTECTED) { 1174 desc->reg_indicators &= ~ISER_FASTREG_PROTECTED; 1175 ret = ib_check_mr_status(desc->pi_ctx->sig_mr, 1176 IB_MR_CHECK_SIG_STATUS, &mr_status); 1177 if (ret) { 1178 pr_err("ib_check_mr_status failed, ret %d\n", ret); 1179 goto err; 1180 } 1181 1182 if (mr_status.fail_status & IB_MR_CHECK_SIG_STATUS) { 1183 sector_t sector_off = mr_status.sig_err.sig_err_offset; 1184 1185 do_div(sector_off, sector_size + 8); 1186 *sector = scsi_get_lba(iser_task->sc) + sector_off; 1187 1188 pr_err("PI error found type %d at sector %llx " 1189 "expected %x vs actual %x\n", 1190 mr_status.sig_err.err_type, 1191 (unsigned long long)*sector, 1192 mr_status.sig_err.expected, 1193 mr_status.sig_err.actual); 1194 1195 switch (mr_status.sig_err.err_type) { 1196 case IB_SIG_BAD_GUARD: 1197 return 0x1; 1198 case IB_SIG_BAD_REFTAG: 1199 return 0x3; 1200 case IB_SIG_BAD_APPTAG: 1201 return 0x2; 1202 } 1203 } 1204 } 1205 1206 return 0; 1207 err: 1208 /* Not alot we can do here, return ambiguous guard error */ 1209 return 0x1; 1210 } 1211