1 /* 2 * Copyright (c) 2004, 2005, 2006 Voltaire, Inc. All rights reserved. 3 * Copyright (c) 2005, 2006 Cisco Systems. All rights reserved. 4 * Copyright (c) 2013-2014 Mellanox Technologies. All rights reserved. 5 * 6 * This software is available to you under a choice of one of two 7 * licenses. You may choose to be licensed under the terms of the GNU 8 * General Public License (GPL) Version 2, available from the file 9 * COPYING in the main directory of this source tree, or the 10 * OpenIB.org BSD license below: 11 * 12 * Redistribution and use in source and binary forms, with or 13 * without modification, are permitted provided that the following 14 * conditions are met: 15 * 16 * - Redistributions of source code must retain the above 17 * copyright notice, this list of conditions and the following 18 * disclaimer. 19 * 20 * - Redistributions in binary form must reproduce the above 21 * copyright notice, this list of conditions and the following 22 * disclaimer in the documentation and/or other materials 23 * provided with the distribution. 24 * 25 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 26 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 27 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 28 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 29 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 30 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 32 * SOFTWARE. 33 */ 34 #include <linux/kernel.h> 35 #include <linux/module.h> 36 #include <linux/slab.h> 37 #include <linux/delay.h> 38 39 #include "iscsi_iser.h" 40 41 #define ISCSI_ISER_MAX_CONN 8 42 #define ISER_MAX_RX_LEN (ISER_QP_MAX_RECV_DTOS * ISCSI_ISER_MAX_CONN) 43 #define ISER_MAX_TX_LEN (ISER_QP_MAX_REQ_DTOS * ISCSI_ISER_MAX_CONN) 44 #define ISER_MAX_CQ_LEN (ISER_MAX_RX_LEN + ISER_MAX_TX_LEN + \ 45 ISCSI_ISER_MAX_CONN) 46 47 static void iser_qp_event_callback(struct ib_event *cause, void *context) 48 { 49 iser_err("qp event %s (%d)\n", 50 ib_event_msg(cause->event), cause->event); 51 } 52 53 static void iser_event_handler(struct ib_event_handler *handler, 54 struct ib_event *event) 55 { 56 iser_err("async event %s (%d) on device %s port %d\n", 57 ib_event_msg(event->event), event->event, 58 dev_name(&event->device->dev), event->element.port_num); 59 } 60 61 /* 62 * iser_create_device_ib_res - creates Protection Domain (PD), Completion 63 * Queue (CQ), DMA Memory Region (DMA MR) with the device associated with 64 * the adaptor. 65 * 66 * Return: 0 on success, -1 on failure 67 */ 68 static int iser_create_device_ib_res(struct iser_device *device) 69 { 70 struct ib_device *ib_dev = device->ib_device; 71 int i, max_cqe; 72 73 if (!(ib_dev->attrs.device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS)) { 74 iser_err("IB device does not support memory registrations\n"); 75 return -1; 76 } 77 78 device->comps_used = min_t(int, num_online_cpus(), 79 ib_dev->num_comp_vectors); 80 81 device->comps = kcalloc(device->comps_used, sizeof(*device->comps), 82 GFP_KERNEL); 83 if (!device->comps) 84 goto comps_err; 85 86 max_cqe = min(ISER_MAX_CQ_LEN, ib_dev->attrs.max_cqe); 87 88 iser_info("using %d CQs, device %s supports %d vectors max_cqe %d\n", 89 device->comps_used, dev_name(&ib_dev->dev), 90 ib_dev->num_comp_vectors, max_cqe); 91 92 device->pd = ib_alloc_pd(ib_dev, 93 iser_always_reg ? 0 : IB_PD_UNSAFE_GLOBAL_RKEY); 94 if (IS_ERR(device->pd)) 95 goto pd_err; 96 97 for (i = 0; i < device->comps_used; i++) { 98 struct iser_comp *comp = &device->comps[i]; 99 100 comp->cq = ib_alloc_cq(ib_dev, comp, max_cqe, i, 101 IB_POLL_SOFTIRQ); 102 if (IS_ERR(comp->cq)) { 103 comp->cq = NULL; 104 goto cq_err; 105 } 106 } 107 108 INIT_IB_EVENT_HANDLER(&device->event_handler, ib_dev, 109 iser_event_handler); 110 ib_register_event_handler(&device->event_handler); 111 return 0; 112 113 cq_err: 114 for (i = 0; i < device->comps_used; i++) { 115 struct iser_comp *comp = &device->comps[i]; 116 117 if (comp->cq) 118 ib_free_cq(comp->cq); 119 } 120 ib_dealloc_pd(device->pd); 121 pd_err: 122 kfree(device->comps); 123 comps_err: 124 iser_err("failed to allocate an IB resource\n"); 125 return -1; 126 } 127 128 /* 129 * iser_free_device_ib_res - destroy/dealloc/dereg the DMA MR, 130 * CQ and PD created with the device associated with the adaptor. 131 */ 132 static void iser_free_device_ib_res(struct iser_device *device) 133 { 134 int i; 135 136 for (i = 0; i < device->comps_used; i++) { 137 struct iser_comp *comp = &device->comps[i]; 138 139 ib_free_cq(comp->cq); 140 comp->cq = NULL; 141 } 142 143 ib_unregister_event_handler(&device->event_handler); 144 ib_dealloc_pd(device->pd); 145 146 kfree(device->comps); 147 device->comps = NULL; 148 device->pd = NULL; 149 } 150 151 static struct iser_fr_desc * 152 iser_create_fastreg_desc(struct iser_device *device, 153 struct ib_pd *pd, 154 bool pi_enable, 155 unsigned int size) 156 { 157 struct iser_fr_desc *desc; 158 struct ib_device *ib_dev = device->ib_device; 159 enum ib_mr_type mr_type; 160 int ret; 161 162 desc = kzalloc(sizeof(*desc), GFP_KERNEL); 163 if (!desc) 164 return ERR_PTR(-ENOMEM); 165 166 if (ib_dev->attrs.device_cap_flags & IB_DEVICE_SG_GAPS_REG) 167 mr_type = IB_MR_TYPE_SG_GAPS; 168 else 169 mr_type = IB_MR_TYPE_MEM_REG; 170 171 desc->rsc.mr = ib_alloc_mr(pd, mr_type, size); 172 if (IS_ERR(desc->rsc.mr)) { 173 ret = PTR_ERR(desc->rsc.mr); 174 iser_err("Failed to allocate ib_fast_reg_mr err=%d\n", ret); 175 goto err_alloc_mr; 176 } 177 178 if (pi_enable) { 179 desc->rsc.sig_mr = ib_alloc_mr_integrity(pd, size, size); 180 if (IS_ERR(desc->rsc.sig_mr)) { 181 ret = PTR_ERR(desc->rsc.sig_mr); 182 iser_err("Failed to allocate sig_mr err=%d\n", ret); 183 goto err_alloc_mr_integrity; 184 } 185 } 186 desc->rsc.mr_valid = 0; 187 188 return desc; 189 190 err_alloc_mr_integrity: 191 ib_dereg_mr(desc->rsc.mr); 192 err_alloc_mr: 193 kfree(desc); 194 195 return ERR_PTR(ret); 196 } 197 198 static void iser_destroy_fastreg_desc(struct iser_fr_desc *desc) 199 { 200 struct iser_reg_resources *res = &desc->rsc; 201 202 ib_dereg_mr(res->mr); 203 if (res->sig_mr) { 204 ib_dereg_mr(res->sig_mr); 205 res->sig_mr = NULL; 206 } 207 kfree(desc); 208 } 209 210 /** 211 * iser_alloc_fastreg_pool - Creates pool of fast_reg descriptors 212 * for fast registration work requests. 213 * @ib_conn: connection RDMA resources 214 * @cmds_max: max number of SCSI commands for this connection 215 * @size: max number of pages per map request 216 * 217 * Return: 0 on success, or errno code on failure 218 */ 219 int iser_alloc_fastreg_pool(struct ib_conn *ib_conn, 220 unsigned cmds_max, 221 unsigned int size) 222 { 223 struct iser_device *device = ib_conn->device; 224 struct iser_fr_pool *fr_pool = &ib_conn->fr_pool; 225 struct iser_fr_desc *desc; 226 int i, ret; 227 228 INIT_LIST_HEAD(&fr_pool->list); 229 INIT_LIST_HEAD(&fr_pool->all_list); 230 spin_lock_init(&fr_pool->lock); 231 fr_pool->size = 0; 232 for (i = 0; i < cmds_max; i++) { 233 desc = iser_create_fastreg_desc(device, device->pd, 234 ib_conn->pi_support, size); 235 if (IS_ERR(desc)) { 236 ret = PTR_ERR(desc); 237 goto err; 238 } 239 240 list_add_tail(&desc->list, &fr_pool->list); 241 list_add_tail(&desc->all_list, &fr_pool->all_list); 242 fr_pool->size++; 243 } 244 245 return 0; 246 247 err: 248 iser_free_fastreg_pool(ib_conn); 249 return ret; 250 } 251 252 /** 253 * iser_free_fastreg_pool - releases the pool of fast_reg descriptors 254 * @ib_conn: connection RDMA resources 255 */ 256 void iser_free_fastreg_pool(struct ib_conn *ib_conn) 257 { 258 struct iser_fr_pool *fr_pool = &ib_conn->fr_pool; 259 struct iser_fr_desc *desc, *tmp; 260 int i = 0; 261 262 if (list_empty(&fr_pool->all_list)) 263 return; 264 265 iser_info("freeing conn %p fr pool\n", ib_conn); 266 267 list_for_each_entry_safe(desc, tmp, &fr_pool->all_list, all_list) { 268 list_del(&desc->all_list); 269 iser_destroy_fastreg_desc(desc); 270 ++i; 271 } 272 273 if (i < fr_pool->size) 274 iser_warn("pool still has %d regions registered\n", 275 fr_pool->size - i); 276 } 277 278 /* 279 * iser_create_ib_conn_res - Queue-Pair (QP) 280 * 281 * Return: 0 on success, -1 on failure 282 */ 283 static int iser_create_ib_conn_res(struct ib_conn *ib_conn) 284 { 285 struct iser_conn *iser_conn = to_iser_conn(ib_conn); 286 struct iser_device *device; 287 struct ib_device *ib_dev; 288 struct ib_qp_init_attr init_attr; 289 int ret = -ENOMEM; 290 int index, min_index = 0; 291 292 BUG_ON(ib_conn->device == NULL); 293 294 device = ib_conn->device; 295 ib_dev = device->ib_device; 296 297 memset(&init_attr, 0, sizeof init_attr); 298 299 mutex_lock(&ig.connlist_mutex); 300 /* select the CQ with the minimal number of usages */ 301 for (index = 0; index < device->comps_used; index++) { 302 if (device->comps[index].active_qps < 303 device->comps[min_index].active_qps) 304 min_index = index; 305 } 306 ib_conn->comp = &device->comps[min_index]; 307 ib_conn->comp->active_qps++; 308 mutex_unlock(&ig.connlist_mutex); 309 iser_info("cq index %d used for ib_conn %p\n", min_index, ib_conn); 310 311 init_attr.event_handler = iser_qp_event_callback; 312 init_attr.qp_context = (void *)ib_conn; 313 init_attr.send_cq = ib_conn->comp->cq; 314 init_attr.recv_cq = ib_conn->comp->cq; 315 init_attr.cap.max_recv_wr = ISER_QP_MAX_RECV_DTOS; 316 init_attr.cap.max_send_sge = 2; 317 init_attr.cap.max_recv_sge = 1; 318 init_attr.sq_sig_type = IB_SIGNAL_REQ_WR; 319 init_attr.qp_type = IB_QPT_RC; 320 if (ib_conn->pi_support) { 321 init_attr.cap.max_send_wr = ISER_QP_SIG_MAX_REQ_DTOS + 1; 322 init_attr.create_flags |= IB_QP_CREATE_INTEGRITY_EN; 323 iser_conn->max_cmds = 324 ISER_GET_MAX_XMIT_CMDS(ISER_QP_SIG_MAX_REQ_DTOS); 325 } else { 326 if (ib_dev->attrs.max_qp_wr > ISER_QP_MAX_REQ_DTOS) { 327 init_attr.cap.max_send_wr = ISER_QP_MAX_REQ_DTOS + 1; 328 iser_conn->max_cmds = 329 ISER_GET_MAX_XMIT_CMDS(ISER_QP_MAX_REQ_DTOS); 330 } else { 331 init_attr.cap.max_send_wr = ib_dev->attrs.max_qp_wr; 332 iser_conn->max_cmds = 333 ISER_GET_MAX_XMIT_CMDS(ib_dev->attrs.max_qp_wr); 334 iser_dbg("device %s supports max_send_wr %d\n", 335 dev_name(&device->ib_device->dev), 336 ib_dev->attrs.max_qp_wr); 337 } 338 } 339 340 ret = rdma_create_qp(ib_conn->cma_id, device->pd, &init_attr); 341 if (ret) 342 goto out_err; 343 344 ib_conn->qp = ib_conn->cma_id->qp; 345 iser_info("setting conn %p cma_id %p qp %p\n", 346 ib_conn, ib_conn->cma_id, 347 ib_conn->cma_id->qp); 348 return ret; 349 350 out_err: 351 mutex_lock(&ig.connlist_mutex); 352 ib_conn->comp->active_qps--; 353 mutex_unlock(&ig.connlist_mutex); 354 iser_err("unable to alloc mem or create resource, err %d\n", ret); 355 356 return ret; 357 } 358 359 /* 360 * based on the resolved device node GUID see if there already allocated 361 * device for this device. If there's no such, create one. 362 */ 363 static 364 struct iser_device *iser_device_find_by_ib_device(struct rdma_cm_id *cma_id) 365 { 366 struct iser_device *device; 367 368 mutex_lock(&ig.device_list_mutex); 369 370 list_for_each_entry(device, &ig.device_list, ig_list) 371 /* find if there's a match using the node GUID */ 372 if (device->ib_device->node_guid == cma_id->device->node_guid) 373 goto inc_refcnt; 374 375 device = kzalloc(sizeof *device, GFP_KERNEL); 376 if (device == NULL) 377 goto out; 378 379 /* assign this device to the device */ 380 device->ib_device = cma_id->device; 381 /* init the device and link it into ig device list */ 382 if (iser_create_device_ib_res(device)) { 383 kfree(device); 384 device = NULL; 385 goto out; 386 } 387 list_add(&device->ig_list, &ig.device_list); 388 389 inc_refcnt: 390 device->refcount++; 391 out: 392 mutex_unlock(&ig.device_list_mutex); 393 return device; 394 } 395 396 /* if there's no demand for this device, release it */ 397 static void iser_device_try_release(struct iser_device *device) 398 { 399 mutex_lock(&ig.device_list_mutex); 400 device->refcount--; 401 iser_info("device %p refcount %d\n", device, device->refcount); 402 if (!device->refcount) { 403 iser_free_device_ib_res(device); 404 list_del(&device->ig_list); 405 kfree(device); 406 } 407 mutex_unlock(&ig.device_list_mutex); 408 } 409 410 /* 411 * Called with state mutex held 412 */ 413 static int iser_conn_state_comp_exch(struct iser_conn *iser_conn, 414 enum iser_conn_state comp, 415 enum iser_conn_state exch) 416 { 417 int ret; 418 419 ret = (iser_conn->state == comp); 420 if (ret) 421 iser_conn->state = exch; 422 423 return ret; 424 } 425 426 void iser_release_work(struct work_struct *work) 427 { 428 struct iser_conn *iser_conn; 429 430 iser_conn = container_of(work, struct iser_conn, release_work); 431 432 /* Wait for conn_stop to complete */ 433 wait_for_completion(&iser_conn->stop_completion); 434 /* Wait for IB resouces cleanup to complete */ 435 wait_for_completion(&iser_conn->ib_completion); 436 437 mutex_lock(&iser_conn->state_mutex); 438 iser_conn->state = ISER_CONN_DOWN; 439 mutex_unlock(&iser_conn->state_mutex); 440 441 iser_conn_release(iser_conn); 442 } 443 444 /** 445 * iser_free_ib_conn_res - release IB related resources 446 * @iser_conn: iser connection struct 447 * @destroy: indicator if we need to try to release the 448 * iser device and memory regoins pool (only iscsi 449 * shutdown and DEVICE_REMOVAL will use this). 450 * 451 * This routine is called with the iser state mutex held 452 * so the cm_id removal is out of here. It is Safe to 453 * be invoked multiple times. 454 */ 455 static void iser_free_ib_conn_res(struct iser_conn *iser_conn, 456 bool destroy) 457 { 458 struct ib_conn *ib_conn = &iser_conn->ib_conn; 459 struct iser_device *device = ib_conn->device; 460 461 iser_info("freeing conn %p cma_id %p qp %p\n", 462 iser_conn, ib_conn->cma_id, ib_conn->qp); 463 464 if (ib_conn->qp != NULL) { 465 mutex_lock(&ig.connlist_mutex); 466 ib_conn->comp->active_qps--; 467 mutex_unlock(&ig.connlist_mutex); 468 rdma_destroy_qp(ib_conn->cma_id); 469 ib_conn->qp = NULL; 470 } 471 472 if (destroy) { 473 if (iser_conn->rx_descs) 474 iser_free_rx_descriptors(iser_conn); 475 476 if (device != NULL) { 477 iser_device_try_release(device); 478 ib_conn->device = NULL; 479 } 480 } 481 } 482 483 /** 484 * iser_conn_release - Frees all conn objects and deallocs conn descriptor 485 * @iser_conn: iSER connection context 486 */ 487 void iser_conn_release(struct iser_conn *iser_conn) 488 { 489 struct ib_conn *ib_conn = &iser_conn->ib_conn; 490 491 mutex_lock(&ig.connlist_mutex); 492 list_del(&iser_conn->conn_list); 493 mutex_unlock(&ig.connlist_mutex); 494 495 mutex_lock(&iser_conn->state_mutex); 496 /* In case we endup here without ep_disconnect being invoked. */ 497 if (iser_conn->state != ISER_CONN_DOWN) { 498 iser_warn("iser conn %p state %d, expected state down.\n", 499 iser_conn, iser_conn->state); 500 iscsi_destroy_endpoint(iser_conn->ep); 501 iser_conn->state = ISER_CONN_DOWN; 502 } 503 /* 504 * In case we never got to bind stage, we still need to 505 * release IB resources (which is safe to call more than once). 506 */ 507 iser_free_ib_conn_res(iser_conn, true); 508 mutex_unlock(&iser_conn->state_mutex); 509 510 if (ib_conn->cma_id != NULL) { 511 rdma_destroy_id(ib_conn->cma_id); 512 ib_conn->cma_id = NULL; 513 } 514 515 kfree(iser_conn); 516 } 517 518 /** 519 * iser_conn_terminate - triggers start of the disconnect procedures and 520 * waits for them to be done 521 * @iser_conn: iSER connection context 522 * 523 * Called with state mutex held 524 */ 525 int iser_conn_terminate(struct iser_conn *iser_conn) 526 { 527 struct ib_conn *ib_conn = &iser_conn->ib_conn; 528 int err = 0; 529 530 /* terminate the iser conn only if the conn state is UP */ 531 if (!iser_conn_state_comp_exch(iser_conn, ISER_CONN_UP, 532 ISER_CONN_TERMINATING)) 533 return 0; 534 535 iser_info("iser_conn %p state %d\n", iser_conn, iser_conn->state); 536 537 /* suspend queuing of new iscsi commands */ 538 if (iser_conn->iscsi_conn) 539 iscsi_suspend_queue(iser_conn->iscsi_conn); 540 541 /* 542 * In case we didn't already clean up the cma_id (peer initiated 543 * a disconnection), we need to Cause the CMA to change the QP 544 * state to ERROR. 545 */ 546 if (ib_conn->cma_id) { 547 err = rdma_disconnect(ib_conn->cma_id); 548 if (err) 549 iser_err("Failed to disconnect, conn: 0x%p err %d\n", 550 iser_conn, err); 551 552 /* block until all flush errors are consumed */ 553 ib_drain_sq(ib_conn->qp); 554 } 555 556 return 1; 557 } 558 559 /* 560 * Called with state mutex held 561 */ 562 static void iser_connect_error(struct rdma_cm_id *cma_id) 563 { 564 struct iser_conn *iser_conn; 565 566 iser_conn = (struct iser_conn *)cma_id->context; 567 iser_conn->state = ISER_CONN_TERMINATING; 568 } 569 570 static void 571 iser_calc_scsi_params(struct iser_conn *iser_conn, 572 unsigned int max_sectors) 573 { 574 struct iser_device *device = iser_conn->ib_conn.device; 575 struct ib_device_attr *attr = &device->ib_device->attrs; 576 unsigned short sg_tablesize, sup_sg_tablesize; 577 unsigned short reserved_mr_pages; 578 u32 max_num_sg; 579 580 /* 581 * FRs without SG_GAPS can only map up to a (device) page per entry, 582 * but if the first entry is misaligned we'll end up using two entries 583 * (head and tail) for a single page worth data, so one additional 584 * entry is required. 585 */ 586 if (attr->device_cap_flags & IB_DEVICE_SG_GAPS_REG) 587 reserved_mr_pages = 0; 588 else 589 reserved_mr_pages = 1; 590 591 if (iser_conn->ib_conn.pi_support) 592 max_num_sg = attr->max_pi_fast_reg_page_list_len; 593 else 594 max_num_sg = attr->max_fast_reg_page_list_len; 595 596 sg_tablesize = DIV_ROUND_UP(max_sectors * SECTOR_SIZE, SZ_4K); 597 sup_sg_tablesize = min_t(uint, ISCSI_ISER_MAX_SG_TABLESIZE, 598 max_num_sg - reserved_mr_pages); 599 iser_conn->scsi_sg_tablesize = min(sg_tablesize, sup_sg_tablesize); 600 iser_conn->pages_per_mr = 601 iser_conn->scsi_sg_tablesize + reserved_mr_pages; 602 } 603 604 /* 605 * Called with state mutex held 606 */ 607 static void iser_addr_handler(struct rdma_cm_id *cma_id) 608 { 609 struct iser_device *device; 610 struct iser_conn *iser_conn; 611 struct ib_conn *ib_conn; 612 int ret; 613 614 iser_conn = (struct iser_conn *)cma_id->context; 615 if (iser_conn->state != ISER_CONN_PENDING) 616 /* bailout */ 617 return; 618 619 ib_conn = &iser_conn->ib_conn; 620 device = iser_device_find_by_ib_device(cma_id); 621 if (!device) { 622 iser_err("device lookup/creation failed\n"); 623 iser_connect_error(cma_id); 624 return; 625 } 626 627 ib_conn->device = device; 628 629 /* connection T10-PI support */ 630 if (iser_pi_enable) { 631 if (!(device->ib_device->attrs.device_cap_flags & 632 IB_DEVICE_INTEGRITY_HANDOVER)) { 633 iser_warn("T10-PI requested but not supported on %s, " 634 "continue without T10-PI\n", 635 dev_name(&ib_conn->device->ib_device->dev)); 636 ib_conn->pi_support = false; 637 } else { 638 ib_conn->pi_support = true; 639 } 640 } 641 642 iser_calc_scsi_params(iser_conn, iser_max_sectors); 643 644 ret = rdma_resolve_route(cma_id, 1000); 645 if (ret) { 646 iser_err("resolve route failed: %d\n", ret); 647 iser_connect_error(cma_id); 648 return; 649 } 650 } 651 652 /* 653 * Called with state mutex held 654 */ 655 static void iser_route_handler(struct rdma_cm_id *cma_id) 656 { 657 struct rdma_conn_param conn_param; 658 int ret; 659 struct iser_cm_hdr req_hdr; 660 struct iser_conn *iser_conn = (struct iser_conn *)cma_id->context; 661 struct ib_conn *ib_conn = &iser_conn->ib_conn; 662 struct ib_device *ib_dev = ib_conn->device->ib_device; 663 664 if (iser_conn->state != ISER_CONN_PENDING) 665 /* bailout */ 666 return; 667 668 ret = iser_create_ib_conn_res(ib_conn); 669 if (ret) 670 goto failure; 671 672 memset(&conn_param, 0, sizeof conn_param); 673 conn_param.responder_resources = ib_dev->attrs.max_qp_rd_atom; 674 conn_param.initiator_depth = 1; 675 conn_param.retry_count = 7; 676 conn_param.rnr_retry_count = 6; 677 678 memset(&req_hdr, 0, sizeof(req_hdr)); 679 req_hdr.flags = ISER_ZBVA_NOT_SUP; 680 if (!iser_always_reg) 681 req_hdr.flags |= ISER_SEND_W_INV_NOT_SUP; 682 conn_param.private_data = (void *)&req_hdr; 683 conn_param.private_data_len = sizeof(struct iser_cm_hdr); 684 685 ret = rdma_connect(cma_id, &conn_param); 686 if (ret) { 687 iser_err("failure connecting: %d\n", ret); 688 goto failure; 689 } 690 691 return; 692 failure: 693 iser_connect_error(cma_id); 694 } 695 696 static void iser_connected_handler(struct rdma_cm_id *cma_id, 697 const void *private_data) 698 { 699 struct iser_conn *iser_conn; 700 struct ib_qp_attr attr; 701 struct ib_qp_init_attr init_attr; 702 703 iser_conn = (struct iser_conn *)cma_id->context; 704 if (iser_conn->state != ISER_CONN_PENDING) 705 /* bailout */ 706 return; 707 708 (void)ib_query_qp(cma_id->qp, &attr, ~0, &init_attr); 709 iser_info("remote qpn:%x my qpn:%x\n", attr.dest_qp_num, cma_id->qp->qp_num); 710 711 if (private_data) { 712 u8 flags = *(u8 *)private_data; 713 714 iser_conn->snd_w_inv = !(flags & ISER_SEND_W_INV_NOT_SUP); 715 } 716 717 iser_info("conn %p: negotiated %s invalidation\n", 718 iser_conn, iser_conn->snd_w_inv ? "remote" : "local"); 719 720 iser_conn->state = ISER_CONN_UP; 721 complete(&iser_conn->up_completion); 722 } 723 724 static void iser_disconnected_handler(struct rdma_cm_id *cma_id) 725 { 726 struct iser_conn *iser_conn = (struct iser_conn *)cma_id->context; 727 728 if (iser_conn_terminate(iser_conn)) { 729 if (iser_conn->iscsi_conn) 730 iscsi_conn_failure(iser_conn->iscsi_conn, 731 ISCSI_ERR_CONN_FAILED); 732 else 733 iser_err("iscsi_iser connection isn't bound\n"); 734 } 735 } 736 737 static void iser_cleanup_handler(struct rdma_cm_id *cma_id, 738 bool destroy) 739 { 740 struct iser_conn *iser_conn = (struct iser_conn *)cma_id->context; 741 742 /* 743 * We are not guaranteed that we visited disconnected_handler 744 * by now, call it here to be safe that we handle CM drep 745 * and flush errors. 746 */ 747 iser_disconnected_handler(cma_id); 748 iser_free_ib_conn_res(iser_conn, destroy); 749 complete(&iser_conn->ib_completion); 750 }; 751 752 static int iser_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *event) 753 { 754 struct iser_conn *iser_conn; 755 int ret = 0; 756 757 iser_conn = (struct iser_conn *)cma_id->context; 758 iser_info("%s (%d): status %d conn %p id %p\n", 759 rdma_event_msg(event->event), event->event, 760 event->status, cma_id->context, cma_id); 761 762 mutex_lock(&iser_conn->state_mutex); 763 switch (event->event) { 764 case RDMA_CM_EVENT_ADDR_RESOLVED: 765 iser_addr_handler(cma_id); 766 break; 767 case RDMA_CM_EVENT_ROUTE_RESOLVED: 768 iser_route_handler(cma_id); 769 break; 770 case RDMA_CM_EVENT_ESTABLISHED: 771 iser_connected_handler(cma_id, event->param.conn.private_data); 772 break; 773 case RDMA_CM_EVENT_REJECTED: 774 iser_info("Connection rejected: %s\n", 775 rdma_reject_msg(cma_id, event->status)); 776 /* FALLTHROUGH */ 777 case RDMA_CM_EVENT_ADDR_ERROR: 778 case RDMA_CM_EVENT_ROUTE_ERROR: 779 case RDMA_CM_EVENT_CONNECT_ERROR: 780 case RDMA_CM_EVENT_UNREACHABLE: 781 iser_connect_error(cma_id); 782 break; 783 case RDMA_CM_EVENT_DISCONNECTED: 784 case RDMA_CM_EVENT_ADDR_CHANGE: 785 case RDMA_CM_EVENT_TIMEWAIT_EXIT: 786 iser_cleanup_handler(cma_id, false); 787 break; 788 case RDMA_CM_EVENT_DEVICE_REMOVAL: 789 /* 790 * we *must* destroy the device as we cannot rely 791 * on iscsid to be around to initiate error handling. 792 * also if we are not in state DOWN implicitly destroy 793 * the cma_id. 794 */ 795 iser_cleanup_handler(cma_id, true); 796 if (iser_conn->state != ISER_CONN_DOWN) { 797 iser_conn->ib_conn.cma_id = NULL; 798 ret = 1; 799 } 800 break; 801 default: 802 iser_err("Unexpected RDMA CM event: %s (%d)\n", 803 rdma_event_msg(event->event), event->event); 804 break; 805 } 806 mutex_unlock(&iser_conn->state_mutex); 807 808 return ret; 809 } 810 811 void iser_conn_init(struct iser_conn *iser_conn) 812 { 813 struct ib_conn *ib_conn = &iser_conn->ib_conn; 814 815 iser_conn->state = ISER_CONN_INIT; 816 init_completion(&iser_conn->stop_completion); 817 init_completion(&iser_conn->ib_completion); 818 init_completion(&iser_conn->up_completion); 819 INIT_LIST_HEAD(&iser_conn->conn_list); 820 mutex_init(&iser_conn->state_mutex); 821 822 ib_conn->post_recv_buf_count = 0; 823 ib_conn->reg_cqe.done = iser_reg_comp; 824 } 825 826 /** 827 * starts the process of connecting to the target 828 * sleeps until the connection is established or rejected 829 */ 830 int iser_connect(struct iser_conn *iser_conn, 831 struct sockaddr *src_addr, 832 struct sockaddr *dst_addr, 833 int non_blocking) 834 { 835 struct ib_conn *ib_conn = &iser_conn->ib_conn; 836 int err = 0; 837 838 mutex_lock(&iser_conn->state_mutex); 839 840 sprintf(iser_conn->name, "%pISp", dst_addr); 841 842 iser_info("connecting to: %s\n", iser_conn->name); 843 844 /* the device is known only --after-- address resolution */ 845 ib_conn->device = NULL; 846 847 iser_conn->state = ISER_CONN_PENDING; 848 849 ib_conn->cma_id = rdma_create_id(&init_net, iser_cma_handler, 850 (void *)iser_conn, 851 RDMA_PS_TCP, IB_QPT_RC); 852 if (IS_ERR(ib_conn->cma_id)) { 853 err = PTR_ERR(ib_conn->cma_id); 854 iser_err("rdma_create_id failed: %d\n", err); 855 goto id_failure; 856 } 857 858 err = rdma_resolve_addr(ib_conn->cma_id, src_addr, dst_addr, 1000); 859 if (err) { 860 iser_err("rdma_resolve_addr failed: %d\n", err); 861 goto addr_failure; 862 } 863 864 if (!non_blocking) { 865 wait_for_completion_interruptible(&iser_conn->up_completion); 866 867 if (iser_conn->state != ISER_CONN_UP) { 868 err = -EIO; 869 goto connect_failure; 870 } 871 } 872 mutex_unlock(&iser_conn->state_mutex); 873 874 mutex_lock(&ig.connlist_mutex); 875 list_add(&iser_conn->conn_list, &ig.connlist); 876 mutex_unlock(&ig.connlist_mutex); 877 return 0; 878 879 id_failure: 880 ib_conn->cma_id = NULL; 881 addr_failure: 882 iser_conn->state = ISER_CONN_DOWN; 883 connect_failure: 884 mutex_unlock(&iser_conn->state_mutex); 885 iser_conn_release(iser_conn); 886 return err; 887 } 888 889 int iser_post_recvl(struct iser_conn *iser_conn) 890 { 891 struct ib_conn *ib_conn = &iser_conn->ib_conn; 892 struct iser_login_desc *desc = &iser_conn->login_desc; 893 struct ib_recv_wr wr; 894 int ib_ret; 895 896 desc->sge.addr = desc->rsp_dma; 897 desc->sge.length = ISER_RX_LOGIN_SIZE; 898 desc->sge.lkey = ib_conn->device->pd->local_dma_lkey; 899 900 desc->cqe.done = iser_login_rsp; 901 wr.wr_cqe = &desc->cqe; 902 wr.sg_list = &desc->sge; 903 wr.num_sge = 1; 904 wr.next = NULL; 905 906 ib_conn->post_recv_buf_count++; 907 ib_ret = ib_post_recv(ib_conn->qp, &wr, NULL); 908 if (ib_ret) { 909 iser_err("ib_post_recv failed ret=%d\n", ib_ret); 910 ib_conn->post_recv_buf_count--; 911 } 912 913 return ib_ret; 914 } 915 916 int iser_post_recvm(struct iser_conn *iser_conn, int count) 917 { 918 struct ib_conn *ib_conn = &iser_conn->ib_conn; 919 unsigned int my_rx_head = iser_conn->rx_desc_head; 920 struct iser_rx_desc *rx_desc; 921 struct ib_recv_wr *wr; 922 int i, ib_ret; 923 924 for (wr = ib_conn->rx_wr, i = 0; i < count; i++, wr++) { 925 rx_desc = &iser_conn->rx_descs[my_rx_head]; 926 rx_desc->cqe.done = iser_task_rsp; 927 wr->wr_cqe = &rx_desc->cqe; 928 wr->sg_list = &rx_desc->rx_sg; 929 wr->num_sge = 1; 930 wr->next = wr + 1; 931 my_rx_head = (my_rx_head + 1) & iser_conn->qp_max_recv_dtos_mask; 932 } 933 934 wr--; 935 wr->next = NULL; /* mark end of work requests list */ 936 937 ib_conn->post_recv_buf_count += count; 938 ib_ret = ib_post_recv(ib_conn->qp, ib_conn->rx_wr, NULL); 939 if (unlikely(ib_ret)) { 940 iser_err("ib_post_recv failed ret=%d\n", ib_ret); 941 ib_conn->post_recv_buf_count -= count; 942 } else 943 iser_conn->rx_desc_head = my_rx_head; 944 945 return ib_ret; 946 } 947 948 949 /** 950 * iser_post_send - Initiate a Send DTO operation 951 * @ib_conn: connection RDMA resources 952 * @tx_desc: iSER TX descriptor 953 * @signal: true to send work request as SIGNALED 954 * 955 * Return: 0 on success, -1 on failure 956 */ 957 int iser_post_send(struct ib_conn *ib_conn, struct iser_tx_desc *tx_desc, 958 bool signal) 959 { 960 struct ib_send_wr *wr = &tx_desc->send_wr; 961 struct ib_send_wr *first_wr; 962 int ib_ret; 963 964 ib_dma_sync_single_for_device(ib_conn->device->ib_device, 965 tx_desc->dma_addr, ISER_HEADERS_LEN, 966 DMA_TO_DEVICE); 967 968 wr->next = NULL; 969 wr->wr_cqe = &tx_desc->cqe; 970 wr->sg_list = tx_desc->tx_sg; 971 wr->num_sge = tx_desc->num_sge; 972 wr->opcode = IB_WR_SEND; 973 wr->send_flags = signal ? IB_SEND_SIGNALED : 0; 974 975 if (tx_desc->inv_wr.next) 976 first_wr = &tx_desc->inv_wr; 977 else if (tx_desc->reg_wr.wr.next) 978 first_wr = &tx_desc->reg_wr.wr; 979 else 980 first_wr = wr; 981 982 ib_ret = ib_post_send(ib_conn->qp, first_wr, NULL); 983 if (unlikely(ib_ret)) 984 iser_err("ib_post_send failed, ret:%d opcode:%d\n", 985 ib_ret, wr->opcode); 986 987 return ib_ret; 988 } 989 990 u8 iser_check_task_pi_status(struct iscsi_iser_task *iser_task, 991 enum iser_data_dir cmd_dir, sector_t *sector) 992 { 993 struct iser_mem_reg *reg = &iser_task->rdma_reg[cmd_dir]; 994 struct iser_fr_desc *desc = reg->mem_h; 995 unsigned long sector_size = iser_task->sc->device->sector_size; 996 struct ib_mr_status mr_status; 997 int ret; 998 999 if (desc && desc->sig_protected) { 1000 desc->sig_protected = false; 1001 ret = ib_check_mr_status(desc->rsc.sig_mr, 1002 IB_MR_CHECK_SIG_STATUS, &mr_status); 1003 if (ret) { 1004 iser_err("ib_check_mr_status failed, ret %d\n", ret); 1005 /* Not a lot we can do, return ambiguous guard error */ 1006 *sector = 0; 1007 return 0x1; 1008 } 1009 1010 if (mr_status.fail_status & IB_MR_CHECK_SIG_STATUS) { 1011 sector_t sector_off = mr_status.sig_err.sig_err_offset; 1012 1013 sector_div(sector_off, sector_size + 8); 1014 *sector = scsi_get_lba(iser_task->sc) + sector_off; 1015 1016 iser_err("PI error found type %d at sector %llx " 1017 "expected %x vs actual %x\n", 1018 mr_status.sig_err.err_type, 1019 (unsigned long long)*sector, 1020 mr_status.sig_err.expected, 1021 mr_status.sig_err.actual); 1022 1023 switch (mr_status.sig_err.err_type) { 1024 case IB_SIG_BAD_GUARD: 1025 return 0x1; 1026 case IB_SIG_BAD_REFTAG: 1027 return 0x3; 1028 case IB_SIG_BAD_APPTAG: 1029 return 0x2; 1030 } 1031 } 1032 } 1033 1034 return 0; 1035 } 1036 1037 void iser_err_comp(struct ib_wc *wc, const char *type) 1038 { 1039 if (wc->status != IB_WC_WR_FLUSH_ERR) { 1040 struct iser_conn *iser_conn = to_iser_conn(wc->qp->qp_context); 1041 1042 iser_err("%s failure: %s (%d) vend_err %#x\n", type, 1043 ib_wc_status_msg(wc->status), wc->status, 1044 wc->vendor_err); 1045 1046 if (iser_conn->iscsi_conn) 1047 iscsi_conn_failure(iser_conn->iscsi_conn, 1048 ISCSI_ERR_CONN_FAILED); 1049 } else { 1050 iser_dbg("%s failure: %s (%d)\n", type, 1051 ib_wc_status_msg(wc->status), wc->status); 1052 } 1053 } 1054