1 /* 2 * Copyright (c) 2004, 2005, 2006 Voltaire, Inc. All rights reserved. 3 * Copyright (c) 2005, 2006 Cisco Systems. All rights reserved. 4 * Copyright (c) 2013 Mellanox Technologies. All rights reserved. 5 * 6 * This software is available to you under a choice of one of two 7 * licenses. You may choose to be licensed under the terms of the GNU 8 * General Public License (GPL) Version 2, available from the file 9 * COPYING in the main directory of this source tree, or the 10 * OpenIB.org BSD license below: 11 * 12 * Redistribution and use in source and binary forms, with or 13 * without modification, are permitted provided that the following 14 * conditions are met: 15 * 16 * - Redistributions of source code must retain the above 17 * copyright notice, this list of conditions and the following 18 * disclaimer. 19 * 20 * - Redistributions in binary form must reproduce the above 21 * copyright notice, this list of conditions and the following 22 * disclaimer in the documentation and/or other materials 23 * provided with the distribution. 24 * 25 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 26 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 27 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 28 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 29 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 30 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 32 * SOFTWARE. 33 */ 34 #include <linux/kernel.h> 35 #include <linux/module.h> 36 #include <linux/slab.h> 37 #include <linux/delay.h> 38 39 #include "iscsi_iser.h" 40 41 #define ISCSI_ISER_MAX_CONN 8 42 #define ISER_MAX_RX_CQ_LEN (ISER_QP_MAX_RECV_DTOS * ISCSI_ISER_MAX_CONN) 43 #define ISER_MAX_TX_CQ_LEN (ISER_QP_MAX_REQ_DTOS * ISCSI_ISER_MAX_CONN) 44 45 static void iser_cq_tasklet_fn(unsigned long data); 46 static void iser_cq_callback(struct ib_cq *cq, void *cq_context); 47 48 static void iser_cq_event_callback(struct ib_event *cause, void *context) 49 { 50 iser_err("got cq event %d \n", cause->event); 51 } 52 53 static void iser_qp_event_callback(struct ib_event *cause, void *context) 54 { 55 iser_err("got qp event %d\n",cause->event); 56 } 57 58 static void iser_event_handler(struct ib_event_handler *handler, 59 struct ib_event *event) 60 { 61 iser_err("async event %d on device %s port %d\n", event->event, 62 event->device->name, event->element.port_num); 63 } 64 65 /** 66 * iser_create_device_ib_res - creates Protection Domain (PD), Completion 67 * Queue (CQ), DMA Memory Region (DMA MR) with the device associated with 68 * the adapator. 69 * 70 * returns 0 on success, -1 on failure 71 */ 72 static int iser_create_device_ib_res(struct iser_device *device) 73 { 74 int i, j; 75 struct iser_cq_desc *cq_desc; 76 77 device->cqs_used = min(ISER_MAX_CQ, device->ib_device->num_comp_vectors); 78 iser_info("using %d CQs, device %s supports %d vectors\n", 79 device->cqs_used, device->ib_device->name, 80 device->ib_device->num_comp_vectors); 81 82 device->cq_desc = kmalloc(sizeof(struct iser_cq_desc) * device->cqs_used, 83 GFP_KERNEL); 84 if (device->cq_desc == NULL) 85 goto cq_desc_err; 86 cq_desc = device->cq_desc; 87 88 device->pd = ib_alloc_pd(device->ib_device); 89 if (IS_ERR(device->pd)) 90 goto pd_err; 91 92 for (i = 0; i < device->cqs_used; i++) { 93 cq_desc[i].device = device; 94 cq_desc[i].cq_index = i; 95 96 device->rx_cq[i] = ib_create_cq(device->ib_device, 97 iser_cq_callback, 98 iser_cq_event_callback, 99 (void *)&cq_desc[i], 100 ISER_MAX_RX_CQ_LEN, i); 101 if (IS_ERR(device->rx_cq[i])) 102 goto cq_err; 103 104 device->tx_cq[i] = ib_create_cq(device->ib_device, 105 NULL, iser_cq_event_callback, 106 (void *)&cq_desc[i], 107 ISER_MAX_TX_CQ_LEN, i); 108 109 if (IS_ERR(device->tx_cq[i])) 110 goto cq_err; 111 112 if (ib_req_notify_cq(device->rx_cq[i], IB_CQ_NEXT_COMP)) 113 goto cq_err; 114 115 tasklet_init(&device->cq_tasklet[i], 116 iser_cq_tasklet_fn, 117 (unsigned long)&cq_desc[i]); 118 } 119 120 device->mr = ib_get_dma_mr(device->pd, IB_ACCESS_LOCAL_WRITE | 121 IB_ACCESS_REMOTE_WRITE | 122 IB_ACCESS_REMOTE_READ); 123 if (IS_ERR(device->mr)) 124 goto dma_mr_err; 125 126 INIT_IB_EVENT_HANDLER(&device->event_handler, device->ib_device, 127 iser_event_handler); 128 if (ib_register_event_handler(&device->event_handler)) 129 goto handler_err; 130 131 return 0; 132 133 handler_err: 134 ib_dereg_mr(device->mr); 135 dma_mr_err: 136 for (j = 0; j < device->cqs_used; j++) 137 tasklet_kill(&device->cq_tasklet[j]); 138 cq_err: 139 for (j = 0; j < i; j++) { 140 if (device->tx_cq[j]) 141 ib_destroy_cq(device->tx_cq[j]); 142 if (device->rx_cq[j]) 143 ib_destroy_cq(device->rx_cq[j]); 144 } 145 ib_dealloc_pd(device->pd); 146 pd_err: 147 kfree(device->cq_desc); 148 cq_desc_err: 149 iser_err("failed to allocate an IB resource\n"); 150 return -1; 151 } 152 153 /** 154 * iser_free_device_ib_res - destroy/dealloc/dereg the DMA MR, 155 * CQ and PD created with the device associated with the adapator. 156 */ 157 static void iser_free_device_ib_res(struct iser_device *device) 158 { 159 int i; 160 BUG_ON(device->mr == NULL); 161 162 for (i = 0; i < device->cqs_used; i++) { 163 tasklet_kill(&device->cq_tasklet[i]); 164 (void)ib_destroy_cq(device->tx_cq[i]); 165 (void)ib_destroy_cq(device->rx_cq[i]); 166 device->tx_cq[i] = NULL; 167 device->rx_cq[i] = NULL; 168 } 169 170 (void)ib_unregister_event_handler(&device->event_handler); 171 (void)ib_dereg_mr(device->mr); 172 (void)ib_dealloc_pd(device->pd); 173 174 kfree(device->cq_desc); 175 176 device->mr = NULL; 177 device->pd = NULL; 178 } 179 180 /** 181 * iser_create_ib_conn_res - Creates FMR pool and Queue-Pair (QP) 182 * 183 * returns 0 on success, -1 on failure 184 */ 185 static int iser_create_ib_conn_res(struct iser_conn *ib_conn) 186 { 187 struct iser_device *device; 188 struct ib_qp_init_attr init_attr; 189 int req_err, resp_err, ret = -ENOMEM; 190 struct ib_fmr_pool_param params; 191 int index, min_index = 0; 192 193 BUG_ON(ib_conn->device == NULL); 194 195 device = ib_conn->device; 196 197 ib_conn->login_buf = kmalloc(ISCSI_DEF_MAX_RECV_SEG_LEN + 198 ISER_RX_LOGIN_SIZE, GFP_KERNEL); 199 if (!ib_conn->login_buf) 200 goto out_err; 201 202 ib_conn->login_req_buf = ib_conn->login_buf; 203 ib_conn->login_resp_buf = ib_conn->login_buf + ISCSI_DEF_MAX_RECV_SEG_LEN; 204 205 ib_conn->login_req_dma = ib_dma_map_single(ib_conn->device->ib_device, 206 (void *)ib_conn->login_req_buf, 207 ISCSI_DEF_MAX_RECV_SEG_LEN, DMA_TO_DEVICE); 208 209 ib_conn->login_resp_dma = ib_dma_map_single(ib_conn->device->ib_device, 210 (void *)ib_conn->login_resp_buf, 211 ISER_RX_LOGIN_SIZE, DMA_FROM_DEVICE); 212 213 req_err = ib_dma_mapping_error(device->ib_device, ib_conn->login_req_dma); 214 resp_err = ib_dma_mapping_error(device->ib_device, ib_conn->login_resp_dma); 215 216 if (req_err || resp_err) { 217 if (req_err) 218 ib_conn->login_req_dma = 0; 219 if (resp_err) 220 ib_conn->login_resp_dma = 0; 221 goto out_err; 222 } 223 224 ib_conn->page_vec = kmalloc(sizeof(struct iser_page_vec) + 225 (sizeof(u64) * (ISCSI_ISER_SG_TABLESIZE +1)), 226 GFP_KERNEL); 227 if (!ib_conn->page_vec) 228 goto out_err; 229 230 ib_conn->page_vec->pages = (u64 *) (ib_conn->page_vec + 1); 231 232 params.page_shift = SHIFT_4K; 233 /* when the first/last SG element are not start/end * 234 * page aligned, the map whould be of N+1 pages */ 235 params.max_pages_per_fmr = ISCSI_ISER_SG_TABLESIZE + 1; 236 /* make the pool size twice the max number of SCSI commands * 237 * the ML is expected to queue, watermark for unmap at 50% */ 238 params.pool_size = ISCSI_DEF_XMIT_CMDS_MAX * 2; 239 params.dirty_watermark = ISCSI_DEF_XMIT_CMDS_MAX; 240 params.cache = 0; 241 params.flush_function = NULL; 242 params.access = (IB_ACCESS_LOCAL_WRITE | 243 IB_ACCESS_REMOTE_WRITE | 244 IB_ACCESS_REMOTE_READ); 245 246 ib_conn->fmr_pool = ib_create_fmr_pool(device->pd, ¶ms); 247 ret = PTR_ERR(ib_conn->fmr_pool); 248 if (IS_ERR(ib_conn->fmr_pool) && ret != -ENOSYS) { 249 ib_conn->fmr_pool = NULL; 250 goto out_err; 251 } else if (ret == -ENOSYS) { 252 ib_conn->fmr_pool = NULL; 253 iser_warn("FMRs are not supported, using unaligned mode\n"); 254 ret = 0; 255 } 256 257 memset(&init_attr, 0, sizeof init_attr); 258 259 mutex_lock(&ig.connlist_mutex); 260 /* select the CQ with the minimal number of usages */ 261 for (index = 0; index < device->cqs_used; index++) 262 if (device->cq_active_qps[index] < 263 device->cq_active_qps[min_index]) 264 min_index = index; 265 device->cq_active_qps[min_index]++; 266 mutex_unlock(&ig.connlist_mutex); 267 iser_info("cq index %d used for ib_conn %p\n", min_index, ib_conn); 268 269 init_attr.event_handler = iser_qp_event_callback; 270 init_attr.qp_context = (void *)ib_conn; 271 init_attr.send_cq = device->tx_cq[min_index]; 272 init_attr.recv_cq = device->rx_cq[min_index]; 273 init_attr.cap.max_send_wr = ISER_QP_MAX_REQ_DTOS; 274 init_attr.cap.max_recv_wr = ISER_QP_MAX_RECV_DTOS; 275 init_attr.cap.max_send_sge = 2; 276 init_attr.cap.max_recv_sge = 1; 277 init_attr.sq_sig_type = IB_SIGNAL_REQ_WR; 278 init_attr.qp_type = IB_QPT_RC; 279 280 ret = rdma_create_qp(ib_conn->cma_id, device->pd, &init_attr); 281 if (ret) 282 goto out_err; 283 284 ib_conn->qp = ib_conn->cma_id->qp; 285 iser_info("setting conn %p cma_id %p: fmr_pool %p qp %p\n", 286 ib_conn, ib_conn->cma_id, 287 ib_conn->fmr_pool, ib_conn->cma_id->qp); 288 return ret; 289 290 out_err: 291 iser_err("unable to alloc mem or create resource, err %d\n", ret); 292 return ret; 293 } 294 295 /** 296 * releases the FMR pool and QP objects, returns 0 on success, 297 * -1 on failure 298 */ 299 static int iser_free_ib_conn_res(struct iser_conn *ib_conn) 300 { 301 int cq_index; 302 BUG_ON(ib_conn == NULL); 303 304 iser_info("freeing conn %p cma_id %p fmr pool %p qp %p\n", 305 ib_conn, ib_conn->cma_id, 306 ib_conn->fmr_pool, ib_conn->qp); 307 308 /* qp is created only once both addr & route are resolved */ 309 if (ib_conn->fmr_pool != NULL) 310 ib_destroy_fmr_pool(ib_conn->fmr_pool); 311 312 if (ib_conn->qp != NULL) { 313 cq_index = ((struct iser_cq_desc *)ib_conn->qp->recv_cq->cq_context)->cq_index; 314 ib_conn->device->cq_active_qps[cq_index]--; 315 316 rdma_destroy_qp(ib_conn->cma_id); 317 } 318 319 ib_conn->fmr_pool = NULL; 320 ib_conn->qp = NULL; 321 kfree(ib_conn->page_vec); 322 323 if (ib_conn->login_buf) { 324 if (ib_conn->login_req_dma) 325 ib_dma_unmap_single(ib_conn->device->ib_device, 326 ib_conn->login_req_dma, 327 ISCSI_DEF_MAX_RECV_SEG_LEN, DMA_TO_DEVICE); 328 if (ib_conn->login_resp_dma) 329 ib_dma_unmap_single(ib_conn->device->ib_device, 330 ib_conn->login_resp_dma, 331 ISER_RX_LOGIN_SIZE, DMA_FROM_DEVICE); 332 kfree(ib_conn->login_buf); 333 } 334 335 return 0; 336 } 337 338 /** 339 * based on the resolved device node GUID see if there already allocated 340 * device for this device. If there's no such, create one. 341 */ 342 static 343 struct iser_device *iser_device_find_by_ib_device(struct rdma_cm_id *cma_id) 344 { 345 struct iser_device *device; 346 347 mutex_lock(&ig.device_list_mutex); 348 349 list_for_each_entry(device, &ig.device_list, ig_list) 350 /* find if there's a match using the node GUID */ 351 if (device->ib_device->node_guid == cma_id->device->node_guid) 352 goto inc_refcnt; 353 354 device = kzalloc(sizeof *device, GFP_KERNEL); 355 if (device == NULL) 356 goto out; 357 358 /* assign this device to the device */ 359 device->ib_device = cma_id->device; 360 /* init the device and link it into ig device list */ 361 if (iser_create_device_ib_res(device)) { 362 kfree(device); 363 device = NULL; 364 goto out; 365 } 366 list_add(&device->ig_list, &ig.device_list); 367 368 inc_refcnt: 369 device->refcount++; 370 out: 371 mutex_unlock(&ig.device_list_mutex); 372 return device; 373 } 374 375 /* if there's no demand for this device, release it */ 376 static void iser_device_try_release(struct iser_device *device) 377 { 378 mutex_lock(&ig.device_list_mutex); 379 device->refcount--; 380 iser_info("device %p refcount %d\n", device, device->refcount); 381 if (!device->refcount) { 382 iser_free_device_ib_res(device); 383 list_del(&device->ig_list); 384 kfree(device); 385 } 386 mutex_unlock(&ig.device_list_mutex); 387 } 388 389 static int iser_conn_state_comp_exch(struct iser_conn *ib_conn, 390 enum iser_ib_conn_state comp, 391 enum iser_ib_conn_state exch) 392 { 393 int ret; 394 395 spin_lock_bh(&ib_conn->lock); 396 if ((ret = (ib_conn->state == comp))) 397 ib_conn->state = exch; 398 spin_unlock_bh(&ib_conn->lock); 399 return ret; 400 } 401 402 /** 403 * Frees all conn objects and deallocs conn descriptor 404 */ 405 static void iser_conn_release(struct iser_conn *ib_conn, int can_destroy_id) 406 { 407 struct iser_device *device = ib_conn->device; 408 409 BUG_ON(ib_conn->state != ISER_CONN_DOWN); 410 411 mutex_lock(&ig.connlist_mutex); 412 list_del(&ib_conn->conn_list); 413 mutex_unlock(&ig.connlist_mutex); 414 iser_free_rx_descriptors(ib_conn); 415 iser_free_ib_conn_res(ib_conn); 416 ib_conn->device = NULL; 417 /* on EVENT_ADDR_ERROR there's no device yet for this conn */ 418 if (device != NULL) 419 iser_device_try_release(device); 420 /* if cma handler context, the caller actually destroy the id */ 421 if (ib_conn->cma_id != NULL && can_destroy_id) { 422 rdma_destroy_id(ib_conn->cma_id); 423 ib_conn->cma_id = NULL; 424 } 425 iscsi_destroy_endpoint(ib_conn->ep); 426 } 427 428 void iser_conn_get(struct iser_conn *ib_conn) 429 { 430 atomic_inc(&ib_conn->refcount); 431 } 432 433 int iser_conn_put(struct iser_conn *ib_conn, int can_destroy_id) 434 { 435 if (atomic_dec_and_test(&ib_conn->refcount)) { 436 iser_conn_release(ib_conn, can_destroy_id); 437 return 1; 438 } 439 return 0; 440 } 441 442 /** 443 * triggers start of the disconnect procedures and wait for them to be done 444 */ 445 void iser_conn_terminate(struct iser_conn *ib_conn) 446 { 447 int err = 0; 448 449 /* change the ib conn state only if the conn is UP, however always call 450 * rdma_disconnect since this is the only way to cause the CMA to change 451 * the QP state to ERROR 452 */ 453 454 iser_conn_state_comp_exch(ib_conn, ISER_CONN_UP, ISER_CONN_TERMINATING); 455 err = rdma_disconnect(ib_conn->cma_id); 456 if (err) 457 iser_err("Failed to disconnect, conn: 0x%p err %d\n", 458 ib_conn,err); 459 460 wait_event_interruptible(ib_conn->wait, 461 ib_conn->state == ISER_CONN_DOWN); 462 463 iser_conn_put(ib_conn, 1); /* deref ib conn deallocate */ 464 } 465 466 static int iser_connect_error(struct rdma_cm_id *cma_id) 467 { 468 struct iser_conn *ib_conn; 469 ib_conn = (struct iser_conn *)cma_id->context; 470 471 ib_conn->state = ISER_CONN_DOWN; 472 wake_up_interruptible(&ib_conn->wait); 473 return iser_conn_put(ib_conn, 0); /* deref ib conn's cma id */ 474 } 475 476 static int iser_addr_handler(struct rdma_cm_id *cma_id) 477 { 478 struct iser_device *device; 479 struct iser_conn *ib_conn; 480 int ret; 481 482 device = iser_device_find_by_ib_device(cma_id); 483 if (!device) { 484 iser_err("device lookup/creation failed\n"); 485 return iser_connect_error(cma_id); 486 } 487 488 ib_conn = (struct iser_conn *)cma_id->context; 489 ib_conn->device = device; 490 491 ret = rdma_resolve_route(cma_id, 1000); 492 if (ret) { 493 iser_err("resolve route failed: %d\n", ret); 494 return iser_connect_error(cma_id); 495 } 496 497 return 0; 498 } 499 500 static int iser_route_handler(struct rdma_cm_id *cma_id) 501 { 502 struct rdma_conn_param conn_param; 503 int ret; 504 struct iser_cm_hdr req_hdr; 505 506 ret = iser_create_ib_conn_res((struct iser_conn *)cma_id->context); 507 if (ret) 508 goto failure; 509 510 memset(&conn_param, 0, sizeof conn_param); 511 conn_param.responder_resources = 4; 512 conn_param.initiator_depth = 1; 513 conn_param.retry_count = 7; 514 conn_param.rnr_retry_count = 6; 515 516 memset(&req_hdr, 0, sizeof(req_hdr)); 517 req_hdr.flags = (ISER_ZBVA_NOT_SUPPORTED | 518 ISER_SEND_W_INV_NOT_SUPPORTED); 519 conn_param.private_data = (void *)&req_hdr; 520 conn_param.private_data_len = sizeof(struct iser_cm_hdr); 521 522 ret = rdma_connect(cma_id, &conn_param); 523 if (ret) { 524 iser_err("failure connecting: %d\n", ret); 525 goto failure; 526 } 527 528 return 0; 529 failure: 530 return iser_connect_error(cma_id); 531 } 532 533 static void iser_connected_handler(struct rdma_cm_id *cma_id) 534 { 535 struct iser_conn *ib_conn; 536 537 ib_conn = (struct iser_conn *)cma_id->context; 538 ib_conn->state = ISER_CONN_UP; 539 wake_up_interruptible(&ib_conn->wait); 540 } 541 542 static int iser_disconnected_handler(struct rdma_cm_id *cma_id) 543 { 544 struct iser_conn *ib_conn; 545 int ret; 546 547 ib_conn = (struct iser_conn *)cma_id->context; 548 549 /* getting here when the state is UP means that the conn is being * 550 * terminated asynchronously from the iSCSI layer's perspective. */ 551 if (iser_conn_state_comp_exch(ib_conn, ISER_CONN_UP, 552 ISER_CONN_TERMINATING)) 553 iscsi_conn_failure(ib_conn->iser_conn->iscsi_conn, 554 ISCSI_ERR_CONN_FAILED); 555 556 /* Complete the termination process if no posts are pending */ 557 if (ib_conn->post_recv_buf_count == 0 && 558 (atomic_read(&ib_conn->post_send_buf_count) == 0)) { 559 ib_conn->state = ISER_CONN_DOWN; 560 wake_up_interruptible(&ib_conn->wait); 561 } 562 563 ret = iser_conn_put(ib_conn, 0); /* deref ib conn's cma id */ 564 return ret; 565 } 566 567 static int iser_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *event) 568 { 569 int ret = 0; 570 571 iser_info("event %d status %d conn %p id %p\n", 572 event->event, event->status, cma_id->context, cma_id); 573 574 switch (event->event) { 575 case RDMA_CM_EVENT_ADDR_RESOLVED: 576 ret = iser_addr_handler(cma_id); 577 break; 578 case RDMA_CM_EVENT_ROUTE_RESOLVED: 579 ret = iser_route_handler(cma_id); 580 break; 581 case RDMA_CM_EVENT_ESTABLISHED: 582 iser_connected_handler(cma_id); 583 break; 584 case RDMA_CM_EVENT_ADDR_ERROR: 585 case RDMA_CM_EVENT_ROUTE_ERROR: 586 case RDMA_CM_EVENT_CONNECT_ERROR: 587 case RDMA_CM_EVENT_UNREACHABLE: 588 case RDMA_CM_EVENT_REJECTED: 589 ret = iser_connect_error(cma_id); 590 break; 591 case RDMA_CM_EVENT_DISCONNECTED: 592 case RDMA_CM_EVENT_DEVICE_REMOVAL: 593 case RDMA_CM_EVENT_ADDR_CHANGE: 594 ret = iser_disconnected_handler(cma_id); 595 break; 596 default: 597 iser_err("Unexpected RDMA CM event (%d)\n", event->event); 598 break; 599 } 600 return ret; 601 } 602 603 void iser_conn_init(struct iser_conn *ib_conn) 604 { 605 ib_conn->state = ISER_CONN_INIT; 606 init_waitqueue_head(&ib_conn->wait); 607 ib_conn->post_recv_buf_count = 0; 608 atomic_set(&ib_conn->post_send_buf_count, 0); 609 atomic_set(&ib_conn->refcount, 1); /* ref ib conn allocation */ 610 INIT_LIST_HEAD(&ib_conn->conn_list); 611 spin_lock_init(&ib_conn->lock); 612 } 613 614 /** 615 * starts the process of connecting to the target 616 * sleeps until the connection is established or rejected 617 */ 618 int iser_connect(struct iser_conn *ib_conn, 619 struct sockaddr_in *src_addr, 620 struct sockaddr_in *dst_addr, 621 int non_blocking) 622 { 623 struct sockaddr *src, *dst; 624 int err = 0; 625 626 sprintf(ib_conn->name, "%pI4:%d", 627 &dst_addr->sin_addr.s_addr, dst_addr->sin_port); 628 629 /* the device is known only --after-- address resolution */ 630 ib_conn->device = NULL; 631 632 iser_info("connecting to: %pI4, port 0x%x\n", 633 &dst_addr->sin_addr, dst_addr->sin_port); 634 635 ib_conn->state = ISER_CONN_PENDING; 636 637 iser_conn_get(ib_conn); /* ref ib conn's cma id */ 638 ib_conn->cma_id = rdma_create_id(iser_cma_handler, 639 (void *)ib_conn, 640 RDMA_PS_TCP, IB_QPT_RC); 641 if (IS_ERR(ib_conn->cma_id)) { 642 err = PTR_ERR(ib_conn->cma_id); 643 iser_err("rdma_create_id failed: %d\n", err); 644 goto id_failure; 645 } 646 647 src = (struct sockaddr *)src_addr; 648 dst = (struct sockaddr *)dst_addr; 649 err = rdma_resolve_addr(ib_conn->cma_id, src, dst, 1000); 650 if (err) { 651 iser_err("rdma_resolve_addr failed: %d\n", err); 652 goto addr_failure; 653 } 654 655 if (!non_blocking) { 656 wait_event_interruptible(ib_conn->wait, 657 (ib_conn->state != ISER_CONN_PENDING)); 658 659 if (ib_conn->state != ISER_CONN_UP) { 660 err = -EIO; 661 goto connect_failure; 662 } 663 } 664 665 mutex_lock(&ig.connlist_mutex); 666 list_add(&ib_conn->conn_list, &ig.connlist); 667 mutex_unlock(&ig.connlist_mutex); 668 return 0; 669 670 id_failure: 671 ib_conn->cma_id = NULL; 672 addr_failure: 673 ib_conn->state = ISER_CONN_DOWN; 674 iser_conn_put(ib_conn, 1); /* deref ib conn's cma id */ 675 connect_failure: 676 iser_conn_put(ib_conn, 1); /* deref ib conn deallocate */ 677 return err; 678 } 679 680 /** 681 * iser_reg_page_vec - Register physical memory 682 * 683 * returns: 0 on success, errno code on failure 684 */ 685 int iser_reg_page_vec(struct iser_conn *ib_conn, 686 struct iser_page_vec *page_vec, 687 struct iser_mem_reg *mem_reg) 688 { 689 struct ib_pool_fmr *mem; 690 u64 io_addr; 691 u64 *page_list; 692 int status; 693 694 page_list = page_vec->pages; 695 io_addr = page_list[0]; 696 697 mem = ib_fmr_pool_map_phys(ib_conn->fmr_pool, 698 page_list, 699 page_vec->length, 700 io_addr); 701 702 if (IS_ERR(mem)) { 703 status = (int)PTR_ERR(mem); 704 iser_err("ib_fmr_pool_map_phys failed: %d\n", status); 705 return status; 706 } 707 708 mem_reg->lkey = mem->fmr->lkey; 709 mem_reg->rkey = mem->fmr->rkey; 710 mem_reg->len = page_vec->length * SIZE_4K; 711 mem_reg->va = io_addr; 712 mem_reg->is_fmr = 1; 713 mem_reg->mem_h = (void *)mem; 714 715 mem_reg->va += page_vec->offset; 716 mem_reg->len = page_vec->data_size; 717 718 iser_dbg("PHYSICAL Mem.register, [PHYS p_array: 0x%p, sz: %d, " 719 "entry[0]: (0x%08lx,%ld)] -> " 720 "[lkey: 0x%08X mem_h: 0x%p va: 0x%08lX sz: %ld]\n", 721 page_vec, page_vec->length, 722 (unsigned long)page_vec->pages[0], 723 (unsigned long)page_vec->data_size, 724 (unsigned int)mem_reg->lkey, mem_reg->mem_h, 725 (unsigned long)mem_reg->va, (unsigned long)mem_reg->len); 726 return 0; 727 } 728 729 /** 730 * Unregister (previosuly registered) memory. 731 */ 732 void iser_unreg_mem(struct iser_mem_reg *reg) 733 { 734 int ret; 735 736 iser_dbg("PHYSICAL Mem.Unregister mem_h %p\n",reg->mem_h); 737 738 ret = ib_fmr_pool_unmap((struct ib_pool_fmr *)reg->mem_h); 739 if (ret) 740 iser_err("ib_fmr_pool_unmap failed %d\n", ret); 741 742 reg->mem_h = NULL; 743 } 744 745 int iser_post_recvl(struct iser_conn *ib_conn) 746 { 747 struct ib_recv_wr rx_wr, *rx_wr_failed; 748 struct ib_sge sge; 749 int ib_ret; 750 751 sge.addr = ib_conn->login_resp_dma; 752 sge.length = ISER_RX_LOGIN_SIZE; 753 sge.lkey = ib_conn->device->mr->lkey; 754 755 rx_wr.wr_id = (unsigned long)ib_conn->login_resp_buf; 756 rx_wr.sg_list = &sge; 757 rx_wr.num_sge = 1; 758 rx_wr.next = NULL; 759 760 ib_conn->post_recv_buf_count++; 761 ib_ret = ib_post_recv(ib_conn->qp, &rx_wr, &rx_wr_failed); 762 if (ib_ret) { 763 iser_err("ib_post_recv failed ret=%d\n", ib_ret); 764 ib_conn->post_recv_buf_count--; 765 } 766 return ib_ret; 767 } 768 769 int iser_post_recvm(struct iser_conn *ib_conn, int count) 770 { 771 struct ib_recv_wr *rx_wr, *rx_wr_failed; 772 int i, ib_ret; 773 unsigned int my_rx_head = ib_conn->rx_desc_head; 774 struct iser_rx_desc *rx_desc; 775 776 for (rx_wr = ib_conn->rx_wr, i = 0; i < count; i++, rx_wr++) { 777 rx_desc = &ib_conn->rx_descs[my_rx_head]; 778 rx_wr->wr_id = (unsigned long)rx_desc; 779 rx_wr->sg_list = &rx_desc->rx_sg; 780 rx_wr->num_sge = 1; 781 rx_wr->next = rx_wr + 1; 782 my_rx_head = (my_rx_head + 1) & (ISER_QP_MAX_RECV_DTOS - 1); 783 } 784 785 rx_wr--; 786 rx_wr->next = NULL; /* mark end of work requests list */ 787 788 ib_conn->post_recv_buf_count += count; 789 ib_ret = ib_post_recv(ib_conn->qp, ib_conn->rx_wr, &rx_wr_failed); 790 if (ib_ret) { 791 iser_err("ib_post_recv failed ret=%d\n", ib_ret); 792 ib_conn->post_recv_buf_count -= count; 793 } else 794 ib_conn->rx_desc_head = my_rx_head; 795 return ib_ret; 796 } 797 798 799 /** 800 * iser_start_send - Initiate a Send DTO operation 801 * 802 * returns 0 on success, -1 on failure 803 */ 804 int iser_post_send(struct iser_conn *ib_conn, struct iser_tx_desc *tx_desc) 805 { 806 int ib_ret; 807 struct ib_send_wr send_wr, *send_wr_failed; 808 809 ib_dma_sync_single_for_device(ib_conn->device->ib_device, 810 tx_desc->dma_addr, ISER_HEADERS_LEN, DMA_TO_DEVICE); 811 812 send_wr.next = NULL; 813 send_wr.wr_id = (unsigned long)tx_desc; 814 send_wr.sg_list = tx_desc->tx_sg; 815 send_wr.num_sge = tx_desc->num_sge; 816 send_wr.opcode = IB_WR_SEND; 817 send_wr.send_flags = IB_SEND_SIGNALED; 818 819 atomic_inc(&ib_conn->post_send_buf_count); 820 821 ib_ret = ib_post_send(ib_conn->qp, &send_wr, &send_wr_failed); 822 if (ib_ret) { 823 iser_err("ib_post_send failed, ret:%d\n", ib_ret); 824 atomic_dec(&ib_conn->post_send_buf_count); 825 } 826 return ib_ret; 827 } 828 829 static void iser_handle_comp_error(struct iser_tx_desc *desc, 830 struct iser_conn *ib_conn) 831 { 832 if (desc && desc->type == ISCSI_TX_DATAOUT) 833 kmem_cache_free(ig.desc_cache, desc); 834 835 if (ib_conn->post_recv_buf_count == 0 && 836 atomic_read(&ib_conn->post_send_buf_count) == 0) { 837 /* getting here when the state is UP means that the conn is * 838 * being terminated asynchronously from the iSCSI layer's * 839 * perspective. */ 840 if (iser_conn_state_comp_exch(ib_conn, ISER_CONN_UP, 841 ISER_CONN_TERMINATING)) 842 iscsi_conn_failure(ib_conn->iser_conn->iscsi_conn, 843 ISCSI_ERR_CONN_FAILED); 844 845 /* no more non completed posts to the QP, complete the 846 * termination process w.o worrying on disconnect event */ 847 ib_conn->state = ISER_CONN_DOWN; 848 wake_up_interruptible(&ib_conn->wait); 849 } 850 } 851 852 static int iser_drain_tx_cq(struct iser_device *device, int cq_index) 853 { 854 struct ib_cq *cq = device->tx_cq[cq_index]; 855 struct ib_wc wc; 856 struct iser_tx_desc *tx_desc; 857 struct iser_conn *ib_conn; 858 int completed_tx = 0; 859 860 while (ib_poll_cq(cq, 1, &wc) == 1) { 861 tx_desc = (struct iser_tx_desc *) (unsigned long) wc.wr_id; 862 ib_conn = wc.qp->qp_context; 863 if (wc.status == IB_WC_SUCCESS) { 864 if (wc.opcode == IB_WC_SEND) 865 iser_snd_completion(tx_desc, ib_conn); 866 else 867 iser_err("expected opcode %d got %d\n", 868 IB_WC_SEND, wc.opcode); 869 } else { 870 iser_err("tx id %llx status %d vend_err %x\n", 871 wc.wr_id, wc.status, wc.vendor_err); 872 atomic_dec(&ib_conn->post_send_buf_count); 873 iser_handle_comp_error(tx_desc, ib_conn); 874 } 875 completed_tx++; 876 } 877 return completed_tx; 878 } 879 880 881 static void iser_cq_tasklet_fn(unsigned long data) 882 { 883 struct iser_cq_desc *cq_desc = (struct iser_cq_desc *)data; 884 struct iser_device *device = cq_desc->device; 885 int cq_index = cq_desc->cq_index; 886 struct ib_cq *cq = device->rx_cq[cq_index]; 887 struct ib_wc wc; 888 struct iser_rx_desc *desc; 889 unsigned long xfer_len; 890 struct iser_conn *ib_conn; 891 int completed_tx, completed_rx; 892 completed_tx = completed_rx = 0; 893 894 while (ib_poll_cq(cq, 1, &wc) == 1) { 895 desc = (struct iser_rx_desc *) (unsigned long) wc.wr_id; 896 BUG_ON(desc == NULL); 897 ib_conn = wc.qp->qp_context; 898 if (wc.status == IB_WC_SUCCESS) { 899 if (wc.opcode == IB_WC_RECV) { 900 xfer_len = (unsigned long)wc.byte_len; 901 iser_rcv_completion(desc, xfer_len, ib_conn); 902 } else 903 iser_err("expected opcode %d got %d\n", 904 IB_WC_RECV, wc.opcode); 905 } else { 906 if (wc.status != IB_WC_WR_FLUSH_ERR) 907 iser_err("rx id %llx status %d vend_err %x\n", 908 wc.wr_id, wc.status, wc.vendor_err); 909 ib_conn->post_recv_buf_count--; 910 iser_handle_comp_error(NULL, ib_conn); 911 } 912 completed_rx++; 913 if (!(completed_rx & 63)) 914 completed_tx += iser_drain_tx_cq(device, cq_index); 915 } 916 /* #warning "it is assumed here that arming CQ only once its empty" * 917 * " would not cause interrupts to be missed" */ 918 ib_req_notify_cq(cq, IB_CQ_NEXT_COMP); 919 920 completed_tx += iser_drain_tx_cq(device, cq_index); 921 iser_dbg("got %d rx %d tx completions\n", completed_rx, completed_tx); 922 } 923 924 static void iser_cq_callback(struct ib_cq *cq, void *cq_context) 925 { 926 struct iser_cq_desc *cq_desc = (struct iser_cq_desc *)cq_context; 927 struct iser_device *device = cq_desc->device; 928 int cq_index = cq_desc->cq_index; 929 930 tasklet_schedule(&device->cq_tasklet[cq_index]); 931 } 932