1 /* 2 * Copyright (c) 2004, 2005, 2006 Voltaire, Inc. All rights reserved. 3 * Copyright (c) 2013 Mellanox Technologies. All rights reserved. 4 * 5 * This software is available to you under a choice of one of two 6 * licenses. You may choose to be licensed under the terms of the GNU 7 * General Public License (GPL) Version 2, available from the file 8 * COPYING in the main directory of this source tree, or the 9 * OpenIB.org BSD license below: 10 * 11 * Redistribution and use in source and binary forms, with or 12 * without modification, are permitted provided that the following 13 * conditions are met: 14 * 15 * - Redistributions of source code must retain the above 16 * copyright notice, this list of conditions and the following 17 * disclaimer. 18 * 19 * - Redistributions in binary form must reproduce the above 20 * copyright notice, this list of conditions and the following 21 * disclaimer in the documentation and/or other materials 22 * provided with the distribution. 23 * 24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 31 * SOFTWARE. 32 */ 33 #include <linux/kernel.h> 34 #include <linux/slab.h> 35 #include <linux/mm.h> 36 #include <linux/scatterlist.h> 37 #include <linux/kfifo.h> 38 #include <scsi/scsi_cmnd.h> 39 #include <scsi/scsi_host.h> 40 41 #include "iscsi_iser.h" 42 43 /* Register user buffer memory and initialize passive rdma 44 * dto descriptor. Total data size is stored in 45 * iser_task->data[ISER_DIR_IN].data_len 46 */ 47 static int iser_prepare_read_cmd(struct iscsi_task *task, 48 unsigned int edtl) 49 50 { 51 struct iscsi_iser_task *iser_task = task->dd_data; 52 struct iser_regd_buf *regd_buf; 53 int err; 54 struct iser_hdr *hdr = &iser_task->desc.iser_header; 55 struct iser_data_buf *buf_in = &iser_task->data[ISER_DIR_IN]; 56 57 err = iser_dma_map_task_data(iser_task, 58 buf_in, 59 ISER_DIR_IN, 60 DMA_FROM_DEVICE); 61 if (err) 62 return err; 63 64 if (edtl > iser_task->data[ISER_DIR_IN].data_len) { 65 iser_err("Total data length: %ld, less than EDTL: " 66 "%d, in READ cmd BHS itt: %d, conn: 0x%p\n", 67 iser_task->data[ISER_DIR_IN].data_len, edtl, 68 task->itt, iser_task->iser_conn); 69 return -EINVAL; 70 } 71 72 err = iser_reg_rdma_mem(iser_task,ISER_DIR_IN); 73 if (err) { 74 iser_err("Failed to set up Data-IN RDMA\n"); 75 return err; 76 } 77 regd_buf = &iser_task->rdma_regd[ISER_DIR_IN]; 78 79 hdr->flags |= ISER_RSV; 80 hdr->read_stag = cpu_to_be32(regd_buf->reg.rkey); 81 hdr->read_va = cpu_to_be64(regd_buf->reg.va); 82 83 iser_dbg("Cmd itt:%d READ tags RKEY:%#.4X VA:%#llX\n", 84 task->itt, regd_buf->reg.rkey, 85 (unsigned long long)regd_buf->reg.va); 86 87 return 0; 88 } 89 90 /* Register user buffer memory and initialize passive rdma 91 * dto descriptor. Total data size is stored in 92 * task->data[ISER_DIR_OUT].data_len 93 */ 94 static int 95 iser_prepare_write_cmd(struct iscsi_task *task, 96 unsigned int imm_sz, 97 unsigned int unsol_sz, 98 unsigned int edtl) 99 { 100 struct iscsi_iser_task *iser_task = task->dd_data; 101 struct iser_regd_buf *regd_buf; 102 int err; 103 struct iser_hdr *hdr = &iser_task->desc.iser_header; 104 struct iser_data_buf *buf_out = &iser_task->data[ISER_DIR_OUT]; 105 struct ib_sge *tx_dsg = &iser_task->desc.tx_sg[1]; 106 107 err = iser_dma_map_task_data(iser_task, 108 buf_out, 109 ISER_DIR_OUT, 110 DMA_TO_DEVICE); 111 if (err) 112 return err; 113 114 if (edtl > iser_task->data[ISER_DIR_OUT].data_len) { 115 iser_err("Total data length: %ld, less than EDTL: %d, " 116 "in WRITE cmd BHS itt: %d, conn: 0x%p\n", 117 iser_task->data[ISER_DIR_OUT].data_len, 118 edtl, task->itt, task->conn); 119 return -EINVAL; 120 } 121 122 err = iser_reg_rdma_mem(iser_task,ISER_DIR_OUT); 123 if (err != 0) { 124 iser_err("Failed to register write cmd RDMA mem\n"); 125 return err; 126 } 127 128 regd_buf = &iser_task->rdma_regd[ISER_DIR_OUT]; 129 130 if (unsol_sz < edtl) { 131 hdr->flags |= ISER_WSV; 132 hdr->write_stag = cpu_to_be32(regd_buf->reg.rkey); 133 hdr->write_va = cpu_to_be64(regd_buf->reg.va + unsol_sz); 134 135 iser_dbg("Cmd itt:%d, WRITE tags, RKEY:%#.4X " 136 "VA:%#llX + unsol:%d\n", 137 task->itt, regd_buf->reg.rkey, 138 (unsigned long long)regd_buf->reg.va, unsol_sz); 139 } 140 141 if (imm_sz > 0) { 142 iser_dbg("Cmd itt:%d, WRITE, adding imm.data sz: %d\n", 143 task->itt, imm_sz); 144 tx_dsg->addr = regd_buf->reg.va; 145 tx_dsg->length = imm_sz; 146 tx_dsg->lkey = regd_buf->reg.lkey; 147 iser_task->desc.num_sge = 2; 148 } 149 150 return 0; 151 } 152 153 /* creates a new tx descriptor and adds header regd buffer */ 154 static void iser_create_send_desc(struct iser_conn *ib_conn, 155 struct iser_tx_desc *tx_desc) 156 { 157 struct iser_device *device = ib_conn->device; 158 159 ib_dma_sync_single_for_cpu(device->ib_device, 160 tx_desc->dma_addr, ISER_HEADERS_LEN, DMA_TO_DEVICE); 161 162 memset(&tx_desc->iser_header, 0, sizeof(struct iser_hdr)); 163 tx_desc->iser_header.flags = ISER_VER; 164 165 tx_desc->num_sge = 1; 166 167 if (tx_desc->tx_sg[0].lkey != device->mr->lkey) { 168 tx_desc->tx_sg[0].lkey = device->mr->lkey; 169 iser_dbg("sdesc %p lkey mismatch, fixing\n", tx_desc); 170 } 171 } 172 173 174 int iser_alloc_rx_descriptors(struct iser_conn *ib_conn) 175 { 176 int i, j; 177 u64 dma_addr; 178 struct iser_rx_desc *rx_desc; 179 struct ib_sge *rx_sg; 180 struct iser_device *device = ib_conn->device; 181 182 ib_conn->rx_descs = kmalloc(ISER_QP_MAX_RECV_DTOS * 183 sizeof(struct iser_rx_desc), GFP_KERNEL); 184 if (!ib_conn->rx_descs) 185 goto rx_desc_alloc_fail; 186 187 rx_desc = ib_conn->rx_descs; 188 189 for (i = 0; i < ISER_QP_MAX_RECV_DTOS; i++, rx_desc++) { 190 dma_addr = ib_dma_map_single(device->ib_device, (void *)rx_desc, 191 ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE); 192 if (ib_dma_mapping_error(device->ib_device, dma_addr)) 193 goto rx_desc_dma_map_failed; 194 195 rx_desc->dma_addr = dma_addr; 196 197 rx_sg = &rx_desc->rx_sg; 198 rx_sg->addr = rx_desc->dma_addr; 199 rx_sg->length = ISER_RX_PAYLOAD_SIZE; 200 rx_sg->lkey = device->mr->lkey; 201 } 202 203 ib_conn->rx_desc_head = 0; 204 return 0; 205 206 rx_desc_dma_map_failed: 207 rx_desc = ib_conn->rx_descs; 208 for (j = 0; j < i; j++, rx_desc++) 209 ib_dma_unmap_single(device->ib_device, rx_desc->dma_addr, 210 ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE); 211 kfree(ib_conn->rx_descs); 212 ib_conn->rx_descs = NULL; 213 rx_desc_alloc_fail: 214 iser_err("failed allocating rx descriptors / data buffers\n"); 215 return -ENOMEM; 216 } 217 218 void iser_free_rx_descriptors(struct iser_conn *ib_conn) 219 { 220 int i; 221 struct iser_rx_desc *rx_desc; 222 struct iser_device *device = ib_conn->device; 223 224 if (!ib_conn->rx_descs) 225 return; 226 227 rx_desc = ib_conn->rx_descs; 228 for (i = 0; i < ISER_QP_MAX_RECV_DTOS; i++, rx_desc++) 229 ib_dma_unmap_single(device->ib_device, rx_desc->dma_addr, 230 ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE); 231 kfree(ib_conn->rx_descs); 232 } 233 234 static int iser_post_rx_bufs(struct iscsi_conn *conn, struct iscsi_hdr *req) 235 { 236 struct iscsi_iser_conn *iser_conn = conn->dd_data; 237 238 iser_dbg("req op %x flags %x\n", req->opcode, req->flags); 239 /* check if this is the last login - going to full feature phase */ 240 if ((req->flags & ISCSI_FULL_FEATURE_PHASE) != ISCSI_FULL_FEATURE_PHASE) 241 return 0; 242 243 /* 244 * Check that there is one posted recv buffer (for the last login 245 * response) and no posted send buffers left - they must have been 246 * consumed during previous login phases. 247 */ 248 WARN_ON(iser_conn->ib_conn->post_recv_buf_count != 1); 249 WARN_ON(atomic_read(&iser_conn->ib_conn->post_send_buf_count) != 0); 250 251 iser_dbg("Initially post: %d\n", ISER_MIN_POSTED_RX); 252 /* Initial post receive buffers */ 253 if (iser_post_recvm(iser_conn->ib_conn, ISER_MIN_POSTED_RX)) 254 return -ENOMEM; 255 256 return 0; 257 } 258 259 /** 260 * iser_send_command - send command PDU 261 */ 262 int iser_send_command(struct iscsi_conn *conn, 263 struct iscsi_task *task) 264 { 265 struct iscsi_iser_conn *iser_conn = conn->dd_data; 266 struct iscsi_iser_task *iser_task = task->dd_data; 267 unsigned long edtl; 268 int err; 269 struct iser_data_buf *data_buf; 270 struct iscsi_scsi_req *hdr = (struct iscsi_scsi_req *)task->hdr; 271 struct scsi_cmnd *sc = task->sc; 272 struct iser_tx_desc *tx_desc = &iser_task->desc; 273 274 edtl = ntohl(hdr->data_length); 275 276 /* build the tx desc regd header and add it to the tx desc dto */ 277 tx_desc->type = ISCSI_TX_SCSI_COMMAND; 278 iser_create_send_desc(iser_conn->ib_conn, tx_desc); 279 280 if (hdr->flags & ISCSI_FLAG_CMD_READ) 281 data_buf = &iser_task->data[ISER_DIR_IN]; 282 else 283 data_buf = &iser_task->data[ISER_DIR_OUT]; 284 285 if (scsi_sg_count(sc)) { /* using a scatter list */ 286 data_buf->buf = scsi_sglist(sc); 287 data_buf->size = scsi_sg_count(sc); 288 } 289 290 data_buf->data_len = scsi_bufflen(sc); 291 292 if (hdr->flags & ISCSI_FLAG_CMD_READ) { 293 err = iser_prepare_read_cmd(task, edtl); 294 if (err) 295 goto send_command_error; 296 } 297 if (hdr->flags & ISCSI_FLAG_CMD_WRITE) { 298 err = iser_prepare_write_cmd(task, 299 task->imm_count, 300 task->imm_count + 301 task->unsol_r2t.data_length, 302 edtl); 303 if (err) 304 goto send_command_error; 305 } 306 307 iser_task->status = ISER_TASK_STATUS_STARTED; 308 309 err = iser_post_send(iser_conn->ib_conn, tx_desc); 310 if (!err) 311 return 0; 312 313 send_command_error: 314 iser_err("conn %p failed task->itt %d err %d\n",conn, task->itt, err); 315 return err; 316 } 317 318 /** 319 * iser_send_data_out - send data out PDU 320 */ 321 int iser_send_data_out(struct iscsi_conn *conn, 322 struct iscsi_task *task, 323 struct iscsi_data *hdr) 324 { 325 struct iscsi_iser_conn *iser_conn = conn->dd_data; 326 struct iscsi_iser_task *iser_task = task->dd_data; 327 struct iser_tx_desc *tx_desc = NULL; 328 struct iser_regd_buf *regd_buf; 329 unsigned long buf_offset; 330 unsigned long data_seg_len; 331 uint32_t itt; 332 int err = 0; 333 struct ib_sge *tx_dsg; 334 335 itt = (__force uint32_t)hdr->itt; 336 data_seg_len = ntoh24(hdr->dlength); 337 buf_offset = ntohl(hdr->offset); 338 339 iser_dbg("%s itt %d dseg_len %d offset %d\n", 340 __func__,(int)itt,(int)data_seg_len,(int)buf_offset); 341 342 tx_desc = kmem_cache_zalloc(ig.desc_cache, GFP_ATOMIC); 343 if (tx_desc == NULL) { 344 iser_err("Failed to alloc desc for post dataout\n"); 345 return -ENOMEM; 346 } 347 348 tx_desc->type = ISCSI_TX_DATAOUT; 349 tx_desc->iser_header.flags = ISER_VER; 350 memcpy(&tx_desc->iscsi_header, hdr, sizeof(struct iscsi_hdr)); 351 352 /* build the tx desc */ 353 iser_initialize_task_headers(task, tx_desc); 354 355 regd_buf = &iser_task->rdma_regd[ISER_DIR_OUT]; 356 tx_dsg = &tx_desc->tx_sg[1]; 357 tx_dsg->addr = regd_buf->reg.va + buf_offset; 358 tx_dsg->length = data_seg_len; 359 tx_dsg->lkey = regd_buf->reg.lkey; 360 tx_desc->num_sge = 2; 361 362 if (buf_offset + data_seg_len > iser_task->data[ISER_DIR_OUT].data_len) { 363 iser_err("Offset:%ld & DSL:%ld in Data-Out " 364 "inconsistent with total len:%ld, itt:%d\n", 365 buf_offset, data_seg_len, 366 iser_task->data[ISER_DIR_OUT].data_len, itt); 367 err = -EINVAL; 368 goto send_data_out_error; 369 } 370 iser_dbg("data-out itt: %d, offset: %ld, sz: %ld\n", 371 itt, buf_offset, data_seg_len); 372 373 374 err = iser_post_send(iser_conn->ib_conn, tx_desc); 375 if (!err) 376 return 0; 377 378 send_data_out_error: 379 kmem_cache_free(ig.desc_cache, tx_desc); 380 iser_err("conn %p failed err %d\n",conn, err); 381 return err; 382 } 383 384 int iser_send_control(struct iscsi_conn *conn, 385 struct iscsi_task *task) 386 { 387 struct iscsi_iser_conn *iser_conn = conn->dd_data; 388 struct iscsi_iser_task *iser_task = task->dd_data; 389 struct iser_tx_desc *mdesc = &iser_task->desc; 390 unsigned long data_seg_len; 391 int err = 0; 392 struct iser_device *device; 393 struct iser_conn *ib_conn = iser_conn->ib_conn; 394 395 /* build the tx desc regd header and add it to the tx desc dto */ 396 mdesc->type = ISCSI_TX_CONTROL; 397 iser_create_send_desc(iser_conn->ib_conn, mdesc); 398 399 device = iser_conn->ib_conn->device; 400 401 data_seg_len = ntoh24(task->hdr->dlength); 402 403 if (data_seg_len > 0) { 404 struct ib_sge *tx_dsg = &mdesc->tx_sg[1]; 405 if (task != conn->login_task) { 406 iser_err("data present on non login task!!!\n"); 407 goto send_control_error; 408 } 409 410 ib_dma_sync_single_for_cpu(device->ib_device, 411 ib_conn->login_req_dma, task->data_count, 412 DMA_TO_DEVICE); 413 414 memcpy(iser_conn->ib_conn->login_req_buf, task->data, 415 task->data_count); 416 417 ib_dma_sync_single_for_device(device->ib_device, 418 ib_conn->login_req_dma, task->data_count, 419 DMA_TO_DEVICE); 420 421 tx_dsg->addr = iser_conn->ib_conn->login_req_dma; 422 tx_dsg->length = task->data_count; 423 tx_dsg->lkey = device->mr->lkey; 424 mdesc->num_sge = 2; 425 } 426 427 if (task == conn->login_task) { 428 err = iser_post_recvl(iser_conn->ib_conn); 429 if (err) 430 goto send_control_error; 431 err = iser_post_rx_bufs(conn, task->hdr); 432 if (err) 433 goto send_control_error; 434 } 435 436 err = iser_post_send(iser_conn->ib_conn, mdesc); 437 if (!err) 438 return 0; 439 440 send_control_error: 441 iser_err("conn %p failed err %d\n",conn, err); 442 return err; 443 } 444 445 /** 446 * iser_rcv_dto_completion - recv DTO completion 447 */ 448 void iser_rcv_completion(struct iser_rx_desc *rx_desc, 449 unsigned long rx_xfer_len, 450 struct iser_conn *ib_conn) 451 { 452 struct iscsi_iser_conn *conn = ib_conn->iser_conn; 453 struct iscsi_hdr *hdr; 454 u64 rx_dma; 455 int rx_buflen, outstanding, count, err; 456 457 /* differentiate between login to all other PDUs */ 458 if ((char *)rx_desc == ib_conn->login_resp_buf) { 459 rx_dma = ib_conn->login_resp_dma; 460 rx_buflen = ISER_RX_LOGIN_SIZE; 461 } else { 462 rx_dma = rx_desc->dma_addr; 463 rx_buflen = ISER_RX_PAYLOAD_SIZE; 464 } 465 466 ib_dma_sync_single_for_cpu(ib_conn->device->ib_device, rx_dma, 467 rx_buflen, DMA_FROM_DEVICE); 468 469 hdr = &rx_desc->iscsi_header; 470 471 iser_dbg("op 0x%x itt 0x%x dlen %d\n", hdr->opcode, 472 hdr->itt, (int)(rx_xfer_len - ISER_HEADERS_LEN)); 473 474 iscsi_iser_recv(conn->iscsi_conn, hdr, 475 rx_desc->data, rx_xfer_len - ISER_HEADERS_LEN); 476 477 ib_dma_sync_single_for_device(ib_conn->device->ib_device, rx_dma, 478 rx_buflen, DMA_FROM_DEVICE); 479 480 /* decrementing conn->post_recv_buf_count only --after-- freeing the * 481 * task eliminates the need to worry on tasks which are completed in * 482 * parallel to the execution of iser_conn_term. So the code that waits * 483 * for the posted rx bufs refcount to become zero handles everything */ 484 conn->ib_conn->post_recv_buf_count--; 485 486 if (rx_dma == ib_conn->login_resp_dma) 487 return; 488 489 outstanding = ib_conn->post_recv_buf_count; 490 if (outstanding + ISER_MIN_POSTED_RX <= ISER_QP_MAX_RECV_DTOS) { 491 count = min(ISER_QP_MAX_RECV_DTOS - outstanding, 492 ISER_MIN_POSTED_RX); 493 err = iser_post_recvm(ib_conn, count); 494 if (err) 495 iser_err("posting %d rx bufs err %d\n", count, err); 496 } 497 } 498 499 void iser_snd_completion(struct iser_tx_desc *tx_desc, 500 struct iser_conn *ib_conn) 501 { 502 struct iscsi_task *task; 503 struct iser_device *device = ib_conn->device; 504 505 if (tx_desc->type == ISCSI_TX_DATAOUT) { 506 ib_dma_unmap_single(device->ib_device, tx_desc->dma_addr, 507 ISER_HEADERS_LEN, DMA_TO_DEVICE); 508 kmem_cache_free(ig.desc_cache, tx_desc); 509 } 510 511 atomic_dec(&ib_conn->post_send_buf_count); 512 513 if (tx_desc->type == ISCSI_TX_CONTROL) { 514 /* this arithmetic is legal by libiscsi dd_data allocation */ 515 task = (void *) ((long)(void *)tx_desc - 516 sizeof(struct iscsi_task)); 517 if (task->hdr->itt == RESERVED_ITT) 518 iscsi_put_task(task); 519 } 520 } 521 522 void iser_task_rdma_init(struct iscsi_iser_task *iser_task) 523 524 { 525 iser_task->status = ISER_TASK_STATUS_INIT; 526 527 iser_task->dir[ISER_DIR_IN] = 0; 528 iser_task->dir[ISER_DIR_OUT] = 0; 529 530 iser_task->data[ISER_DIR_IN].data_len = 0; 531 iser_task->data[ISER_DIR_OUT].data_len = 0; 532 533 memset(&iser_task->rdma_regd[ISER_DIR_IN], 0, 534 sizeof(struct iser_regd_buf)); 535 memset(&iser_task->rdma_regd[ISER_DIR_OUT], 0, 536 sizeof(struct iser_regd_buf)); 537 } 538 539 void iser_task_rdma_finalize(struct iscsi_iser_task *iser_task) 540 { 541 int is_rdma_aligned = 1; 542 struct iser_regd_buf *regd; 543 544 /* if we were reading, copy back to unaligned sglist, 545 * anyway dma_unmap and free the copy 546 */ 547 if (iser_task->data_copy[ISER_DIR_IN].copy_buf != NULL) { 548 is_rdma_aligned = 0; 549 iser_finalize_rdma_unaligned_sg(iser_task, ISER_DIR_IN); 550 } 551 if (iser_task->data_copy[ISER_DIR_OUT].copy_buf != NULL) { 552 is_rdma_aligned = 0; 553 iser_finalize_rdma_unaligned_sg(iser_task, ISER_DIR_OUT); 554 } 555 556 if (iser_task->dir[ISER_DIR_IN]) { 557 regd = &iser_task->rdma_regd[ISER_DIR_IN]; 558 if (regd->reg.is_fmr) 559 iser_unreg_mem(®d->reg); 560 } 561 562 if (iser_task->dir[ISER_DIR_OUT]) { 563 regd = &iser_task->rdma_regd[ISER_DIR_OUT]; 564 if (regd->reg.is_fmr) 565 iser_unreg_mem(®d->reg); 566 } 567 568 /* if the data was unaligned, it was already unmapped and then copied */ 569 if (is_rdma_aligned) 570 iser_dma_unmap_task_data(iser_task); 571 } 572