1 /* 2 * Copyright (c) 2005 Cisco Systems. All rights reserved. 3 * 4 * This software is available to you under a choice of one of two 5 * licenses. You may choose to be licensed under the terms of the GNU 6 * General Public License (GPL) Version 2, available from the file 7 * COPYING in the main directory of this source tree, or the 8 * OpenIB.org BSD license below: 9 * 10 * Redistribution and use in source and binary forms, with or 11 * without modification, are permitted provided that the following 12 * conditions are met: 13 * 14 * - Redistributions of source code must retain the above 15 * copyright notice, this list of conditions and the following 16 * disclaimer. 17 * 18 * - Redistributions in binary form must reproduce the above 19 * copyright notice, this list of conditions and the following 20 * disclaimer in the documentation and/or other materials 21 * provided with the distribution. 22 * 23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 * SOFTWARE. 31 */ 32 33 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 34 35 #include <linux/module.h> 36 #include <linux/init.h> 37 #include <linux/slab.h> 38 #include <linux/err.h> 39 #include <linux/string.h> 40 #include <linux/parser.h> 41 #include <linux/random.h> 42 #include <linux/jiffies.h> 43 #include <rdma/ib_cache.h> 44 45 #include <linux/atomic.h> 46 47 #include <scsi/scsi.h> 48 #include <scsi/scsi_device.h> 49 #include <scsi/scsi_dbg.h> 50 #include <scsi/scsi_tcq.h> 51 #include <scsi/srp.h> 52 #include <scsi/scsi_transport_srp.h> 53 54 #include "ib_srp.h" 55 56 #define DRV_NAME "ib_srp" 57 #define PFX DRV_NAME ": " 58 #define DRV_VERSION "2.0" 59 #define DRV_RELDATE "July 26, 2015" 60 61 MODULE_AUTHOR("Roland Dreier"); 62 MODULE_DESCRIPTION("InfiniBand SCSI RDMA Protocol initiator"); 63 MODULE_LICENSE("Dual BSD/GPL"); 64 MODULE_VERSION(DRV_VERSION); 65 MODULE_INFO(release_date, DRV_RELDATE); 66 67 static unsigned int srp_sg_tablesize; 68 static unsigned int cmd_sg_entries; 69 static unsigned int indirect_sg_entries; 70 static bool allow_ext_sg; 71 static bool prefer_fr = true; 72 static bool register_always = true; 73 static bool never_register; 74 static int topspin_workarounds = 1; 75 76 module_param(srp_sg_tablesize, uint, 0444); 77 MODULE_PARM_DESC(srp_sg_tablesize, "Deprecated name for cmd_sg_entries"); 78 79 module_param(cmd_sg_entries, uint, 0444); 80 MODULE_PARM_DESC(cmd_sg_entries, 81 "Default number of gather/scatter entries in the SRP command (default is 12, max 255)"); 82 83 module_param(indirect_sg_entries, uint, 0444); 84 MODULE_PARM_DESC(indirect_sg_entries, 85 "Default max number of gather/scatter entries (default is 12, max is " __stringify(SG_MAX_SEGMENTS) ")"); 86 87 module_param(allow_ext_sg, bool, 0444); 88 MODULE_PARM_DESC(allow_ext_sg, 89 "Default behavior when there are more than cmd_sg_entries S/G entries after mapping; fails the request when false (default false)"); 90 91 module_param(topspin_workarounds, int, 0444); 92 MODULE_PARM_DESC(topspin_workarounds, 93 "Enable workarounds for Topspin/Cisco SRP target bugs if != 0"); 94 95 module_param(prefer_fr, bool, 0444); 96 MODULE_PARM_DESC(prefer_fr, 97 "Whether to use fast registration if both FMR and fast registration are supported"); 98 99 module_param(register_always, bool, 0444); 100 MODULE_PARM_DESC(register_always, 101 "Use memory registration even for contiguous memory regions"); 102 103 module_param(never_register, bool, 0444); 104 MODULE_PARM_DESC(never_register, "Never register memory"); 105 106 static const struct kernel_param_ops srp_tmo_ops; 107 108 static int srp_reconnect_delay = 10; 109 module_param_cb(reconnect_delay, &srp_tmo_ops, &srp_reconnect_delay, 110 S_IRUGO | S_IWUSR); 111 MODULE_PARM_DESC(reconnect_delay, "Time between successive reconnect attempts"); 112 113 static int srp_fast_io_fail_tmo = 15; 114 module_param_cb(fast_io_fail_tmo, &srp_tmo_ops, &srp_fast_io_fail_tmo, 115 S_IRUGO | S_IWUSR); 116 MODULE_PARM_DESC(fast_io_fail_tmo, 117 "Number of seconds between the observation of a transport" 118 " layer error and failing all I/O. \"off\" means that this" 119 " functionality is disabled."); 120 121 static int srp_dev_loss_tmo = 600; 122 module_param_cb(dev_loss_tmo, &srp_tmo_ops, &srp_dev_loss_tmo, 123 S_IRUGO | S_IWUSR); 124 MODULE_PARM_DESC(dev_loss_tmo, 125 "Maximum number of seconds that the SRP transport should" 126 " insulate transport layer errors. After this time has been" 127 " exceeded the SCSI host is removed. Should be" 128 " between 1 and " __stringify(SCSI_DEVICE_BLOCK_MAX_TIMEOUT) 129 " if fast_io_fail_tmo has not been set. \"off\" means that" 130 " this functionality is disabled."); 131 132 static unsigned ch_count; 133 module_param(ch_count, uint, 0444); 134 MODULE_PARM_DESC(ch_count, 135 "Number of RDMA channels to use for communication with an SRP target. Using more than one channel improves performance if the HCA supports multiple completion vectors. The default value is the minimum of four times the number of online CPU sockets and the number of completion vectors supported by the HCA."); 136 137 static void srp_add_one(struct ib_device *device); 138 static void srp_remove_one(struct ib_device *device, void *client_data); 139 static void srp_recv_done(struct ib_cq *cq, struct ib_wc *wc); 140 static void srp_handle_qp_err(struct ib_cq *cq, struct ib_wc *wc, 141 const char *opname); 142 static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event); 143 144 static struct scsi_transport_template *ib_srp_transport_template; 145 static struct workqueue_struct *srp_remove_wq; 146 147 static struct ib_client srp_client = { 148 .name = "srp", 149 .add = srp_add_one, 150 .remove = srp_remove_one 151 }; 152 153 static struct ib_sa_client srp_sa_client; 154 155 static int srp_tmo_get(char *buffer, const struct kernel_param *kp) 156 { 157 int tmo = *(int *)kp->arg; 158 159 if (tmo >= 0) 160 return sprintf(buffer, "%d", tmo); 161 else 162 return sprintf(buffer, "off"); 163 } 164 165 static int srp_tmo_set(const char *val, const struct kernel_param *kp) 166 { 167 int tmo, res; 168 169 res = srp_parse_tmo(&tmo, val); 170 if (res) 171 goto out; 172 173 if (kp->arg == &srp_reconnect_delay) 174 res = srp_tmo_valid(tmo, srp_fast_io_fail_tmo, 175 srp_dev_loss_tmo); 176 else if (kp->arg == &srp_fast_io_fail_tmo) 177 res = srp_tmo_valid(srp_reconnect_delay, tmo, srp_dev_loss_tmo); 178 else 179 res = srp_tmo_valid(srp_reconnect_delay, srp_fast_io_fail_tmo, 180 tmo); 181 if (res) 182 goto out; 183 *(int *)kp->arg = tmo; 184 185 out: 186 return res; 187 } 188 189 static const struct kernel_param_ops srp_tmo_ops = { 190 .get = srp_tmo_get, 191 .set = srp_tmo_set, 192 }; 193 194 static inline struct srp_target_port *host_to_target(struct Scsi_Host *host) 195 { 196 return (struct srp_target_port *) host->hostdata; 197 } 198 199 static const char *srp_target_info(struct Scsi_Host *host) 200 { 201 return host_to_target(host)->target_name; 202 } 203 204 static int srp_target_is_topspin(struct srp_target_port *target) 205 { 206 static const u8 topspin_oui[3] = { 0x00, 0x05, 0xad }; 207 static const u8 cisco_oui[3] = { 0x00, 0x1b, 0x0d }; 208 209 return topspin_workarounds && 210 (!memcmp(&target->ioc_guid, topspin_oui, sizeof topspin_oui) || 211 !memcmp(&target->ioc_guid, cisco_oui, sizeof cisco_oui)); 212 } 213 214 static struct srp_iu *srp_alloc_iu(struct srp_host *host, size_t size, 215 gfp_t gfp_mask, 216 enum dma_data_direction direction) 217 { 218 struct srp_iu *iu; 219 220 iu = kmalloc(sizeof *iu, gfp_mask); 221 if (!iu) 222 goto out; 223 224 iu->buf = kzalloc(size, gfp_mask); 225 if (!iu->buf) 226 goto out_free_iu; 227 228 iu->dma = ib_dma_map_single(host->srp_dev->dev, iu->buf, size, 229 direction); 230 if (ib_dma_mapping_error(host->srp_dev->dev, iu->dma)) 231 goto out_free_buf; 232 233 iu->size = size; 234 iu->direction = direction; 235 236 return iu; 237 238 out_free_buf: 239 kfree(iu->buf); 240 out_free_iu: 241 kfree(iu); 242 out: 243 return NULL; 244 } 245 246 static void srp_free_iu(struct srp_host *host, struct srp_iu *iu) 247 { 248 if (!iu) 249 return; 250 251 ib_dma_unmap_single(host->srp_dev->dev, iu->dma, iu->size, 252 iu->direction); 253 kfree(iu->buf); 254 kfree(iu); 255 } 256 257 static void srp_qp_event(struct ib_event *event, void *context) 258 { 259 pr_debug("QP event %s (%d)\n", 260 ib_event_msg(event->event), event->event); 261 } 262 263 static int srp_init_qp(struct srp_target_port *target, 264 struct ib_qp *qp) 265 { 266 struct ib_qp_attr *attr; 267 int ret; 268 269 attr = kmalloc(sizeof *attr, GFP_KERNEL); 270 if (!attr) 271 return -ENOMEM; 272 273 ret = ib_find_cached_pkey(target->srp_host->srp_dev->dev, 274 target->srp_host->port, 275 be16_to_cpu(target->pkey), 276 &attr->pkey_index); 277 if (ret) 278 goto out; 279 280 attr->qp_state = IB_QPS_INIT; 281 attr->qp_access_flags = (IB_ACCESS_REMOTE_READ | 282 IB_ACCESS_REMOTE_WRITE); 283 attr->port_num = target->srp_host->port; 284 285 ret = ib_modify_qp(qp, attr, 286 IB_QP_STATE | 287 IB_QP_PKEY_INDEX | 288 IB_QP_ACCESS_FLAGS | 289 IB_QP_PORT); 290 291 out: 292 kfree(attr); 293 return ret; 294 } 295 296 static int srp_new_cm_id(struct srp_rdma_ch *ch) 297 { 298 struct srp_target_port *target = ch->target; 299 struct ib_cm_id *new_cm_id; 300 301 new_cm_id = ib_create_cm_id(target->srp_host->srp_dev->dev, 302 srp_cm_handler, ch); 303 if (IS_ERR(new_cm_id)) 304 return PTR_ERR(new_cm_id); 305 306 if (ch->cm_id) 307 ib_destroy_cm_id(ch->cm_id); 308 ch->cm_id = new_cm_id; 309 ch->path.sgid = target->sgid; 310 ch->path.dgid = target->orig_dgid; 311 ch->path.pkey = target->pkey; 312 ch->path.service_id = target->service_id; 313 314 return 0; 315 } 316 317 static struct ib_fmr_pool *srp_alloc_fmr_pool(struct srp_target_port *target) 318 { 319 struct srp_device *dev = target->srp_host->srp_dev; 320 struct ib_fmr_pool_param fmr_param; 321 322 memset(&fmr_param, 0, sizeof(fmr_param)); 323 fmr_param.pool_size = target->mr_pool_size; 324 fmr_param.dirty_watermark = fmr_param.pool_size / 4; 325 fmr_param.cache = 1; 326 fmr_param.max_pages_per_fmr = dev->max_pages_per_mr; 327 fmr_param.page_shift = ilog2(dev->mr_page_size); 328 fmr_param.access = (IB_ACCESS_LOCAL_WRITE | 329 IB_ACCESS_REMOTE_WRITE | 330 IB_ACCESS_REMOTE_READ); 331 332 return ib_create_fmr_pool(dev->pd, &fmr_param); 333 } 334 335 /** 336 * srp_destroy_fr_pool() - free the resources owned by a pool 337 * @pool: Fast registration pool to be destroyed. 338 */ 339 static void srp_destroy_fr_pool(struct srp_fr_pool *pool) 340 { 341 int i; 342 struct srp_fr_desc *d; 343 344 if (!pool) 345 return; 346 347 for (i = 0, d = &pool->desc[0]; i < pool->size; i++, d++) { 348 if (d->mr) 349 ib_dereg_mr(d->mr); 350 } 351 kfree(pool); 352 } 353 354 /** 355 * srp_create_fr_pool() - allocate and initialize a pool for fast registration 356 * @device: IB device to allocate fast registration descriptors for. 357 * @pd: Protection domain associated with the FR descriptors. 358 * @pool_size: Number of descriptors to allocate. 359 * @max_page_list_len: Maximum fast registration work request page list length. 360 */ 361 static struct srp_fr_pool *srp_create_fr_pool(struct ib_device *device, 362 struct ib_pd *pd, int pool_size, 363 int max_page_list_len) 364 { 365 struct srp_fr_pool *pool; 366 struct srp_fr_desc *d; 367 struct ib_mr *mr; 368 int i, ret = -EINVAL; 369 370 if (pool_size <= 0) 371 goto err; 372 ret = -ENOMEM; 373 pool = kzalloc(sizeof(struct srp_fr_pool) + 374 pool_size * sizeof(struct srp_fr_desc), GFP_KERNEL); 375 if (!pool) 376 goto err; 377 pool->size = pool_size; 378 pool->max_page_list_len = max_page_list_len; 379 spin_lock_init(&pool->lock); 380 INIT_LIST_HEAD(&pool->free_list); 381 382 for (i = 0, d = &pool->desc[0]; i < pool->size; i++, d++) { 383 mr = ib_alloc_mr(pd, IB_MR_TYPE_MEM_REG, 384 max_page_list_len); 385 if (IS_ERR(mr)) { 386 ret = PTR_ERR(mr); 387 goto destroy_pool; 388 } 389 d->mr = mr; 390 list_add_tail(&d->entry, &pool->free_list); 391 } 392 393 out: 394 return pool; 395 396 destroy_pool: 397 srp_destroy_fr_pool(pool); 398 399 err: 400 pool = ERR_PTR(ret); 401 goto out; 402 } 403 404 /** 405 * srp_fr_pool_get() - obtain a descriptor suitable for fast registration 406 * @pool: Pool to obtain descriptor from. 407 */ 408 static struct srp_fr_desc *srp_fr_pool_get(struct srp_fr_pool *pool) 409 { 410 struct srp_fr_desc *d = NULL; 411 unsigned long flags; 412 413 spin_lock_irqsave(&pool->lock, flags); 414 if (!list_empty(&pool->free_list)) { 415 d = list_first_entry(&pool->free_list, typeof(*d), entry); 416 list_del(&d->entry); 417 } 418 spin_unlock_irqrestore(&pool->lock, flags); 419 420 return d; 421 } 422 423 /** 424 * srp_fr_pool_put() - put an FR descriptor back in the free list 425 * @pool: Pool the descriptor was allocated from. 426 * @desc: Pointer to an array of fast registration descriptor pointers. 427 * @n: Number of descriptors to put back. 428 * 429 * Note: The caller must already have queued an invalidation request for 430 * desc->mr->rkey before calling this function. 431 */ 432 static void srp_fr_pool_put(struct srp_fr_pool *pool, struct srp_fr_desc **desc, 433 int n) 434 { 435 unsigned long flags; 436 int i; 437 438 spin_lock_irqsave(&pool->lock, flags); 439 for (i = 0; i < n; i++) 440 list_add(&desc[i]->entry, &pool->free_list); 441 spin_unlock_irqrestore(&pool->lock, flags); 442 } 443 444 static struct srp_fr_pool *srp_alloc_fr_pool(struct srp_target_port *target) 445 { 446 struct srp_device *dev = target->srp_host->srp_dev; 447 448 return srp_create_fr_pool(dev->dev, dev->pd, target->mr_pool_size, 449 dev->max_pages_per_mr); 450 } 451 452 /** 453 * srp_destroy_qp() - destroy an RDMA queue pair 454 * @qp: RDMA queue pair. 455 * 456 * Drain the qp before destroying it. This avoids that the receive 457 * completion handler can access the queue pair while it is 458 * being destroyed. 459 */ 460 static void srp_destroy_qp(struct ib_qp *qp) 461 { 462 ib_drain_rq(qp); 463 ib_destroy_qp(qp); 464 } 465 466 static int srp_create_ch_ib(struct srp_rdma_ch *ch) 467 { 468 struct srp_target_port *target = ch->target; 469 struct srp_device *dev = target->srp_host->srp_dev; 470 struct ib_qp_init_attr *init_attr; 471 struct ib_cq *recv_cq, *send_cq; 472 struct ib_qp *qp; 473 struct ib_fmr_pool *fmr_pool = NULL; 474 struct srp_fr_pool *fr_pool = NULL; 475 const int m = 1 + dev->use_fast_reg * target->mr_per_cmd * 2; 476 int ret; 477 478 init_attr = kzalloc(sizeof *init_attr, GFP_KERNEL); 479 if (!init_attr) 480 return -ENOMEM; 481 482 /* queue_size + 1 for ib_drain_rq() */ 483 recv_cq = ib_alloc_cq(dev->dev, ch, target->queue_size + 1, 484 ch->comp_vector, IB_POLL_SOFTIRQ); 485 if (IS_ERR(recv_cq)) { 486 ret = PTR_ERR(recv_cq); 487 goto err; 488 } 489 490 send_cq = ib_alloc_cq(dev->dev, ch, m * target->queue_size, 491 ch->comp_vector, IB_POLL_DIRECT); 492 if (IS_ERR(send_cq)) { 493 ret = PTR_ERR(send_cq); 494 goto err_recv_cq; 495 } 496 497 init_attr->event_handler = srp_qp_event; 498 init_attr->cap.max_send_wr = m * target->queue_size; 499 init_attr->cap.max_recv_wr = target->queue_size + 1; 500 init_attr->cap.max_recv_sge = 1; 501 init_attr->cap.max_send_sge = 1; 502 init_attr->sq_sig_type = IB_SIGNAL_REQ_WR; 503 init_attr->qp_type = IB_QPT_RC; 504 init_attr->send_cq = send_cq; 505 init_attr->recv_cq = recv_cq; 506 507 qp = ib_create_qp(dev->pd, init_attr); 508 if (IS_ERR(qp)) { 509 ret = PTR_ERR(qp); 510 goto err_send_cq; 511 } 512 513 ret = srp_init_qp(target, qp); 514 if (ret) 515 goto err_qp; 516 517 if (dev->use_fast_reg) { 518 fr_pool = srp_alloc_fr_pool(target); 519 if (IS_ERR(fr_pool)) { 520 ret = PTR_ERR(fr_pool); 521 shost_printk(KERN_WARNING, target->scsi_host, PFX 522 "FR pool allocation failed (%d)\n", ret); 523 goto err_qp; 524 } 525 } else if (dev->use_fmr) { 526 fmr_pool = srp_alloc_fmr_pool(target); 527 if (IS_ERR(fmr_pool)) { 528 ret = PTR_ERR(fmr_pool); 529 shost_printk(KERN_WARNING, target->scsi_host, PFX 530 "FMR pool allocation failed (%d)\n", ret); 531 goto err_qp; 532 } 533 } 534 535 if (ch->qp) 536 srp_destroy_qp(ch->qp); 537 if (ch->recv_cq) 538 ib_free_cq(ch->recv_cq); 539 if (ch->send_cq) 540 ib_free_cq(ch->send_cq); 541 542 ch->qp = qp; 543 ch->recv_cq = recv_cq; 544 ch->send_cq = send_cq; 545 546 if (dev->use_fast_reg) { 547 if (ch->fr_pool) 548 srp_destroy_fr_pool(ch->fr_pool); 549 ch->fr_pool = fr_pool; 550 } else if (dev->use_fmr) { 551 if (ch->fmr_pool) 552 ib_destroy_fmr_pool(ch->fmr_pool); 553 ch->fmr_pool = fmr_pool; 554 } 555 556 kfree(init_attr); 557 return 0; 558 559 err_qp: 560 srp_destroy_qp(qp); 561 562 err_send_cq: 563 ib_free_cq(send_cq); 564 565 err_recv_cq: 566 ib_free_cq(recv_cq); 567 568 err: 569 kfree(init_attr); 570 return ret; 571 } 572 573 /* 574 * Note: this function may be called without srp_alloc_iu_bufs() having been 575 * invoked. Hence the ch->[rt]x_ring checks. 576 */ 577 static void srp_free_ch_ib(struct srp_target_port *target, 578 struct srp_rdma_ch *ch) 579 { 580 struct srp_device *dev = target->srp_host->srp_dev; 581 int i; 582 583 if (!ch->target) 584 return; 585 586 if (ch->cm_id) { 587 ib_destroy_cm_id(ch->cm_id); 588 ch->cm_id = NULL; 589 } 590 591 /* If srp_new_cm_id() succeeded but srp_create_ch_ib() not, return. */ 592 if (!ch->qp) 593 return; 594 595 if (dev->use_fast_reg) { 596 if (ch->fr_pool) 597 srp_destroy_fr_pool(ch->fr_pool); 598 } else if (dev->use_fmr) { 599 if (ch->fmr_pool) 600 ib_destroy_fmr_pool(ch->fmr_pool); 601 } 602 603 srp_destroy_qp(ch->qp); 604 ib_free_cq(ch->send_cq); 605 ib_free_cq(ch->recv_cq); 606 607 /* 608 * Avoid that the SCSI error handler tries to use this channel after 609 * it has been freed. The SCSI error handler can namely continue 610 * trying to perform recovery actions after scsi_remove_host() 611 * returned. 612 */ 613 ch->target = NULL; 614 615 ch->qp = NULL; 616 ch->send_cq = ch->recv_cq = NULL; 617 618 if (ch->rx_ring) { 619 for (i = 0; i < target->queue_size; ++i) 620 srp_free_iu(target->srp_host, ch->rx_ring[i]); 621 kfree(ch->rx_ring); 622 ch->rx_ring = NULL; 623 } 624 if (ch->tx_ring) { 625 for (i = 0; i < target->queue_size; ++i) 626 srp_free_iu(target->srp_host, ch->tx_ring[i]); 627 kfree(ch->tx_ring); 628 ch->tx_ring = NULL; 629 } 630 } 631 632 static void srp_path_rec_completion(int status, 633 struct ib_sa_path_rec *pathrec, 634 void *ch_ptr) 635 { 636 struct srp_rdma_ch *ch = ch_ptr; 637 struct srp_target_port *target = ch->target; 638 639 ch->status = status; 640 if (status) 641 shost_printk(KERN_ERR, target->scsi_host, 642 PFX "Got failed path rec status %d\n", status); 643 else 644 ch->path = *pathrec; 645 complete(&ch->done); 646 } 647 648 static int srp_lookup_path(struct srp_rdma_ch *ch) 649 { 650 struct srp_target_port *target = ch->target; 651 int ret; 652 653 ch->path.numb_path = 1; 654 655 init_completion(&ch->done); 656 657 ch->path_query_id = ib_sa_path_rec_get(&srp_sa_client, 658 target->srp_host->srp_dev->dev, 659 target->srp_host->port, 660 &ch->path, 661 IB_SA_PATH_REC_SERVICE_ID | 662 IB_SA_PATH_REC_DGID | 663 IB_SA_PATH_REC_SGID | 664 IB_SA_PATH_REC_NUMB_PATH | 665 IB_SA_PATH_REC_PKEY, 666 SRP_PATH_REC_TIMEOUT_MS, 667 GFP_KERNEL, 668 srp_path_rec_completion, 669 ch, &ch->path_query); 670 if (ch->path_query_id < 0) 671 return ch->path_query_id; 672 673 ret = wait_for_completion_interruptible(&ch->done); 674 if (ret < 0) 675 return ret; 676 677 if (ch->status < 0) 678 shost_printk(KERN_WARNING, target->scsi_host, 679 PFX "Path record query failed\n"); 680 681 return ch->status; 682 } 683 684 static int srp_send_req(struct srp_rdma_ch *ch, bool multich) 685 { 686 struct srp_target_port *target = ch->target; 687 struct { 688 struct ib_cm_req_param param; 689 struct srp_login_req priv; 690 } *req = NULL; 691 int status; 692 693 req = kzalloc(sizeof *req, GFP_KERNEL); 694 if (!req) 695 return -ENOMEM; 696 697 req->param.primary_path = &ch->path; 698 req->param.alternate_path = NULL; 699 req->param.service_id = target->service_id; 700 req->param.qp_num = ch->qp->qp_num; 701 req->param.qp_type = ch->qp->qp_type; 702 req->param.private_data = &req->priv; 703 req->param.private_data_len = sizeof req->priv; 704 req->param.flow_control = 1; 705 706 get_random_bytes(&req->param.starting_psn, 4); 707 req->param.starting_psn &= 0xffffff; 708 709 /* 710 * Pick some arbitrary defaults here; we could make these 711 * module parameters if anyone cared about setting them. 712 */ 713 req->param.responder_resources = 4; 714 req->param.remote_cm_response_timeout = 20; 715 req->param.local_cm_response_timeout = 20; 716 req->param.retry_count = target->tl_retry_count; 717 req->param.rnr_retry_count = 7; 718 req->param.max_cm_retries = 15; 719 720 req->priv.opcode = SRP_LOGIN_REQ; 721 req->priv.tag = 0; 722 req->priv.req_it_iu_len = cpu_to_be32(target->max_iu_len); 723 req->priv.req_buf_fmt = cpu_to_be16(SRP_BUF_FORMAT_DIRECT | 724 SRP_BUF_FORMAT_INDIRECT); 725 req->priv.req_flags = (multich ? SRP_MULTICHAN_MULTI : 726 SRP_MULTICHAN_SINGLE); 727 /* 728 * In the published SRP specification (draft rev. 16a), the 729 * port identifier format is 8 bytes of ID extension followed 730 * by 8 bytes of GUID. Older drafts put the two halves in the 731 * opposite order, so that the GUID comes first. 732 * 733 * Targets conforming to these obsolete drafts can be 734 * recognized by the I/O Class they report. 735 */ 736 if (target->io_class == SRP_REV10_IB_IO_CLASS) { 737 memcpy(req->priv.initiator_port_id, 738 &target->sgid.global.interface_id, 8); 739 memcpy(req->priv.initiator_port_id + 8, 740 &target->initiator_ext, 8); 741 memcpy(req->priv.target_port_id, &target->ioc_guid, 8); 742 memcpy(req->priv.target_port_id + 8, &target->id_ext, 8); 743 } else { 744 memcpy(req->priv.initiator_port_id, 745 &target->initiator_ext, 8); 746 memcpy(req->priv.initiator_port_id + 8, 747 &target->sgid.global.interface_id, 8); 748 memcpy(req->priv.target_port_id, &target->id_ext, 8); 749 memcpy(req->priv.target_port_id + 8, &target->ioc_guid, 8); 750 } 751 752 /* 753 * Topspin/Cisco SRP targets will reject our login unless we 754 * zero out the first 8 bytes of our initiator port ID and set 755 * the second 8 bytes to the local node GUID. 756 */ 757 if (srp_target_is_topspin(target)) { 758 shost_printk(KERN_DEBUG, target->scsi_host, 759 PFX "Topspin/Cisco initiator port ID workaround " 760 "activated for target GUID %016llx\n", 761 be64_to_cpu(target->ioc_guid)); 762 memset(req->priv.initiator_port_id, 0, 8); 763 memcpy(req->priv.initiator_port_id + 8, 764 &target->srp_host->srp_dev->dev->node_guid, 8); 765 } 766 767 status = ib_send_cm_req(ch->cm_id, &req->param); 768 769 kfree(req); 770 771 return status; 772 } 773 774 static bool srp_queue_remove_work(struct srp_target_port *target) 775 { 776 bool changed = false; 777 778 spin_lock_irq(&target->lock); 779 if (target->state != SRP_TARGET_REMOVED) { 780 target->state = SRP_TARGET_REMOVED; 781 changed = true; 782 } 783 spin_unlock_irq(&target->lock); 784 785 if (changed) 786 queue_work(srp_remove_wq, &target->remove_work); 787 788 return changed; 789 } 790 791 static void srp_disconnect_target(struct srp_target_port *target) 792 { 793 struct srp_rdma_ch *ch; 794 int i; 795 796 /* XXX should send SRP_I_LOGOUT request */ 797 798 for (i = 0; i < target->ch_count; i++) { 799 ch = &target->ch[i]; 800 ch->connected = false; 801 if (ch->cm_id && ib_send_cm_dreq(ch->cm_id, NULL, 0)) { 802 shost_printk(KERN_DEBUG, target->scsi_host, 803 PFX "Sending CM DREQ failed\n"); 804 } 805 } 806 } 807 808 static void srp_free_req_data(struct srp_target_port *target, 809 struct srp_rdma_ch *ch) 810 { 811 struct srp_device *dev = target->srp_host->srp_dev; 812 struct ib_device *ibdev = dev->dev; 813 struct srp_request *req; 814 int i; 815 816 if (!ch->req_ring) 817 return; 818 819 for (i = 0; i < target->req_ring_size; ++i) { 820 req = &ch->req_ring[i]; 821 if (dev->use_fast_reg) { 822 kfree(req->fr_list); 823 } else { 824 kfree(req->fmr_list); 825 kfree(req->map_page); 826 } 827 if (req->indirect_dma_addr) { 828 ib_dma_unmap_single(ibdev, req->indirect_dma_addr, 829 target->indirect_size, 830 DMA_TO_DEVICE); 831 } 832 kfree(req->indirect_desc); 833 } 834 835 kfree(ch->req_ring); 836 ch->req_ring = NULL; 837 } 838 839 static int srp_alloc_req_data(struct srp_rdma_ch *ch) 840 { 841 struct srp_target_port *target = ch->target; 842 struct srp_device *srp_dev = target->srp_host->srp_dev; 843 struct ib_device *ibdev = srp_dev->dev; 844 struct srp_request *req; 845 void *mr_list; 846 dma_addr_t dma_addr; 847 int i, ret = -ENOMEM; 848 849 ch->req_ring = kcalloc(target->req_ring_size, sizeof(*ch->req_ring), 850 GFP_KERNEL); 851 if (!ch->req_ring) 852 goto out; 853 854 for (i = 0; i < target->req_ring_size; ++i) { 855 req = &ch->req_ring[i]; 856 mr_list = kmalloc(target->mr_per_cmd * sizeof(void *), 857 GFP_KERNEL); 858 if (!mr_list) 859 goto out; 860 if (srp_dev->use_fast_reg) { 861 req->fr_list = mr_list; 862 } else { 863 req->fmr_list = mr_list; 864 req->map_page = kmalloc(srp_dev->max_pages_per_mr * 865 sizeof(void *), GFP_KERNEL); 866 if (!req->map_page) 867 goto out; 868 } 869 req->indirect_desc = kmalloc(target->indirect_size, GFP_KERNEL); 870 if (!req->indirect_desc) 871 goto out; 872 873 dma_addr = ib_dma_map_single(ibdev, req->indirect_desc, 874 target->indirect_size, 875 DMA_TO_DEVICE); 876 if (ib_dma_mapping_error(ibdev, dma_addr)) 877 goto out; 878 879 req->indirect_dma_addr = dma_addr; 880 } 881 ret = 0; 882 883 out: 884 return ret; 885 } 886 887 /** 888 * srp_del_scsi_host_attr() - Remove attributes defined in the host template. 889 * @shost: SCSI host whose attributes to remove from sysfs. 890 * 891 * Note: Any attributes defined in the host template and that did not exist 892 * before invocation of this function will be ignored. 893 */ 894 static void srp_del_scsi_host_attr(struct Scsi_Host *shost) 895 { 896 struct device_attribute **attr; 897 898 for (attr = shost->hostt->shost_attrs; attr && *attr; ++attr) 899 device_remove_file(&shost->shost_dev, *attr); 900 } 901 902 static void srp_remove_target(struct srp_target_port *target) 903 { 904 struct srp_rdma_ch *ch; 905 int i; 906 907 WARN_ON_ONCE(target->state != SRP_TARGET_REMOVED); 908 909 srp_del_scsi_host_attr(target->scsi_host); 910 srp_rport_get(target->rport); 911 srp_remove_host(target->scsi_host); 912 scsi_remove_host(target->scsi_host); 913 srp_stop_rport_timers(target->rport); 914 srp_disconnect_target(target); 915 for (i = 0; i < target->ch_count; i++) { 916 ch = &target->ch[i]; 917 srp_free_ch_ib(target, ch); 918 } 919 cancel_work_sync(&target->tl_err_work); 920 srp_rport_put(target->rport); 921 for (i = 0; i < target->ch_count; i++) { 922 ch = &target->ch[i]; 923 srp_free_req_data(target, ch); 924 } 925 kfree(target->ch); 926 target->ch = NULL; 927 928 spin_lock(&target->srp_host->target_lock); 929 list_del(&target->list); 930 spin_unlock(&target->srp_host->target_lock); 931 932 scsi_host_put(target->scsi_host); 933 } 934 935 static void srp_remove_work(struct work_struct *work) 936 { 937 struct srp_target_port *target = 938 container_of(work, struct srp_target_port, remove_work); 939 940 WARN_ON_ONCE(target->state != SRP_TARGET_REMOVED); 941 942 srp_remove_target(target); 943 } 944 945 static void srp_rport_delete(struct srp_rport *rport) 946 { 947 struct srp_target_port *target = rport->lld_data; 948 949 srp_queue_remove_work(target); 950 } 951 952 /** 953 * srp_connected_ch() - number of connected channels 954 * @target: SRP target port. 955 */ 956 static int srp_connected_ch(struct srp_target_port *target) 957 { 958 int i, c = 0; 959 960 for (i = 0; i < target->ch_count; i++) 961 c += target->ch[i].connected; 962 963 return c; 964 } 965 966 static int srp_connect_ch(struct srp_rdma_ch *ch, bool multich) 967 { 968 struct srp_target_port *target = ch->target; 969 int ret; 970 971 WARN_ON_ONCE(!multich && srp_connected_ch(target) > 0); 972 973 ret = srp_lookup_path(ch); 974 if (ret) 975 goto out; 976 977 while (1) { 978 init_completion(&ch->done); 979 ret = srp_send_req(ch, multich); 980 if (ret) 981 goto out; 982 ret = wait_for_completion_interruptible(&ch->done); 983 if (ret < 0) 984 goto out; 985 986 /* 987 * The CM event handling code will set status to 988 * SRP_PORT_REDIRECT if we get a port redirect REJ 989 * back, or SRP_DLID_REDIRECT if we get a lid/qp 990 * redirect REJ back. 991 */ 992 ret = ch->status; 993 switch (ret) { 994 case 0: 995 ch->connected = true; 996 goto out; 997 998 case SRP_PORT_REDIRECT: 999 ret = srp_lookup_path(ch); 1000 if (ret) 1001 goto out; 1002 break; 1003 1004 case SRP_DLID_REDIRECT: 1005 break; 1006 1007 case SRP_STALE_CONN: 1008 shost_printk(KERN_ERR, target->scsi_host, PFX 1009 "giving up on stale connection\n"); 1010 ret = -ECONNRESET; 1011 goto out; 1012 1013 default: 1014 goto out; 1015 } 1016 } 1017 1018 out: 1019 return ret <= 0 ? ret : -ENODEV; 1020 } 1021 1022 static void srp_inv_rkey_err_done(struct ib_cq *cq, struct ib_wc *wc) 1023 { 1024 srp_handle_qp_err(cq, wc, "INV RKEY"); 1025 } 1026 1027 static int srp_inv_rkey(struct srp_request *req, struct srp_rdma_ch *ch, 1028 u32 rkey) 1029 { 1030 struct ib_send_wr *bad_wr; 1031 struct ib_send_wr wr = { 1032 .opcode = IB_WR_LOCAL_INV, 1033 .next = NULL, 1034 .num_sge = 0, 1035 .send_flags = 0, 1036 .ex.invalidate_rkey = rkey, 1037 }; 1038 1039 wr.wr_cqe = &req->reg_cqe; 1040 req->reg_cqe.done = srp_inv_rkey_err_done; 1041 return ib_post_send(ch->qp, &wr, &bad_wr); 1042 } 1043 1044 static void srp_unmap_data(struct scsi_cmnd *scmnd, 1045 struct srp_rdma_ch *ch, 1046 struct srp_request *req) 1047 { 1048 struct srp_target_port *target = ch->target; 1049 struct srp_device *dev = target->srp_host->srp_dev; 1050 struct ib_device *ibdev = dev->dev; 1051 int i, res; 1052 1053 if (!scsi_sglist(scmnd) || 1054 (scmnd->sc_data_direction != DMA_TO_DEVICE && 1055 scmnd->sc_data_direction != DMA_FROM_DEVICE)) 1056 return; 1057 1058 if (dev->use_fast_reg) { 1059 struct srp_fr_desc **pfr; 1060 1061 for (i = req->nmdesc, pfr = req->fr_list; i > 0; i--, pfr++) { 1062 res = srp_inv_rkey(req, ch, (*pfr)->mr->rkey); 1063 if (res < 0) { 1064 shost_printk(KERN_ERR, target->scsi_host, PFX 1065 "Queueing INV WR for rkey %#x failed (%d)\n", 1066 (*pfr)->mr->rkey, res); 1067 queue_work(system_long_wq, 1068 &target->tl_err_work); 1069 } 1070 } 1071 if (req->nmdesc) 1072 srp_fr_pool_put(ch->fr_pool, req->fr_list, 1073 req->nmdesc); 1074 } else if (dev->use_fmr) { 1075 struct ib_pool_fmr **pfmr; 1076 1077 for (i = req->nmdesc, pfmr = req->fmr_list; i > 0; i--, pfmr++) 1078 ib_fmr_pool_unmap(*pfmr); 1079 } 1080 1081 ib_dma_unmap_sg(ibdev, scsi_sglist(scmnd), scsi_sg_count(scmnd), 1082 scmnd->sc_data_direction); 1083 } 1084 1085 /** 1086 * srp_claim_req - Take ownership of the scmnd associated with a request. 1087 * @ch: SRP RDMA channel. 1088 * @req: SRP request. 1089 * @sdev: If not NULL, only take ownership for this SCSI device. 1090 * @scmnd: If NULL, take ownership of @req->scmnd. If not NULL, only take 1091 * ownership of @req->scmnd if it equals @scmnd. 1092 * 1093 * Return value: 1094 * Either NULL or a pointer to the SCSI command the caller became owner of. 1095 */ 1096 static struct scsi_cmnd *srp_claim_req(struct srp_rdma_ch *ch, 1097 struct srp_request *req, 1098 struct scsi_device *sdev, 1099 struct scsi_cmnd *scmnd) 1100 { 1101 unsigned long flags; 1102 1103 spin_lock_irqsave(&ch->lock, flags); 1104 if (req->scmnd && 1105 (!sdev || req->scmnd->device == sdev) && 1106 (!scmnd || req->scmnd == scmnd)) { 1107 scmnd = req->scmnd; 1108 req->scmnd = NULL; 1109 } else { 1110 scmnd = NULL; 1111 } 1112 spin_unlock_irqrestore(&ch->lock, flags); 1113 1114 return scmnd; 1115 } 1116 1117 /** 1118 * srp_free_req() - Unmap data and adjust ch->req_lim. 1119 * @ch: SRP RDMA channel. 1120 * @req: Request to be freed. 1121 * @scmnd: SCSI command associated with @req. 1122 * @req_lim_delta: Amount to be added to @target->req_lim. 1123 */ 1124 static void srp_free_req(struct srp_rdma_ch *ch, struct srp_request *req, 1125 struct scsi_cmnd *scmnd, s32 req_lim_delta) 1126 { 1127 unsigned long flags; 1128 1129 srp_unmap_data(scmnd, ch, req); 1130 1131 spin_lock_irqsave(&ch->lock, flags); 1132 ch->req_lim += req_lim_delta; 1133 spin_unlock_irqrestore(&ch->lock, flags); 1134 } 1135 1136 static void srp_finish_req(struct srp_rdma_ch *ch, struct srp_request *req, 1137 struct scsi_device *sdev, int result) 1138 { 1139 struct scsi_cmnd *scmnd = srp_claim_req(ch, req, sdev, NULL); 1140 1141 if (scmnd) { 1142 srp_free_req(ch, req, scmnd, 0); 1143 scmnd->result = result; 1144 scmnd->scsi_done(scmnd); 1145 } 1146 } 1147 1148 static void srp_terminate_io(struct srp_rport *rport) 1149 { 1150 struct srp_target_port *target = rport->lld_data; 1151 struct srp_rdma_ch *ch; 1152 struct Scsi_Host *shost = target->scsi_host; 1153 struct scsi_device *sdev; 1154 int i, j; 1155 1156 /* 1157 * Invoking srp_terminate_io() while srp_queuecommand() is running 1158 * is not safe. Hence the warning statement below. 1159 */ 1160 shost_for_each_device(sdev, shost) 1161 WARN_ON_ONCE(sdev->request_queue->request_fn_active); 1162 1163 for (i = 0; i < target->ch_count; i++) { 1164 ch = &target->ch[i]; 1165 1166 for (j = 0; j < target->req_ring_size; ++j) { 1167 struct srp_request *req = &ch->req_ring[j]; 1168 1169 srp_finish_req(ch, req, NULL, 1170 DID_TRANSPORT_FAILFAST << 16); 1171 } 1172 } 1173 } 1174 1175 /* 1176 * It is up to the caller to ensure that srp_rport_reconnect() calls are 1177 * serialized and that no concurrent srp_queuecommand(), srp_abort(), 1178 * srp_reset_device() or srp_reset_host() calls will occur while this function 1179 * is in progress. One way to realize that is not to call this function 1180 * directly but to call srp_reconnect_rport() instead since that last function 1181 * serializes calls of this function via rport->mutex and also blocks 1182 * srp_queuecommand() calls before invoking this function. 1183 */ 1184 static int srp_rport_reconnect(struct srp_rport *rport) 1185 { 1186 struct srp_target_port *target = rport->lld_data; 1187 struct srp_rdma_ch *ch; 1188 int i, j, ret = 0; 1189 bool multich = false; 1190 1191 srp_disconnect_target(target); 1192 1193 if (target->state == SRP_TARGET_SCANNING) 1194 return -ENODEV; 1195 1196 /* 1197 * Now get a new local CM ID so that we avoid confusing the target in 1198 * case things are really fouled up. Doing so also ensures that all CM 1199 * callbacks will have finished before a new QP is allocated. 1200 */ 1201 for (i = 0; i < target->ch_count; i++) { 1202 ch = &target->ch[i]; 1203 ret += srp_new_cm_id(ch); 1204 } 1205 for (i = 0; i < target->ch_count; i++) { 1206 ch = &target->ch[i]; 1207 for (j = 0; j < target->req_ring_size; ++j) { 1208 struct srp_request *req = &ch->req_ring[j]; 1209 1210 srp_finish_req(ch, req, NULL, DID_RESET << 16); 1211 } 1212 } 1213 for (i = 0; i < target->ch_count; i++) { 1214 ch = &target->ch[i]; 1215 /* 1216 * Whether or not creating a new CM ID succeeded, create a new 1217 * QP. This guarantees that all completion callback function 1218 * invocations have finished before request resetting starts. 1219 */ 1220 ret += srp_create_ch_ib(ch); 1221 1222 INIT_LIST_HEAD(&ch->free_tx); 1223 for (j = 0; j < target->queue_size; ++j) 1224 list_add(&ch->tx_ring[j]->list, &ch->free_tx); 1225 } 1226 1227 target->qp_in_error = false; 1228 1229 for (i = 0; i < target->ch_count; i++) { 1230 ch = &target->ch[i]; 1231 if (ret) 1232 break; 1233 ret = srp_connect_ch(ch, multich); 1234 multich = true; 1235 } 1236 1237 if (ret == 0) 1238 shost_printk(KERN_INFO, target->scsi_host, 1239 PFX "reconnect succeeded\n"); 1240 1241 return ret; 1242 } 1243 1244 static void srp_map_desc(struct srp_map_state *state, dma_addr_t dma_addr, 1245 unsigned int dma_len, u32 rkey) 1246 { 1247 struct srp_direct_buf *desc = state->desc; 1248 1249 WARN_ON_ONCE(!dma_len); 1250 1251 desc->va = cpu_to_be64(dma_addr); 1252 desc->key = cpu_to_be32(rkey); 1253 desc->len = cpu_to_be32(dma_len); 1254 1255 state->total_len += dma_len; 1256 state->desc++; 1257 state->ndesc++; 1258 } 1259 1260 static int srp_map_finish_fmr(struct srp_map_state *state, 1261 struct srp_rdma_ch *ch) 1262 { 1263 struct srp_target_port *target = ch->target; 1264 struct srp_device *dev = target->srp_host->srp_dev; 1265 struct ib_pd *pd = target->pd; 1266 struct ib_pool_fmr *fmr; 1267 u64 io_addr = 0; 1268 1269 if (state->fmr.next >= state->fmr.end) 1270 return -ENOMEM; 1271 1272 WARN_ON_ONCE(!dev->use_fmr); 1273 1274 if (state->npages == 0) 1275 return 0; 1276 1277 if (state->npages == 1 && (pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY)) { 1278 srp_map_desc(state, state->base_dma_addr, state->dma_len, 1279 pd->unsafe_global_rkey); 1280 goto reset_state; 1281 } 1282 1283 fmr = ib_fmr_pool_map_phys(ch->fmr_pool, state->pages, 1284 state->npages, io_addr); 1285 if (IS_ERR(fmr)) 1286 return PTR_ERR(fmr); 1287 1288 *state->fmr.next++ = fmr; 1289 state->nmdesc++; 1290 1291 srp_map_desc(state, state->base_dma_addr & ~dev->mr_page_mask, 1292 state->dma_len, fmr->fmr->rkey); 1293 1294 reset_state: 1295 state->npages = 0; 1296 state->dma_len = 0; 1297 1298 return 0; 1299 } 1300 1301 static void srp_reg_mr_err_done(struct ib_cq *cq, struct ib_wc *wc) 1302 { 1303 srp_handle_qp_err(cq, wc, "FAST REG"); 1304 } 1305 1306 /* 1307 * Map up to sg_nents elements of state->sg where *sg_offset_p is the offset 1308 * where to start in the first element. If sg_offset_p != NULL then 1309 * *sg_offset_p is updated to the offset in state->sg[retval] of the first 1310 * byte that has not yet been mapped. 1311 */ 1312 static int srp_map_finish_fr(struct srp_map_state *state, 1313 struct srp_request *req, 1314 struct srp_rdma_ch *ch, int sg_nents, 1315 unsigned int *sg_offset_p) 1316 { 1317 struct srp_target_port *target = ch->target; 1318 struct srp_device *dev = target->srp_host->srp_dev; 1319 struct ib_pd *pd = target->pd; 1320 struct ib_send_wr *bad_wr; 1321 struct ib_reg_wr wr; 1322 struct srp_fr_desc *desc; 1323 u32 rkey; 1324 int n, err; 1325 1326 if (state->fr.next >= state->fr.end) 1327 return -ENOMEM; 1328 1329 WARN_ON_ONCE(!dev->use_fast_reg); 1330 1331 if (sg_nents == 1 && (pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY)) { 1332 unsigned int sg_offset = sg_offset_p ? *sg_offset_p : 0; 1333 1334 srp_map_desc(state, sg_dma_address(state->sg) + sg_offset, 1335 sg_dma_len(state->sg) - sg_offset, 1336 pd->unsafe_global_rkey); 1337 if (sg_offset_p) 1338 *sg_offset_p = 0; 1339 return 1; 1340 } 1341 1342 desc = srp_fr_pool_get(ch->fr_pool); 1343 if (!desc) 1344 return -ENOMEM; 1345 1346 rkey = ib_inc_rkey(desc->mr->rkey); 1347 ib_update_fast_reg_key(desc->mr, rkey); 1348 1349 n = ib_map_mr_sg(desc->mr, state->sg, sg_nents, sg_offset_p, 1350 dev->mr_page_size); 1351 if (unlikely(n < 0)) { 1352 srp_fr_pool_put(ch->fr_pool, &desc, 1); 1353 pr_debug("%s: ib_map_mr_sg(%d, %d) returned %d.\n", 1354 dev_name(&req->scmnd->device->sdev_gendev), sg_nents, 1355 sg_offset_p ? *sg_offset_p : -1, n); 1356 return n; 1357 } 1358 1359 WARN_ON_ONCE(desc->mr->length == 0); 1360 1361 req->reg_cqe.done = srp_reg_mr_err_done; 1362 1363 wr.wr.next = NULL; 1364 wr.wr.opcode = IB_WR_REG_MR; 1365 wr.wr.wr_cqe = &req->reg_cqe; 1366 wr.wr.num_sge = 0; 1367 wr.wr.send_flags = 0; 1368 wr.mr = desc->mr; 1369 wr.key = desc->mr->rkey; 1370 wr.access = (IB_ACCESS_LOCAL_WRITE | 1371 IB_ACCESS_REMOTE_READ | 1372 IB_ACCESS_REMOTE_WRITE); 1373 1374 *state->fr.next++ = desc; 1375 state->nmdesc++; 1376 1377 srp_map_desc(state, desc->mr->iova, 1378 desc->mr->length, desc->mr->rkey); 1379 1380 err = ib_post_send(ch->qp, &wr.wr, &bad_wr); 1381 if (unlikely(err)) { 1382 WARN_ON_ONCE(err == -ENOMEM); 1383 return err; 1384 } 1385 1386 return n; 1387 } 1388 1389 static int srp_map_sg_entry(struct srp_map_state *state, 1390 struct srp_rdma_ch *ch, 1391 struct scatterlist *sg) 1392 { 1393 struct srp_target_port *target = ch->target; 1394 struct srp_device *dev = target->srp_host->srp_dev; 1395 struct ib_device *ibdev = dev->dev; 1396 dma_addr_t dma_addr = ib_sg_dma_address(ibdev, sg); 1397 unsigned int dma_len = ib_sg_dma_len(ibdev, sg); 1398 unsigned int len = 0; 1399 int ret; 1400 1401 WARN_ON_ONCE(!dma_len); 1402 1403 while (dma_len) { 1404 unsigned offset = dma_addr & ~dev->mr_page_mask; 1405 1406 if (state->npages == dev->max_pages_per_mr || 1407 (state->npages > 0 && offset != 0)) { 1408 ret = srp_map_finish_fmr(state, ch); 1409 if (ret) 1410 return ret; 1411 } 1412 1413 len = min_t(unsigned int, dma_len, dev->mr_page_size - offset); 1414 1415 if (!state->npages) 1416 state->base_dma_addr = dma_addr; 1417 state->pages[state->npages++] = dma_addr & dev->mr_page_mask; 1418 state->dma_len += len; 1419 dma_addr += len; 1420 dma_len -= len; 1421 } 1422 1423 /* 1424 * If the end of the MR is not on a page boundary then we need to 1425 * close it out and start a new one -- we can only merge at page 1426 * boundaries. 1427 */ 1428 ret = 0; 1429 if ((dma_addr & ~dev->mr_page_mask) != 0) 1430 ret = srp_map_finish_fmr(state, ch); 1431 return ret; 1432 } 1433 1434 static int srp_map_sg_fmr(struct srp_map_state *state, struct srp_rdma_ch *ch, 1435 struct srp_request *req, struct scatterlist *scat, 1436 int count) 1437 { 1438 struct scatterlist *sg; 1439 int i, ret; 1440 1441 state->pages = req->map_page; 1442 state->fmr.next = req->fmr_list; 1443 state->fmr.end = req->fmr_list + ch->target->mr_per_cmd; 1444 1445 for_each_sg(scat, sg, count, i) { 1446 ret = srp_map_sg_entry(state, ch, sg); 1447 if (ret) 1448 return ret; 1449 } 1450 1451 ret = srp_map_finish_fmr(state, ch); 1452 if (ret) 1453 return ret; 1454 1455 return 0; 1456 } 1457 1458 static int srp_map_sg_fr(struct srp_map_state *state, struct srp_rdma_ch *ch, 1459 struct srp_request *req, struct scatterlist *scat, 1460 int count) 1461 { 1462 unsigned int sg_offset = 0; 1463 1464 state->fr.next = req->fr_list; 1465 state->fr.end = req->fr_list + ch->target->mr_per_cmd; 1466 state->sg = scat; 1467 1468 if (count == 0) 1469 return 0; 1470 1471 while (count) { 1472 int i, n; 1473 1474 n = srp_map_finish_fr(state, req, ch, count, &sg_offset); 1475 if (unlikely(n < 0)) 1476 return n; 1477 1478 count -= n; 1479 for (i = 0; i < n; i++) 1480 state->sg = sg_next(state->sg); 1481 } 1482 1483 return 0; 1484 } 1485 1486 static int srp_map_sg_dma(struct srp_map_state *state, struct srp_rdma_ch *ch, 1487 struct srp_request *req, struct scatterlist *scat, 1488 int count) 1489 { 1490 struct srp_target_port *target = ch->target; 1491 struct srp_device *dev = target->srp_host->srp_dev; 1492 struct scatterlist *sg; 1493 int i; 1494 1495 for_each_sg(scat, sg, count, i) { 1496 srp_map_desc(state, ib_sg_dma_address(dev->dev, sg), 1497 ib_sg_dma_len(dev->dev, sg), 1498 target->pd->unsafe_global_rkey); 1499 } 1500 1501 return 0; 1502 } 1503 1504 /* 1505 * Register the indirect data buffer descriptor with the HCA. 1506 * 1507 * Note: since the indirect data buffer descriptor has been allocated with 1508 * kmalloc() it is guaranteed that this buffer is a physically contiguous 1509 * memory buffer. 1510 */ 1511 static int srp_map_idb(struct srp_rdma_ch *ch, struct srp_request *req, 1512 void **next_mr, void **end_mr, u32 idb_len, 1513 __be32 *idb_rkey) 1514 { 1515 struct srp_target_port *target = ch->target; 1516 struct srp_device *dev = target->srp_host->srp_dev; 1517 struct srp_map_state state; 1518 struct srp_direct_buf idb_desc; 1519 u64 idb_pages[1]; 1520 struct scatterlist idb_sg[1]; 1521 int ret; 1522 1523 memset(&state, 0, sizeof(state)); 1524 memset(&idb_desc, 0, sizeof(idb_desc)); 1525 state.gen.next = next_mr; 1526 state.gen.end = end_mr; 1527 state.desc = &idb_desc; 1528 state.base_dma_addr = req->indirect_dma_addr; 1529 state.dma_len = idb_len; 1530 1531 if (dev->use_fast_reg) { 1532 state.sg = idb_sg; 1533 sg_init_one(idb_sg, req->indirect_desc, idb_len); 1534 idb_sg->dma_address = req->indirect_dma_addr; /* hack! */ 1535 #ifdef CONFIG_NEED_SG_DMA_LENGTH 1536 idb_sg->dma_length = idb_sg->length; /* hack^2 */ 1537 #endif 1538 ret = srp_map_finish_fr(&state, req, ch, 1, NULL); 1539 if (ret < 0) 1540 return ret; 1541 WARN_ON_ONCE(ret < 1); 1542 } else if (dev->use_fmr) { 1543 state.pages = idb_pages; 1544 state.pages[0] = (req->indirect_dma_addr & 1545 dev->mr_page_mask); 1546 state.npages = 1; 1547 ret = srp_map_finish_fmr(&state, ch); 1548 if (ret < 0) 1549 return ret; 1550 } else { 1551 return -EINVAL; 1552 } 1553 1554 *idb_rkey = idb_desc.key; 1555 1556 return 0; 1557 } 1558 1559 #if defined(DYNAMIC_DATA_DEBUG) 1560 static void srp_check_mapping(struct srp_map_state *state, 1561 struct srp_rdma_ch *ch, struct srp_request *req, 1562 struct scatterlist *scat, int count) 1563 { 1564 struct srp_device *dev = ch->target->srp_host->srp_dev; 1565 struct srp_fr_desc **pfr; 1566 u64 desc_len = 0, mr_len = 0; 1567 int i; 1568 1569 for (i = 0; i < state->ndesc; i++) 1570 desc_len += be32_to_cpu(req->indirect_desc[i].len); 1571 if (dev->use_fast_reg) 1572 for (i = 0, pfr = req->fr_list; i < state->nmdesc; i++, pfr++) 1573 mr_len += (*pfr)->mr->length; 1574 else if (dev->use_fmr) 1575 for (i = 0; i < state->nmdesc; i++) 1576 mr_len += be32_to_cpu(req->indirect_desc[i].len); 1577 if (desc_len != scsi_bufflen(req->scmnd) || 1578 mr_len > scsi_bufflen(req->scmnd)) 1579 pr_err("Inconsistent: scsi len %d <> desc len %lld <> mr len %lld; ndesc %d; nmdesc = %d\n", 1580 scsi_bufflen(req->scmnd), desc_len, mr_len, 1581 state->ndesc, state->nmdesc); 1582 } 1583 #endif 1584 1585 /** 1586 * srp_map_data() - map SCSI data buffer onto an SRP request 1587 * @scmnd: SCSI command to map 1588 * @ch: SRP RDMA channel 1589 * @req: SRP request 1590 * 1591 * Returns the length in bytes of the SRP_CMD IU or a negative value if 1592 * mapping failed. 1593 */ 1594 static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_rdma_ch *ch, 1595 struct srp_request *req) 1596 { 1597 struct srp_target_port *target = ch->target; 1598 struct ib_pd *pd = target->pd; 1599 struct scatterlist *scat; 1600 struct srp_cmd *cmd = req->cmd->buf; 1601 int len, nents, count, ret; 1602 struct srp_device *dev; 1603 struct ib_device *ibdev; 1604 struct srp_map_state state; 1605 struct srp_indirect_buf *indirect_hdr; 1606 u32 idb_len, table_len; 1607 __be32 idb_rkey; 1608 u8 fmt; 1609 1610 if (!scsi_sglist(scmnd) || scmnd->sc_data_direction == DMA_NONE) 1611 return sizeof (struct srp_cmd); 1612 1613 if (scmnd->sc_data_direction != DMA_FROM_DEVICE && 1614 scmnd->sc_data_direction != DMA_TO_DEVICE) { 1615 shost_printk(KERN_WARNING, target->scsi_host, 1616 PFX "Unhandled data direction %d\n", 1617 scmnd->sc_data_direction); 1618 return -EINVAL; 1619 } 1620 1621 nents = scsi_sg_count(scmnd); 1622 scat = scsi_sglist(scmnd); 1623 1624 dev = target->srp_host->srp_dev; 1625 ibdev = dev->dev; 1626 1627 count = ib_dma_map_sg(ibdev, scat, nents, scmnd->sc_data_direction); 1628 if (unlikely(count == 0)) 1629 return -EIO; 1630 1631 fmt = SRP_DATA_DESC_DIRECT; 1632 len = sizeof (struct srp_cmd) + sizeof (struct srp_direct_buf); 1633 1634 if (count == 1 && (pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY)) { 1635 /* 1636 * The midlayer only generated a single gather/scatter 1637 * entry, or DMA mapping coalesced everything to a 1638 * single entry. So a direct descriptor along with 1639 * the DMA MR suffices. 1640 */ 1641 struct srp_direct_buf *buf = (void *) cmd->add_data; 1642 1643 buf->va = cpu_to_be64(ib_sg_dma_address(ibdev, scat)); 1644 buf->key = cpu_to_be32(pd->unsafe_global_rkey); 1645 buf->len = cpu_to_be32(ib_sg_dma_len(ibdev, scat)); 1646 1647 req->nmdesc = 0; 1648 goto map_complete; 1649 } 1650 1651 /* 1652 * We have more than one scatter/gather entry, so build our indirect 1653 * descriptor table, trying to merge as many entries as we can. 1654 */ 1655 indirect_hdr = (void *) cmd->add_data; 1656 1657 ib_dma_sync_single_for_cpu(ibdev, req->indirect_dma_addr, 1658 target->indirect_size, DMA_TO_DEVICE); 1659 1660 memset(&state, 0, sizeof(state)); 1661 state.desc = req->indirect_desc; 1662 if (dev->use_fast_reg) 1663 ret = srp_map_sg_fr(&state, ch, req, scat, count); 1664 else if (dev->use_fmr) 1665 ret = srp_map_sg_fmr(&state, ch, req, scat, count); 1666 else 1667 ret = srp_map_sg_dma(&state, ch, req, scat, count); 1668 req->nmdesc = state.nmdesc; 1669 if (ret < 0) 1670 goto unmap; 1671 1672 #if defined(DYNAMIC_DEBUG) 1673 { 1674 DEFINE_DYNAMIC_DEBUG_METADATA(ddm, 1675 "Memory mapping consistency check"); 1676 if (unlikely(ddm.flags & _DPRINTK_FLAGS_PRINT)) 1677 srp_check_mapping(&state, ch, req, scat, count); 1678 } 1679 #endif 1680 1681 /* We've mapped the request, now pull as much of the indirect 1682 * descriptor table as we can into the command buffer. If this 1683 * target is not using an external indirect table, we are 1684 * guaranteed to fit into the command, as the SCSI layer won't 1685 * give us more S/G entries than we allow. 1686 */ 1687 if (state.ndesc == 1) { 1688 /* 1689 * Memory registration collapsed the sg-list into one entry, 1690 * so use a direct descriptor. 1691 */ 1692 struct srp_direct_buf *buf = (void *) cmd->add_data; 1693 1694 *buf = req->indirect_desc[0]; 1695 goto map_complete; 1696 } 1697 1698 if (unlikely(target->cmd_sg_cnt < state.ndesc && 1699 !target->allow_ext_sg)) { 1700 shost_printk(KERN_ERR, target->scsi_host, 1701 "Could not fit S/G list into SRP_CMD\n"); 1702 ret = -EIO; 1703 goto unmap; 1704 } 1705 1706 count = min(state.ndesc, target->cmd_sg_cnt); 1707 table_len = state.ndesc * sizeof (struct srp_direct_buf); 1708 idb_len = sizeof(struct srp_indirect_buf) + table_len; 1709 1710 fmt = SRP_DATA_DESC_INDIRECT; 1711 len = sizeof(struct srp_cmd) + sizeof (struct srp_indirect_buf); 1712 len += count * sizeof (struct srp_direct_buf); 1713 1714 memcpy(indirect_hdr->desc_list, req->indirect_desc, 1715 count * sizeof (struct srp_direct_buf)); 1716 1717 if (!(pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY)) { 1718 ret = srp_map_idb(ch, req, state.gen.next, state.gen.end, 1719 idb_len, &idb_rkey); 1720 if (ret < 0) 1721 goto unmap; 1722 req->nmdesc++; 1723 } else { 1724 idb_rkey = cpu_to_be32(pd->unsafe_global_rkey); 1725 } 1726 1727 indirect_hdr->table_desc.va = cpu_to_be64(req->indirect_dma_addr); 1728 indirect_hdr->table_desc.key = idb_rkey; 1729 indirect_hdr->table_desc.len = cpu_to_be32(table_len); 1730 indirect_hdr->len = cpu_to_be32(state.total_len); 1731 1732 if (scmnd->sc_data_direction == DMA_TO_DEVICE) 1733 cmd->data_out_desc_cnt = count; 1734 else 1735 cmd->data_in_desc_cnt = count; 1736 1737 ib_dma_sync_single_for_device(ibdev, req->indirect_dma_addr, table_len, 1738 DMA_TO_DEVICE); 1739 1740 map_complete: 1741 if (scmnd->sc_data_direction == DMA_TO_DEVICE) 1742 cmd->buf_fmt = fmt << 4; 1743 else 1744 cmd->buf_fmt = fmt; 1745 1746 return len; 1747 1748 unmap: 1749 srp_unmap_data(scmnd, ch, req); 1750 if (ret == -ENOMEM && req->nmdesc >= target->mr_pool_size) 1751 ret = -E2BIG; 1752 return ret; 1753 } 1754 1755 /* 1756 * Return an IU and possible credit to the free pool 1757 */ 1758 static void srp_put_tx_iu(struct srp_rdma_ch *ch, struct srp_iu *iu, 1759 enum srp_iu_type iu_type) 1760 { 1761 unsigned long flags; 1762 1763 spin_lock_irqsave(&ch->lock, flags); 1764 list_add(&iu->list, &ch->free_tx); 1765 if (iu_type != SRP_IU_RSP) 1766 ++ch->req_lim; 1767 spin_unlock_irqrestore(&ch->lock, flags); 1768 } 1769 1770 /* 1771 * Must be called with ch->lock held to protect req_lim and free_tx. 1772 * If IU is not sent, it must be returned using srp_put_tx_iu(). 1773 * 1774 * Note: 1775 * An upper limit for the number of allocated information units for each 1776 * request type is: 1777 * - SRP_IU_CMD: SRP_CMD_SQ_SIZE, since the SCSI mid-layer never queues 1778 * more than Scsi_Host.can_queue requests. 1779 * - SRP_IU_TSK_MGMT: SRP_TSK_MGMT_SQ_SIZE. 1780 * - SRP_IU_RSP: 1, since a conforming SRP target never sends more than 1781 * one unanswered SRP request to an initiator. 1782 */ 1783 static struct srp_iu *__srp_get_tx_iu(struct srp_rdma_ch *ch, 1784 enum srp_iu_type iu_type) 1785 { 1786 struct srp_target_port *target = ch->target; 1787 s32 rsv = (iu_type == SRP_IU_TSK_MGMT) ? 0 : SRP_TSK_MGMT_SQ_SIZE; 1788 struct srp_iu *iu; 1789 1790 ib_process_cq_direct(ch->send_cq, -1); 1791 1792 if (list_empty(&ch->free_tx)) 1793 return NULL; 1794 1795 /* Initiator responses to target requests do not consume credits */ 1796 if (iu_type != SRP_IU_RSP) { 1797 if (ch->req_lim <= rsv) { 1798 ++target->zero_req_lim; 1799 return NULL; 1800 } 1801 1802 --ch->req_lim; 1803 } 1804 1805 iu = list_first_entry(&ch->free_tx, struct srp_iu, list); 1806 list_del(&iu->list); 1807 return iu; 1808 } 1809 1810 static void srp_send_done(struct ib_cq *cq, struct ib_wc *wc) 1811 { 1812 struct srp_iu *iu = container_of(wc->wr_cqe, struct srp_iu, cqe); 1813 struct srp_rdma_ch *ch = cq->cq_context; 1814 1815 if (unlikely(wc->status != IB_WC_SUCCESS)) { 1816 srp_handle_qp_err(cq, wc, "SEND"); 1817 return; 1818 } 1819 1820 list_add(&iu->list, &ch->free_tx); 1821 } 1822 1823 static int srp_post_send(struct srp_rdma_ch *ch, struct srp_iu *iu, int len) 1824 { 1825 struct srp_target_port *target = ch->target; 1826 struct ib_sge list; 1827 struct ib_send_wr wr, *bad_wr; 1828 1829 list.addr = iu->dma; 1830 list.length = len; 1831 list.lkey = target->lkey; 1832 1833 iu->cqe.done = srp_send_done; 1834 1835 wr.next = NULL; 1836 wr.wr_cqe = &iu->cqe; 1837 wr.sg_list = &list; 1838 wr.num_sge = 1; 1839 wr.opcode = IB_WR_SEND; 1840 wr.send_flags = IB_SEND_SIGNALED; 1841 1842 return ib_post_send(ch->qp, &wr, &bad_wr); 1843 } 1844 1845 static int srp_post_recv(struct srp_rdma_ch *ch, struct srp_iu *iu) 1846 { 1847 struct srp_target_port *target = ch->target; 1848 struct ib_recv_wr wr, *bad_wr; 1849 struct ib_sge list; 1850 1851 list.addr = iu->dma; 1852 list.length = iu->size; 1853 list.lkey = target->lkey; 1854 1855 iu->cqe.done = srp_recv_done; 1856 1857 wr.next = NULL; 1858 wr.wr_cqe = &iu->cqe; 1859 wr.sg_list = &list; 1860 wr.num_sge = 1; 1861 1862 return ib_post_recv(ch->qp, &wr, &bad_wr); 1863 } 1864 1865 static void srp_process_rsp(struct srp_rdma_ch *ch, struct srp_rsp *rsp) 1866 { 1867 struct srp_target_port *target = ch->target; 1868 struct srp_request *req; 1869 struct scsi_cmnd *scmnd; 1870 unsigned long flags; 1871 1872 if (unlikely(rsp->tag & SRP_TAG_TSK_MGMT)) { 1873 spin_lock_irqsave(&ch->lock, flags); 1874 ch->req_lim += be32_to_cpu(rsp->req_lim_delta); 1875 spin_unlock_irqrestore(&ch->lock, flags); 1876 1877 ch->tsk_mgmt_status = -1; 1878 if (be32_to_cpu(rsp->resp_data_len) >= 4) 1879 ch->tsk_mgmt_status = rsp->data[3]; 1880 complete(&ch->tsk_mgmt_done); 1881 } else { 1882 scmnd = scsi_host_find_tag(target->scsi_host, rsp->tag); 1883 if (scmnd) { 1884 req = (void *)scmnd->host_scribble; 1885 scmnd = srp_claim_req(ch, req, NULL, scmnd); 1886 } 1887 if (!scmnd) { 1888 shost_printk(KERN_ERR, target->scsi_host, 1889 "Null scmnd for RSP w/tag %#016llx received on ch %td / QP %#x\n", 1890 rsp->tag, ch - target->ch, ch->qp->qp_num); 1891 1892 spin_lock_irqsave(&ch->lock, flags); 1893 ch->req_lim += be32_to_cpu(rsp->req_lim_delta); 1894 spin_unlock_irqrestore(&ch->lock, flags); 1895 1896 return; 1897 } 1898 scmnd->result = rsp->status; 1899 1900 if (rsp->flags & SRP_RSP_FLAG_SNSVALID) { 1901 memcpy(scmnd->sense_buffer, rsp->data + 1902 be32_to_cpu(rsp->resp_data_len), 1903 min_t(int, be32_to_cpu(rsp->sense_data_len), 1904 SCSI_SENSE_BUFFERSIZE)); 1905 } 1906 1907 if (unlikely(rsp->flags & SRP_RSP_FLAG_DIUNDER)) 1908 scsi_set_resid(scmnd, be32_to_cpu(rsp->data_in_res_cnt)); 1909 else if (unlikely(rsp->flags & SRP_RSP_FLAG_DIOVER)) 1910 scsi_set_resid(scmnd, -be32_to_cpu(rsp->data_in_res_cnt)); 1911 else if (unlikely(rsp->flags & SRP_RSP_FLAG_DOUNDER)) 1912 scsi_set_resid(scmnd, be32_to_cpu(rsp->data_out_res_cnt)); 1913 else if (unlikely(rsp->flags & SRP_RSP_FLAG_DOOVER)) 1914 scsi_set_resid(scmnd, -be32_to_cpu(rsp->data_out_res_cnt)); 1915 1916 srp_free_req(ch, req, scmnd, 1917 be32_to_cpu(rsp->req_lim_delta)); 1918 1919 scmnd->host_scribble = NULL; 1920 scmnd->scsi_done(scmnd); 1921 } 1922 } 1923 1924 static int srp_response_common(struct srp_rdma_ch *ch, s32 req_delta, 1925 void *rsp, int len) 1926 { 1927 struct srp_target_port *target = ch->target; 1928 struct ib_device *dev = target->srp_host->srp_dev->dev; 1929 unsigned long flags; 1930 struct srp_iu *iu; 1931 int err; 1932 1933 spin_lock_irqsave(&ch->lock, flags); 1934 ch->req_lim += req_delta; 1935 iu = __srp_get_tx_iu(ch, SRP_IU_RSP); 1936 spin_unlock_irqrestore(&ch->lock, flags); 1937 1938 if (!iu) { 1939 shost_printk(KERN_ERR, target->scsi_host, PFX 1940 "no IU available to send response\n"); 1941 return 1; 1942 } 1943 1944 ib_dma_sync_single_for_cpu(dev, iu->dma, len, DMA_TO_DEVICE); 1945 memcpy(iu->buf, rsp, len); 1946 ib_dma_sync_single_for_device(dev, iu->dma, len, DMA_TO_DEVICE); 1947 1948 err = srp_post_send(ch, iu, len); 1949 if (err) { 1950 shost_printk(KERN_ERR, target->scsi_host, PFX 1951 "unable to post response: %d\n", err); 1952 srp_put_tx_iu(ch, iu, SRP_IU_RSP); 1953 } 1954 1955 return err; 1956 } 1957 1958 static void srp_process_cred_req(struct srp_rdma_ch *ch, 1959 struct srp_cred_req *req) 1960 { 1961 struct srp_cred_rsp rsp = { 1962 .opcode = SRP_CRED_RSP, 1963 .tag = req->tag, 1964 }; 1965 s32 delta = be32_to_cpu(req->req_lim_delta); 1966 1967 if (srp_response_common(ch, delta, &rsp, sizeof(rsp))) 1968 shost_printk(KERN_ERR, ch->target->scsi_host, PFX 1969 "problems processing SRP_CRED_REQ\n"); 1970 } 1971 1972 static void srp_process_aer_req(struct srp_rdma_ch *ch, 1973 struct srp_aer_req *req) 1974 { 1975 struct srp_target_port *target = ch->target; 1976 struct srp_aer_rsp rsp = { 1977 .opcode = SRP_AER_RSP, 1978 .tag = req->tag, 1979 }; 1980 s32 delta = be32_to_cpu(req->req_lim_delta); 1981 1982 shost_printk(KERN_ERR, target->scsi_host, PFX 1983 "ignoring AER for LUN %llu\n", scsilun_to_int(&req->lun)); 1984 1985 if (srp_response_common(ch, delta, &rsp, sizeof(rsp))) 1986 shost_printk(KERN_ERR, target->scsi_host, PFX 1987 "problems processing SRP_AER_REQ\n"); 1988 } 1989 1990 static void srp_recv_done(struct ib_cq *cq, struct ib_wc *wc) 1991 { 1992 struct srp_iu *iu = container_of(wc->wr_cqe, struct srp_iu, cqe); 1993 struct srp_rdma_ch *ch = cq->cq_context; 1994 struct srp_target_port *target = ch->target; 1995 struct ib_device *dev = target->srp_host->srp_dev->dev; 1996 int res; 1997 u8 opcode; 1998 1999 if (unlikely(wc->status != IB_WC_SUCCESS)) { 2000 srp_handle_qp_err(cq, wc, "RECV"); 2001 return; 2002 } 2003 2004 ib_dma_sync_single_for_cpu(dev, iu->dma, ch->max_ti_iu_len, 2005 DMA_FROM_DEVICE); 2006 2007 opcode = *(u8 *) iu->buf; 2008 2009 if (0) { 2010 shost_printk(KERN_ERR, target->scsi_host, 2011 PFX "recv completion, opcode 0x%02x\n", opcode); 2012 print_hex_dump(KERN_ERR, "", DUMP_PREFIX_OFFSET, 8, 1, 2013 iu->buf, wc->byte_len, true); 2014 } 2015 2016 switch (opcode) { 2017 case SRP_RSP: 2018 srp_process_rsp(ch, iu->buf); 2019 break; 2020 2021 case SRP_CRED_REQ: 2022 srp_process_cred_req(ch, iu->buf); 2023 break; 2024 2025 case SRP_AER_REQ: 2026 srp_process_aer_req(ch, iu->buf); 2027 break; 2028 2029 case SRP_T_LOGOUT: 2030 /* XXX Handle target logout */ 2031 shost_printk(KERN_WARNING, target->scsi_host, 2032 PFX "Got target logout request\n"); 2033 break; 2034 2035 default: 2036 shost_printk(KERN_WARNING, target->scsi_host, 2037 PFX "Unhandled SRP opcode 0x%02x\n", opcode); 2038 break; 2039 } 2040 2041 ib_dma_sync_single_for_device(dev, iu->dma, ch->max_ti_iu_len, 2042 DMA_FROM_DEVICE); 2043 2044 res = srp_post_recv(ch, iu); 2045 if (res != 0) 2046 shost_printk(KERN_ERR, target->scsi_host, 2047 PFX "Recv failed with error code %d\n", res); 2048 } 2049 2050 /** 2051 * srp_tl_err_work() - handle a transport layer error 2052 * @work: Work structure embedded in an SRP target port. 2053 * 2054 * Note: This function may get invoked before the rport has been created, 2055 * hence the target->rport test. 2056 */ 2057 static void srp_tl_err_work(struct work_struct *work) 2058 { 2059 struct srp_target_port *target; 2060 2061 target = container_of(work, struct srp_target_port, tl_err_work); 2062 if (target->rport) 2063 srp_start_tl_fail_timers(target->rport); 2064 } 2065 2066 static void srp_handle_qp_err(struct ib_cq *cq, struct ib_wc *wc, 2067 const char *opname) 2068 { 2069 struct srp_rdma_ch *ch = cq->cq_context; 2070 struct srp_target_port *target = ch->target; 2071 2072 if (ch->connected && !target->qp_in_error) { 2073 shost_printk(KERN_ERR, target->scsi_host, 2074 PFX "failed %s status %s (%d) for CQE %p\n", 2075 opname, ib_wc_status_msg(wc->status), wc->status, 2076 wc->wr_cqe); 2077 queue_work(system_long_wq, &target->tl_err_work); 2078 } 2079 target->qp_in_error = true; 2080 } 2081 2082 static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd) 2083 { 2084 struct srp_target_port *target = host_to_target(shost); 2085 struct srp_rport *rport = target->rport; 2086 struct srp_rdma_ch *ch; 2087 struct srp_request *req; 2088 struct srp_iu *iu; 2089 struct srp_cmd *cmd; 2090 struct ib_device *dev; 2091 unsigned long flags; 2092 u32 tag; 2093 u16 idx; 2094 int len, ret; 2095 const bool in_scsi_eh = !in_interrupt() && current == shost->ehandler; 2096 2097 /* 2098 * The SCSI EH thread is the only context from which srp_queuecommand() 2099 * can get invoked for blocked devices (SDEV_BLOCK / 2100 * SDEV_CREATED_BLOCK). Avoid racing with srp_reconnect_rport() by 2101 * locking the rport mutex if invoked from inside the SCSI EH. 2102 */ 2103 if (in_scsi_eh) 2104 mutex_lock(&rport->mutex); 2105 2106 scmnd->result = srp_chkready(target->rport); 2107 if (unlikely(scmnd->result)) 2108 goto err; 2109 2110 WARN_ON_ONCE(scmnd->request->tag < 0); 2111 tag = blk_mq_unique_tag(scmnd->request); 2112 ch = &target->ch[blk_mq_unique_tag_to_hwq(tag)]; 2113 idx = blk_mq_unique_tag_to_tag(tag); 2114 WARN_ONCE(idx >= target->req_ring_size, "%s: tag %#x: idx %d >= %d\n", 2115 dev_name(&shost->shost_gendev), tag, idx, 2116 target->req_ring_size); 2117 2118 spin_lock_irqsave(&ch->lock, flags); 2119 iu = __srp_get_tx_iu(ch, SRP_IU_CMD); 2120 spin_unlock_irqrestore(&ch->lock, flags); 2121 2122 if (!iu) 2123 goto err; 2124 2125 req = &ch->req_ring[idx]; 2126 dev = target->srp_host->srp_dev->dev; 2127 ib_dma_sync_single_for_cpu(dev, iu->dma, target->max_iu_len, 2128 DMA_TO_DEVICE); 2129 2130 scmnd->host_scribble = (void *) req; 2131 2132 cmd = iu->buf; 2133 memset(cmd, 0, sizeof *cmd); 2134 2135 cmd->opcode = SRP_CMD; 2136 int_to_scsilun(scmnd->device->lun, &cmd->lun); 2137 cmd->tag = tag; 2138 memcpy(cmd->cdb, scmnd->cmnd, scmnd->cmd_len); 2139 2140 req->scmnd = scmnd; 2141 req->cmd = iu; 2142 2143 len = srp_map_data(scmnd, ch, req); 2144 if (len < 0) { 2145 shost_printk(KERN_ERR, target->scsi_host, 2146 PFX "Failed to map data (%d)\n", len); 2147 /* 2148 * If we ran out of memory descriptors (-ENOMEM) because an 2149 * application is queuing many requests with more than 2150 * max_pages_per_mr sg-list elements, tell the SCSI mid-layer 2151 * to reduce queue depth temporarily. 2152 */ 2153 scmnd->result = len == -ENOMEM ? 2154 DID_OK << 16 | QUEUE_FULL << 1 : DID_ERROR << 16; 2155 goto err_iu; 2156 } 2157 2158 ib_dma_sync_single_for_device(dev, iu->dma, target->max_iu_len, 2159 DMA_TO_DEVICE); 2160 2161 if (srp_post_send(ch, iu, len)) { 2162 shost_printk(KERN_ERR, target->scsi_host, PFX "Send failed\n"); 2163 goto err_unmap; 2164 } 2165 2166 ret = 0; 2167 2168 unlock_rport: 2169 if (in_scsi_eh) 2170 mutex_unlock(&rport->mutex); 2171 2172 return ret; 2173 2174 err_unmap: 2175 srp_unmap_data(scmnd, ch, req); 2176 2177 err_iu: 2178 srp_put_tx_iu(ch, iu, SRP_IU_CMD); 2179 2180 /* 2181 * Avoid that the loops that iterate over the request ring can 2182 * encounter a dangling SCSI command pointer. 2183 */ 2184 req->scmnd = NULL; 2185 2186 err: 2187 if (scmnd->result) { 2188 scmnd->scsi_done(scmnd); 2189 ret = 0; 2190 } else { 2191 ret = SCSI_MLQUEUE_HOST_BUSY; 2192 } 2193 2194 goto unlock_rport; 2195 } 2196 2197 /* 2198 * Note: the resources allocated in this function are freed in 2199 * srp_free_ch_ib(). 2200 */ 2201 static int srp_alloc_iu_bufs(struct srp_rdma_ch *ch) 2202 { 2203 struct srp_target_port *target = ch->target; 2204 int i; 2205 2206 ch->rx_ring = kcalloc(target->queue_size, sizeof(*ch->rx_ring), 2207 GFP_KERNEL); 2208 if (!ch->rx_ring) 2209 goto err_no_ring; 2210 ch->tx_ring = kcalloc(target->queue_size, sizeof(*ch->tx_ring), 2211 GFP_KERNEL); 2212 if (!ch->tx_ring) 2213 goto err_no_ring; 2214 2215 for (i = 0; i < target->queue_size; ++i) { 2216 ch->rx_ring[i] = srp_alloc_iu(target->srp_host, 2217 ch->max_ti_iu_len, 2218 GFP_KERNEL, DMA_FROM_DEVICE); 2219 if (!ch->rx_ring[i]) 2220 goto err; 2221 } 2222 2223 for (i = 0; i < target->queue_size; ++i) { 2224 ch->tx_ring[i] = srp_alloc_iu(target->srp_host, 2225 target->max_iu_len, 2226 GFP_KERNEL, DMA_TO_DEVICE); 2227 if (!ch->tx_ring[i]) 2228 goto err; 2229 2230 list_add(&ch->tx_ring[i]->list, &ch->free_tx); 2231 } 2232 2233 return 0; 2234 2235 err: 2236 for (i = 0; i < target->queue_size; ++i) { 2237 srp_free_iu(target->srp_host, ch->rx_ring[i]); 2238 srp_free_iu(target->srp_host, ch->tx_ring[i]); 2239 } 2240 2241 2242 err_no_ring: 2243 kfree(ch->tx_ring); 2244 ch->tx_ring = NULL; 2245 kfree(ch->rx_ring); 2246 ch->rx_ring = NULL; 2247 2248 return -ENOMEM; 2249 } 2250 2251 static uint32_t srp_compute_rq_tmo(struct ib_qp_attr *qp_attr, int attr_mask) 2252 { 2253 uint64_t T_tr_ns, max_compl_time_ms; 2254 uint32_t rq_tmo_jiffies; 2255 2256 /* 2257 * According to section 11.2.4.2 in the IBTA spec (Modify Queue Pair, 2258 * table 91), both the QP timeout and the retry count have to be set 2259 * for RC QP's during the RTR to RTS transition. 2260 */ 2261 WARN_ON_ONCE((attr_mask & (IB_QP_TIMEOUT | IB_QP_RETRY_CNT)) != 2262 (IB_QP_TIMEOUT | IB_QP_RETRY_CNT)); 2263 2264 /* 2265 * Set target->rq_tmo_jiffies to one second more than the largest time 2266 * it can take before an error completion is generated. See also 2267 * C9-140..142 in the IBTA spec for more information about how to 2268 * convert the QP Local ACK Timeout value to nanoseconds. 2269 */ 2270 T_tr_ns = 4096 * (1ULL << qp_attr->timeout); 2271 max_compl_time_ms = qp_attr->retry_cnt * 4 * T_tr_ns; 2272 do_div(max_compl_time_ms, NSEC_PER_MSEC); 2273 rq_tmo_jiffies = msecs_to_jiffies(max_compl_time_ms + 1000); 2274 2275 return rq_tmo_jiffies; 2276 } 2277 2278 static void srp_cm_rep_handler(struct ib_cm_id *cm_id, 2279 const struct srp_login_rsp *lrsp, 2280 struct srp_rdma_ch *ch) 2281 { 2282 struct srp_target_port *target = ch->target; 2283 struct ib_qp_attr *qp_attr = NULL; 2284 int attr_mask = 0; 2285 int ret; 2286 int i; 2287 2288 if (lrsp->opcode == SRP_LOGIN_RSP) { 2289 ch->max_ti_iu_len = be32_to_cpu(lrsp->max_ti_iu_len); 2290 ch->req_lim = be32_to_cpu(lrsp->req_lim_delta); 2291 2292 /* 2293 * Reserve credits for task management so we don't 2294 * bounce requests back to the SCSI mid-layer. 2295 */ 2296 target->scsi_host->can_queue 2297 = min(ch->req_lim - SRP_TSK_MGMT_SQ_SIZE, 2298 target->scsi_host->can_queue); 2299 target->scsi_host->cmd_per_lun 2300 = min_t(int, target->scsi_host->can_queue, 2301 target->scsi_host->cmd_per_lun); 2302 } else { 2303 shost_printk(KERN_WARNING, target->scsi_host, 2304 PFX "Unhandled RSP opcode %#x\n", lrsp->opcode); 2305 ret = -ECONNRESET; 2306 goto error; 2307 } 2308 2309 if (!ch->rx_ring) { 2310 ret = srp_alloc_iu_bufs(ch); 2311 if (ret) 2312 goto error; 2313 } 2314 2315 ret = -ENOMEM; 2316 qp_attr = kmalloc(sizeof *qp_attr, GFP_KERNEL); 2317 if (!qp_attr) 2318 goto error; 2319 2320 qp_attr->qp_state = IB_QPS_RTR; 2321 ret = ib_cm_init_qp_attr(cm_id, qp_attr, &attr_mask); 2322 if (ret) 2323 goto error_free; 2324 2325 ret = ib_modify_qp(ch->qp, qp_attr, attr_mask); 2326 if (ret) 2327 goto error_free; 2328 2329 for (i = 0; i < target->queue_size; i++) { 2330 struct srp_iu *iu = ch->rx_ring[i]; 2331 2332 ret = srp_post_recv(ch, iu); 2333 if (ret) 2334 goto error_free; 2335 } 2336 2337 qp_attr->qp_state = IB_QPS_RTS; 2338 ret = ib_cm_init_qp_attr(cm_id, qp_attr, &attr_mask); 2339 if (ret) 2340 goto error_free; 2341 2342 target->rq_tmo_jiffies = srp_compute_rq_tmo(qp_attr, attr_mask); 2343 2344 ret = ib_modify_qp(ch->qp, qp_attr, attr_mask); 2345 if (ret) 2346 goto error_free; 2347 2348 ret = ib_send_cm_rtu(cm_id, NULL, 0); 2349 2350 error_free: 2351 kfree(qp_attr); 2352 2353 error: 2354 ch->status = ret; 2355 } 2356 2357 static void srp_cm_rej_handler(struct ib_cm_id *cm_id, 2358 struct ib_cm_event *event, 2359 struct srp_rdma_ch *ch) 2360 { 2361 struct srp_target_port *target = ch->target; 2362 struct Scsi_Host *shost = target->scsi_host; 2363 struct ib_class_port_info *cpi; 2364 int opcode; 2365 2366 switch (event->param.rej_rcvd.reason) { 2367 case IB_CM_REJ_PORT_CM_REDIRECT: 2368 cpi = event->param.rej_rcvd.ari; 2369 ch->path.dlid = cpi->redirect_lid; 2370 ch->path.pkey = cpi->redirect_pkey; 2371 cm_id->remote_cm_qpn = be32_to_cpu(cpi->redirect_qp) & 0x00ffffff; 2372 memcpy(ch->path.dgid.raw, cpi->redirect_gid, 16); 2373 2374 ch->status = ch->path.dlid ? 2375 SRP_DLID_REDIRECT : SRP_PORT_REDIRECT; 2376 break; 2377 2378 case IB_CM_REJ_PORT_REDIRECT: 2379 if (srp_target_is_topspin(target)) { 2380 /* 2381 * Topspin/Cisco SRP gateways incorrectly send 2382 * reject reason code 25 when they mean 24 2383 * (port redirect). 2384 */ 2385 memcpy(ch->path.dgid.raw, 2386 event->param.rej_rcvd.ari, 16); 2387 2388 shost_printk(KERN_DEBUG, shost, 2389 PFX "Topspin/Cisco redirect to target port GID %016llx%016llx\n", 2390 be64_to_cpu(ch->path.dgid.global.subnet_prefix), 2391 be64_to_cpu(ch->path.dgid.global.interface_id)); 2392 2393 ch->status = SRP_PORT_REDIRECT; 2394 } else { 2395 shost_printk(KERN_WARNING, shost, 2396 " REJ reason: IB_CM_REJ_PORT_REDIRECT\n"); 2397 ch->status = -ECONNRESET; 2398 } 2399 break; 2400 2401 case IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID: 2402 shost_printk(KERN_WARNING, shost, 2403 " REJ reason: IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID\n"); 2404 ch->status = -ECONNRESET; 2405 break; 2406 2407 case IB_CM_REJ_CONSUMER_DEFINED: 2408 opcode = *(u8 *) event->private_data; 2409 if (opcode == SRP_LOGIN_REJ) { 2410 struct srp_login_rej *rej = event->private_data; 2411 u32 reason = be32_to_cpu(rej->reason); 2412 2413 if (reason == SRP_LOGIN_REJ_REQ_IT_IU_LENGTH_TOO_LARGE) 2414 shost_printk(KERN_WARNING, shost, 2415 PFX "SRP_LOGIN_REJ: requested max_it_iu_len too large\n"); 2416 else 2417 shost_printk(KERN_WARNING, shost, PFX 2418 "SRP LOGIN from %pI6 to %pI6 REJECTED, reason 0x%08x\n", 2419 target->sgid.raw, 2420 target->orig_dgid.raw, reason); 2421 } else 2422 shost_printk(KERN_WARNING, shost, 2423 " REJ reason: IB_CM_REJ_CONSUMER_DEFINED," 2424 " opcode 0x%02x\n", opcode); 2425 ch->status = -ECONNRESET; 2426 break; 2427 2428 case IB_CM_REJ_STALE_CONN: 2429 shost_printk(KERN_WARNING, shost, " REJ reason: stale connection\n"); 2430 ch->status = SRP_STALE_CONN; 2431 break; 2432 2433 default: 2434 shost_printk(KERN_WARNING, shost, " REJ reason 0x%x\n", 2435 event->param.rej_rcvd.reason); 2436 ch->status = -ECONNRESET; 2437 } 2438 } 2439 2440 static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event) 2441 { 2442 struct srp_rdma_ch *ch = cm_id->context; 2443 struct srp_target_port *target = ch->target; 2444 int comp = 0; 2445 2446 switch (event->event) { 2447 case IB_CM_REQ_ERROR: 2448 shost_printk(KERN_DEBUG, target->scsi_host, 2449 PFX "Sending CM REQ failed\n"); 2450 comp = 1; 2451 ch->status = -ECONNRESET; 2452 break; 2453 2454 case IB_CM_REP_RECEIVED: 2455 comp = 1; 2456 srp_cm_rep_handler(cm_id, event->private_data, ch); 2457 break; 2458 2459 case IB_CM_REJ_RECEIVED: 2460 shost_printk(KERN_DEBUG, target->scsi_host, PFX "REJ received\n"); 2461 comp = 1; 2462 2463 srp_cm_rej_handler(cm_id, event, ch); 2464 break; 2465 2466 case IB_CM_DREQ_RECEIVED: 2467 shost_printk(KERN_WARNING, target->scsi_host, 2468 PFX "DREQ received - connection closed\n"); 2469 ch->connected = false; 2470 if (ib_send_cm_drep(cm_id, NULL, 0)) 2471 shost_printk(KERN_ERR, target->scsi_host, 2472 PFX "Sending CM DREP failed\n"); 2473 queue_work(system_long_wq, &target->tl_err_work); 2474 break; 2475 2476 case IB_CM_TIMEWAIT_EXIT: 2477 shost_printk(KERN_ERR, target->scsi_host, 2478 PFX "connection closed\n"); 2479 comp = 1; 2480 2481 ch->status = 0; 2482 break; 2483 2484 case IB_CM_MRA_RECEIVED: 2485 case IB_CM_DREQ_ERROR: 2486 case IB_CM_DREP_RECEIVED: 2487 break; 2488 2489 default: 2490 shost_printk(KERN_WARNING, target->scsi_host, 2491 PFX "Unhandled CM event %d\n", event->event); 2492 break; 2493 } 2494 2495 if (comp) 2496 complete(&ch->done); 2497 2498 return 0; 2499 } 2500 2501 /** 2502 * srp_change_queue_depth - setting device queue depth 2503 * @sdev: scsi device struct 2504 * @qdepth: requested queue depth 2505 * 2506 * Returns queue depth. 2507 */ 2508 static int 2509 srp_change_queue_depth(struct scsi_device *sdev, int qdepth) 2510 { 2511 if (!sdev->tagged_supported) 2512 qdepth = 1; 2513 return scsi_change_queue_depth(sdev, qdepth); 2514 } 2515 2516 static int srp_send_tsk_mgmt(struct srp_rdma_ch *ch, u64 req_tag, u64 lun, 2517 u8 func) 2518 { 2519 struct srp_target_port *target = ch->target; 2520 struct srp_rport *rport = target->rport; 2521 struct ib_device *dev = target->srp_host->srp_dev->dev; 2522 struct srp_iu *iu; 2523 struct srp_tsk_mgmt *tsk_mgmt; 2524 2525 if (!ch->connected || target->qp_in_error) 2526 return -1; 2527 2528 init_completion(&ch->tsk_mgmt_done); 2529 2530 /* 2531 * Lock the rport mutex to avoid that srp_create_ch_ib() is 2532 * invoked while a task management function is being sent. 2533 */ 2534 mutex_lock(&rport->mutex); 2535 spin_lock_irq(&ch->lock); 2536 iu = __srp_get_tx_iu(ch, SRP_IU_TSK_MGMT); 2537 spin_unlock_irq(&ch->lock); 2538 2539 if (!iu) { 2540 mutex_unlock(&rport->mutex); 2541 2542 return -1; 2543 } 2544 2545 ib_dma_sync_single_for_cpu(dev, iu->dma, sizeof *tsk_mgmt, 2546 DMA_TO_DEVICE); 2547 tsk_mgmt = iu->buf; 2548 memset(tsk_mgmt, 0, sizeof *tsk_mgmt); 2549 2550 tsk_mgmt->opcode = SRP_TSK_MGMT; 2551 int_to_scsilun(lun, &tsk_mgmt->lun); 2552 tsk_mgmt->tag = req_tag | SRP_TAG_TSK_MGMT; 2553 tsk_mgmt->tsk_mgmt_func = func; 2554 tsk_mgmt->task_tag = req_tag; 2555 2556 ib_dma_sync_single_for_device(dev, iu->dma, sizeof *tsk_mgmt, 2557 DMA_TO_DEVICE); 2558 if (srp_post_send(ch, iu, sizeof(*tsk_mgmt))) { 2559 srp_put_tx_iu(ch, iu, SRP_IU_TSK_MGMT); 2560 mutex_unlock(&rport->mutex); 2561 2562 return -1; 2563 } 2564 mutex_unlock(&rport->mutex); 2565 2566 if (!wait_for_completion_timeout(&ch->tsk_mgmt_done, 2567 msecs_to_jiffies(SRP_ABORT_TIMEOUT_MS))) 2568 return -1; 2569 2570 return 0; 2571 } 2572 2573 static int srp_abort(struct scsi_cmnd *scmnd) 2574 { 2575 struct srp_target_port *target = host_to_target(scmnd->device->host); 2576 struct srp_request *req = (struct srp_request *) scmnd->host_scribble; 2577 u32 tag; 2578 u16 ch_idx; 2579 struct srp_rdma_ch *ch; 2580 int ret; 2581 2582 shost_printk(KERN_ERR, target->scsi_host, "SRP abort called\n"); 2583 2584 if (!req) 2585 return SUCCESS; 2586 tag = blk_mq_unique_tag(scmnd->request); 2587 ch_idx = blk_mq_unique_tag_to_hwq(tag); 2588 if (WARN_ON_ONCE(ch_idx >= target->ch_count)) 2589 return SUCCESS; 2590 ch = &target->ch[ch_idx]; 2591 if (!srp_claim_req(ch, req, NULL, scmnd)) 2592 return SUCCESS; 2593 shost_printk(KERN_ERR, target->scsi_host, 2594 "Sending SRP abort for tag %#x\n", tag); 2595 if (srp_send_tsk_mgmt(ch, tag, scmnd->device->lun, 2596 SRP_TSK_ABORT_TASK) == 0) 2597 ret = SUCCESS; 2598 else if (target->rport->state == SRP_RPORT_LOST) 2599 ret = FAST_IO_FAIL; 2600 else 2601 ret = FAILED; 2602 srp_free_req(ch, req, scmnd, 0); 2603 scmnd->result = DID_ABORT << 16; 2604 scmnd->scsi_done(scmnd); 2605 2606 return ret; 2607 } 2608 2609 static int srp_reset_device(struct scsi_cmnd *scmnd) 2610 { 2611 struct srp_target_port *target = host_to_target(scmnd->device->host); 2612 struct srp_rdma_ch *ch; 2613 int i; 2614 2615 shost_printk(KERN_ERR, target->scsi_host, "SRP reset_device called\n"); 2616 2617 ch = &target->ch[0]; 2618 if (srp_send_tsk_mgmt(ch, SRP_TAG_NO_REQ, scmnd->device->lun, 2619 SRP_TSK_LUN_RESET)) 2620 return FAILED; 2621 if (ch->tsk_mgmt_status) 2622 return FAILED; 2623 2624 for (i = 0; i < target->ch_count; i++) { 2625 ch = &target->ch[i]; 2626 for (i = 0; i < target->req_ring_size; ++i) { 2627 struct srp_request *req = &ch->req_ring[i]; 2628 2629 srp_finish_req(ch, req, scmnd->device, DID_RESET << 16); 2630 } 2631 } 2632 2633 return SUCCESS; 2634 } 2635 2636 static int srp_reset_host(struct scsi_cmnd *scmnd) 2637 { 2638 struct srp_target_port *target = host_to_target(scmnd->device->host); 2639 2640 shost_printk(KERN_ERR, target->scsi_host, PFX "SRP reset_host called\n"); 2641 2642 return srp_reconnect_rport(target->rport) == 0 ? SUCCESS : FAILED; 2643 } 2644 2645 static int srp_slave_alloc(struct scsi_device *sdev) 2646 { 2647 struct Scsi_Host *shost = sdev->host; 2648 struct srp_target_port *target = host_to_target(shost); 2649 struct srp_device *srp_dev = target->srp_host->srp_dev; 2650 struct ib_device *ibdev = srp_dev->dev; 2651 2652 if (!(ibdev->attrs.device_cap_flags & IB_DEVICE_SG_GAPS_REG)) 2653 blk_queue_virt_boundary(sdev->request_queue, 2654 ~srp_dev->mr_page_mask); 2655 2656 return 0; 2657 } 2658 2659 static int srp_slave_configure(struct scsi_device *sdev) 2660 { 2661 struct Scsi_Host *shost = sdev->host; 2662 struct srp_target_port *target = host_to_target(shost); 2663 struct request_queue *q = sdev->request_queue; 2664 unsigned long timeout; 2665 2666 if (sdev->type == TYPE_DISK) { 2667 timeout = max_t(unsigned, 30 * HZ, target->rq_tmo_jiffies); 2668 blk_queue_rq_timeout(q, timeout); 2669 } 2670 2671 return 0; 2672 } 2673 2674 static ssize_t show_id_ext(struct device *dev, struct device_attribute *attr, 2675 char *buf) 2676 { 2677 struct srp_target_port *target = host_to_target(class_to_shost(dev)); 2678 2679 return sprintf(buf, "0x%016llx\n", be64_to_cpu(target->id_ext)); 2680 } 2681 2682 static ssize_t show_ioc_guid(struct device *dev, struct device_attribute *attr, 2683 char *buf) 2684 { 2685 struct srp_target_port *target = host_to_target(class_to_shost(dev)); 2686 2687 return sprintf(buf, "0x%016llx\n", be64_to_cpu(target->ioc_guid)); 2688 } 2689 2690 static ssize_t show_service_id(struct device *dev, 2691 struct device_attribute *attr, char *buf) 2692 { 2693 struct srp_target_port *target = host_to_target(class_to_shost(dev)); 2694 2695 return sprintf(buf, "0x%016llx\n", be64_to_cpu(target->service_id)); 2696 } 2697 2698 static ssize_t show_pkey(struct device *dev, struct device_attribute *attr, 2699 char *buf) 2700 { 2701 struct srp_target_port *target = host_to_target(class_to_shost(dev)); 2702 2703 return sprintf(buf, "0x%04x\n", be16_to_cpu(target->pkey)); 2704 } 2705 2706 static ssize_t show_sgid(struct device *dev, struct device_attribute *attr, 2707 char *buf) 2708 { 2709 struct srp_target_port *target = host_to_target(class_to_shost(dev)); 2710 2711 return sprintf(buf, "%pI6\n", target->sgid.raw); 2712 } 2713 2714 static ssize_t show_dgid(struct device *dev, struct device_attribute *attr, 2715 char *buf) 2716 { 2717 struct srp_target_port *target = host_to_target(class_to_shost(dev)); 2718 struct srp_rdma_ch *ch = &target->ch[0]; 2719 2720 return sprintf(buf, "%pI6\n", ch->path.dgid.raw); 2721 } 2722 2723 static ssize_t show_orig_dgid(struct device *dev, 2724 struct device_attribute *attr, char *buf) 2725 { 2726 struct srp_target_port *target = host_to_target(class_to_shost(dev)); 2727 2728 return sprintf(buf, "%pI6\n", target->orig_dgid.raw); 2729 } 2730 2731 static ssize_t show_req_lim(struct device *dev, 2732 struct device_attribute *attr, char *buf) 2733 { 2734 struct srp_target_port *target = host_to_target(class_to_shost(dev)); 2735 struct srp_rdma_ch *ch; 2736 int i, req_lim = INT_MAX; 2737 2738 for (i = 0; i < target->ch_count; i++) { 2739 ch = &target->ch[i]; 2740 req_lim = min(req_lim, ch->req_lim); 2741 } 2742 return sprintf(buf, "%d\n", req_lim); 2743 } 2744 2745 static ssize_t show_zero_req_lim(struct device *dev, 2746 struct device_attribute *attr, char *buf) 2747 { 2748 struct srp_target_port *target = host_to_target(class_to_shost(dev)); 2749 2750 return sprintf(buf, "%d\n", target->zero_req_lim); 2751 } 2752 2753 static ssize_t show_local_ib_port(struct device *dev, 2754 struct device_attribute *attr, char *buf) 2755 { 2756 struct srp_target_port *target = host_to_target(class_to_shost(dev)); 2757 2758 return sprintf(buf, "%d\n", target->srp_host->port); 2759 } 2760 2761 static ssize_t show_local_ib_device(struct device *dev, 2762 struct device_attribute *attr, char *buf) 2763 { 2764 struct srp_target_port *target = host_to_target(class_to_shost(dev)); 2765 2766 return sprintf(buf, "%s\n", target->srp_host->srp_dev->dev->name); 2767 } 2768 2769 static ssize_t show_ch_count(struct device *dev, struct device_attribute *attr, 2770 char *buf) 2771 { 2772 struct srp_target_port *target = host_to_target(class_to_shost(dev)); 2773 2774 return sprintf(buf, "%d\n", target->ch_count); 2775 } 2776 2777 static ssize_t show_comp_vector(struct device *dev, 2778 struct device_attribute *attr, char *buf) 2779 { 2780 struct srp_target_port *target = host_to_target(class_to_shost(dev)); 2781 2782 return sprintf(buf, "%d\n", target->comp_vector); 2783 } 2784 2785 static ssize_t show_tl_retry_count(struct device *dev, 2786 struct device_attribute *attr, char *buf) 2787 { 2788 struct srp_target_port *target = host_to_target(class_to_shost(dev)); 2789 2790 return sprintf(buf, "%d\n", target->tl_retry_count); 2791 } 2792 2793 static ssize_t show_cmd_sg_entries(struct device *dev, 2794 struct device_attribute *attr, char *buf) 2795 { 2796 struct srp_target_port *target = host_to_target(class_to_shost(dev)); 2797 2798 return sprintf(buf, "%u\n", target->cmd_sg_cnt); 2799 } 2800 2801 static ssize_t show_allow_ext_sg(struct device *dev, 2802 struct device_attribute *attr, char *buf) 2803 { 2804 struct srp_target_port *target = host_to_target(class_to_shost(dev)); 2805 2806 return sprintf(buf, "%s\n", target->allow_ext_sg ? "true" : "false"); 2807 } 2808 2809 static DEVICE_ATTR(id_ext, S_IRUGO, show_id_ext, NULL); 2810 static DEVICE_ATTR(ioc_guid, S_IRUGO, show_ioc_guid, NULL); 2811 static DEVICE_ATTR(service_id, S_IRUGO, show_service_id, NULL); 2812 static DEVICE_ATTR(pkey, S_IRUGO, show_pkey, NULL); 2813 static DEVICE_ATTR(sgid, S_IRUGO, show_sgid, NULL); 2814 static DEVICE_ATTR(dgid, S_IRUGO, show_dgid, NULL); 2815 static DEVICE_ATTR(orig_dgid, S_IRUGO, show_orig_dgid, NULL); 2816 static DEVICE_ATTR(req_lim, S_IRUGO, show_req_lim, NULL); 2817 static DEVICE_ATTR(zero_req_lim, S_IRUGO, show_zero_req_lim, NULL); 2818 static DEVICE_ATTR(local_ib_port, S_IRUGO, show_local_ib_port, NULL); 2819 static DEVICE_ATTR(local_ib_device, S_IRUGO, show_local_ib_device, NULL); 2820 static DEVICE_ATTR(ch_count, S_IRUGO, show_ch_count, NULL); 2821 static DEVICE_ATTR(comp_vector, S_IRUGO, show_comp_vector, NULL); 2822 static DEVICE_ATTR(tl_retry_count, S_IRUGO, show_tl_retry_count, NULL); 2823 static DEVICE_ATTR(cmd_sg_entries, S_IRUGO, show_cmd_sg_entries, NULL); 2824 static DEVICE_ATTR(allow_ext_sg, S_IRUGO, show_allow_ext_sg, NULL); 2825 2826 static struct device_attribute *srp_host_attrs[] = { 2827 &dev_attr_id_ext, 2828 &dev_attr_ioc_guid, 2829 &dev_attr_service_id, 2830 &dev_attr_pkey, 2831 &dev_attr_sgid, 2832 &dev_attr_dgid, 2833 &dev_attr_orig_dgid, 2834 &dev_attr_req_lim, 2835 &dev_attr_zero_req_lim, 2836 &dev_attr_local_ib_port, 2837 &dev_attr_local_ib_device, 2838 &dev_attr_ch_count, 2839 &dev_attr_comp_vector, 2840 &dev_attr_tl_retry_count, 2841 &dev_attr_cmd_sg_entries, 2842 &dev_attr_allow_ext_sg, 2843 NULL 2844 }; 2845 2846 static struct scsi_host_template srp_template = { 2847 .module = THIS_MODULE, 2848 .name = "InfiniBand SRP initiator", 2849 .proc_name = DRV_NAME, 2850 .slave_alloc = srp_slave_alloc, 2851 .slave_configure = srp_slave_configure, 2852 .info = srp_target_info, 2853 .queuecommand = srp_queuecommand, 2854 .change_queue_depth = srp_change_queue_depth, 2855 .eh_abort_handler = srp_abort, 2856 .eh_device_reset_handler = srp_reset_device, 2857 .eh_host_reset_handler = srp_reset_host, 2858 .skip_settle_delay = true, 2859 .sg_tablesize = SRP_DEF_SG_TABLESIZE, 2860 .can_queue = SRP_DEFAULT_CMD_SQ_SIZE, 2861 .this_id = -1, 2862 .cmd_per_lun = SRP_DEFAULT_CMD_SQ_SIZE, 2863 .use_clustering = ENABLE_CLUSTERING, 2864 .shost_attrs = srp_host_attrs, 2865 .track_queue_depth = 1, 2866 }; 2867 2868 static int srp_sdev_count(struct Scsi_Host *host) 2869 { 2870 struct scsi_device *sdev; 2871 int c = 0; 2872 2873 shost_for_each_device(sdev, host) 2874 c++; 2875 2876 return c; 2877 } 2878 2879 /* 2880 * Return values: 2881 * < 0 upon failure. Caller is responsible for SRP target port cleanup. 2882 * 0 and target->state == SRP_TARGET_REMOVED if asynchronous target port 2883 * removal has been scheduled. 2884 * 0 and target->state != SRP_TARGET_REMOVED upon success. 2885 */ 2886 static int srp_add_target(struct srp_host *host, struct srp_target_port *target) 2887 { 2888 struct srp_rport_identifiers ids; 2889 struct srp_rport *rport; 2890 2891 target->state = SRP_TARGET_SCANNING; 2892 sprintf(target->target_name, "SRP.T10:%016llX", 2893 be64_to_cpu(target->id_ext)); 2894 2895 if (scsi_add_host(target->scsi_host, host->srp_dev->dev->dma_device)) 2896 return -ENODEV; 2897 2898 memcpy(ids.port_id, &target->id_ext, 8); 2899 memcpy(ids.port_id + 8, &target->ioc_guid, 8); 2900 ids.roles = SRP_RPORT_ROLE_TARGET; 2901 rport = srp_rport_add(target->scsi_host, &ids); 2902 if (IS_ERR(rport)) { 2903 scsi_remove_host(target->scsi_host); 2904 return PTR_ERR(rport); 2905 } 2906 2907 rport->lld_data = target; 2908 target->rport = rport; 2909 2910 spin_lock(&host->target_lock); 2911 list_add_tail(&target->list, &host->target_list); 2912 spin_unlock(&host->target_lock); 2913 2914 scsi_scan_target(&target->scsi_host->shost_gendev, 2915 0, target->scsi_id, SCAN_WILD_CARD, SCSI_SCAN_INITIAL); 2916 2917 if (srp_connected_ch(target) < target->ch_count || 2918 target->qp_in_error) { 2919 shost_printk(KERN_INFO, target->scsi_host, 2920 PFX "SCSI scan failed - removing SCSI host\n"); 2921 srp_queue_remove_work(target); 2922 goto out; 2923 } 2924 2925 pr_debug("%s: SCSI scan succeeded - detected %d LUNs\n", 2926 dev_name(&target->scsi_host->shost_gendev), 2927 srp_sdev_count(target->scsi_host)); 2928 2929 spin_lock_irq(&target->lock); 2930 if (target->state == SRP_TARGET_SCANNING) 2931 target->state = SRP_TARGET_LIVE; 2932 spin_unlock_irq(&target->lock); 2933 2934 out: 2935 return 0; 2936 } 2937 2938 static void srp_release_dev(struct device *dev) 2939 { 2940 struct srp_host *host = 2941 container_of(dev, struct srp_host, dev); 2942 2943 complete(&host->released); 2944 } 2945 2946 static struct class srp_class = { 2947 .name = "infiniband_srp", 2948 .dev_release = srp_release_dev 2949 }; 2950 2951 /** 2952 * srp_conn_unique() - check whether the connection to a target is unique 2953 * @host: SRP host. 2954 * @target: SRP target port. 2955 */ 2956 static bool srp_conn_unique(struct srp_host *host, 2957 struct srp_target_port *target) 2958 { 2959 struct srp_target_port *t; 2960 bool ret = false; 2961 2962 if (target->state == SRP_TARGET_REMOVED) 2963 goto out; 2964 2965 ret = true; 2966 2967 spin_lock(&host->target_lock); 2968 list_for_each_entry(t, &host->target_list, list) { 2969 if (t != target && 2970 target->id_ext == t->id_ext && 2971 target->ioc_guid == t->ioc_guid && 2972 target->initiator_ext == t->initiator_ext) { 2973 ret = false; 2974 break; 2975 } 2976 } 2977 spin_unlock(&host->target_lock); 2978 2979 out: 2980 return ret; 2981 } 2982 2983 /* 2984 * Target ports are added by writing 2985 * 2986 * id_ext=<SRP ID ext>,ioc_guid=<SRP IOC GUID>,dgid=<dest GID>, 2987 * pkey=<P_Key>,service_id=<service ID> 2988 * 2989 * to the add_target sysfs attribute. 2990 */ 2991 enum { 2992 SRP_OPT_ERR = 0, 2993 SRP_OPT_ID_EXT = 1 << 0, 2994 SRP_OPT_IOC_GUID = 1 << 1, 2995 SRP_OPT_DGID = 1 << 2, 2996 SRP_OPT_PKEY = 1 << 3, 2997 SRP_OPT_SERVICE_ID = 1 << 4, 2998 SRP_OPT_MAX_SECT = 1 << 5, 2999 SRP_OPT_MAX_CMD_PER_LUN = 1 << 6, 3000 SRP_OPT_IO_CLASS = 1 << 7, 3001 SRP_OPT_INITIATOR_EXT = 1 << 8, 3002 SRP_OPT_CMD_SG_ENTRIES = 1 << 9, 3003 SRP_OPT_ALLOW_EXT_SG = 1 << 10, 3004 SRP_OPT_SG_TABLESIZE = 1 << 11, 3005 SRP_OPT_COMP_VECTOR = 1 << 12, 3006 SRP_OPT_TL_RETRY_COUNT = 1 << 13, 3007 SRP_OPT_QUEUE_SIZE = 1 << 14, 3008 SRP_OPT_ALL = (SRP_OPT_ID_EXT | 3009 SRP_OPT_IOC_GUID | 3010 SRP_OPT_DGID | 3011 SRP_OPT_PKEY | 3012 SRP_OPT_SERVICE_ID), 3013 }; 3014 3015 static const match_table_t srp_opt_tokens = { 3016 { SRP_OPT_ID_EXT, "id_ext=%s" }, 3017 { SRP_OPT_IOC_GUID, "ioc_guid=%s" }, 3018 { SRP_OPT_DGID, "dgid=%s" }, 3019 { SRP_OPT_PKEY, "pkey=%x" }, 3020 { SRP_OPT_SERVICE_ID, "service_id=%s" }, 3021 { SRP_OPT_MAX_SECT, "max_sect=%d" }, 3022 { SRP_OPT_MAX_CMD_PER_LUN, "max_cmd_per_lun=%d" }, 3023 { SRP_OPT_IO_CLASS, "io_class=%x" }, 3024 { SRP_OPT_INITIATOR_EXT, "initiator_ext=%s" }, 3025 { SRP_OPT_CMD_SG_ENTRIES, "cmd_sg_entries=%u" }, 3026 { SRP_OPT_ALLOW_EXT_SG, "allow_ext_sg=%u" }, 3027 { SRP_OPT_SG_TABLESIZE, "sg_tablesize=%u" }, 3028 { SRP_OPT_COMP_VECTOR, "comp_vector=%u" }, 3029 { SRP_OPT_TL_RETRY_COUNT, "tl_retry_count=%u" }, 3030 { SRP_OPT_QUEUE_SIZE, "queue_size=%d" }, 3031 { SRP_OPT_ERR, NULL } 3032 }; 3033 3034 static int srp_parse_options(const char *buf, struct srp_target_port *target) 3035 { 3036 char *options, *sep_opt; 3037 char *p; 3038 char dgid[3]; 3039 substring_t args[MAX_OPT_ARGS]; 3040 int opt_mask = 0; 3041 int token; 3042 int ret = -EINVAL; 3043 int i; 3044 3045 options = kstrdup(buf, GFP_KERNEL); 3046 if (!options) 3047 return -ENOMEM; 3048 3049 sep_opt = options; 3050 while ((p = strsep(&sep_opt, ",\n")) != NULL) { 3051 if (!*p) 3052 continue; 3053 3054 token = match_token(p, srp_opt_tokens, args); 3055 opt_mask |= token; 3056 3057 switch (token) { 3058 case SRP_OPT_ID_EXT: 3059 p = match_strdup(args); 3060 if (!p) { 3061 ret = -ENOMEM; 3062 goto out; 3063 } 3064 target->id_ext = cpu_to_be64(simple_strtoull(p, NULL, 16)); 3065 kfree(p); 3066 break; 3067 3068 case SRP_OPT_IOC_GUID: 3069 p = match_strdup(args); 3070 if (!p) { 3071 ret = -ENOMEM; 3072 goto out; 3073 } 3074 target->ioc_guid = cpu_to_be64(simple_strtoull(p, NULL, 16)); 3075 kfree(p); 3076 break; 3077 3078 case SRP_OPT_DGID: 3079 p = match_strdup(args); 3080 if (!p) { 3081 ret = -ENOMEM; 3082 goto out; 3083 } 3084 if (strlen(p) != 32) { 3085 pr_warn("bad dest GID parameter '%s'\n", p); 3086 kfree(p); 3087 goto out; 3088 } 3089 3090 for (i = 0; i < 16; ++i) { 3091 strlcpy(dgid, p + i * 2, sizeof(dgid)); 3092 if (sscanf(dgid, "%hhx", 3093 &target->orig_dgid.raw[i]) < 1) { 3094 ret = -EINVAL; 3095 kfree(p); 3096 goto out; 3097 } 3098 } 3099 kfree(p); 3100 break; 3101 3102 case SRP_OPT_PKEY: 3103 if (match_hex(args, &token)) { 3104 pr_warn("bad P_Key parameter '%s'\n", p); 3105 goto out; 3106 } 3107 target->pkey = cpu_to_be16(token); 3108 break; 3109 3110 case SRP_OPT_SERVICE_ID: 3111 p = match_strdup(args); 3112 if (!p) { 3113 ret = -ENOMEM; 3114 goto out; 3115 } 3116 target->service_id = cpu_to_be64(simple_strtoull(p, NULL, 16)); 3117 kfree(p); 3118 break; 3119 3120 case SRP_OPT_MAX_SECT: 3121 if (match_int(args, &token)) { 3122 pr_warn("bad max sect parameter '%s'\n", p); 3123 goto out; 3124 } 3125 target->scsi_host->max_sectors = token; 3126 break; 3127 3128 case SRP_OPT_QUEUE_SIZE: 3129 if (match_int(args, &token) || token < 1) { 3130 pr_warn("bad queue_size parameter '%s'\n", p); 3131 goto out; 3132 } 3133 target->scsi_host->can_queue = token; 3134 target->queue_size = token + SRP_RSP_SQ_SIZE + 3135 SRP_TSK_MGMT_SQ_SIZE; 3136 if (!(opt_mask & SRP_OPT_MAX_CMD_PER_LUN)) 3137 target->scsi_host->cmd_per_lun = token; 3138 break; 3139 3140 case SRP_OPT_MAX_CMD_PER_LUN: 3141 if (match_int(args, &token) || token < 1) { 3142 pr_warn("bad max cmd_per_lun parameter '%s'\n", 3143 p); 3144 goto out; 3145 } 3146 target->scsi_host->cmd_per_lun = token; 3147 break; 3148 3149 case SRP_OPT_IO_CLASS: 3150 if (match_hex(args, &token)) { 3151 pr_warn("bad IO class parameter '%s'\n", p); 3152 goto out; 3153 } 3154 if (token != SRP_REV10_IB_IO_CLASS && 3155 token != SRP_REV16A_IB_IO_CLASS) { 3156 pr_warn("unknown IO class parameter value %x specified (use %x or %x).\n", 3157 token, SRP_REV10_IB_IO_CLASS, 3158 SRP_REV16A_IB_IO_CLASS); 3159 goto out; 3160 } 3161 target->io_class = token; 3162 break; 3163 3164 case SRP_OPT_INITIATOR_EXT: 3165 p = match_strdup(args); 3166 if (!p) { 3167 ret = -ENOMEM; 3168 goto out; 3169 } 3170 target->initiator_ext = cpu_to_be64(simple_strtoull(p, NULL, 16)); 3171 kfree(p); 3172 break; 3173 3174 case SRP_OPT_CMD_SG_ENTRIES: 3175 if (match_int(args, &token) || token < 1 || token > 255) { 3176 pr_warn("bad max cmd_sg_entries parameter '%s'\n", 3177 p); 3178 goto out; 3179 } 3180 target->cmd_sg_cnt = token; 3181 break; 3182 3183 case SRP_OPT_ALLOW_EXT_SG: 3184 if (match_int(args, &token)) { 3185 pr_warn("bad allow_ext_sg parameter '%s'\n", p); 3186 goto out; 3187 } 3188 target->allow_ext_sg = !!token; 3189 break; 3190 3191 case SRP_OPT_SG_TABLESIZE: 3192 if (match_int(args, &token) || token < 1 || 3193 token > SG_MAX_SEGMENTS) { 3194 pr_warn("bad max sg_tablesize parameter '%s'\n", 3195 p); 3196 goto out; 3197 } 3198 target->sg_tablesize = token; 3199 break; 3200 3201 case SRP_OPT_COMP_VECTOR: 3202 if (match_int(args, &token) || token < 0) { 3203 pr_warn("bad comp_vector parameter '%s'\n", p); 3204 goto out; 3205 } 3206 target->comp_vector = token; 3207 break; 3208 3209 case SRP_OPT_TL_RETRY_COUNT: 3210 if (match_int(args, &token) || token < 2 || token > 7) { 3211 pr_warn("bad tl_retry_count parameter '%s' (must be a number between 2 and 7)\n", 3212 p); 3213 goto out; 3214 } 3215 target->tl_retry_count = token; 3216 break; 3217 3218 default: 3219 pr_warn("unknown parameter or missing value '%s' in target creation request\n", 3220 p); 3221 goto out; 3222 } 3223 } 3224 3225 if ((opt_mask & SRP_OPT_ALL) == SRP_OPT_ALL) 3226 ret = 0; 3227 else 3228 for (i = 0; i < ARRAY_SIZE(srp_opt_tokens); ++i) 3229 if ((srp_opt_tokens[i].token & SRP_OPT_ALL) && 3230 !(srp_opt_tokens[i].token & opt_mask)) 3231 pr_warn("target creation request is missing parameter '%s'\n", 3232 srp_opt_tokens[i].pattern); 3233 3234 if (target->scsi_host->cmd_per_lun > target->scsi_host->can_queue 3235 && (opt_mask & SRP_OPT_MAX_CMD_PER_LUN)) 3236 pr_warn("cmd_per_lun = %d > queue_size = %d\n", 3237 target->scsi_host->cmd_per_lun, 3238 target->scsi_host->can_queue); 3239 3240 out: 3241 kfree(options); 3242 return ret; 3243 } 3244 3245 static ssize_t srp_create_target(struct device *dev, 3246 struct device_attribute *attr, 3247 const char *buf, size_t count) 3248 { 3249 struct srp_host *host = 3250 container_of(dev, struct srp_host, dev); 3251 struct Scsi_Host *target_host; 3252 struct srp_target_port *target; 3253 struct srp_rdma_ch *ch; 3254 struct srp_device *srp_dev = host->srp_dev; 3255 struct ib_device *ibdev = srp_dev->dev; 3256 int ret, node_idx, node, cpu, i; 3257 unsigned int max_sectors_per_mr, mr_per_cmd = 0; 3258 bool multich = false; 3259 3260 target_host = scsi_host_alloc(&srp_template, 3261 sizeof (struct srp_target_port)); 3262 if (!target_host) 3263 return -ENOMEM; 3264 3265 target_host->transportt = ib_srp_transport_template; 3266 target_host->max_channel = 0; 3267 target_host->max_id = 1; 3268 target_host->max_lun = -1LL; 3269 target_host->max_cmd_len = sizeof ((struct srp_cmd *) (void *) 0L)->cdb; 3270 3271 target = host_to_target(target_host); 3272 3273 target->io_class = SRP_REV16A_IB_IO_CLASS; 3274 target->scsi_host = target_host; 3275 target->srp_host = host; 3276 target->pd = host->srp_dev->pd; 3277 target->lkey = host->srp_dev->pd->local_dma_lkey; 3278 target->cmd_sg_cnt = cmd_sg_entries; 3279 target->sg_tablesize = indirect_sg_entries ? : cmd_sg_entries; 3280 target->allow_ext_sg = allow_ext_sg; 3281 target->tl_retry_count = 7; 3282 target->queue_size = SRP_DEFAULT_QUEUE_SIZE; 3283 3284 /* 3285 * Avoid that the SCSI host can be removed by srp_remove_target() 3286 * before this function returns. 3287 */ 3288 scsi_host_get(target->scsi_host); 3289 3290 mutex_lock(&host->add_target_mutex); 3291 3292 ret = srp_parse_options(buf, target); 3293 if (ret) 3294 goto out; 3295 3296 target->req_ring_size = target->queue_size - SRP_TSK_MGMT_SQ_SIZE; 3297 3298 if (!srp_conn_unique(target->srp_host, target)) { 3299 shost_printk(KERN_INFO, target->scsi_host, 3300 PFX "Already connected to target port with id_ext=%016llx;ioc_guid=%016llx;initiator_ext=%016llx\n", 3301 be64_to_cpu(target->id_ext), 3302 be64_to_cpu(target->ioc_guid), 3303 be64_to_cpu(target->initiator_ext)); 3304 ret = -EEXIST; 3305 goto out; 3306 } 3307 3308 if (!srp_dev->has_fmr && !srp_dev->has_fr && !target->allow_ext_sg && 3309 target->cmd_sg_cnt < target->sg_tablesize) { 3310 pr_warn("No MR pool and no external indirect descriptors, limiting sg_tablesize to cmd_sg_cnt\n"); 3311 target->sg_tablesize = target->cmd_sg_cnt; 3312 } 3313 3314 if (srp_dev->use_fast_reg || srp_dev->use_fmr) { 3315 /* 3316 * FR and FMR can only map one HCA page per entry. If the 3317 * start address is not aligned on a HCA page boundary two 3318 * entries will be used for the head and the tail although 3319 * these two entries combined contain at most one HCA page of 3320 * data. Hence the "+ 1" in the calculation below. 3321 * 3322 * The indirect data buffer descriptor is contiguous so the 3323 * memory for that buffer will only be registered if 3324 * register_always is true. Hence add one to mr_per_cmd if 3325 * register_always has been set. 3326 */ 3327 max_sectors_per_mr = srp_dev->max_pages_per_mr << 3328 (ilog2(srp_dev->mr_page_size) - 9); 3329 mr_per_cmd = register_always + 3330 (target->scsi_host->max_sectors + 1 + 3331 max_sectors_per_mr - 1) / max_sectors_per_mr; 3332 pr_debug("max_sectors = %u; max_pages_per_mr = %u; mr_page_size = %u; max_sectors_per_mr = %u; mr_per_cmd = %u\n", 3333 target->scsi_host->max_sectors, 3334 srp_dev->max_pages_per_mr, srp_dev->mr_page_size, 3335 max_sectors_per_mr, mr_per_cmd); 3336 } 3337 3338 target_host->sg_tablesize = target->sg_tablesize; 3339 target->mr_pool_size = target->scsi_host->can_queue * mr_per_cmd; 3340 target->mr_per_cmd = mr_per_cmd; 3341 target->indirect_size = target->sg_tablesize * 3342 sizeof (struct srp_direct_buf); 3343 target->max_iu_len = sizeof (struct srp_cmd) + 3344 sizeof (struct srp_indirect_buf) + 3345 target->cmd_sg_cnt * sizeof (struct srp_direct_buf); 3346 3347 INIT_WORK(&target->tl_err_work, srp_tl_err_work); 3348 INIT_WORK(&target->remove_work, srp_remove_work); 3349 spin_lock_init(&target->lock); 3350 ret = ib_query_gid(ibdev, host->port, 0, &target->sgid, NULL); 3351 if (ret) 3352 goto out; 3353 3354 ret = -ENOMEM; 3355 target->ch_count = max_t(unsigned, num_online_nodes(), 3356 min(ch_count ? : 3357 min(4 * num_online_nodes(), 3358 ibdev->num_comp_vectors), 3359 num_online_cpus())); 3360 target->ch = kcalloc(target->ch_count, sizeof(*target->ch), 3361 GFP_KERNEL); 3362 if (!target->ch) 3363 goto out; 3364 3365 node_idx = 0; 3366 for_each_online_node(node) { 3367 const int ch_start = (node_idx * target->ch_count / 3368 num_online_nodes()); 3369 const int ch_end = ((node_idx + 1) * target->ch_count / 3370 num_online_nodes()); 3371 const int cv_start = (node_idx * ibdev->num_comp_vectors / 3372 num_online_nodes() + target->comp_vector) 3373 % ibdev->num_comp_vectors; 3374 const int cv_end = ((node_idx + 1) * ibdev->num_comp_vectors / 3375 num_online_nodes() + target->comp_vector) 3376 % ibdev->num_comp_vectors; 3377 int cpu_idx = 0; 3378 3379 for_each_online_cpu(cpu) { 3380 if (cpu_to_node(cpu) != node) 3381 continue; 3382 if (ch_start + cpu_idx >= ch_end) 3383 continue; 3384 ch = &target->ch[ch_start + cpu_idx]; 3385 ch->target = target; 3386 ch->comp_vector = cv_start == cv_end ? cv_start : 3387 cv_start + cpu_idx % (cv_end - cv_start); 3388 spin_lock_init(&ch->lock); 3389 INIT_LIST_HEAD(&ch->free_tx); 3390 ret = srp_new_cm_id(ch); 3391 if (ret) 3392 goto err_disconnect; 3393 3394 ret = srp_create_ch_ib(ch); 3395 if (ret) 3396 goto err_disconnect; 3397 3398 ret = srp_alloc_req_data(ch); 3399 if (ret) 3400 goto err_disconnect; 3401 3402 ret = srp_connect_ch(ch, multich); 3403 if (ret) { 3404 shost_printk(KERN_ERR, target->scsi_host, 3405 PFX "Connection %d/%d failed\n", 3406 ch_start + cpu_idx, 3407 target->ch_count); 3408 if (node_idx == 0 && cpu_idx == 0) { 3409 goto err_disconnect; 3410 } else { 3411 srp_free_ch_ib(target, ch); 3412 srp_free_req_data(target, ch); 3413 target->ch_count = ch - target->ch; 3414 goto connected; 3415 } 3416 } 3417 3418 multich = true; 3419 cpu_idx++; 3420 } 3421 node_idx++; 3422 } 3423 3424 connected: 3425 target->scsi_host->nr_hw_queues = target->ch_count; 3426 3427 ret = srp_add_target(host, target); 3428 if (ret) 3429 goto err_disconnect; 3430 3431 if (target->state != SRP_TARGET_REMOVED) { 3432 shost_printk(KERN_DEBUG, target->scsi_host, PFX 3433 "new target: id_ext %016llx ioc_guid %016llx pkey %04x service_id %016llx sgid %pI6 dgid %pI6\n", 3434 be64_to_cpu(target->id_ext), 3435 be64_to_cpu(target->ioc_guid), 3436 be16_to_cpu(target->pkey), 3437 be64_to_cpu(target->service_id), 3438 target->sgid.raw, target->orig_dgid.raw); 3439 } 3440 3441 ret = count; 3442 3443 out: 3444 mutex_unlock(&host->add_target_mutex); 3445 3446 scsi_host_put(target->scsi_host); 3447 if (ret < 0) 3448 scsi_host_put(target->scsi_host); 3449 3450 return ret; 3451 3452 err_disconnect: 3453 srp_disconnect_target(target); 3454 3455 for (i = 0; i < target->ch_count; i++) { 3456 ch = &target->ch[i]; 3457 srp_free_ch_ib(target, ch); 3458 srp_free_req_data(target, ch); 3459 } 3460 3461 kfree(target->ch); 3462 goto out; 3463 } 3464 3465 static DEVICE_ATTR(add_target, S_IWUSR, NULL, srp_create_target); 3466 3467 static ssize_t show_ibdev(struct device *dev, struct device_attribute *attr, 3468 char *buf) 3469 { 3470 struct srp_host *host = container_of(dev, struct srp_host, dev); 3471 3472 return sprintf(buf, "%s\n", host->srp_dev->dev->name); 3473 } 3474 3475 static DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL); 3476 3477 static ssize_t show_port(struct device *dev, struct device_attribute *attr, 3478 char *buf) 3479 { 3480 struct srp_host *host = container_of(dev, struct srp_host, dev); 3481 3482 return sprintf(buf, "%d\n", host->port); 3483 } 3484 3485 static DEVICE_ATTR(port, S_IRUGO, show_port, NULL); 3486 3487 static struct srp_host *srp_add_port(struct srp_device *device, u8 port) 3488 { 3489 struct srp_host *host; 3490 3491 host = kzalloc(sizeof *host, GFP_KERNEL); 3492 if (!host) 3493 return NULL; 3494 3495 INIT_LIST_HEAD(&host->target_list); 3496 spin_lock_init(&host->target_lock); 3497 init_completion(&host->released); 3498 mutex_init(&host->add_target_mutex); 3499 host->srp_dev = device; 3500 host->port = port; 3501 3502 host->dev.class = &srp_class; 3503 host->dev.parent = device->dev->dma_device; 3504 dev_set_name(&host->dev, "srp-%s-%d", device->dev->name, port); 3505 3506 if (device_register(&host->dev)) 3507 goto free_host; 3508 if (device_create_file(&host->dev, &dev_attr_add_target)) 3509 goto err_class; 3510 if (device_create_file(&host->dev, &dev_attr_ibdev)) 3511 goto err_class; 3512 if (device_create_file(&host->dev, &dev_attr_port)) 3513 goto err_class; 3514 3515 return host; 3516 3517 err_class: 3518 device_unregister(&host->dev); 3519 3520 free_host: 3521 kfree(host); 3522 3523 return NULL; 3524 } 3525 3526 static void srp_add_one(struct ib_device *device) 3527 { 3528 struct srp_device *srp_dev; 3529 struct srp_host *host; 3530 int mr_page_shift, p; 3531 u64 max_pages_per_mr; 3532 unsigned int flags = 0; 3533 3534 srp_dev = kzalloc(sizeof(*srp_dev), GFP_KERNEL); 3535 if (!srp_dev) 3536 return; 3537 3538 /* 3539 * Use the smallest page size supported by the HCA, down to a 3540 * minimum of 4096 bytes. We're unlikely to build large sglists 3541 * out of smaller entries. 3542 */ 3543 mr_page_shift = max(12, ffs(device->attrs.page_size_cap) - 1); 3544 srp_dev->mr_page_size = 1 << mr_page_shift; 3545 srp_dev->mr_page_mask = ~((u64) srp_dev->mr_page_size - 1); 3546 max_pages_per_mr = device->attrs.max_mr_size; 3547 do_div(max_pages_per_mr, srp_dev->mr_page_size); 3548 pr_debug("%s: %llu / %u = %llu <> %u\n", __func__, 3549 device->attrs.max_mr_size, srp_dev->mr_page_size, 3550 max_pages_per_mr, SRP_MAX_PAGES_PER_MR); 3551 srp_dev->max_pages_per_mr = min_t(u64, SRP_MAX_PAGES_PER_MR, 3552 max_pages_per_mr); 3553 3554 srp_dev->has_fmr = (device->alloc_fmr && device->dealloc_fmr && 3555 device->map_phys_fmr && device->unmap_fmr); 3556 srp_dev->has_fr = (device->attrs.device_cap_flags & 3557 IB_DEVICE_MEM_MGT_EXTENSIONS); 3558 if (!never_register && !srp_dev->has_fmr && !srp_dev->has_fr) { 3559 dev_warn(&device->dev, "neither FMR nor FR is supported\n"); 3560 } else if (!never_register && 3561 device->attrs.max_mr_size >= 2 * srp_dev->mr_page_size) { 3562 srp_dev->use_fast_reg = (srp_dev->has_fr && 3563 (!srp_dev->has_fmr || prefer_fr)); 3564 srp_dev->use_fmr = !srp_dev->use_fast_reg && srp_dev->has_fmr; 3565 } 3566 3567 if (never_register || !register_always || 3568 (!srp_dev->has_fmr && !srp_dev->has_fr)) 3569 flags |= IB_PD_UNSAFE_GLOBAL_RKEY; 3570 3571 if (srp_dev->use_fast_reg) { 3572 srp_dev->max_pages_per_mr = 3573 min_t(u32, srp_dev->max_pages_per_mr, 3574 device->attrs.max_fast_reg_page_list_len); 3575 } 3576 srp_dev->mr_max_size = srp_dev->mr_page_size * 3577 srp_dev->max_pages_per_mr; 3578 pr_debug("%s: mr_page_shift = %d, device->max_mr_size = %#llx, device->max_fast_reg_page_list_len = %u, max_pages_per_mr = %d, mr_max_size = %#x\n", 3579 device->name, mr_page_shift, device->attrs.max_mr_size, 3580 device->attrs.max_fast_reg_page_list_len, 3581 srp_dev->max_pages_per_mr, srp_dev->mr_max_size); 3582 3583 INIT_LIST_HEAD(&srp_dev->dev_list); 3584 3585 srp_dev->dev = device; 3586 srp_dev->pd = ib_alloc_pd(device, flags); 3587 if (IS_ERR(srp_dev->pd)) 3588 goto free_dev; 3589 3590 3591 for (p = rdma_start_port(device); p <= rdma_end_port(device); ++p) { 3592 host = srp_add_port(srp_dev, p); 3593 if (host) 3594 list_add_tail(&host->list, &srp_dev->dev_list); 3595 } 3596 3597 ib_set_client_data(device, &srp_client, srp_dev); 3598 return; 3599 3600 free_dev: 3601 kfree(srp_dev); 3602 } 3603 3604 static void srp_remove_one(struct ib_device *device, void *client_data) 3605 { 3606 struct srp_device *srp_dev; 3607 struct srp_host *host, *tmp_host; 3608 struct srp_target_port *target; 3609 3610 srp_dev = client_data; 3611 if (!srp_dev) 3612 return; 3613 3614 list_for_each_entry_safe(host, tmp_host, &srp_dev->dev_list, list) { 3615 device_unregister(&host->dev); 3616 /* 3617 * Wait for the sysfs entry to go away, so that no new 3618 * target ports can be created. 3619 */ 3620 wait_for_completion(&host->released); 3621 3622 /* 3623 * Remove all target ports. 3624 */ 3625 spin_lock(&host->target_lock); 3626 list_for_each_entry(target, &host->target_list, list) 3627 srp_queue_remove_work(target); 3628 spin_unlock(&host->target_lock); 3629 3630 /* 3631 * Wait for tl_err and target port removal tasks. 3632 */ 3633 flush_workqueue(system_long_wq); 3634 flush_workqueue(srp_remove_wq); 3635 3636 kfree(host); 3637 } 3638 3639 ib_dealloc_pd(srp_dev->pd); 3640 3641 kfree(srp_dev); 3642 } 3643 3644 static struct srp_function_template ib_srp_transport_functions = { 3645 .has_rport_state = true, 3646 .reset_timer_if_blocked = true, 3647 .reconnect_delay = &srp_reconnect_delay, 3648 .fast_io_fail_tmo = &srp_fast_io_fail_tmo, 3649 .dev_loss_tmo = &srp_dev_loss_tmo, 3650 .reconnect = srp_rport_reconnect, 3651 .rport_delete = srp_rport_delete, 3652 .terminate_rport_io = srp_terminate_io, 3653 }; 3654 3655 static int __init srp_init_module(void) 3656 { 3657 int ret; 3658 3659 if (srp_sg_tablesize) { 3660 pr_warn("srp_sg_tablesize is deprecated, please use cmd_sg_entries\n"); 3661 if (!cmd_sg_entries) 3662 cmd_sg_entries = srp_sg_tablesize; 3663 } 3664 3665 if (!cmd_sg_entries) 3666 cmd_sg_entries = SRP_DEF_SG_TABLESIZE; 3667 3668 if (cmd_sg_entries > 255) { 3669 pr_warn("Clamping cmd_sg_entries to 255\n"); 3670 cmd_sg_entries = 255; 3671 } 3672 3673 if (!indirect_sg_entries) 3674 indirect_sg_entries = cmd_sg_entries; 3675 else if (indirect_sg_entries < cmd_sg_entries) { 3676 pr_warn("Bumping up indirect_sg_entries to match cmd_sg_entries (%u)\n", 3677 cmd_sg_entries); 3678 indirect_sg_entries = cmd_sg_entries; 3679 } 3680 3681 srp_remove_wq = create_workqueue("srp_remove"); 3682 if (!srp_remove_wq) { 3683 ret = -ENOMEM; 3684 goto out; 3685 } 3686 3687 ret = -ENOMEM; 3688 ib_srp_transport_template = 3689 srp_attach_transport(&ib_srp_transport_functions); 3690 if (!ib_srp_transport_template) 3691 goto destroy_wq; 3692 3693 ret = class_register(&srp_class); 3694 if (ret) { 3695 pr_err("couldn't register class infiniband_srp\n"); 3696 goto release_tr; 3697 } 3698 3699 ib_sa_register_client(&srp_sa_client); 3700 3701 ret = ib_register_client(&srp_client); 3702 if (ret) { 3703 pr_err("couldn't register IB client\n"); 3704 goto unreg_sa; 3705 } 3706 3707 out: 3708 return ret; 3709 3710 unreg_sa: 3711 ib_sa_unregister_client(&srp_sa_client); 3712 class_unregister(&srp_class); 3713 3714 release_tr: 3715 srp_release_transport(ib_srp_transport_template); 3716 3717 destroy_wq: 3718 destroy_workqueue(srp_remove_wq); 3719 goto out; 3720 } 3721 3722 static void __exit srp_cleanup_module(void) 3723 { 3724 ib_unregister_client(&srp_client); 3725 ib_sa_unregister_client(&srp_sa_client); 3726 class_unregister(&srp_class); 3727 srp_release_transport(ib_srp_transport_template); 3728 destroy_workqueue(srp_remove_wq); 3729 } 3730 3731 module_init(srp_init_module); 3732 module_exit(srp_cleanup_module); 3733