1 /* 2 * Copyright (c) 2005 Cisco Systems. All rights reserved. 3 * 4 * This software is available to you under a choice of one of two 5 * licenses. You may choose to be licensed under the terms of the GNU 6 * General Public License (GPL) Version 2, available from the file 7 * COPYING in the main directory of this source tree, or the 8 * OpenIB.org BSD license below: 9 * 10 * Redistribution and use in source and binary forms, with or 11 * without modification, are permitted provided that the following 12 * conditions are met: 13 * 14 * - Redistributions of source code must retain the above 15 * copyright notice, this list of conditions and the following 16 * disclaimer. 17 * 18 * - Redistributions in binary form must reproduce the above 19 * copyright notice, this list of conditions and the following 20 * disclaimer in the documentation and/or other materials 21 * provided with the distribution. 22 * 23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 * SOFTWARE. 31 */ 32 33 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 34 35 #include <linux/module.h> 36 #include <linux/init.h> 37 #include <linux/slab.h> 38 #include <linux/err.h> 39 #include <linux/string.h> 40 #include <linux/parser.h> 41 #include <linux/random.h> 42 #include <linux/jiffies.h> 43 #include <linux/lockdep.h> 44 #include <rdma/ib_cache.h> 45 46 #include <linux/atomic.h> 47 48 #include <scsi/scsi.h> 49 #include <scsi/scsi_device.h> 50 #include <scsi/scsi_dbg.h> 51 #include <scsi/scsi_tcq.h> 52 #include <scsi/srp.h> 53 #include <scsi/scsi_transport_srp.h> 54 55 #include "ib_srp.h" 56 57 #define DRV_NAME "ib_srp" 58 #define PFX DRV_NAME ": " 59 #define DRV_VERSION "2.0" 60 #define DRV_RELDATE "July 26, 2015" 61 62 MODULE_AUTHOR("Roland Dreier"); 63 MODULE_DESCRIPTION("InfiniBand SCSI RDMA Protocol initiator"); 64 MODULE_LICENSE("Dual BSD/GPL"); 65 MODULE_INFO(release_date, DRV_RELDATE); 66 67 #if !defined(CONFIG_DYNAMIC_DEBUG) 68 #define DEFINE_DYNAMIC_DEBUG_METADATA(name, fmt) 69 #define DYNAMIC_DEBUG_BRANCH(descriptor) false 70 #endif 71 72 static unsigned int srp_sg_tablesize; 73 static unsigned int cmd_sg_entries; 74 static unsigned int indirect_sg_entries; 75 static bool allow_ext_sg; 76 static bool prefer_fr = true; 77 static bool register_always = true; 78 static bool never_register; 79 static int topspin_workarounds = 1; 80 81 module_param(srp_sg_tablesize, uint, 0444); 82 MODULE_PARM_DESC(srp_sg_tablesize, "Deprecated name for cmd_sg_entries"); 83 84 module_param(cmd_sg_entries, uint, 0444); 85 MODULE_PARM_DESC(cmd_sg_entries, 86 "Default number of gather/scatter entries in the SRP command (default is 12, max 255)"); 87 88 module_param(indirect_sg_entries, uint, 0444); 89 MODULE_PARM_DESC(indirect_sg_entries, 90 "Default max number of gather/scatter entries (default is 12, max is " __stringify(SG_MAX_SEGMENTS) ")"); 91 92 module_param(allow_ext_sg, bool, 0444); 93 MODULE_PARM_DESC(allow_ext_sg, 94 "Default behavior when there are more than cmd_sg_entries S/G entries after mapping; fails the request when false (default false)"); 95 96 module_param(topspin_workarounds, int, 0444); 97 MODULE_PARM_DESC(topspin_workarounds, 98 "Enable workarounds for Topspin/Cisco SRP target bugs if != 0"); 99 100 module_param(prefer_fr, bool, 0444); 101 MODULE_PARM_DESC(prefer_fr, 102 "Whether to use fast registration if both FMR and fast registration are supported"); 103 104 module_param(register_always, bool, 0444); 105 MODULE_PARM_DESC(register_always, 106 "Use memory registration even for contiguous memory regions"); 107 108 module_param(never_register, bool, 0444); 109 MODULE_PARM_DESC(never_register, "Never register memory"); 110 111 static const struct kernel_param_ops srp_tmo_ops; 112 113 static int srp_reconnect_delay = 10; 114 module_param_cb(reconnect_delay, &srp_tmo_ops, &srp_reconnect_delay, 115 S_IRUGO | S_IWUSR); 116 MODULE_PARM_DESC(reconnect_delay, "Time between successive reconnect attempts"); 117 118 static int srp_fast_io_fail_tmo = 15; 119 module_param_cb(fast_io_fail_tmo, &srp_tmo_ops, &srp_fast_io_fail_tmo, 120 S_IRUGO | S_IWUSR); 121 MODULE_PARM_DESC(fast_io_fail_tmo, 122 "Number of seconds between the observation of a transport" 123 " layer error and failing all I/O. \"off\" means that this" 124 " functionality is disabled."); 125 126 static int srp_dev_loss_tmo = 600; 127 module_param_cb(dev_loss_tmo, &srp_tmo_ops, &srp_dev_loss_tmo, 128 S_IRUGO | S_IWUSR); 129 MODULE_PARM_DESC(dev_loss_tmo, 130 "Maximum number of seconds that the SRP transport should" 131 " insulate transport layer errors. After this time has been" 132 " exceeded the SCSI host is removed. Should be" 133 " between 1 and " __stringify(SCSI_DEVICE_BLOCK_MAX_TIMEOUT) 134 " if fast_io_fail_tmo has not been set. \"off\" means that" 135 " this functionality is disabled."); 136 137 static unsigned ch_count; 138 module_param(ch_count, uint, 0444); 139 MODULE_PARM_DESC(ch_count, 140 "Number of RDMA channels to use for communication with an SRP target. Using more than one channel improves performance if the HCA supports multiple completion vectors. The default value is the minimum of four times the number of online CPU sockets and the number of completion vectors supported by the HCA."); 141 142 static void srp_add_one(struct ib_device *device); 143 static void srp_remove_one(struct ib_device *device, void *client_data); 144 static void srp_recv_done(struct ib_cq *cq, struct ib_wc *wc); 145 static void srp_handle_qp_err(struct ib_cq *cq, struct ib_wc *wc, 146 const char *opname); 147 static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event); 148 149 static struct scsi_transport_template *ib_srp_transport_template; 150 static struct workqueue_struct *srp_remove_wq; 151 152 static struct ib_client srp_client = { 153 .name = "srp", 154 .add = srp_add_one, 155 .remove = srp_remove_one 156 }; 157 158 static struct ib_sa_client srp_sa_client; 159 160 static int srp_tmo_get(char *buffer, const struct kernel_param *kp) 161 { 162 int tmo = *(int *)kp->arg; 163 164 if (tmo >= 0) 165 return sprintf(buffer, "%d", tmo); 166 else 167 return sprintf(buffer, "off"); 168 } 169 170 static int srp_tmo_set(const char *val, const struct kernel_param *kp) 171 { 172 int tmo, res; 173 174 res = srp_parse_tmo(&tmo, val); 175 if (res) 176 goto out; 177 178 if (kp->arg == &srp_reconnect_delay) 179 res = srp_tmo_valid(tmo, srp_fast_io_fail_tmo, 180 srp_dev_loss_tmo); 181 else if (kp->arg == &srp_fast_io_fail_tmo) 182 res = srp_tmo_valid(srp_reconnect_delay, tmo, srp_dev_loss_tmo); 183 else 184 res = srp_tmo_valid(srp_reconnect_delay, srp_fast_io_fail_tmo, 185 tmo); 186 if (res) 187 goto out; 188 *(int *)kp->arg = tmo; 189 190 out: 191 return res; 192 } 193 194 static const struct kernel_param_ops srp_tmo_ops = { 195 .get = srp_tmo_get, 196 .set = srp_tmo_set, 197 }; 198 199 static inline struct srp_target_port *host_to_target(struct Scsi_Host *host) 200 { 201 return (struct srp_target_port *) host->hostdata; 202 } 203 204 static const char *srp_target_info(struct Scsi_Host *host) 205 { 206 return host_to_target(host)->target_name; 207 } 208 209 static int srp_target_is_topspin(struct srp_target_port *target) 210 { 211 static const u8 topspin_oui[3] = { 0x00, 0x05, 0xad }; 212 static const u8 cisco_oui[3] = { 0x00, 0x1b, 0x0d }; 213 214 return topspin_workarounds && 215 (!memcmp(&target->ioc_guid, topspin_oui, sizeof topspin_oui) || 216 !memcmp(&target->ioc_guid, cisco_oui, sizeof cisco_oui)); 217 } 218 219 static struct srp_iu *srp_alloc_iu(struct srp_host *host, size_t size, 220 gfp_t gfp_mask, 221 enum dma_data_direction direction) 222 { 223 struct srp_iu *iu; 224 225 iu = kmalloc(sizeof *iu, gfp_mask); 226 if (!iu) 227 goto out; 228 229 iu->buf = kzalloc(size, gfp_mask); 230 if (!iu->buf) 231 goto out_free_iu; 232 233 iu->dma = ib_dma_map_single(host->srp_dev->dev, iu->buf, size, 234 direction); 235 if (ib_dma_mapping_error(host->srp_dev->dev, iu->dma)) 236 goto out_free_buf; 237 238 iu->size = size; 239 iu->direction = direction; 240 241 return iu; 242 243 out_free_buf: 244 kfree(iu->buf); 245 out_free_iu: 246 kfree(iu); 247 out: 248 return NULL; 249 } 250 251 static void srp_free_iu(struct srp_host *host, struct srp_iu *iu) 252 { 253 if (!iu) 254 return; 255 256 ib_dma_unmap_single(host->srp_dev->dev, iu->dma, iu->size, 257 iu->direction); 258 kfree(iu->buf); 259 kfree(iu); 260 } 261 262 static void srp_qp_event(struct ib_event *event, void *context) 263 { 264 pr_debug("QP event %s (%d)\n", 265 ib_event_msg(event->event), event->event); 266 } 267 268 static int srp_init_qp(struct srp_target_port *target, 269 struct ib_qp *qp) 270 { 271 struct ib_qp_attr *attr; 272 int ret; 273 274 attr = kmalloc(sizeof *attr, GFP_KERNEL); 275 if (!attr) 276 return -ENOMEM; 277 278 ret = ib_find_cached_pkey(target->srp_host->srp_dev->dev, 279 target->srp_host->port, 280 be16_to_cpu(target->pkey), 281 &attr->pkey_index); 282 if (ret) 283 goto out; 284 285 attr->qp_state = IB_QPS_INIT; 286 attr->qp_access_flags = (IB_ACCESS_REMOTE_READ | 287 IB_ACCESS_REMOTE_WRITE); 288 attr->port_num = target->srp_host->port; 289 290 ret = ib_modify_qp(qp, attr, 291 IB_QP_STATE | 292 IB_QP_PKEY_INDEX | 293 IB_QP_ACCESS_FLAGS | 294 IB_QP_PORT); 295 296 out: 297 kfree(attr); 298 return ret; 299 } 300 301 static int srp_new_cm_id(struct srp_rdma_ch *ch) 302 { 303 struct srp_target_port *target = ch->target; 304 struct ib_cm_id *new_cm_id; 305 306 new_cm_id = ib_create_cm_id(target->srp_host->srp_dev->dev, 307 srp_cm_handler, ch); 308 if (IS_ERR(new_cm_id)) 309 return PTR_ERR(new_cm_id); 310 311 if (ch->cm_id) 312 ib_destroy_cm_id(ch->cm_id); 313 ch->cm_id = new_cm_id; 314 if (rdma_cap_opa_ah(target->srp_host->srp_dev->dev, 315 target->srp_host->port)) 316 ch->path.rec_type = SA_PATH_REC_TYPE_OPA; 317 else 318 ch->path.rec_type = SA_PATH_REC_TYPE_IB; 319 ch->path.sgid = target->sgid; 320 ch->path.dgid = target->orig_dgid; 321 ch->path.pkey = target->pkey; 322 ch->path.service_id = target->service_id; 323 324 return 0; 325 } 326 327 static struct ib_fmr_pool *srp_alloc_fmr_pool(struct srp_target_port *target) 328 { 329 struct srp_device *dev = target->srp_host->srp_dev; 330 struct ib_fmr_pool_param fmr_param; 331 332 memset(&fmr_param, 0, sizeof(fmr_param)); 333 fmr_param.pool_size = target->mr_pool_size; 334 fmr_param.dirty_watermark = fmr_param.pool_size / 4; 335 fmr_param.cache = 1; 336 fmr_param.max_pages_per_fmr = dev->max_pages_per_mr; 337 fmr_param.page_shift = ilog2(dev->mr_page_size); 338 fmr_param.access = (IB_ACCESS_LOCAL_WRITE | 339 IB_ACCESS_REMOTE_WRITE | 340 IB_ACCESS_REMOTE_READ); 341 342 return ib_create_fmr_pool(dev->pd, &fmr_param); 343 } 344 345 /** 346 * srp_destroy_fr_pool() - free the resources owned by a pool 347 * @pool: Fast registration pool to be destroyed. 348 */ 349 static void srp_destroy_fr_pool(struct srp_fr_pool *pool) 350 { 351 int i; 352 struct srp_fr_desc *d; 353 354 if (!pool) 355 return; 356 357 for (i = 0, d = &pool->desc[0]; i < pool->size; i++, d++) { 358 if (d->mr) 359 ib_dereg_mr(d->mr); 360 } 361 kfree(pool); 362 } 363 364 /** 365 * srp_create_fr_pool() - allocate and initialize a pool for fast registration 366 * @device: IB device to allocate fast registration descriptors for. 367 * @pd: Protection domain associated with the FR descriptors. 368 * @pool_size: Number of descriptors to allocate. 369 * @max_page_list_len: Maximum fast registration work request page list length. 370 */ 371 static struct srp_fr_pool *srp_create_fr_pool(struct ib_device *device, 372 struct ib_pd *pd, int pool_size, 373 int max_page_list_len) 374 { 375 struct srp_fr_pool *pool; 376 struct srp_fr_desc *d; 377 struct ib_mr *mr; 378 int i, ret = -EINVAL; 379 380 if (pool_size <= 0) 381 goto err; 382 ret = -ENOMEM; 383 pool = kzalloc(sizeof(struct srp_fr_pool) + 384 pool_size * sizeof(struct srp_fr_desc), GFP_KERNEL); 385 if (!pool) 386 goto err; 387 pool->size = pool_size; 388 pool->max_page_list_len = max_page_list_len; 389 spin_lock_init(&pool->lock); 390 INIT_LIST_HEAD(&pool->free_list); 391 392 for (i = 0, d = &pool->desc[0]; i < pool->size; i++, d++) { 393 mr = ib_alloc_mr(pd, IB_MR_TYPE_MEM_REG, 394 max_page_list_len); 395 if (IS_ERR(mr)) { 396 ret = PTR_ERR(mr); 397 if (ret == -ENOMEM) 398 pr_info("%s: ib_alloc_mr() failed. Try to reduce max_cmd_per_lun, max_sect or ch_count\n", 399 dev_name(&device->dev)); 400 goto destroy_pool; 401 } 402 d->mr = mr; 403 list_add_tail(&d->entry, &pool->free_list); 404 } 405 406 out: 407 return pool; 408 409 destroy_pool: 410 srp_destroy_fr_pool(pool); 411 412 err: 413 pool = ERR_PTR(ret); 414 goto out; 415 } 416 417 /** 418 * srp_fr_pool_get() - obtain a descriptor suitable for fast registration 419 * @pool: Pool to obtain descriptor from. 420 */ 421 static struct srp_fr_desc *srp_fr_pool_get(struct srp_fr_pool *pool) 422 { 423 struct srp_fr_desc *d = NULL; 424 unsigned long flags; 425 426 spin_lock_irqsave(&pool->lock, flags); 427 if (!list_empty(&pool->free_list)) { 428 d = list_first_entry(&pool->free_list, typeof(*d), entry); 429 list_del(&d->entry); 430 } 431 spin_unlock_irqrestore(&pool->lock, flags); 432 433 return d; 434 } 435 436 /** 437 * srp_fr_pool_put() - put an FR descriptor back in the free list 438 * @pool: Pool the descriptor was allocated from. 439 * @desc: Pointer to an array of fast registration descriptor pointers. 440 * @n: Number of descriptors to put back. 441 * 442 * Note: The caller must already have queued an invalidation request for 443 * desc->mr->rkey before calling this function. 444 */ 445 static void srp_fr_pool_put(struct srp_fr_pool *pool, struct srp_fr_desc **desc, 446 int n) 447 { 448 unsigned long flags; 449 int i; 450 451 spin_lock_irqsave(&pool->lock, flags); 452 for (i = 0; i < n; i++) 453 list_add(&desc[i]->entry, &pool->free_list); 454 spin_unlock_irqrestore(&pool->lock, flags); 455 } 456 457 static struct srp_fr_pool *srp_alloc_fr_pool(struct srp_target_port *target) 458 { 459 struct srp_device *dev = target->srp_host->srp_dev; 460 461 return srp_create_fr_pool(dev->dev, dev->pd, target->mr_pool_size, 462 dev->max_pages_per_mr); 463 } 464 465 /** 466 * srp_destroy_qp() - destroy an RDMA queue pair 467 * @qp: RDMA queue pair. 468 * 469 * Drain the qp before destroying it. This avoids that the receive 470 * completion handler can access the queue pair while it is 471 * being destroyed. 472 */ 473 static void srp_destroy_qp(struct srp_rdma_ch *ch, struct ib_qp *qp) 474 { 475 spin_lock_irq(&ch->lock); 476 ib_process_cq_direct(ch->send_cq, -1); 477 spin_unlock_irq(&ch->lock); 478 479 ib_drain_qp(qp); 480 ib_destroy_qp(qp); 481 } 482 483 static int srp_create_ch_ib(struct srp_rdma_ch *ch) 484 { 485 struct srp_target_port *target = ch->target; 486 struct srp_device *dev = target->srp_host->srp_dev; 487 struct ib_qp_init_attr *init_attr; 488 struct ib_cq *recv_cq, *send_cq; 489 struct ib_qp *qp; 490 struct ib_fmr_pool *fmr_pool = NULL; 491 struct srp_fr_pool *fr_pool = NULL; 492 const int m = 1 + dev->use_fast_reg * target->mr_per_cmd * 2; 493 int ret; 494 495 init_attr = kzalloc(sizeof *init_attr, GFP_KERNEL); 496 if (!init_attr) 497 return -ENOMEM; 498 499 /* queue_size + 1 for ib_drain_rq() */ 500 recv_cq = ib_alloc_cq(dev->dev, ch, target->queue_size + 1, 501 ch->comp_vector, IB_POLL_SOFTIRQ); 502 if (IS_ERR(recv_cq)) { 503 ret = PTR_ERR(recv_cq); 504 goto err; 505 } 506 507 send_cq = ib_alloc_cq(dev->dev, ch, m * target->queue_size, 508 ch->comp_vector, IB_POLL_DIRECT); 509 if (IS_ERR(send_cq)) { 510 ret = PTR_ERR(send_cq); 511 goto err_recv_cq; 512 } 513 514 init_attr->event_handler = srp_qp_event; 515 init_attr->cap.max_send_wr = m * target->queue_size; 516 init_attr->cap.max_recv_wr = target->queue_size + 1; 517 init_attr->cap.max_recv_sge = 1; 518 init_attr->cap.max_send_sge = 1; 519 init_attr->sq_sig_type = IB_SIGNAL_REQ_WR; 520 init_attr->qp_type = IB_QPT_RC; 521 init_attr->send_cq = send_cq; 522 init_attr->recv_cq = recv_cq; 523 524 qp = ib_create_qp(dev->pd, init_attr); 525 if (IS_ERR(qp)) { 526 ret = PTR_ERR(qp); 527 goto err_send_cq; 528 } 529 530 ret = srp_init_qp(target, qp); 531 if (ret) 532 goto err_qp; 533 534 if (dev->use_fast_reg) { 535 fr_pool = srp_alloc_fr_pool(target); 536 if (IS_ERR(fr_pool)) { 537 ret = PTR_ERR(fr_pool); 538 shost_printk(KERN_WARNING, target->scsi_host, PFX 539 "FR pool allocation failed (%d)\n", ret); 540 goto err_qp; 541 } 542 } else if (dev->use_fmr) { 543 fmr_pool = srp_alloc_fmr_pool(target); 544 if (IS_ERR(fmr_pool)) { 545 ret = PTR_ERR(fmr_pool); 546 shost_printk(KERN_WARNING, target->scsi_host, PFX 547 "FMR pool allocation failed (%d)\n", ret); 548 goto err_qp; 549 } 550 } 551 552 if (ch->qp) 553 srp_destroy_qp(ch, ch->qp); 554 if (ch->recv_cq) 555 ib_free_cq(ch->recv_cq); 556 if (ch->send_cq) 557 ib_free_cq(ch->send_cq); 558 559 ch->qp = qp; 560 ch->recv_cq = recv_cq; 561 ch->send_cq = send_cq; 562 563 if (dev->use_fast_reg) { 564 if (ch->fr_pool) 565 srp_destroy_fr_pool(ch->fr_pool); 566 ch->fr_pool = fr_pool; 567 } else if (dev->use_fmr) { 568 if (ch->fmr_pool) 569 ib_destroy_fmr_pool(ch->fmr_pool); 570 ch->fmr_pool = fmr_pool; 571 } 572 573 kfree(init_attr); 574 return 0; 575 576 err_qp: 577 ib_destroy_qp(qp); 578 579 err_send_cq: 580 ib_free_cq(send_cq); 581 582 err_recv_cq: 583 ib_free_cq(recv_cq); 584 585 err: 586 kfree(init_attr); 587 return ret; 588 } 589 590 /* 591 * Note: this function may be called without srp_alloc_iu_bufs() having been 592 * invoked. Hence the ch->[rt]x_ring checks. 593 */ 594 static void srp_free_ch_ib(struct srp_target_port *target, 595 struct srp_rdma_ch *ch) 596 { 597 struct srp_device *dev = target->srp_host->srp_dev; 598 int i; 599 600 if (!ch->target) 601 return; 602 603 if (ch->cm_id) { 604 ib_destroy_cm_id(ch->cm_id); 605 ch->cm_id = NULL; 606 } 607 608 /* If srp_new_cm_id() succeeded but srp_create_ch_ib() not, return. */ 609 if (!ch->qp) 610 return; 611 612 if (dev->use_fast_reg) { 613 if (ch->fr_pool) 614 srp_destroy_fr_pool(ch->fr_pool); 615 } else if (dev->use_fmr) { 616 if (ch->fmr_pool) 617 ib_destroy_fmr_pool(ch->fmr_pool); 618 } 619 620 srp_destroy_qp(ch, ch->qp); 621 ib_free_cq(ch->send_cq); 622 ib_free_cq(ch->recv_cq); 623 624 /* 625 * Avoid that the SCSI error handler tries to use this channel after 626 * it has been freed. The SCSI error handler can namely continue 627 * trying to perform recovery actions after scsi_remove_host() 628 * returned. 629 */ 630 ch->target = NULL; 631 632 ch->qp = NULL; 633 ch->send_cq = ch->recv_cq = NULL; 634 635 if (ch->rx_ring) { 636 for (i = 0; i < target->queue_size; ++i) 637 srp_free_iu(target->srp_host, ch->rx_ring[i]); 638 kfree(ch->rx_ring); 639 ch->rx_ring = NULL; 640 } 641 if (ch->tx_ring) { 642 for (i = 0; i < target->queue_size; ++i) 643 srp_free_iu(target->srp_host, ch->tx_ring[i]); 644 kfree(ch->tx_ring); 645 ch->tx_ring = NULL; 646 } 647 } 648 649 static void srp_path_rec_completion(int status, 650 struct sa_path_rec *pathrec, 651 void *ch_ptr) 652 { 653 struct srp_rdma_ch *ch = ch_ptr; 654 struct srp_target_port *target = ch->target; 655 656 ch->status = status; 657 if (status) 658 shost_printk(KERN_ERR, target->scsi_host, 659 PFX "Got failed path rec status %d\n", status); 660 else 661 ch->path = *pathrec; 662 complete(&ch->done); 663 } 664 665 static int srp_lookup_path(struct srp_rdma_ch *ch) 666 { 667 struct srp_target_port *target = ch->target; 668 int ret; 669 670 ch->path.numb_path = 1; 671 672 init_completion(&ch->done); 673 674 ch->path_query_id = ib_sa_path_rec_get(&srp_sa_client, 675 target->srp_host->srp_dev->dev, 676 target->srp_host->port, 677 &ch->path, 678 IB_SA_PATH_REC_SERVICE_ID | 679 IB_SA_PATH_REC_DGID | 680 IB_SA_PATH_REC_SGID | 681 IB_SA_PATH_REC_NUMB_PATH | 682 IB_SA_PATH_REC_PKEY, 683 SRP_PATH_REC_TIMEOUT_MS, 684 GFP_KERNEL, 685 srp_path_rec_completion, 686 ch, &ch->path_query); 687 if (ch->path_query_id < 0) 688 return ch->path_query_id; 689 690 ret = wait_for_completion_interruptible(&ch->done); 691 if (ret < 0) 692 return ret; 693 694 if (ch->status < 0) 695 shost_printk(KERN_WARNING, target->scsi_host, 696 PFX "Path record query failed\n"); 697 698 return ch->status; 699 } 700 701 static int srp_send_req(struct srp_rdma_ch *ch, bool multich) 702 { 703 struct srp_target_port *target = ch->target; 704 struct { 705 struct ib_cm_req_param param; 706 struct srp_login_req priv; 707 } *req = NULL; 708 int status; 709 710 req = kzalloc(sizeof *req, GFP_KERNEL); 711 if (!req) 712 return -ENOMEM; 713 714 req->param.primary_path = &ch->path; 715 req->param.alternate_path = NULL; 716 req->param.service_id = target->service_id; 717 req->param.qp_num = ch->qp->qp_num; 718 req->param.qp_type = ch->qp->qp_type; 719 req->param.private_data = &req->priv; 720 req->param.private_data_len = sizeof req->priv; 721 req->param.flow_control = 1; 722 723 get_random_bytes(&req->param.starting_psn, 4); 724 req->param.starting_psn &= 0xffffff; 725 726 /* 727 * Pick some arbitrary defaults here; we could make these 728 * module parameters if anyone cared about setting them. 729 */ 730 req->param.responder_resources = 4; 731 req->param.remote_cm_response_timeout = 20; 732 req->param.local_cm_response_timeout = 20; 733 req->param.retry_count = target->tl_retry_count; 734 req->param.rnr_retry_count = 7; 735 req->param.max_cm_retries = 15; 736 737 req->priv.opcode = SRP_LOGIN_REQ; 738 req->priv.tag = 0; 739 req->priv.req_it_iu_len = cpu_to_be32(target->max_iu_len); 740 req->priv.req_buf_fmt = cpu_to_be16(SRP_BUF_FORMAT_DIRECT | 741 SRP_BUF_FORMAT_INDIRECT); 742 req->priv.req_flags = (multich ? SRP_MULTICHAN_MULTI : 743 SRP_MULTICHAN_SINGLE); 744 /* 745 * In the published SRP specification (draft rev. 16a), the 746 * port identifier format is 8 bytes of ID extension followed 747 * by 8 bytes of GUID. Older drafts put the two halves in the 748 * opposite order, so that the GUID comes first. 749 * 750 * Targets conforming to these obsolete drafts can be 751 * recognized by the I/O Class they report. 752 */ 753 if (target->io_class == SRP_REV10_IB_IO_CLASS) { 754 memcpy(req->priv.initiator_port_id, 755 &target->sgid.global.interface_id, 8); 756 memcpy(req->priv.initiator_port_id + 8, 757 &target->initiator_ext, 8); 758 memcpy(req->priv.target_port_id, &target->ioc_guid, 8); 759 memcpy(req->priv.target_port_id + 8, &target->id_ext, 8); 760 } else { 761 memcpy(req->priv.initiator_port_id, 762 &target->initiator_ext, 8); 763 memcpy(req->priv.initiator_port_id + 8, 764 &target->sgid.global.interface_id, 8); 765 memcpy(req->priv.target_port_id, &target->id_ext, 8); 766 memcpy(req->priv.target_port_id + 8, &target->ioc_guid, 8); 767 } 768 769 /* 770 * Topspin/Cisco SRP targets will reject our login unless we 771 * zero out the first 8 bytes of our initiator port ID and set 772 * the second 8 bytes to the local node GUID. 773 */ 774 if (srp_target_is_topspin(target)) { 775 shost_printk(KERN_DEBUG, target->scsi_host, 776 PFX "Topspin/Cisco initiator port ID workaround " 777 "activated for target GUID %016llx\n", 778 be64_to_cpu(target->ioc_guid)); 779 memset(req->priv.initiator_port_id, 0, 8); 780 memcpy(req->priv.initiator_port_id + 8, 781 &target->srp_host->srp_dev->dev->node_guid, 8); 782 } 783 784 status = ib_send_cm_req(ch->cm_id, &req->param); 785 786 kfree(req); 787 788 return status; 789 } 790 791 static bool srp_queue_remove_work(struct srp_target_port *target) 792 { 793 bool changed = false; 794 795 spin_lock_irq(&target->lock); 796 if (target->state != SRP_TARGET_REMOVED) { 797 target->state = SRP_TARGET_REMOVED; 798 changed = true; 799 } 800 spin_unlock_irq(&target->lock); 801 802 if (changed) 803 queue_work(srp_remove_wq, &target->remove_work); 804 805 return changed; 806 } 807 808 static void srp_disconnect_target(struct srp_target_port *target) 809 { 810 struct srp_rdma_ch *ch; 811 int i; 812 813 /* XXX should send SRP_I_LOGOUT request */ 814 815 for (i = 0; i < target->ch_count; i++) { 816 ch = &target->ch[i]; 817 ch->connected = false; 818 if (ch->cm_id && ib_send_cm_dreq(ch->cm_id, NULL, 0)) { 819 shost_printk(KERN_DEBUG, target->scsi_host, 820 PFX "Sending CM DREQ failed\n"); 821 } 822 } 823 } 824 825 static void srp_free_req_data(struct srp_target_port *target, 826 struct srp_rdma_ch *ch) 827 { 828 struct srp_device *dev = target->srp_host->srp_dev; 829 struct ib_device *ibdev = dev->dev; 830 struct srp_request *req; 831 int i; 832 833 if (!ch->req_ring) 834 return; 835 836 for (i = 0; i < target->req_ring_size; ++i) { 837 req = &ch->req_ring[i]; 838 if (dev->use_fast_reg) { 839 kfree(req->fr_list); 840 } else { 841 kfree(req->fmr_list); 842 kfree(req->map_page); 843 } 844 if (req->indirect_dma_addr) { 845 ib_dma_unmap_single(ibdev, req->indirect_dma_addr, 846 target->indirect_size, 847 DMA_TO_DEVICE); 848 } 849 kfree(req->indirect_desc); 850 } 851 852 kfree(ch->req_ring); 853 ch->req_ring = NULL; 854 } 855 856 static int srp_alloc_req_data(struct srp_rdma_ch *ch) 857 { 858 struct srp_target_port *target = ch->target; 859 struct srp_device *srp_dev = target->srp_host->srp_dev; 860 struct ib_device *ibdev = srp_dev->dev; 861 struct srp_request *req; 862 void *mr_list; 863 dma_addr_t dma_addr; 864 int i, ret = -ENOMEM; 865 866 ch->req_ring = kcalloc(target->req_ring_size, sizeof(*ch->req_ring), 867 GFP_KERNEL); 868 if (!ch->req_ring) 869 goto out; 870 871 for (i = 0; i < target->req_ring_size; ++i) { 872 req = &ch->req_ring[i]; 873 mr_list = kmalloc(target->mr_per_cmd * sizeof(void *), 874 GFP_KERNEL); 875 if (!mr_list) 876 goto out; 877 if (srp_dev->use_fast_reg) { 878 req->fr_list = mr_list; 879 } else { 880 req->fmr_list = mr_list; 881 req->map_page = kmalloc(srp_dev->max_pages_per_mr * 882 sizeof(void *), GFP_KERNEL); 883 if (!req->map_page) 884 goto out; 885 } 886 req->indirect_desc = kmalloc(target->indirect_size, GFP_KERNEL); 887 if (!req->indirect_desc) 888 goto out; 889 890 dma_addr = ib_dma_map_single(ibdev, req->indirect_desc, 891 target->indirect_size, 892 DMA_TO_DEVICE); 893 if (ib_dma_mapping_error(ibdev, dma_addr)) 894 goto out; 895 896 req->indirect_dma_addr = dma_addr; 897 } 898 ret = 0; 899 900 out: 901 return ret; 902 } 903 904 /** 905 * srp_del_scsi_host_attr() - Remove attributes defined in the host template. 906 * @shost: SCSI host whose attributes to remove from sysfs. 907 * 908 * Note: Any attributes defined in the host template and that did not exist 909 * before invocation of this function will be ignored. 910 */ 911 static void srp_del_scsi_host_attr(struct Scsi_Host *shost) 912 { 913 struct device_attribute **attr; 914 915 for (attr = shost->hostt->shost_attrs; attr && *attr; ++attr) 916 device_remove_file(&shost->shost_dev, *attr); 917 } 918 919 static void srp_remove_target(struct srp_target_port *target) 920 { 921 struct srp_rdma_ch *ch; 922 int i; 923 924 WARN_ON_ONCE(target->state != SRP_TARGET_REMOVED); 925 926 srp_del_scsi_host_attr(target->scsi_host); 927 srp_rport_get(target->rport); 928 srp_remove_host(target->scsi_host); 929 scsi_remove_host(target->scsi_host); 930 srp_stop_rport_timers(target->rport); 931 srp_disconnect_target(target); 932 for (i = 0; i < target->ch_count; i++) { 933 ch = &target->ch[i]; 934 srp_free_ch_ib(target, ch); 935 } 936 cancel_work_sync(&target->tl_err_work); 937 srp_rport_put(target->rport); 938 for (i = 0; i < target->ch_count; i++) { 939 ch = &target->ch[i]; 940 srp_free_req_data(target, ch); 941 } 942 kfree(target->ch); 943 target->ch = NULL; 944 945 spin_lock(&target->srp_host->target_lock); 946 list_del(&target->list); 947 spin_unlock(&target->srp_host->target_lock); 948 949 scsi_host_put(target->scsi_host); 950 } 951 952 static void srp_remove_work(struct work_struct *work) 953 { 954 struct srp_target_port *target = 955 container_of(work, struct srp_target_port, remove_work); 956 957 WARN_ON_ONCE(target->state != SRP_TARGET_REMOVED); 958 959 srp_remove_target(target); 960 } 961 962 static void srp_rport_delete(struct srp_rport *rport) 963 { 964 struct srp_target_port *target = rport->lld_data; 965 966 srp_queue_remove_work(target); 967 } 968 969 /** 970 * srp_connected_ch() - number of connected channels 971 * @target: SRP target port. 972 */ 973 static int srp_connected_ch(struct srp_target_port *target) 974 { 975 int i, c = 0; 976 977 for (i = 0; i < target->ch_count; i++) 978 c += target->ch[i].connected; 979 980 return c; 981 } 982 983 static int srp_connect_ch(struct srp_rdma_ch *ch, bool multich) 984 { 985 struct srp_target_port *target = ch->target; 986 int ret; 987 988 WARN_ON_ONCE(!multich && srp_connected_ch(target) > 0); 989 990 ret = srp_lookup_path(ch); 991 if (ret) 992 goto out; 993 994 while (1) { 995 init_completion(&ch->done); 996 ret = srp_send_req(ch, multich); 997 if (ret) 998 goto out; 999 ret = wait_for_completion_interruptible(&ch->done); 1000 if (ret < 0) 1001 goto out; 1002 1003 /* 1004 * The CM event handling code will set status to 1005 * SRP_PORT_REDIRECT if we get a port redirect REJ 1006 * back, or SRP_DLID_REDIRECT if we get a lid/qp 1007 * redirect REJ back. 1008 */ 1009 ret = ch->status; 1010 switch (ret) { 1011 case 0: 1012 ch->connected = true; 1013 goto out; 1014 1015 case SRP_PORT_REDIRECT: 1016 ret = srp_lookup_path(ch); 1017 if (ret) 1018 goto out; 1019 break; 1020 1021 case SRP_DLID_REDIRECT: 1022 break; 1023 1024 case SRP_STALE_CONN: 1025 shost_printk(KERN_ERR, target->scsi_host, PFX 1026 "giving up on stale connection\n"); 1027 ret = -ECONNRESET; 1028 goto out; 1029 1030 default: 1031 goto out; 1032 } 1033 } 1034 1035 out: 1036 return ret <= 0 ? ret : -ENODEV; 1037 } 1038 1039 static void srp_inv_rkey_err_done(struct ib_cq *cq, struct ib_wc *wc) 1040 { 1041 srp_handle_qp_err(cq, wc, "INV RKEY"); 1042 } 1043 1044 static int srp_inv_rkey(struct srp_request *req, struct srp_rdma_ch *ch, 1045 u32 rkey) 1046 { 1047 struct ib_send_wr *bad_wr; 1048 struct ib_send_wr wr = { 1049 .opcode = IB_WR_LOCAL_INV, 1050 .next = NULL, 1051 .num_sge = 0, 1052 .send_flags = 0, 1053 .ex.invalidate_rkey = rkey, 1054 }; 1055 1056 wr.wr_cqe = &req->reg_cqe; 1057 req->reg_cqe.done = srp_inv_rkey_err_done; 1058 return ib_post_send(ch->qp, &wr, &bad_wr); 1059 } 1060 1061 static void srp_unmap_data(struct scsi_cmnd *scmnd, 1062 struct srp_rdma_ch *ch, 1063 struct srp_request *req) 1064 { 1065 struct srp_target_port *target = ch->target; 1066 struct srp_device *dev = target->srp_host->srp_dev; 1067 struct ib_device *ibdev = dev->dev; 1068 int i, res; 1069 1070 if (!scsi_sglist(scmnd) || 1071 (scmnd->sc_data_direction != DMA_TO_DEVICE && 1072 scmnd->sc_data_direction != DMA_FROM_DEVICE)) 1073 return; 1074 1075 if (dev->use_fast_reg) { 1076 struct srp_fr_desc **pfr; 1077 1078 for (i = req->nmdesc, pfr = req->fr_list; i > 0; i--, pfr++) { 1079 res = srp_inv_rkey(req, ch, (*pfr)->mr->rkey); 1080 if (res < 0) { 1081 shost_printk(KERN_ERR, target->scsi_host, PFX 1082 "Queueing INV WR for rkey %#x failed (%d)\n", 1083 (*pfr)->mr->rkey, res); 1084 queue_work(system_long_wq, 1085 &target->tl_err_work); 1086 } 1087 } 1088 if (req->nmdesc) 1089 srp_fr_pool_put(ch->fr_pool, req->fr_list, 1090 req->nmdesc); 1091 } else if (dev->use_fmr) { 1092 struct ib_pool_fmr **pfmr; 1093 1094 for (i = req->nmdesc, pfmr = req->fmr_list; i > 0; i--, pfmr++) 1095 ib_fmr_pool_unmap(*pfmr); 1096 } 1097 1098 ib_dma_unmap_sg(ibdev, scsi_sglist(scmnd), scsi_sg_count(scmnd), 1099 scmnd->sc_data_direction); 1100 } 1101 1102 /** 1103 * srp_claim_req - Take ownership of the scmnd associated with a request. 1104 * @ch: SRP RDMA channel. 1105 * @req: SRP request. 1106 * @sdev: If not NULL, only take ownership for this SCSI device. 1107 * @scmnd: If NULL, take ownership of @req->scmnd. If not NULL, only take 1108 * ownership of @req->scmnd if it equals @scmnd. 1109 * 1110 * Return value: 1111 * Either NULL or a pointer to the SCSI command the caller became owner of. 1112 */ 1113 static struct scsi_cmnd *srp_claim_req(struct srp_rdma_ch *ch, 1114 struct srp_request *req, 1115 struct scsi_device *sdev, 1116 struct scsi_cmnd *scmnd) 1117 { 1118 unsigned long flags; 1119 1120 spin_lock_irqsave(&ch->lock, flags); 1121 if (req->scmnd && 1122 (!sdev || req->scmnd->device == sdev) && 1123 (!scmnd || req->scmnd == scmnd)) { 1124 scmnd = req->scmnd; 1125 req->scmnd = NULL; 1126 } else { 1127 scmnd = NULL; 1128 } 1129 spin_unlock_irqrestore(&ch->lock, flags); 1130 1131 return scmnd; 1132 } 1133 1134 /** 1135 * srp_free_req() - Unmap data and adjust ch->req_lim. 1136 * @ch: SRP RDMA channel. 1137 * @req: Request to be freed. 1138 * @scmnd: SCSI command associated with @req. 1139 * @req_lim_delta: Amount to be added to @target->req_lim. 1140 */ 1141 static void srp_free_req(struct srp_rdma_ch *ch, struct srp_request *req, 1142 struct scsi_cmnd *scmnd, s32 req_lim_delta) 1143 { 1144 unsigned long flags; 1145 1146 srp_unmap_data(scmnd, ch, req); 1147 1148 spin_lock_irqsave(&ch->lock, flags); 1149 ch->req_lim += req_lim_delta; 1150 spin_unlock_irqrestore(&ch->lock, flags); 1151 } 1152 1153 static void srp_finish_req(struct srp_rdma_ch *ch, struct srp_request *req, 1154 struct scsi_device *sdev, int result) 1155 { 1156 struct scsi_cmnd *scmnd = srp_claim_req(ch, req, sdev, NULL); 1157 1158 if (scmnd) { 1159 srp_free_req(ch, req, scmnd, 0); 1160 scmnd->result = result; 1161 scmnd->scsi_done(scmnd); 1162 } 1163 } 1164 1165 static void srp_terminate_io(struct srp_rport *rport) 1166 { 1167 struct srp_target_port *target = rport->lld_data; 1168 struct srp_rdma_ch *ch; 1169 struct Scsi_Host *shost = target->scsi_host; 1170 struct scsi_device *sdev; 1171 int i, j; 1172 1173 /* 1174 * Invoking srp_terminate_io() while srp_queuecommand() is running 1175 * is not safe. Hence the warning statement below. 1176 */ 1177 shost_for_each_device(sdev, shost) 1178 WARN_ON_ONCE(sdev->request_queue->request_fn_active); 1179 1180 for (i = 0; i < target->ch_count; i++) { 1181 ch = &target->ch[i]; 1182 1183 for (j = 0; j < target->req_ring_size; ++j) { 1184 struct srp_request *req = &ch->req_ring[j]; 1185 1186 srp_finish_req(ch, req, NULL, 1187 DID_TRANSPORT_FAILFAST << 16); 1188 } 1189 } 1190 } 1191 1192 /* 1193 * It is up to the caller to ensure that srp_rport_reconnect() calls are 1194 * serialized and that no concurrent srp_queuecommand(), srp_abort(), 1195 * srp_reset_device() or srp_reset_host() calls will occur while this function 1196 * is in progress. One way to realize that is not to call this function 1197 * directly but to call srp_reconnect_rport() instead since that last function 1198 * serializes calls of this function via rport->mutex and also blocks 1199 * srp_queuecommand() calls before invoking this function. 1200 */ 1201 static int srp_rport_reconnect(struct srp_rport *rport) 1202 { 1203 struct srp_target_port *target = rport->lld_data; 1204 struct srp_rdma_ch *ch; 1205 int i, j, ret = 0; 1206 bool multich = false; 1207 1208 srp_disconnect_target(target); 1209 1210 if (target->state == SRP_TARGET_SCANNING) 1211 return -ENODEV; 1212 1213 /* 1214 * Now get a new local CM ID so that we avoid confusing the target in 1215 * case things are really fouled up. Doing so also ensures that all CM 1216 * callbacks will have finished before a new QP is allocated. 1217 */ 1218 for (i = 0; i < target->ch_count; i++) { 1219 ch = &target->ch[i]; 1220 ret += srp_new_cm_id(ch); 1221 } 1222 for (i = 0; i < target->ch_count; i++) { 1223 ch = &target->ch[i]; 1224 for (j = 0; j < target->req_ring_size; ++j) { 1225 struct srp_request *req = &ch->req_ring[j]; 1226 1227 srp_finish_req(ch, req, NULL, DID_RESET << 16); 1228 } 1229 } 1230 for (i = 0; i < target->ch_count; i++) { 1231 ch = &target->ch[i]; 1232 /* 1233 * Whether or not creating a new CM ID succeeded, create a new 1234 * QP. This guarantees that all completion callback function 1235 * invocations have finished before request resetting starts. 1236 */ 1237 ret += srp_create_ch_ib(ch); 1238 1239 INIT_LIST_HEAD(&ch->free_tx); 1240 for (j = 0; j < target->queue_size; ++j) 1241 list_add(&ch->tx_ring[j]->list, &ch->free_tx); 1242 } 1243 1244 target->qp_in_error = false; 1245 1246 for (i = 0; i < target->ch_count; i++) { 1247 ch = &target->ch[i]; 1248 if (ret) 1249 break; 1250 ret = srp_connect_ch(ch, multich); 1251 multich = true; 1252 } 1253 1254 if (ret == 0) 1255 shost_printk(KERN_INFO, target->scsi_host, 1256 PFX "reconnect succeeded\n"); 1257 1258 return ret; 1259 } 1260 1261 static void srp_map_desc(struct srp_map_state *state, dma_addr_t dma_addr, 1262 unsigned int dma_len, u32 rkey) 1263 { 1264 struct srp_direct_buf *desc = state->desc; 1265 1266 WARN_ON_ONCE(!dma_len); 1267 1268 desc->va = cpu_to_be64(dma_addr); 1269 desc->key = cpu_to_be32(rkey); 1270 desc->len = cpu_to_be32(dma_len); 1271 1272 state->total_len += dma_len; 1273 state->desc++; 1274 state->ndesc++; 1275 } 1276 1277 static int srp_map_finish_fmr(struct srp_map_state *state, 1278 struct srp_rdma_ch *ch) 1279 { 1280 struct srp_target_port *target = ch->target; 1281 struct srp_device *dev = target->srp_host->srp_dev; 1282 struct ib_pd *pd = target->pd; 1283 struct ib_pool_fmr *fmr; 1284 u64 io_addr = 0; 1285 1286 if (state->fmr.next >= state->fmr.end) { 1287 shost_printk(KERN_ERR, ch->target->scsi_host, 1288 PFX "Out of MRs (mr_per_cmd = %d)\n", 1289 ch->target->mr_per_cmd); 1290 return -ENOMEM; 1291 } 1292 1293 WARN_ON_ONCE(!dev->use_fmr); 1294 1295 if (state->npages == 0) 1296 return 0; 1297 1298 if (state->npages == 1 && (pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY)) { 1299 srp_map_desc(state, state->base_dma_addr, state->dma_len, 1300 pd->unsafe_global_rkey); 1301 goto reset_state; 1302 } 1303 1304 fmr = ib_fmr_pool_map_phys(ch->fmr_pool, state->pages, 1305 state->npages, io_addr); 1306 if (IS_ERR(fmr)) 1307 return PTR_ERR(fmr); 1308 1309 *state->fmr.next++ = fmr; 1310 state->nmdesc++; 1311 1312 srp_map_desc(state, state->base_dma_addr & ~dev->mr_page_mask, 1313 state->dma_len, fmr->fmr->rkey); 1314 1315 reset_state: 1316 state->npages = 0; 1317 state->dma_len = 0; 1318 1319 return 0; 1320 } 1321 1322 static void srp_reg_mr_err_done(struct ib_cq *cq, struct ib_wc *wc) 1323 { 1324 srp_handle_qp_err(cq, wc, "FAST REG"); 1325 } 1326 1327 /* 1328 * Map up to sg_nents elements of state->sg where *sg_offset_p is the offset 1329 * where to start in the first element. If sg_offset_p != NULL then 1330 * *sg_offset_p is updated to the offset in state->sg[retval] of the first 1331 * byte that has not yet been mapped. 1332 */ 1333 static int srp_map_finish_fr(struct srp_map_state *state, 1334 struct srp_request *req, 1335 struct srp_rdma_ch *ch, int sg_nents, 1336 unsigned int *sg_offset_p) 1337 { 1338 struct srp_target_port *target = ch->target; 1339 struct srp_device *dev = target->srp_host->srp_dev; 1340 struct ib_pd *pd = target->pd; 1341 struct ib_send_wr *bad_wr; 1342 struct ib_reg_wr wr; 1343 struct srp_fr_desc *desc; 1344 u32 rkey; 1345 int n, err; 1346 1347 if (state->fr.next >= state->fr.end) { 1348 shost_printk(KERN_ERR, ch->target->scsi_host, 1349 PFX "Out of MRs (mr_per_cmd = %d)\n", 1350 ch->target->mr_per_cmd); 1351 return -ENOMEM; 1352 } 1353 1354 WARN_ON_ONCE(!dev->use_fast_reg); 1355 1356 if (sg_nents == 1 && (pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY)) { 1357 unsigned int sg_offset = sg_offset_p ? *sg_offset_p : 0; 1358 1359 srp_map_desc(state, sg_dma_address(state->sg) + sg_offset, 1360 sg_dma_len(state->sg) - sg_offset, 1361 pd->unsafe_global_rkey); 1362 if (sg_offset_p) 1363 *sg_offset_p = 0; 1364 return 1; 1365 } 1366 1367 desc = srp_fr_pool_get(ch->fr_pool); 1368 if (!desc) 1369 return -ENOMEM; 1370 1371 rkey = ib_inc_rkey(desc->mr->rkey); 1372 ib_update_fast_reg_key(desc->mr, rkey); 1373 1374 n = ib_map_mr_sg(desc->mr, state->sg, sg_nents, sg_offset_p, 1375 dev->mr_page_size); 1376 if (unlikely(n < 0)) { 1377 srp_fr_pool_put(ch->fr_pool, &desc, 1); 1378 pr_debug("%s: ib_map_mr_sg(%d, %d) returned %d.\n", 1379 dev_name(&req->scmnd->device->sdev_gendev), sg_nents, 1380 sg_offset_p ? *sg_offset_p : -1, n); 1381 return n; 1382 } 1383 1384 WARN_ON_ONCE(desc->mr->length == 0); 1385 1386 req->reg_cqe.done = srp_reg_mr_err_done; 1387 1388 wr.wr.next = NULL; 1389 wr.wr.opcode = IB_WR_REG_MR; 1390 wr.wr.wr_cqe = &req->reg_cqe; 1391 wr.wr.num_sge = 0; 1392 wr.wr.send_flags = 0; 1393 wr.mr = desc->mr; 1394 wr.key = desc->mr->rkey; 1395 wr.access = (IB_ACCESS_LOCAL_WRITE | 1396 IB_ACCESS_REMOTE_READ | 1397 IB_ACCESS_REMOTE_WRITE); 1398 1399 *state->fr.next++ = desc; 1400 state->nmdesc++; 1401 1402 srp_map_desc(state, desc->mr->iova, 1403 desc->mr->length, desc->mr->rkey); 1404 1405 err = ib_post_send(ch->qp, &wr.wr, &bad_wr); 1406 if (unlikely(err)) { 1407 WARN_ON_ONCE(err == -ENOMEM); 1408 return err; 1409 } 1410 1411 return n; 1412 } 1413 1414 static int srp_map_sg_entry(struct srp_map_state *state, 1415 struct srp_rdma_ch *ch, 1416 struct scatterlist *sg) 1417 { 1418 struct srp_target_port *target = ch->target; 1419 struct srp_device *dev = target->srp_host->srp_dev; 1420 struct ib_device *ibdev = dev->dev; 1421 dma_addr_t dma_addr = ib_sg_dma_address(ibdev, sg); 1422 unsigned int dma_len = ib_sg_dma_len(ibdev, sg); 1423 unsigned int len = 0; 1424 int ret; 1425 1426 WARN_ON_ONCE(!dma_len); 1427 1428 while (dma_len) { 1429 unsigned offset = dma_addr & ~dev->mr_page_mask; 1430 1431 if (state->npages == dev->max_pages_per_mr || 1432 (state->npages > 0 && offset != 0)) { 1433 ret = srp_map_finish_fmr(state, ch); 1434 if (ret) 1435 return ret; 1436 } 1437 1438 len = min_t(unsigned int, dma_len, dev->mr_page_size - offset); 1439 1440 if (!state->npages) 1441 state->base_dma_addr = dma_addr; 1442 state->pages[state->npages++] = dma_addr & dev->mr_page_mask; 1443 state->dma_len += len; 1444 dma_addr += len; 1445 dma_len -= len; 1446 } 1447 1448 /* 1449 * If the end of the MR is not on a page boundary then we need to 1450 * close it out and start a new one -- we can only merge at page 1451 * boundaries. 1452 */ 1453 ret = 0; 1454 if ((dma_addr & ~dev->mr_page_mask) != 0) 1455 ret = srp_map_finish_fmr(state, ch); 1456 return ret; 1457 } 1458 1459 static int srp_map_sg_fmr(struct srp_map_state *state, struct srp_rdma_ch *ch, 1460 struct srp_request *req, struct scatterlist *scat, 1461 int count) 1462 { 1463 struct scatterlist *sg; 1464 int i, ret; 1465 1466 state->pages = req->map_page; 1467 state->fmr.next = req->fmr_list; 1468 state->fmr.end = req->fmr_list + ch->target->mr_per_cmd; 1469 1470 for_each_sg(scat, sg, count, i) { 1471 ret = srp_map_sg_entry(state, ch, sg); 1472 if (ret) 1473 return ret; 1474 } 1475 1476 ret = srp_map_finish_fmr(state, ch); 1477 if (ret) 1478 return ret; 1479 1480 return 0; 1481 } 1482 1483 static int srp_map_sg_fr(struct srp_map_state *state, struct srp_rdma_ch *ch, 1484 struct srp_request *req, struct scatterlist *scat, 1485 int count) 1486 { 1487 unsigned int sg_offset = 0; 1488 1489 state->fr.next = req->fr_list; 1490 state->fr.end = req->fr_list + ch->target->mr_per_cmd; 1491 state->sg = scat; 1492 1493 if (count == 0) 1494 return 0; 1495 1496 while (count) { 1497 int i, n; 1498 1499 n = srp_map_finish_fr(state, req, ch, count, &sg_offset); 1500 if (unlikely(n < 0)) 1501 return n; 1502 1503 count -= n; 1504 for (i = 0; i < n; i++) 1505 state->sg = sg_next(state->sg); 1506 } 1507 1508 return 0; 1509 } 1510 1511 static int srp_map_sg_dma(struct srp_map_state *state, struct srp_rdma_ch *ch, 1512 struct srp_request *req, struct scatterlist *scat, 1513 int count) 1514 { 1515 struct srp_target_port *target = ch->target; 1516 struct srp_device *dev = target->srp_host->srp_dev; 1517 struct scatterlist *sg; 1518 int i; 1519 1520 for_each_sg(scat, sg, count, i) { 1521 srp_map_desc(state, ib_sg_dma_address(dev->dev, sg), 1522 ib_sg_dma_len(dev->dev, sg), 1523 target->pd->unsafe_global_rkey); 1524 } 1525 1526 return 0; 1527 } 1528 1529 /* 1530 * Register the indirect data buffer descriptor with the HCA. 1531 * 1532 * Note: since the indirect data buffer descriptor has been allocated with 1533 * kmalloc() it is guaranteed that this buffer is a physically contiguous 1534 * memory buffer. 1535 */ 1536 static int srp_map_idb(struct srp_rdma_ch *ch, struct srp_request *req, 1537 void **next_mr, void **end_mr, u32 idb_len, 1538 __be32 *idb_rkey) 1539 { 1540 struct srp_target_port *target = ch->target; 1541 struct srp_device *dev = target->srp_host->srp_dev; 1542 struct srp_map_state state; 1543 struct srp_direct_buf idb_desc; 1544 u64 idb_pages[1]; 1545 struct scatterlist idb_sg[1]; 1546 int ret; 1547 1548 memset(&state, 0, sizeof(state)); 1549 memset(&idb_desc, 0, sizeof(idb_desc)); 1550 state.gen.next = next_mr; 1551 state.gen.end = end_mr; 1552 state.desc = &idb_desc; 1553 state.base_dma_addr = req->indirect_dma_addr; 1554 state.dma_len = idb_len; 1555 1556 if (dev->use_fast_reg) { 1557 state.sg = idb_sg; 1558 sg_init_one(idb_sg, req->indirect_desc, idb_len); 1559 idb_sg->dma_address = req->indirect_dma_addr; /* hack! */ 1560 #ifdef CONFIG_NEED_SG_DMA_LENGTH 1561 idb_sg->dma_length = idb_sg->length; /* hack^2 */ 1562 #endif 1563 ret = srp_map_finish_fr(&state, req, ch, 1, NULL); 1564 if (ret < 0) 1565 return ret; 1566 WARN_ON_ONCE(ret < 1); 1567 } else if (dev->use_fmr) { 1568 state.pages = idb_pages; 1569 state.pages[0] = (req->indirect_dma_addr & 1570 dev->mr_page_mask); 1571 state.npages = 1; 1572 ret = srp_map_finish_fmr(&state, ch); 1573 if (ret < 0) 1574 return ret; 1575 } else { 1576 return -EINVAL; 1577 } 1578 1579 *idb_rkey = idb_desc.key; 1580 1581 return 0; 1582 } 1583 1584 static void srp_check_mapping(struct srp_map_state *state, 1585 struct srp_rdma_ch *ch, struct srp_request *req, 1586 struct scatterlist *scat, int count) 1587 { 1588 struct srp_device *dev = ch->target->srp_host->srp_dev; 1589 struct srp_fr_desc **pfr; 1590 u64 desc_len = 0, mr_len = 0; 1591 int i; 1592 1593 for (i = 0; i < state->ndesc; i++) 1594 desc_len += be32_to_cpu(req->indirect_desc[i].len); 1595 if (dev->use_fast_reg) 1596 for (i = 0, pfr = req->fr_list; i < state->nmdesc; i++, pfr++) 1597 mr_len += (*pfr)->mr->length; 1598 else if (dev->use_fmr) 1599 for (i = 0; i < state->nmdesc; i++) 1600 mr_len += be32_to_cpu(req->indirect_desc[i].len); 1601 if (desc_len != scsi_bufflen(req->scmnd) || 1602 mr_len > scsi_bufflen(req->scmnd)) 1603 pr_err("Inconsistent: scsi len %d <> desc len %lld <> mr len %lld; ndesc %d; nmdesc = %d\n", 1604 scsi_bufflen(req->scmnd), desc_len, mr_len, 1605 state->ndesc, state->nmdesc); 1606 } 1607 1608 /** 1609 * srp_map_data() - map SCSI data buffer onto an SRP request 1610 * @scmnd: SCSI command to map 1611 * @ch: SRP RDMA channel 1612 * @req: SRP request 1613 * 1614 * Returns the length in bytes of the SRP_CMD IU or a negative value if 1615 * mapping failed. 1616 */ 1617 static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_rdma_ch *ch, 1618 struct srp_request *req) 1619 { 1620 struct srp_target_port *target = ch->target; 1621 struct ib_pd *pd = target->pd; 1622 struct scatterlist *scat; 1623 struct srp_cmd *cmd = req->cmd->buf; 1624 int len, nents, count, ret; 1625 struct srp_device *dev; 1626 struct ib_device *ibdev; 1627 struct srp_map_state state; 1628 struct srp_indirect_buf *indirect_hdr; 1629 u32 idb_len, table_len; 1630 __be32 idb_rkey; 1631 u8 fmt; 1632 1633 if (!scsi_sglist(scmnd) || scmnd->sc_data_direction == DMA_NONE) 1634 return sizeof (struct srp_cmd); 1635 1636 if (scmnd->sc_data_direction != DMA_FROM_DEVICE && 1637 scmnd->sc_data_direction != DMA_TO_DEVICE) { 1638 shost_printk(KERN_WARNING, target->scsi_host, 1639 PFX "Unhandled data direction %d\n", 1640 scmnd->sc_data_direction); 1641 return -EINVAL; 1642 } 1643 1644 nents = scsi_sg_count(scmnd); 1645 scat = scsi_sglist(scmnd); 1646 1647 dev = target->srp_host->srp_dev; 1648 ibdev = dev->dev; 1649 1650 count = ib_dma_map_sg(ibdev, scat, nents, scmnd->sc_data_direction); 1651 if (unlikely(count == 0)) 1652 return -EIO; 1653 1654 fmt = SRP_DATA_DESC_DIRECT; 1655 len = sizeof (struct srp_cmd) + sizeof (struct srp_direct_buf); 1656 1657 if (count == 1 && (pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY)) { 1658 /* 1659 * The midlayer only generated a single gather/scatter 1660 * entry, or DMA mapping coalesced everything to a 1661 * single entry. So a direct descriptor along with 1662 * the DMA MR suffices. 1663 */ 1664 struct srp_direct_buf *buf = (void *) cmd->add_data; 1665 1666 buf->va = cpu_to_be64(ib_sg_dma_address(ibdev, scat)); 1667 buf->key = cpu_to_be32(pd->unsafe_global_rkey); 1668 buf->len = cpu_to_be32(ib_sg_dma_len(ibdev, scat)); 1669 1670 req->nmdesc = 0; 1671 goto map_complete; 1672 } 1673 1674 /* 1675 * We have more than one scatter/gather entry, so build our indirect 1676 * descriptor table, trying to merge as many entries as we can. 1677 */ 1678 indirect_hdr = (void *) cmd->add_data; 1679 1680 ib_dma_sync_single_for_cpu(ibdev, req->indirect_dma_addr, 1681 target->indirect_size, DMA_TO_DEVICE); 1682 1683 memset(&state, 0, sizeof(state)); 1684 state.desc = req->indirect_desc; 1685 if (dev->use_fast_reg) 1686 ret = srp_map_sg_fr(&state, ch, req, scat, count); 1687 else if (dev->use_fmr) 1688 ret = srp_map_sg_fmr(&state, ch, req, scat, count); 1689 else 1690 ret = srp_map_sg_dma(&state, ch, req, scat, count); 1691 req->nmdesc = state.nmdesc; 1692 if (ret < 0) 1693 goto unmap; 1694 1695 { 1696 DEFINE_DYNAMIC_DEBUG_METADATA(ddm, 1697 "Memory mapping consistency check"); 1698 if (DYNAMIC_DEBUG_BRANCH(ddm)) 1699 srp_check_mapping(&state, ch, req, scat, count); 1700 } 1701 1702 /* We've mapped the request, now pull as much of the indirect 1703 * descriptor table as we can into the command buffer. If this 1704 * target is not using an external indirect table, we are 1705 * guaranteed to fit into the command, as the SCSI layer won't 1706 * give us more S/G entries than we allow. 1707 */ 1708 if (state.ndesc == 1) { 1709 /* 1710 * Memory registration collapsed the sg-list into one entry, 1711 * so use a direct descriptor. 1712 */ 1713 struct srp_direct_buf *buf = (void *) cmd->add_data; 1714 1715 *buf = req->indirect_desc[0]; 1716 goto map_complete; 1717 } 1718 1719 if (unlikely(target->cmd_sg_cnt < state.ndesc && 1720 !target->allow_ext_sg)) { 1721 shost_printk(KERN_ERR, target->scsi_host, 1722 "Could not fit S/G list into SRP_CMD\n"); 1723 ret = -EIO; 1724 goto unmap; 1725 } 1726 1727 count = min(state.ndesc, target->cmd_sg_cnt); 1728 table_len = state.ndesc * sizeof (struct srp_direct_buf); 1729 idb_len = sizeof(struct srp_indirect_buf) + table_len; 1730 1731 fmt = SRP_DATA_DESC_INDIRECT; 1732 len = sizeof(struct srp_cmd) + sizeof (struct srp_indirect_buf); 1733 len += count * sizeof (struct srp_direct_buf); 1734 1735 memcpy(indirect_hdr->desc_list, req->indirect_desc, 1736 count * sizeof (struct srp_direct_buf)); 1737 1738 if (!(pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY)) { 1739 ret = srp_map_idb(ch, req, state.gen.next, state.gen.end, 1740 idb_len, &idb_rkey); 1741 if (ret < 0) 1742 goto unmap; 1743 req->nmdesc++; 1744 } else { 1745 idb_rkey = cpu_to_be32(pd->unsafe_global_rkey); 1746 } 1747 1748 indirect_hdr->table_desc.va = cpu_to_be64(req->indirect_dma_addr); 1749 indirect_hdr->table_desc.key = idb_rkey; 1750 indirect_hdr->table_desc.len = cpu_to_be32(table_len); 1751 indirect_hdr->len = cpu_to_be32(state.total_len); 1752 1753 if (scmnd->sc_data_direction == DMA_TO_DEVICE) 1754 cmd->data_out_desc_cnt = count; 1755 else 1756 cmd->data_in_desc_cnt = count; 1757 1758 ib_dma_sync_single_for_device(ibdev, req->indirect_dma_addr, table_len, 1759 DMA_TO_DEVICE); 1760 1761 map_complete: 1762 if (scmnd->sc_data_direction == DMA_TO_DEVICE) 1763 cmd->buf_fmt = fmt << 4; 1764 else 1765 cmd->buf_fmt = fmt; 1766 1767 return len; 1768 1769 unmap: 1770 srp_unmap_data(scmnd, ch, req); 1771 if (ret == -ENOMEM && req->nmdesc >= target->mr_pool_size) 1772 ret = -E2BIG; 1773 return ret; 1774 } 1775 1776 /* 1777 * Return an IU and possible credit to the free pool 1778 */ 1779 static void srp_put_tx_iu(struct srp_rdma_ch *ch, struct srp_iu *iu, 1780 enum srp_iu_type iu_type) 1781 { 1782 unsigned long flags; 1783 1784 spin_lock_irqsave(&ch->lock, flags); 1785 list_add(&iu->list, &ch->free_tx); 1786 if (iu_type != SRP_IU_RSP) 1787 ++ch->req_lim; 1788 spin_unlock_irqrestore(&ch->lock, flags); 1789 } 1790 1791 /* 1792 * Must be called with ch->lock held to protect req_lim and free_tx. 1793 * If IU is not sent, it must be returned using srp_put_tx_iu(). 1794 * 1795 * Note: 1796 * An upper limit for the number of allocated information units for each 1797 * request type is: 1798 * - SRP_IU_CMD: SRP_CMD_SQ_SIZE, since the SCSI mid-layer never queues 1799 * more than Scsi_Host.can_queue requests. 1800 * - SRP_IU_TSK_MGMT: SRP_TSK_MGMT_SQ_SIZE. 1801 * - SRP_IU_RSP: 1, since a conforming SRP target never sends more than 1802 * one unanswered SRP request to an initiator. 1803 */ 1804 static struct srp_iu *__srp_get_tx_iu(struct srp_rdma_ch *ch, 1805 enum srp_iu_type iu_type) 1806 { 1807 struct srp_target_port *target = ch->target; 1808 s32 rsv = (iu_type == SRP_IU_TSK_MGMT) ? 0 : SRP_TSK_MGMT_SQ_SIZE; 1809 struct srp_iu *iu; 1810 1811 lockdep_assert_held(&ch->lock); 1812 1813 ib_process_cq_direct(ch->send_cq, -1); 1814 1815 if (list_empty(&ch->free_tx)) 1816 return NULL; 1817 1818 /* Initiator responses to target requests do not consume credits */ 1819 if (iu_type != SRP_IU_RSP) { 1820 if (ch->req_lim <= rsv) { 1821 ++target->zero_req_lim; 1822 return NULL; 1823 } 1824 1825 --ch->req_lim; 1826 } 1827 1828 iu = list_first_entry(&ch->free_tx, struct srp_iu, list); 1829 list_del(&iu->list); 1830 return iu; 1831 } 1832 1833 /* 1834 * Note: if this function is called from inside ib_drain_sq() then it will 1835 * be called without ch->lock being held. If ib_drain_sq() dequeues a WQE 1836 * with status IB_WC_SUCCESS then that's a bug. 1837 */ 1838 static void srp_send_done(struct ib_cq *cq, struct ib_wc *wc) 1839 { 1840 struct srp_iu *iu = container_of(wc->wr_cqe, struct srp_iu, cqe); 1841 struct srp_rdma_ch *ch = cq->cq_context; 1842 1843 if (unlikely(wc->status != IB_WC_SUCCESS)) { 1844 srp_handle_qp_err(cq, wc, "SEND"); 1845 return; 1846 } 1847 1848 lockdep_assert_held(&ch->lock); 1849 1850 list_add(&iu->list, &ch->free_tx); 1851 } 1852 1853 static int srp_post_send(struct srp_rdma_ch *ch, struct srp_iu *iu, int len) 1854 { 1855 struct srp_target_port *target = ch->target; 1856 struct ib_sge list; 1857 struct ib_send_wr wr, *bad_wr; 1858 1859 list.addr = iu->dma; 1860 list.length = len; 1861 list.lkey = target->lkey; 1862 1863 iu->cqe.done = srp_send_done; 1864 1865 wr.next = NULL; 1866 wr.wr_cqe = &iu->cqe; 1867 wr.sg_list = &list; 1868 wr.num_sge = 1; 1869 wr.opcode = IB_WR_SEND; 1870 wr.send_flags = IB_SEND_SIGNALED; 1871 1872 return ib_post_send(ch->qp, &wr, &bad_wr); 1873 } 1874 1875 static int srp_post_recv(struct srp_rdma_ch *ch, struct srp_iu *iu) 1876 { 1877 struct srp_target_port *target = ch->target; 1878 struct ib_recv_wr wr, *bad_wr; 1879 struct ib_sge list; 1880 1881 list.addr = iu->dma; 1882 list.length = iu->size; 1883 list.lkey = target->lkey; 1884 1885 iu->cqe.done = srp_recv_done; 1886 1887 wr.next = NULL; 1888 wr.wr_cqe = &iu->cqe; 1889 wr.sg_list = &list; 1890 wr.num_sge = 1; 1891 1892 return ib_post_recv(ch->qp, &wr, &bad_wr); 1893 } 1894 1895 static void srp_process_rsp(struct srp_rdma_ch *ch, struct srp_rsp *rsp) 1896 { 1897 struct srp_target_port *target = ch->target; 1898 struct srp_request *req; 1899 struct scsi_cmnd *scmnd; 1900 unsigned long flags; 1901 1902 if (unlikely(rsp->tag & SRP_TAG_TSK_MGMT)) { 1903 spin_lock_irqsave(&ch->lock, flags); 1904 ch->req_lim += be32_to_cpu(rsp->req_lim_delta); 1905 if (rsp->tag == ch->tsk_mgmt_tag) { 1906 ch->tsk_mgmt_status = -1; 1907 if (be32_to_cpu(rsp->resp_data_len) >= 4) 1908 ch->tsk_mgmt_status = rsp->data[3]; 1909 complete(&ch->tsk_mgmt_done); 1910 } else { 1911 shost_printk(KERN_ERR, target->scsi_host, 1912 "Received tsk mgmt response too late for tag %#llx\n", 1913 rsp->tag); 1914 } 1915 spin_unlock_irqrestore(&ch->lock, flags); 1916 } else { 1917 scmnd = scsi_host_find_tag(target->scsi_host, rsp->tag); 1918 if (scmnd && scmnd->host_scribble) { 1919 req = (void *)scmnd->host_scribble; 1920 scmnd = srp_claim_req(ch, req, NULL, scmnd); 1921 } else { 1922 scmnd = NULL; 1923 } 1924 if (!scmnd) { 1925 shost_printk(KERN_ERR, target->scsi_host, 1926 "Null scmnd for RSP w/tag %#016llx received on ch %td / QP %#x\n", 1927 rsp->tag, ch - target->ch, ch->qp->qp_num); 1928 1929 spin_lock_irqsave(&ch->lock, flags); 1930 ch->req_lim += be32_to_cpu(rsp->req_lim_delta); 1931 spin_unlock_irqrestore(&ch->lock, flags); 1932 1933 return; 1934 } 1935 scmnd->result = rsp->status; 1936 1937 if (rsp->flags & SRP_RSP_FLAG_SNSVALID) { 1938 memcpy(scmnd->sense_buffer, rsp->data + 1939 be32_to_cpu(rsp->resp_data_len), 1940 min_t(int, be32_to_cpu(rsp->sense_data_len), 1941 SCSI_SENSE_BUFFERSIZE)); 1942 } 1943 1944 if (unlikely(rsp->flags & SRP_RSP_FLAG_DIUNDER)) 1945 scsi_set_resid(scmnd, be32_to_cpu(rsp->data_in_res_cnt)); 1946 else if (unlikely(rsp->flags & SRP_RSP_FLAG_DIOVER)) 1947 scsi_set_resid(scmnd, -be32_to_cpu(rsp->data_in_res_cnt)); 1948 else if (unlikely(rsp->flags & SRP_RSP_FLAG_DOUNDER)) 1949 scsi_set_resid(scmnd, be32_to_cpu(rsp->data_out_res_cnt)); 1950 else if (unlikely(rsp->flags & SRP_RSP_FLAG_DOOVER)) 1951 scsi_set_resid(scmnd, -be32_to_cpu(rsp->data_out_res_cnt)); 1952 1953 srp_free_req(ch, req, scmnd, 1954 be32_to_cpu(rsp->req_lim_delta)); 1955 1956 scmnd->host_scribble = NULL; 1957 scmnd->scsi_done(scmnd); 1958 } 1959 } 1960 1961 static int srp_response_common(struct srp_rdma_ch *ch, s32 req_delta, 1962 void *rsp, int len) 1963 { 1964 struct srp_target_port *target = ch->target; 1965 struct ib_device *dev = target->srp_host->srp_dev->dev; 1966 unsigned long flags; 1967 struct srp_iu *iu; 1968 int err; 1969 1970 spin_lock_irqsave(&ch->lock, flags); 1971 ch->req_lim += req_delta; 1972 iu = __srp_get_tx_iu(ch, SRP_IU_RSP); 1973 spin_unlock_irqrestore(&ch->lock, flags); 1974 1975 if (!iu) { 1976 shost_printk(KERN_ERR, target->scsi_host, PFX 1977 "no IU available to send response\n"); 1978 return 1; 1979 } 1980 1981 ib_dma_sync_single_for_cpu(dev, iu->dma, len, DMA_TO_DEVICE); 1982 memcpy(iu->buf, rsp, len); 1983 ib_dma_sync_single_for_device(dev, iu->dma, len, DMA_TO_DEVICE); 1984 1985 err = srp_post_send(ch, iu, len); 1986 if (err) { 1987 shost_printk(KERN_ERR, target->scsi_host, PFX 1988 "unable to post response: %d\n", err); 1989 srp_put_tx_iu(ch, iu, SRP_IU_RSP); 1990 } 1991 1992 return err; 1993 } 1994 1995 static void srp_process_cred_req(struct srp_rdma_ch *ch, 1996 struct srp_cred_req *req) 1997 { 1998 struct srp_cred_rsp rsp = { 1999 .opcode = SRP_CRED_RSP, 2000 .tag = req->tag, 2001 }; 2002 s32 delta = be32_to_cpu(req->req_lim_delta); 2003 2004 if (srp_response_common(ch, delta, &rsp, sizeof(rsp))) 2005 shost_printk(KERN_ERR, ch->target->scsi_host, PFX 2006 "problems processing SRP_CRED_REQ\n"); 2007 } 2008 2009 static void srp_process_aer_req(struct srp_rdma_ch *ch, 2010 struct srp_aer_req *req) 2011 { 2012 struct srp_target_port *target = ch->target; 2013 struct srp_aer_rsp rsp = { 2014 .opcode = SRP_AER_RSP, 2015 .tag = req->tag, 2016 }; 2017 s32 delta = be32_to_cpu(req->req_lim_delta); 2018 2019 shost_printk(KERN_ERR, target->scsi_host, PFX 2020 "ignoring AER for LUN %llu\n", scsilun_to_int(&req->lun)); 2021 2022 if (srp_response_common(ch, delta, &rsp, sizeof(rsp))) 2023 shost_printk(KERN_ERR, target->scsi_host, PFX 2024 "problems processing SRP_AER_REQ\n"); 2025 } 2026 2027 static void srp_recv_done(struct ib_cq *cq, struct ib_wc *wc) 2028 { 2029 struct srp_iu *iu = container_of(wc->wr_cqe, struct srp_iu, cqe); 2030 struct srp_rdma_ch *ch = cq->cq_context; 2031 struct srp_target_port *target = ch->target; 2032 struct ib_device *dev = target->srp_host->srp_dev->dev; 2033 int res; 2034 u8 opcode; 2035 2036 if (unlikely(wc->status != IB_WC_SUCCESS)) { 2037 srp_handle_qp_err(cq, wc, "RECV"); 2038 return; 2039 } 2040 2041 ib_dma_sync_single_for_cpu(dev, iu->dma, ch->max_ti_iu_len, 2042 DMA_FROM_DEVICE); 2043 2044 opcode = *(u8 *) iu->buf; 2045 2046 if (0) { 2047 shost_printk(KERN_ERR, target->scsi_host, 2048 PFX "recv completion, opcode 0x%02x\n", opcode); 2049 print_hex_dump(KERN_ERR, "", DUMP_PREFIX_OFFSET, 8, 1, 2050 iu->buf, wc->byte_len, true); 2051 } 2052 2053 switch (opcode) { 2054 case SRP_RSP: 2055 srp_process_rsp(ch, iu->buf); 2056 break; 2057 2058 case SRP_CRED_REQ: 2059 srp_process_cred_req(ch, iu->buf); 2060 break; 2061 2062 case SRP_AER_REQ: 2063 srp_process_aer_req(ch, iu->buf); 2064 break; 2065 2066 case SRP_T_LOGOUT: 2067 /* XXX Handle target logout */ 2068 shost_printk(KERN_WARNING, target->scsi_host, 2069 PFX "Got target logout request\n"); 2070 break; 2071 2072 default: 2073 shost_printk(KERN_WARNING, target->scsi_host, 2074 PFX "Unhandled SRP opcode 0x%02x\n", opcode); 2075 break; 2076 } 2077 2078 ib_dma_sync_single_for_device(dev, iu->dma, ch->max_ti_iu_len, 2079 DMA_FROM_DEVICE); 2080 2081 res = srp_post_recv(ch, iu); 2082 if (res != 0) 2083 shost_printk(KERN_ERR, target->scsi_host, 2084 PFX "Recv failed with error code %d\n", res); 2085 } 2086 2087 /** 2088 * srp_tl_err_work() - handle a transport layer error 2089 * @work: Work structure embedded in an SRP target port. 2090 * 2091 * Note: This function may get invoked before the rport has been created, 2092 * hence the target->rport test. 2093 */ 2094 static void srp_tl_err_work(struct work_struct *work) 2095 { 2096 struct srp_target_port *target; 2097 2098 target = container_of(work, struct srp_target_port, tl_err_work); 2099 if (target->rport) 2100 srp_start_tl_fail_timers(target->rport); 2101 } 2102 2103 static void srp_handle_qp_err(struct ib_cq *cq, struct ib_wc *wc, 2104 const char *opname) 2105 { 2106 struct srp_rdma_ch *ch = cq->cq_context; 2107 struct srp_target_port *target = ch->target; 2108 2109 if (ch->connected && !target->qp_in_error) { 2110 shost_printk(KERN_ERR, target->scsi_host, 2111 PFX "failed %s status %s (%d) for CQE %p\n", 2112 opname, ib_wc_status_msg(wc->status), wc->status, 2113 wc->wr_cqe); 2114 queue_work(system_long_wq, &target->tl_err_work); 2115 } 2116 target->qp_in_error = true; 2117 } 2118 2119 static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd) 2120 { 2121 struct srp_target_port *target = host_to_target(shost); 2122 struct srp_rport *rport = target->rport; 2123 struct srp_rdma_ch *ch; 2124 struct srp_request *req; 2125 struct srp_iu *iu; 2126 struct srp_cmd *cmd; 2127 struct ib_device *dev; 2128 unsigned long flags; 2129 u32 tag; 2130 u16 idx; 2131 int len, ret; 2132 const bool in_scsi_eh = !in_interrupt() && current == shost->ehandler; 2133 2134 /* 2135 * The SCSI EH thread is the only context from which srp_queuecommand() 2136 * can get invoked for blocked devices (SDEV_BLOCK / 2137 * SDEV_CREATED_BLOCK). Avoid racing with srp_reconnect_rport() by 2138 * locking the rport mutex if invoked from inside the SCSI EH. 2139 */ 2140 if (in_scsi_eh) 2141 mutex_lock(&rport->mutex); 2142 2143 scmnd->result = srp_chkready(target->rport); 2144 if (unlikely(scmnd->result)) 2145 goto err; 2146 2147 WARN_ON_ONCE(scmnd->request->tag < 0); 2148 tag = blk_mq_unique_tag(scmnd->request); 2149 ch = &target->ch[blk_mq_unique_tag_to_hwq(tag)]; 2150 idx = blk_mq_unique_tag_to_tag(tag); 2151 WARN_ONCE(idx >= target->req_ring_size, "%s: tag %#x: idx %d >= %d\n", 2152 dev_name(&shost->shost_gendev), tag, idx, 2153 target->req_ring_size); 2154 2155 spin_lock_irqsave(&ch->lock, flags); 2156 iu = __srp_get_tx_iu(ch, SRP_IU_CMD); 2157 spin_unlock_irqrestore(&ch->lock, flags); 2158 2159 if (!iu) 2160 goto err; 2161 2162 req = &ch->req_ring[idx]; 2163 dev = target->srp_host->srp_dev->dev; 2164 ib_dma_sync_single_for_cpu(dev, iu->dma, target->max_iu_len, 2165 DMA_TO_DEVICE); 2166 2167 scmnd->host_scribble = (void *) req; 2168 2169 cmd = iu->buf; 2170 memset(cmd, 0, sizeof *cmd); 2171 2172 cmd->opcode = SRP_CMD; 2173 int_to_scsilun(scmnd->device->lun, &cmd->lun); 2174 cmd->tag = tag; 2175 memcpy(cmd->cdb, scmnd->cmnd, scmnd->cmd_len); 2176 2177 req->scmnd = scmnd; 2178 req->cmd = iu; 2179 2180 len = srp_map_data(scmnd, ch, req); 2181 if (len < 0) { 2182 shost_printk(KERN_ERR, target->scsi_host, 2183 PFX "Failed to map data (%d)\n", len); 2184 /* 2185 * If we ran out of memory descriptors (-ENOMEM) because an 2186 * application is queuing many requests with more than 2187 * max_pages_per_mr sg-list elements, tell the SCSI mid-layer 2188 * to reduce queue depth temporarily. 2189 */ 2190 scmnd->result = len == -ENOMEM ? 2191 DID_OK << 16 | QUEUE_FULL << 1 : DID_ERROR << 16; 2192 goto err_iu; 2193 } 2194 2195 ib_dma_sync_single_for_device(dev, iu->dma, target->max_iu_len, 2196 DMA_TO_DEVICE); 2197 2198 if (srp_post_send(ch, iu, len)) { 2199 shost_printk(KERN_ERR, target->scsi_host, PFX "Send failed\n"); 2200 goto err_unmap; 2201 } 2202 2203 ret = 0; 2204 2205 unlock_rport: 2206 if (in_scsi_eh) 2207 mutex_unlock(&rport->mutex); 2208 2209 return ret; 2210 2211 err_unmap: 2212 srp_unmap_data(scmnd, ch, req); 2213 2214 err_iu: 2215 srp_put_tx_iu(ch, iu, SRP_IU_CMD); 2216 2217 /* 2218 * Avoid that the loops that iterate over the request ring can 2219 * encounter a dangling SCSI command pointer. 2220 */ 2221 req->scmnd = NULL; 2222 2223 err: 2224 if (scmnd->result) { 2225 scmnd->scsi_done(scmnd); 2226 ret = 0; 2227 } else { 2228 ret = SCSI_MLQUEUE_HOST_BUSY; 2229 } 2230 2231 goto unlock_rport; 2232 } 2233 2234 /* 2235 * Note: the resources allocated in this function are freed in 2236 * srp_free_ch_ib(). 2237 */ 2238 static int srp_alloc_iu_bufs(struct srp_rdma_ch *ch) 2239 { 2240 struct srp_target_port *target = ch->target; 2241 int i; 2242 2243 ch->rx_ring = kcalloc(target->queue_size, sizeof(*ch->rx_ring), 2244 GFP_KERNEL); 2245 if (!ch->rx_ring) 2246 goto err_no_ring; 2247 ch->tx_ring = kcalloc(target->queue_size, sizeof(*ch->tx_ring), 2248 GFP_KERNEL); 2249 if (!ch->tx_ring) 2250 goto err_no_ring; 2251 2252 for (i = 0; i < target->queue_size; ++i) { 2253 ch->rx_ring[i] = srp_alloc_iu(target->srp_host, 2254 ch->max_ti_iu_len, 2255 GFP_KERNEL, DMA_FROM_DEVICE); 2256 if (!ch->rx_ring[i]) 2257 goto err; 2258 } 2259 2260 for (i = 0; i < target->queue_size; ++i) { 2261 ch->tx_ring[i] = srp_alloc_iu(target->srp_host, 2262 target->max_iu_len, 2263 GFP_KERNEL, DMA_TO_DEVICE); 2264 if (!ch->tx_ring[i]) 2265 goto err; 2266 2267 list_add(&ch->tx_ring[i]->list, &ch->free_tx); 2268 } 2269 2270 return 0; 2271 2272 err: 2273 for (i = 0; i < target->queue_size; ++i) { 2274 srp_free_iu(target->srp_host, ch->rx_ring[i]); 2275 srp_free_iu(target->srp_host, ch->tx_ring[i]); 2276 } 2277 2278 2279 err_no_ring: 2280 kfree(ch->tx_ring); 2281 ch->tx_ring = NULL; 2282 kfree(ch->rx_ring); 2283 ch->rx_ring = NULL; 2284 2285 return -ENOMEM; 2286 } 2287 2288 static uint32_t srp_compute_rq_tmo(struct ib_qp_attr *qp_attr, int attr_mask) 2289 { 2290 uint64_t T_tr_ns, max_compl_time_ms; 2291 uint32_t rq_tmo_jiffies; 2292 2293 /* 2294 * According to section 11.2.4.2 in the IBTA spec (Modify Queue Pair, 2295 * table 91), both the QP timeout and the retry count have to be set 2296 * for RC QP's during the RTR to RTS transition. 2297 */ 2298 WARN_ON_ONCE((attr_mask & (IB_QP_TIMEOUT | IB_QP_RETRY_CNT)) != 2299 (IB_QP_TIMEOUT | IB_QP_RETRY_CNT)); 2300 2301 /* 2302 * Set target->rq_tmo_jiffies to one second more than the largest time 2303 * it can take before an error completion is generated. See also 2304 * C9-140..142 in the IBTA spec for more information about how to 2305 * convert the QP Local ACK Timeout value to nanoseconds. 2306 */ 2307 T_tr_ns = 4096 * (1ULL << qp_attr->timeout); 2308 max_compl_time_ms = qp_attr->retry_cnt * 4 * T_tr_ns; 2309 do_div(max_compl_time_ms, NSEC_PER_MSEC); 2310 rq_tmo_jiffies = msecs_to_jiffies(max_compl_time_ms + 1000); 2311 2312 return rq_tmo_jiffies; 2313 } 2314 2315 static void srp_cm_rep_handler(struct ib_cm_id *cm_id, 2316 const struct srp_login_rsp *lrsp, 2317 struct srp_rdma_ch *ch) 2318 { 2319 struct srp_target_port *target = ch->target; 2320 struct ib_qp_attr *qp_attr = NULL; 2321 int attr_mask = 0; 2322 int ret; 2323 int i; 2324 2325 if (lrsp->opcode == SRP_LOGIN_RSP) { 2326 ch->max_ti_iu_len = be32_to_cpu(lrsp->max_ti_iu_len); 2327 ch->req_lim = be32_to_cpu(lrsp->req_lim_delta); 2328 2329 /* 2330 * Reserve credits for task management so we don't 2331 * bounce requests back to the SCSI mid-layer. 2332 */ 2333 target->scsi_host->can_queue 2334 = min(ch->req_lim - SRP_TSK_MGMT_SQ_SIZE, 2335 target->scsi_host->can_queue); 2336 target->scsi_host->cmd_per_lun 2337 = min_t(int, target->scsi_host->can_queue, 2338 target->scsi_host->cmd_per_lun); 2339 } else { 2340 shost_printk(KERN_WARNING, target->scsi_host, 2341 PFX "Unhandled RSP opcode %#x\n", lrsp->opcode); 2342 ret = -ECONNRESET; 2343 goto error; 2344 } 2345 2346 if (!ch->rx_ring) { 2347 ret = srp_alloc_iu_bufs(ch); 2348 if (ret) 2349 goto error; 2350 } 2351 2352 ret = -ENOMEM; 2353 qp_attr = kmalloc(sizeof *qp_attr, GFP_KERNEL); 2354 if (!qp_attr) 2355 goto error; 2356 2357 qp_attr->qp_state = IB_QPS_RTR; 2358 ret = ib_cm_init_qp_attr(cm_id, qp_attr, &attr_mask); 2359 if (ret) 2360 goto error_free; 2361 2362 ret = ib_modify_qp(ch->qp, qp_attr, attr_mask); 2363 if (ret) 2364 goto error_free; 2365 2366 for (i = 0; i < target->queue_size; i++) { 2367 struct srp_iu *iu = ch->rx_ring[i]; 2368 2369 ret = srp_post_recv(ch, iu); 2370 if (ret) 2371 goto error_free; 2372 } 2373 2374 qp_attr->qp_state = IB_QPS_RTS; 2375 ret = ib_cm_init_qp_attr(cm_id, qp_attr, &attr_mask); 2376 if (ret) 2377 goto error_free; 2378 2379 target->rq_tmo_jiffies = srp_compute_rq_tmo(qp_attr, attr_mask); 2380 2381 ret = ib_modify_qp(ch->qp, qp_attr, attr_mask); 2382 if (ret) 2383 goto error_free; 2384 2385 ret = ib_send_cm_rtu(cm_id, NULL, 0); 2386 2387 error_free: 2388 kfree(qp_attr); 2389 2390 error: 2391 ch->status = ret; 2392 } 2393 2394 static void srp_cm_rej_handler(struct ib_cm_id *cm_id, 2395 struct ib_cm_event *event, 2396 struct srp_rdma_ch *ch) 2397 { 2398 struct srp_target_port *target = ch->target; 2399 struct Scsi_Host *shost = target->scsi_host; 2400 struct ib_class_port_info *cpi; 2401 int opcode; 2402 2403 switch (event->param.rej_rcvd.reason) { 2404 case IB_CM_REJ_PORT_CM_REDIRECT: 2405 cpi = event->param.rej_rcvd.ari; 2406 sa_path_set_dlid(&ch->path, htonl(ntohs(cpi->redirect_lid))); 2407 ch->path.pkey = cpi->redirect_pkey; 2408 cm_id->remote_cm_qpn = be32_to_cpu(cpi->redirect_qp) & 0x00ffffff; 2409 memcpy(ch->path.dgid.raw, cpi->redirect_gid, 16); 2410 2411 ch->status = sa_path_get_dlid(&ch->path) ? 2412 SRP_DLID_REDIRECT : SRP_PORT_REDIRECT; 2413 break; 2414 2415 case IB_CM_REJ_PORT_REDIRECT: 2416 if (srp_target_is_topspin(target)) { 2417 /* 2418 * Topspin/Cisco SRP gateways incorrectly send 2419 * reject reason code 25 when they mean 24 2420 * (port redirect). 2421 */ 2422 memcpy(ch->path.dgid.raw, 2423 event->param.rej_rcvd.ari, 16); 2424 2425 shost_printk(KERN_DEBUG, shost, 2426 PFX "Topspin/Cisco redirect to target port GID %016llx%016llx\n", 2427 be64_to_cpu(ch->path.dgid.global.subnet_prefix), 2428 be64_to_cpu(ch->path.dgid.global.interface_id)); 2429 2430 ch->status = SRP_PORT_REDIRECT; 2431 } else { 2432 shost_printk(KERN_WARNING, shost, 2433 " REJ reason: IB_CM_REJ_PORT_REDIRECT\n"); 2434 ch->status = -ECONNRESET; 2435 } 2436 break; 2437 2438 case IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID: 2439 shost_printk(KERN_WARNING, shost, 2440 " REJ reason: IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID\n"); 2441 ch->status = -ECONNRESET; 2442 break; 2443 2444 case IB_CM_REJ_CONSUMER_DEFINED: 2445 opcode = *(u8 *) event->private_data; 2446 if (opcode == SRP_LOGIN_REJ) { 2447 struct srp_login_rej *rej = event->private_data; 2448 u32 reason = be32_to_cpu(rej->reason); 2449 2450 if (reason == SRP_LOGIN_REJ_REQ_IT_IU_LENGTH_TOO_LARGE) 2451 shost_printk(KERN_WARNING, shost, 2452 PFX "SRP_LOGIN_REJ: requested max_it_iu_len too large\n"); 2453 else 2454 shost_printk(KERN_WARNING, shost, PFX 2455 "SRP LOGIN from %pI6 to %pI6 REJECTED, reason 0x%08x\n", 2456 target->sgid.raw, 2457 target->orig_dgid.raw, reason); 2458 } else 2459 shost_printk(KERN_WARNING, shost, 2460 " REJ reason: IB_CM_REJ_CONSUMER_DEFINED," 2461 " opcode 0x%02x\n", opcode); 2462 ch->status = -ECONNRESET; 2463 break; 2464 2465 case IB_CM_REJ_STALE_CONN: 2466 shost_printk(KERN_WARNING, shost, " REJ reason: stale connection\n"); 2467 ch->status = SRP_STALE_CONN; 2468 break; 2469 2470 default: 2471 shost_printk(KERN_WARNING, shost, " REJ reason 0x%x\n", 2472 event->param.rej_rcvd.reason); 2473 ch->status = -ECONNRESET; 2474 } 2475 } 2476 2477 static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event) 2478 { 2479 struct srp_rdma_ch *ch = cm_id->context; 2480 struct srp_target_port *target = ch->target; 2481 int comp = 0; 2482 2483 switch (event->event) { 2484 case IB_CM_REQ_ERROR: 2485 shost_printk(KERN_DEBUG, target->scsi_host, 2486 PFX "Sending CM REQ failed\n"); 2487 comp = 1; 2488 ch->status = -ECONNRESET; 2489 break; 2490 2491 case IB_CM_REP_RECEIVED: 2492 comp = 1; 2493 srp_cm_rep_handler(cm_id, event->private_data, ch); 2494 break; 2495 2496 case IB_CM_REJ_RECEIVED: 2497 shost_printk(KERN_DEBUG, target->scsi_host, PFX "REJ received\n"); 2498 comp = 1; 2499 2500 srp_cm_rej_handler(cm_id, event, ch); 2501 break; 2502 2503 case IB_CM_DREQ_RECEIVED: 2504 shost_printk(KERN_WARNING, target->scsi_host, 2505 PFX "DREQ received - connection closed\n"); 2506 ch->connected = false; 2507 if (ib_send_cm_drep(cm_id, NULL, 0)) 2508 shost_printk(KERN_ERR, target->scsi_host, 2509 PFX "Sending CM DREP failed\n"); 2510 queue_work(system_long_wq, &target->tl_err_work); 2511 break; 2512 2513 case IB_CM_TIMEWAIT_EXIT: 2514 shost_printk(KERN_ERR, target->scsi_host, 2515 PFX "connection closed\n"); 2516 comp = 1; 2517 2518 ch->status = 0; 2519 break; 2520 2521 case IB_CM_MRA_RECEIVED: 2522 case IB_CM_DREQ_ERROR: 2523 case IB_CM_DREP_RECEIVED: 2524 break; 2525 2526 default: 2527 shost_printk(KERN_WARNING, target->scsi_host, 2528 PFX "Unhandled CM event %d\n", event->event); 2529 break; 2530 } 2531 2532 if (comp) 2533 complete(&ch->done); 2534 2535 return 0; 2536 } 2537 2538 /** 2539 * srp_change_queue_depth - setting device queue depth 2540 * @sdev: scsi device struct 2541 * @qdepth: requested queue depth 2542 * 2543 * Returns queue depth. 2544 */ 2545 static int 2546 srp_change_queue_depth(struct scsi_device *sdev, int qdepth) 2547 { 2548 if (!sdev->tagged_supported) 2549 qdepth = 1; 2550 return scsi_change_queue_depth(sdev, qdepth); 2551 } 2552 2553 static int srp_send_tsk_mgmt(struct srp_rdma_ch *ch, u64 req_tag, u64 lun, 2554 u8 func, u8 *status) 2555 { 2556 struct srp_target_port *target = ch->target; 2557 struct srp_rport *rport = target->rport; 2558 struct ib_device *dev = target->srp_host->srp_dev->dev; 2559 struct srp_iu *iu; 2560 struct srp_tsk_mgmt *tsk_mgmt; 2561 int res; 2562 2563 if (!ch->connected || target->qp_in_error) 2564 return -1; 2565 2566 /* 2567 * Lock the rport mutex to avoid that srp_create_ch_ib() is 2568 * invoked while a task management function is being sent. 2569 */ 2570 mutex_lock(&rport->mutex); 2571 spin_lock_irq(&ch->lock); 2572 iu = __srp_get_tx_iu(ch, SRP_IU_TSK_MGMT); 2573 spin_unlock_irq(&ch->lock); 2574 2575 if (!iu) { 2576 mutex_unlock(&rport->mutex); 2577 2578 return -1; 2579 } 2580 2581 ib_dma_sync_single_for_cpu(dev, iu->dma, sizeof *tsk_mgmt, 2582 DMA_TO_DEVICE); 2583 tsk_mgmt = iu->buf; 2584 memset(tsk_mgmt, 0, sizeof *tsk_mgmt); 2585 2586 tsk_mgmt->opcode = SRP_TSK_MGMT; 2587 int_to_scsilun(lun, &tsk_mgmt->lun); 2588 tsk_mgmt->tsk_mgmt_func = func; 2589 tsk_mgmt->task_tag = req_tag; 2590 2591 spin_lock_irq(&ch->lock); 2592 ch->tsk_mgmt_tag = (ch->tsk_mgmt_tag + 1) | SRP_TAG_TSK_MGMT; 2593 tsk_mgmt->tag = ch->tsk_mgmt_tag; 2594 spin_unlock_irq(&ch->lock); 2595 2596 init_completion(&ch->tsk_mgmt_done); 2597 2598 ib_dma_sync_single_for_device(dev, iu->dma, sizeof *tsk_mgmt, 2599 DMA_TO_DEVICE); 2600 if (srp_post_send(ch, iu, sizeof(*tsk_mgmt))) { 2601 srp_put_tx_iu(ch, iu, SRP_IU_TSK_MGMT); 2602 mutex_unlock(&rport->mutex); 2603 2604 return -1; 2605 } 2606 res = wait_for_completion_timeout(&ch->tsk_mgmt_done, 2607 msecs_to_jiffies(SRP_ABORT_TIMEOUT_MS)); 2608 if (res > 0 && status) 2609 *status = ch->tsk_mgmt_status; 2610 mutex_unlock(&rport->mutex); 2611 2612 WARN_ON_ONCE(res < 0); 2613 2614 return res > 0 ? 0 : -1; 2615 } 2616 2617 static int srp_abort(struct scsi_cmnd *scmnd) 2618 { 2619 struct srp_target_port *target = host_to_target(scmnd->device->host); 2620 struct srp_request *req = (struct srp_request *) scmnd->host_scribble; 2621 u32 tag; 2622 u16 ch_idx; 2623 struct srp_rdma_ch *ch; 2624 int ret; 2625 2626 shost_printk(KERN_ERR, target->scsi_host, "SRP abort called\n"); 2627 2628 if (!req) 2629 return SUCCESS; 2630 tag = blk_mq_unique_tag(scmnd->request); 2631 ch_idx = blk_mq_unique_tag_to_hwq(tag); 2632 if (WARN_ON_ONCE(ch_idx >= target->ch_count)) 2633 return SUCCESS; 2634 ch = &target->ch[ch_idx]; 2635 if (!srp_claim_req(ch, req, NULL, scmnd)) 2636 return SUCCESS; 2637 shost_printk(KERN_ERR, target->scsi_host, 2638 "Sending SRP abort for tag %#x\n", tag); 2639 if (srp_send_tsk_mgmt(ch, tag, scmnd->device->lun, 2640 SRP_TSK_ABORT_TASK, NULL) == 0) 2641 ret = SUCCESS; 2642 else if (target->rport->state == SRP_RPORT_LOST) 2643 ret = FAST_IO_FAIL; 2644 else 2645 ret = FAILED; 2646 srp_free_req(ch, req, scmnd, 0); 2647 scmnd->result = DID_ABORT << 16; 2648 scmnd->scsi_done(scmnd); 2649 2650 return ret; 2651 } 2652 2653 static int srp_reset_device(struct scsi_cmnd *scmnd) 2654 { 2655 struct srp_target_port *target = host_to_target(scmnd->device->host); 2656 struct srp_rdma_ch *ch; 2657 int i; 2658 u8 status; 2659 2660 shost_printk(KERN_ERR, target->scsi_host, "SRP reset_device called\n"); 2661 2662 ch = &target->ch[0]; 2663 if (srp_send_tsk_mgmt(ch, SRP_TAG_NO_REQ, scmnd->device->lun, 2664 SRP_TSK_LUN_RESET, &status)) 2665 return FAILED; 2666 if (status) 2667 return FAILED; 2668 2669 for (i = 0; i < target->ch_count; i++) { 2670 ch = &target->ch[i]; 2671 for (i = 0; i < target->req_ring_size; ++i) { 2672 struct srp_request *req = &ch->req_ring[i]; 2673 2674 srp_finish_req(ch, req, scmnd->device, DID_RESET << 16); 2675 } 2676 } 2677 2678 return SUCCESS; 2679 } 2680 2681 static int srp_reset_host(struct scsi_cmnd *scmnd) 2682 { 2683 struct srp_target_port *target = host_to_target(scmnd->device->host); 2684 2685 shost_printk(KERN_ERR, target->scsi_host, PFX "SRP reset_host called\n"); 2686 2687 return srp_reconnect_rport(target->rport) == 0 ? SUCCESS : FAILED; 2688 } 2689 2690 static int srp_slave_alloc(struct scsi_device *sdev) 2691 { 2692 struct Scsi_Host *shost = sdev->host; 2693 struct srp_target_port *target = host_to_target(shost); 2694 struct srp_device *srp_dev = target->srp_host->srp_dev; 2695 2696 if (true) 2697 blk_queue_virt_boundary(sdev->request_queue, 2698 ~srp_dev->mr_page_mask); 2699 2700 return 0; 2701 } 2702 2703 static int srp_slave_configure(struct scsi_device *sdev) 2704 { 2705 struct Scsi_Host *shost = sdev->host; 2706 struct srp_target_port *target = host_to_target(shost); 2707 struct request_queue *q = sdev->request_queue; 2708 unsigned long timeout; 2709 2710 if (sdev->type == TYPE_DISK) { 2711 timeout = max_t(unsigned, 30 * HZ, target->rq_tmo_jiffies); 2712 blk_queue_rq_timeout(q, timeout); 2713 } 2714 2715 return 0; 2716 } 2717 2718 static ssize_t show_id_ext(struct device *dev, struct device_attribute *attr, 2719 char *buf) 2720 { 2721 struct srp_target_port *target = host_to_target(class_to_shost(dev)); 2722 2723 return sprintf(buf, "0x%016llx\n", be64_to_cpu(target->id_ext)); 2724 } 2725 2726 static ssize_t show_ioc_guid(struct device *dev, struct device_attribute *attr, 2727 char *buf) 2728 { 2729 struct srp_target_port *target = host_to_target(class_to_shost(dev)); 2730 2731 return sprintf(buf, "0x%016llx\n", be64_to_cpu(target->ioc_guid)); 2732 } 2733 2734 static ssize_t show_service_id(struct device *dev, 2735 struct device_attribute *attr, char *buf) 2736 { 2737 struct srp_target_port *target = host_to_target(class_to_shost(dev)); 2738 2739 return sprintf(buf, "0x%016llx\n", be64_to_cpu(target->service_id)); 2740 } 2741 2742 static ssize_t show_pkey(struct device *dev, struct device_attribute *attr, 2743 char *buf) 2744 { 2745 struct srp_target_port *target = host_to_target(class_to_shost(dev)); 2746 2747 return sprintf(buf, "0x%04x\n", be16_to_cpu(target->pkey)); 2748 } 2749 2750 static ssize_t show_sgid(struct device *dev, struct device_attribute *attr, 2751 char *buf) 2752 { 2753 struct srp_target_port *target = host_to_target(class_to_shost(dev)); 2754 2755 return sprintf(buf, "%pI6\n", target->sgid.raw); 2756 } 2757 2758 static ssize_t show_dgid(struct device *dev, struct device_attribute *attr, 2759 char *buf) 2760 { 2761 struct srp_target_port *target = host_to_target(class_to_shost(dev)); 2762 struct srp_rdma_ch *ch = &target->ch[0]; 2763 2764 return sprintf(buf, "%pI6\n", ch->path.dgid.raw); 2765 } 2766 2767 static ssize_t show_orig_dgid(struct device *dev, 2768 struct device_attribute *attr, char *buf) 2769 { 2770 struct srp_target_port *target = host_to_target(class_to_shost(dev)); 2771 2772 return sprintf(buf, "%pI6\n", target->orig_dgid.raw); 2773 } 2774 2775 static ssize_t show_req_lim(struct device *dev, 2776 struct device_attribute *attr, char *buf) 2777 { 2778 struct srp_target_port *target = host_to_target(class_to_shost(dev)); 2779 struct srp_rdma_ch *ch; 2780 int i, req_lim = INT_MAX; 2781 2782 for (i = 0; i < target->ch_count; i++) { 2783 ch = &target->ch[i]; 2784 req_lim = min(req_lim, ch->req_lim); 2785 } 2786 return sprintf(buf, "%d\n", req_lim); 2787 } 2788 2789 static ssize_t show_zero_req_lim(struct device *dev, 2790 struct device_attribute *attr, char *buf) 2791 { 2792 struct srp_target_port *target = host_to_target(class_to_shost(dev)); 2793 2794 return sprintf(buf, "%d\n", target->zero_req_lim); 2795 } 2796 2797 static ssize_t show_local_ib_port(struct device *dev, 2798 struct device_attribute *attr, char *buf) 2799 { 2800 struct srp_target_port *target = host_to_target(class_to_shost(dev)); 2801 2802 return sprintf(buf, "%d\n", target->srp_host->port); 2803 } 2804 2805 static ssize_t show_local_ib_device(struct device *dev, 2806 struct device_attribute *attr, char *buf) 2807 { 2808 struct srp_target_port *target = host_to_target(class_to_shost(dev)); 2809 2810 return sprintf(buf, "%s\n", target->srp_host->srp_dev->dev->name); 2811 } 2812 2813 static ssize_t show_ch_count(struct device *dev, struct device_attribute *attr, 2814 char *buf) 2815 { 2816 struct srp_target_port *target = host_to_target(class_to_shost(dev)); 2817 2818 return sprintf(buf, "%d\n", target->ch_count); 2819 } 2820 2821 static ssize_t show_comp_vector(struct device *dev, 2822 struct device_attribute *attr, char *buf) 2823 { 2824 struct srp_target_port *target = host_to_target(class_to_shost(dev)); 2825 2826 return sprintf(buf, "%d\n", target->comp_vector); 2827 } 2828 2829 static ssize_t show_tl_retry_count(struct device *dev, 2830 struct device_attribute *attr, char *buf) 2831 { 2832 struct srp_target_port *target = host_to_target(class_to_shost(dev)); 2833 2834 return sprintf(buf, "%d\n", target->tl_retry_count); 2835 } 2836 2837 static ssize_t show_cmd_sg_entries(struct device *dev, 2838 struct device_attribute *attr, char *buf) 2839 { 2840 struct srp_target_port *target = host_to_target(class_to_shost(dev)); 2841 2842 return sprintf(buf, "%u\n", target->cmd_sg_cnt); 2843 } 2844 2845 static ssize_t show_allow_ext_sg(struct device *dev, 2846 struct device_attribute *attr, char *buf) 2847 { 2848 struct srp_target_port *target = host_to_target(class_to_shost(dev)); 2849 2850 return sprintf(buf, "%s\n", target->allow_ext_sg ? "true" : "false"); 2851 } 2852 2853 static DEVICE_ATTR(id_ext, S_IRUGO, show_id_ext, NULL); 2854 static DEVICE_ATTR(ioc_guid, S_IRUGO, show_ioc_guid, NULL); 2855 static DEVICE_ATTR(service_id, S_IRUGO, show_service_id, NULL); 2856 static DEVICE_ATTR(pkey, S_IRUGO, show_pkey, NULL); 2857 static DEVICE_ATTR(sgid, S_IRUGO, show_sgid, NULL); 2858 static DEVICE_ATTR(dgid, S_IRUGO, show_dgid, NULL); 2859 static DEVICE_ATTR(orig_dgid, S_IRUGO, show_orig_dgid, NULL); 2860 static DEVICE_ATTR(req_lim, S_IRUGO, show_req_lim, NULL); 2861 static DEVICE_ATTR(zero_req_lim, S_IRUGO, show_zero_req_lim, NULL); 2862 static DEVICE_ATTR(local_ib_port, S_IRUGO, show_local_ib_port, NULL); 2863 static DEVICE_ATTR(local_ib_device, S_IRUGO, show_local_ib_device, NULL); 2864 static DEVICE_ATTR(ch_count, S_IRUGO, show_ch_count, NULL); 2865 static DEVICE_ATTR(comp_vector, S_IRUGO, show_comp_vector, NULL); 2866 static DEVICE_ATTR(tl_retry_count, S_IRUGO, show_tl_retry_count, NULL); 2867 static DEVICE_ATTR(cmd_sg_entries, S_IRUGO, show_cmd_sg_entries, NULL); 2868 static DEVICE_ATTR(allow_ext_sg, S_IRUGO, show_allow_ext_sg, NULL); 2869 2870 static struct device_attribute *srp_host_attrs[] = { 2871 &dev_attr_id_ext, 2872 &dev_attr_ioc_guid, 2873 &dev_attr_service_id, 2874 &dev_attr_pkey, 2875 &dev_attr_sgid, 2876 &dev_attr_dgid, 2877 &dev_attr_orig_dgid, 2878 &dev_attr_req_lim, 2879 &dev_attr_zero_req_lim, 2880 &dev_attr_local_ib_port, 2881 &dev_attr_local_ib_device, 2882 &dev_attr_ch_count, 2883 &dev_attr_comp_vector, 2884 &dev_attr_tl_retry_count, 2885 &dev_attr_cmd_sg_entries, 2886 &dev_attr_allow_ext_sg, 2887 NULL 2888 }; 2889 2890 static struct scsi_host_template srp_template = { 2891 .module = THIS_MODULE, 2892 .name = "InfiniBand SRP initiator", 2893 .proc_name = DRV_NAME, 2894 .slave_alloc = srp_slave_alloc, 2895 .slave_configure = srp_slave_configure, 2896 .info = srp_target_info, 2897 .queuecommand = srp_queuecommand, 2898 .change_queue_depth = srp_change_queue_depth, 2899 .eh_timed_out = srp_timed_out, 2900 .eh_abort_handler = srp_abort, 2901 .eh_device_reset_handler = srp_reset_device, 2902 .eh_host_reset_handler = srp_reset_host, 2903 .skip_settle_delay = true, 2904 .sg_tablesize = SRP_DEF_SG_TABLESIZE, 2905 .can_queue = SRP_DEFAULT_CMD_SQ_SIZE, 2906 .this_id = -1, 2907 .cmd_per_lun = SRP_DEFAULT_CMD_SQ_SIZE, 2908 .use_clustering = ENABLE_CLUSTERING, 2909 .shost_attrs = srp_host_attrs, 2910 .track_queue_depth = 1, 2911 }; 2912 2913 static int srp_sdev_count(struct Scsi_Host *host) 2914 { 2915 struct scsi_device *sdev; 2916 int c = 0; 2917 2918 shost_for_each_device(sdev, host) 2919 c++; 2920 2921 return c; 2922 } 2923 2924 /* 2925 * Return values: 2926 * < 0 upon failure. Caller is responsible for SRP target port cleanup. 2927 * 0 and target->state == SRP_TARGET_REMOVED if asynchronous target port 2928 * removal has been scheduled. 2929 * 0 and target->state != SRP_TARGET_REMOVED upon success. 2930 */ 2931 static int srp_add_target(struct srp_host *host, struct srp_target_port *target) 2932 { 2933 struct srp_rport_identifiers ids; 2934 struct srp_rport *rport; 2935 2936 target->state = SRP_TARGET_SCANNING; 2937 sprintf(target->target_name, "SRP.T10:%016llX", 2938 be64_to_cpu(target->id_ext)); 2939 2940 if (scsi_add_host(target->scsi_host, host->srp_dev->dev->dev.parent)) 2941 return -ENODEV; 2942 2943 memcpy(ids.port_id, &target->id_ext, 8); 2944 memcpy(ids.port_id + 8, &target->ioc_guid, 8); 2945 ids.roles = SRP_RPORT_ROLE_TARGET; 2946 rport = srp_rport_add(target->scsi_host, &ids); 2947 if (IS_ERR(rport)) { 2948 scsi_remove_host(target->scsi_host); 2949 return PTR_ERR(rport); 2950 } 2951 2952 rport->lld_data = target; 2953 target->rport = rport; 2954 2955 spin_lock(&host->target_lock); 2956 list_add_tail(&target->list, &host->target_list); 2957 spin_unlock(&host->target_lock); 2958 2959 scsi_scan_target(&target->scsi_host->shost_gendev, 2960 0, target->scsi_id, SCAN_WILD_CARD, SCSI_SCAN_INITIAL); 2961 2962 if (srp_connected_ch(target) < target->ch_count || 2963 target->qp_in_error) { 2964 shost_printk(KERN_INFO, target->scsi_host, 2965 PFX "SCSI scan failed - removing SCSI host\n"); 2966 srp_queue_remove_work(target); 2967 goto out; 2968 } 2969 2970 pr_debug("%s: SCSI scan succeeded - detected %d LUNs\n", 2971 dev_name(&target->scsi_host->shost_gendev), 2972 srp_sdev_count(target->scsi_host)); 2973 2974 spin_lock_irq(&target->lock); 2975 if (target->state == SRP_TARGET_SCANNING) 2976 target->state = SRP_TARGET_LIVE; 2977 spin_unlock_irq(&target->lock); 2978 2979 out: 2980 return 0; 2981 } 2982 2983 static void srp_release_dev(struct device *dev) 2984 { 2985 struct srp_host *host = 2986 container_of(dev, struct srp_host, dev); 2987 2988 complete(&host->released); 2989 } 2990 2991 static struct class srp_class = { 2992 .name = "infiniband_srp", 2993 .dev_release = srp_release_dev 2994 }; 2995 2996 /** 2997 * srp_conn_unique() - check whether the connection to a target is unique 2998 * @host: SRP host. 2999 * @target: SRP target port. 3000 */ 3001 static bool srp_conn_unique(struct srp_host *host, 3002 struct srp_target_port *target) 3003 { 3004 struct srp_target_port *t; 3005 bool ret = false; 3006 3007 if (target->state == SRP_TARGET_REMOVED) 3008 goto out; 3009 3010 ret = true; 3011 3012 spin_lock(&host->target_lock); 3013 list_for_each_entry(t, &host->target_list, list) { 3014 if (t != target && 3015 target->id_ext == t->id_ext && 3016 target->ioc_guid == t->ioc_guid && 3017 target->initiator_ext == t->initiator_ext) { 3018 ret = false; 3019 break; 3020 } 3021 } 3022 spin_unlock(&host->target_lock); 3023 3024 out: 3025 return ret; 3026 } 3027 3028 /* 3029 * Target ports are added by writing 3030 * 3031 * id_ext=<SRP ID ext>,ioc_guid=<SRP IOC GUID>,dgid=<dest GID>, 3032 * pkey=<P_Key>,service_id=<service ID> 3033 * 3034 * to the add_target sysfs attribute. 3035 */ 3036 enum { 3037 SRP_OPT_ERR = 0, 3038 SRP_OPT_ID_EXT = 1 << 0, 3039 SRP_OPT_IOC_GUID = 1 << 1, 3040 SRP_OPT_DGID = 1 << 2, 3041 SRP_OPT_PKEY = 1 << 3, 3042 SRP_OPT_SERVICE_ID = 1 << 4, 3043 SRP_OPT_MAX_SECT = 1 << 5, 3044 SRP_OPT_MAX_CMD_PER_LUN = 1 << 6, 3045 SRP_OPT_IO_CLASS = 1 << 7, 3046 SRP_OPT_INITIATOR_EXT = 1 << 8, 3047 SRP_OPT_CMD_SG_ENTRIES = 1 << 9, 3048 SRP_OPT_ALLOW_EXT_SG = 1 << 10, 3049 SRP_OPT_SG_TABLESIZE = 1 << 11, 3050 SRP_OPT_COMP_VECTOR = 1 << 12, 3051 SRP_OPT_TL_RETRY_COUNT = 1 << 13, 3052 SRP_OPT_QUEUE_SIZE = 1 << 14, 3053 SRP_OPT_ALL = (SRP_OPT_ID_EXT | 3054 SRP_OPT_IOC_GUID | 3055 SRP_OPT_DGID | 3056 SRP_OPT_PKEY | 3057 SRP_OPT_SERVICE_ID), 3058 }; 3059 3060 static const match_table_t srp_opt_tokens = { 3061 { SRP_OPT_ID_EXT, "id_ext=%s" }, 3062 { SRP_OPT_IOC_GUID, "ioc_guid=%s" }, 3063 { SRP_OPT_DGID, "dgid=%s" }, 3064 { SRP_OPT_PKEY, "pkey=%x" }, 3065 { SRP_OPT_SERVICE_ID, "service_id=%s" }, 3066 { SRP_OPT_MAX_SECT, "max_sect=%d" }, 3067 { SRP_OPT_MAX_CMD_PER_LUN, "max_cmd_per_lun=%d" }, 3068 { SRP_OPT_IO_CLASS, "io_class=%x" }, 3069 { SRP_OPT_INITIATOR_EXT, "initiator_ext=%s" }, 3070 { SRP_OPT_CMD_SG_ENTRIES, "cmd_sg_entries=%u" }, 3071 { SRP_OPT_ALLOW_EXT_SG, "allow_ext_sg=%u" }, 3072 { SRP_OPT_SG_TABLESIZE, "sg_tablesize=%u" }, 3073 { SRP_OPT_COMP_VECTOR, "comp_vector=%u" }, 3074 { SRP_OPT_TL_RETRY_COUNT, "tl_retry_count=%u" }, 3075 { SRP_OPT_QUEUE_SIZE, "queue_size=%d" }, 3076 { SRP_OPT_ERR, NULL } 3077 }; 3078 3079 static int srp_parse_options(const char *buf, struct srp_target_port *target) 3080 { 3081 char *options, *sep_opt; 3082 char *p; 3083 char dgid[3]; 3084 substring_t args[MAX_OPT_ARGS]; 3085 int opt_mask = 0; 3086 int token; 3087 int ret = -EINVAL; 3088 int i; 3089 3090 options = kstrdup(buf, GFP_KERNEL); 3091 if (!options) 3092 return -ENOMEM; 3093 3094 sep_opt = options; 3095 while ((p = strsep(&sep_opt, ",\n")) != NULL) { 3096 if (!*p) 3097 continue; 3098 3099 token = match_token(p, srp_opt_tokens, args); 3100 opt_mask |= token; 3101 3102 switch (token) { 3103 case SRP_OPT_ID_EXT: 3104 p = match_strdup(args); 3105 if (!p) { 3106 ret = -ENOMEM; 3107 goto out; 3108 } 3109 target->id_ext = cpu_to_be64(simple_strtoull(p, NULL, 16)); 3110 kfree(p); 3111 break; 3112 3113 case SRP_OPT_IOC_GUID: 3114 p = match_strdup(args); 3115 if (!p) { 3116 ret = -ENOMEM; 3117 goto out; 3118 } 3119 target->ioc_guid = cpu_to_be64(simple_strtoull(p, NULL, 16)); 3120 kfree(p); 3121 break; 3122 3123 case SRP_OPT_DGID: 3124 p = match_strdup(args); 3125 if (!p) { 3126 ret = -ENOMEM; 3127 goto out; 3128 } 3129 if (strlen(p) != 32) { 3130 pr_warn("bad dest GID parameter '%s'\n", p); 3131 kfree(p); 3132 goto out; 3133 } 3134 3135 for (i = 0; i < 16; ++i) { 3136 strlcpy(dgid, p + i * 2, sizeof(dgid)); 3137 if (sscanf(dgid, "%hhx", 3138 &target->orig_dgid.raw[i]) < 1) { 3139 ret = -EINVAL; 3140 kfree(p); 3141 goto out; 3142 } 3143 } 3144 kfree(p); 3145 break; 3146 3147 case SRP_OPT_PKEY: 3148 if (match_hex(args, &token)) { 3149 pr_warn("bad P_Key parameter '%s'\n", p); 3150 goto out; 3151 } 3152 target->pkey = cpu_to_be16(token); 3153 break; 3154 3155 case SRP_OPT_SERVICE_ID: 3156 p = match_strdup(args); 3157 if (!p) { 3158 ret = -ENOMEM; 3159 goto out; 3160 } 3161 target->service_id = cpu_to_be64(simple_strtoull(p, NULL, 16)); 3162 kfree(p); 3163 break; 3164 3165 case SRP_OPT_MAX_SECT: 3166 if (match_int(args, &token)) { 3167 pr_warn("bad max sect parameter '%s'\n", p); 3168 goto out; 3169 } 3170 target->scsi_host->max_sectors = token; 3171 break; 3172 3173 case SRP_OPT_QUEUE_SIZE: 3174 if (match_int(args, &token) || token < 1) { 3175 pr_warn("bad queue_size parameter '%s'\n", p); 3176 goto out; 3177 } 3178 target->scsi_host->can_queue = token; 3179 target->queue_size = token + SRP_RSP_SQ_SIZE + 3180 SRP_TSK_MGMT_SQ_SIZE; 3181 if (!(opt_mask & SRP_OPT_MAX_CMD_PER_LUN)) 3182 target->scsi_host->cmd_per_lun = token; 3183 break; 3184 3185 case SRP_OPT_MAX_CMD_PER_LUN: 3186 if (match_int(args, &token) || token < 1) { 3187 pr_warn("bad max cmd_per_lun parameter '%s'\n", 3188 p); 3189 goto out; 3190 } 3191 target->scsi_host->cmd_per_lun = token; 3192 break; 3193 3194 case SRP_OPT_IO_CLASS: 3195 if (match_hex(args, &token)) { 3196 pr_warn("bad IO class parameter '%s'\n", p); 3197 goto out; 3198 } 3199 if (token != SRP_REV10_IB_IO_CLASS && 3200 token != SRP_REV16A_IB_IO_CLASS) { 3201 pr_warn("unknown IO class parameter value %x specified (use %x or %x).\n", 3202 token, SRP_REV10_IB_IO_CLASS, 3203 SRP_REV16A_IB_IO_CLASS); 3204 goto out; 3205 } 3206 target->io_class = token; 3207 break; 3208 3209 case SRP_OPT_INITIATOR_EXT: 3210 p = match_strdup(args); 3211 if (!p) { 3212 ret = -ENOMEM; 3213 goto out; 3214 } 3215 target->initiator_ext = cpu_to_be64(simple_strtoull(p, NULL, 16)); 3216 kfree(p); 3217 break; 3218 3219 case SRP_OPT_CMD_SG_ENTRIES: 3220 if (match_int(args, &token) || token < 1 || token > 255) { 3221 pr_warn("bad max cmd_sg_entries parameter '%s'\n", 3222 p); 3223 goto out; 3224 } 3225 target->cmd_sg_cnt = token; 3226 break; 3227 3228 case SRP_OPT_ALLOW_EXT_SG: 3229 if (match_int(args, &token)) { 3230 pr_warn("bad allow_ext_sg parameter '%s'\n", p); 3231 goto out; 3232 } 3233 target->allow_ext_sg = !!token; 3234 break; 3235 3236 case SRP_OPT_SG_TABLESIZE: 3237 if (match_int(args, &token) || token < 1 || 3238 token > SG_MAX_SEGMENTS) { 3239 pr_warn("bad max sg_tablesize parameter '%s'\n", 3240 p); 3241 goto out; 3242 } 3243 target->sg_tablesize = token; 3244 break; 3245 3246 case SRP_OPT_COMP_VECTOR: 3247 if (match_int(args, &token) || token < 0) { 3248 pr_warn("bad comp_vector parameter '%s'\n", p); 3249 goto out; 3250 } 3251 target->comp_vector = token; 3252 break; 3253 3254 case SRP_OPT_TL_RETRY_COUNT: 3255 if (match_int(args, &token) || token < 2 || token > 7) { 3256 pr_warn("bad tl_retry_count parameter '%s' (must be a number between 2 and 7)\n", 3257 p); 3258 goto out; 3259 } 3260 target->tl_retry_count = token; 3261 break; 3262 3263 default: 3264 pr_warn("unknown parameter or missing value '%s' in target creation request\n", 3265 p); 3266 goto out; 3267 } 3268 } 3269 3270 if ((opt_mask & SRP_OPT_ALL) == SRP_OPT_ALL) 3271 ret = 0; 3272 else 3273 for (i = 0; i < ARRAY_SIZE(srp_opt_tokens); ++i) 3274 if ((srp_opt_tokens[i].token & SRP_OPT_ALL) && 3275 !(srp_opt_tokens[i].token & opt_mask)) 3276 pr_warn("target creation request is missing parameter '%s'\n", 3277 srp_opt_tokens[i].pattern); 3278 3279 if (target->scsi_host->cmd_per_lun > target->scsi_host->can_queue 3280 && (opt_mask & SRP_OPT_MAX_CMD_PER_LUN)) 3281 pr_warn("cmd_per_lun = %d > queue_size = %d\n", 3282 target->scsi_host->cmd_per_lun, 3283 target->scsi_host->can_queue); 3284 3285 out: 3286 kfree(options); 3287 return ret; 3288 } 3289 3290 static ssize_t srp_create_target(struct device *dev, 3291 struct device_attribute *attr, 3292 const char *buf, size_t count) 3293 { 3294 struct srp_host *host = 3295 container_of(dev, struct srp_host, dev); 3296 struct Scsi_Host *target_host; 3297 struct srp_target_port *target; 3298 struct srp_rdma_ch *ch; 3299 struct srp_device *srp_dev = host->srp_dev; 3300 struct ib_device *ibdev = srp_dev->dev; 3301 int ret, node_idx, node, cpu, i; 3302 unsigned int max_sectors_per_mr, mr_per_cmd = 0; 3303 bool multich = false; 3304 3305 target_host = scsi_host_alloc(&srp_template, 3306 sizeof (struct srp_target_port)); 3307 if (!target_host) 3308 return -ENOMEM; 3309 3310 target_host->transportt = ib_srp_transport_template; 3311 target_host->max_channel = 0; 3312 target_host->max_id = 1; 3313 target_host->max_lun = -1LL; 3314 target_host->max_cmd_len = sizeof ((struct srp_cmd *) (void *) 0L)->cdb; 3315 3316 target = host_to_target(target_host); 3317 3318 target->io_class = SRP_REV16A_IB_IO_CLASS; 3319 target->scsi_host = target_host; 3320 target->srp_host = host; 3321 target->pd = host->srp_dev->pd; 3322 target->lkey = host->srp_dev->pd->local_dma_lkey; 3323 target->cmd_sg_cnt = cmd_sg_entries; 3324 target->sg_tablesize = indirect_sg_entries ? : cmd_sg_entries; 3325 target->allow_ext_sg = allow_ext_sg; 3326 target->tl_retry_count = 7; 3327 target->queue_size = SRP_DEFAULT_QUEUE_SIZE; 3328 3329 /* 3330 * Avoid that the SCSI host can be removed by srp_remove_target() 3331 * before this function returns. 3332 */ 3333 scsi_host_get(target->scsi_host); 3334 3335 ret = mutex_lock_interruptible(&host->add_target_mutex); 3336 if (ret < 0) 3337 goto put; 3338 3339 ret = srp_parse_options(buf, target); 3340 if (ret) 3341 goto out; 3342 3343 target->req_ring_size = target->queue_size - SRP_TSK_MGMT_SQ_SIZE; 3344 3345 if (!srp_conn_unique(target->srp_host, target)) { 3346 shost_printk(KERN_INFO, target->scsi_host, 3347 PFX "Already connected to target port with id_ext=%016llx;ioc_guid=%016llx;initiator_ext=%016llx\n", 3348 be64_to_cpu(target->id_ext), 3349 be64_to_cpu(target->ioc_guid), 3350 be64_to_cpu(target->initiator_ext)); 3351 ret = -EEXIST; 3352 goto out; 3353 } 3354 3355 if (!srp_dev->has_fmr && !srp_dev->has_fr && !target->allow_ext_sg && 3356 target->cmd_sg_cnt < target->sg_tablesize) { 3357 pr_warn("No MR pool and no external indirect descriptors, limiting sg_tablesize to cmd_sg_cnt\n"); 3358 target->sg_tablesize = target->cmd_sg_cnt; 3359 } 3360 3361 if (srp_dev->use_fast_reg || srp_dev->use_fmr) { 3362 /* 3363 * FR and FMR can only map one HCA page per entry. If the 3364 * start address is not aligned on a HCA page boundary two 3365 * entries will be used for the head and the tail although 3366 * these two entries combined contain at most one HCA page of 3367 * data. Hence the "+ 1" in the calculation below. 3368 * 3369 * The indirect data buffer descriptor is contiguous so the 3370 * memory for that buffer will only be registered if 3371 * register_always is true. Hence add one to mr_per_cmd if 3372 * register_always has been set. 3373 */ 3374 max_sectors_per_mr = srp_dev->max_pages_per_mr << 3375 (ilog2(srp_dev->mr_page_size) - 9); 3376 mr_per_cmd = register_always + 3377 (target->scsi_host->max_sectors + 1 + 3378 max_sectors_per_mr - 1) / max_sectors_per_mr; 3379 pr_debug("max_sectors = %u; max_pages_per_mr = %u; mr_page_size = %u; max_sectors_per_mr = %u; mr_per_cmd = %u\n", 3380 target->scsi_host->max_sectors, 3381 srp_dev->max_pages_per_mr, srp_dev->mr_page_size, 3382 max_sectors_per_mr, mr_per_cmd); 3383 } 3384 3385 target_host->sg_tablesize = target->sg_tablesize; 3386 target->mr_pool_size = target->scsi_host->can_queue * mr_per_cmd; 3387 target->mr_per_cmd = mr_per_cmd; 3388 target->indirect_size = target->sg_tablesize * 3389 sizeof (struct srp_direct_buf); 3390 target->max_iu_len = sizeof (struct srp_cmd) + 3391 sizeof (struct srp_indirect_buf) + 3392 target->cmd_sg_cnt * sizeof (struct srp_direct_buf); 3393 3394 INIT_WORK(&target->tl_err_work, srp_tl_err_work); 3395 INIT_WORK(&target->remove_work, srp_remove_work); 3396 spin_lock_init(&target->lock); 3397 ret = ib_query_gid(ibdev, host->port, 0, &target->sgid, NULL); 3398 if (ret) 3399 goto out; 3400 3401 ret = -ENOMEM; 3402 target->ch_count = max_t(unsigned, num_online_nodes(), 3403 min(ch_count ? : 3404 min(4 * num_online_nodes(), 3405 ibdev->num_comp_vectors), 3406 num_online_cpus())); 3407 target->ch = kcalloc(target->ch_count, sizeof(*target->ch), 3408 GFP_KERNEL); 3409 if (!target->ch) 3410 goto out; 3411 3412 node_idx = 0; 3413 for_each_online_node(node) { 3414 const int ch_start = (node_idx * target->ch_count / 3415 num_online_nodes()); 3416 const int ch_end = ((node_idx + 1) * target->ch_count / 3417 num_online_nodes()); 3418 const int cv_start = (node_idx * ibdev->num_comp_vectors / 3419 num_online_nodes() + target->comp_vector) 3420 % ibdev->num_comp_vectors; 3421 const int cv_end = ((node_idx + 1) * ibdev->num_comp_vectors / 3422 num_online_nodes() + target->comp_vector) 3423 % ibdev->num_comp_vectors; 3424 int cpu_idx = 0; 3425 3426 for_each_online_cpu(cpu) { 3427 if (cpu_to_node(cpu) != node) 3428 continue; 3429 if (ch_start + cpu_idx >= ch_end) 3430 continue; 3431 ch = &target->ch[ch_start + cpu_idx]; 3432 ch->target = target; 3433 ch->comp_vector = cv_start == cv_end ? cv_start : 3434 cv_start + cpu_idx % (cv_end - cv_start); 3435 spin_lock_init(&ch->lock); 3436 INIT_LIST_HEAD(&ch->free_tx); 3437 ret = srp_new_cm_id(ch); 3438 if (ret) 3439 goto err_disconnect; 3440 3441 ret = srp_create_ch_ib(ch); 3442 if (ret) 3443 goto err_disconnect; 3444 3445 ret = srp_alloc_req_data(ch); 3446 if (ret) 3447 goto err_disconnect; 3448 3449 ret = srp_connect_ch(ch, multich); 3450 if (ret) { 3451 shost_printk(KERN_ERR, target->scsi_host, 3452 PFX "Connection %d/%d to %pI6 failed\n", 3453 ch_start + cpu_idx, 3454 target->ch_count, 3455 ch->target->orig_dgid.raw); 3456 if (node_idx == 0 && cpu_idx == 0) { 3457 goto free_ch; 3458 } else { 3459 srp_free_ch_ib(target, ch); 3460 srp_free_req_data(target, ch); 3461 target->ch_count = ch - target->ch; 3462 goto connected; 3463 } 3464 } 3465 3466 multich = true; 3467 cpu_idx++; 3468 } 3469 node_idx++; 3470 } 3471 3472 connected: 3473 target->scsi_host->nr_hw_queues = target->ch_count; 3474 3475 ret = srp_add_target(host, target); 3476 if (ret) 3477 goto err_disconnect; 3478 3479 if (target->state != SRP_TARGET_REMOVED) { 3480 shost_printk(KERN_DEBUG, target->scsi_host, PFX 3481 "new target: id_ext %016llx ioc_guid %016llx pkey %04x service_id %016llx sgid %pI6 dgid %pI6\n", 3482 be64_to_cpu(target->id_ext), 3483 be64_to_cpu(target->ioc_guid), 3484 be16_to_cpu(target->pkey), 3485 be64_to_cpu(target->service_id), 3486 target->sgid.raw, target->orig_dgid.raw); 3487 } 3488 3489 ret = count; 3490 3491 out: 3492 mutex_unlock(&host->add_target_mutex); 3493 3494 put: 3495 scsi_host_put(target->scsi_host); 3496 if (ret < 0) 3497 scsi_host_put(target->scsi_host); 3498 3499 return ret; 3500 3501 err_disconnect: 3502 srp_disconnect_target(target); 3503 3504 free_ch: 3505 for (i = 0; i < target->ch_count; i++) { 3506 ch = &target->ch[i]; 3507 srp_free_ch_ib(target, ch); 3508 srp_free_req_data(target, ch); 3509 } 3510 3511 kfree(target->ch); 3512 goto out; 3513 } 3514 3515 static DEVICE_ATTR(add_target, S_IWUSR, NULL, srp_create_target); 3516 3517 static ssize_t show_ibdev(struct device *dev, struct device_attribute *attr, 3518 char *buf) 3519 { 3520 struct srp_host *host = container_of(dev, struct srp_host, dev); 3521 3522 return sprintf(buf, "%s\n", host->srp_dev->dev->name); 3523 } 3524 3525 static DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL); 3526 3527 static ssize_t show_port(struct device *dev, struct device_attribute *attr, 3528 char *buf) 3529 { 3530 struct srp_host *host = container_of(dev, struct srp_host, dev); 3531 3532 return sprintf(buf, "%d\n", host->port); 3533 } 3534 3535 static DEVICE_ATTR(port, S_IRUGO, show_port, NULL); 3536 3537 static struct srp_host *srp_add_port(struct srp_device *device, u8 port) 3538 { 3539 struct srp_host *host; 3540 3541 host = kzalloc(sizeof *host, GFP_KERNEL); 3542 if (!host) 3543 return NULL; 3544 3545 INIT_LIST_HEAD(&host->target_list); 3546 spin_lock_init(&host->target_lock); 3547 init_completion(&host->released); 3548 mutex_init(&host->add_target_mutex); 3549 host->srp_dev = device; 3550 host->port = port; 3551 3552 host->dev.class = &srp_class; 3553 host->dev.parent = device->dev->dev.parent; 3554 dev_set_name(&host->dev, "srp-%s-%d", device->dev->name, port); 3555 3556 if (device_register(&host->dev)) 3557 goto free_host; 3558 if (device_create_file(&host->dev, &dev_attr_add_target)) 3559 goto err_class; 3560 if (device_create_file(&host->dev, &dev_attr_ibdev)) 3561 goto err_class; 3562 if (device_create_file(&host->dev, &dev_attr_port)) 3563 goto err_class; 3564 3565 return host; 3566 3567 err_class: 3568 device_unregister(&host->dev); 3569 3570 free_host: 3571 kfree(host); 3572 3573 return NULL; 3574 } 3575 3576 static void srp_add_one(struct ib_device *device) 3577 { 3578 struct srp_device *srp_dev; 3579 struct ib_device_attr *attr = &device->attrs; 3580 struct srp_host *host; 3581 int mr_page_shift, p; 3582 u64 max_pages_per_mr; 3583 unsigned int flags = 0; 3584 3585 srp_dev = kzalloc(sizeof(*srp_dev), GFP_KERNEL); 3586 if (!srp_dev) 3587 return; 3588 3589 /* 3590 * Use the smallest page size supported by the HCA, down to a 3591 * minimum of 4096 bytes. We're unlikely to build large sglists 3592 * out of smaller entries. 3593 */ 3594 mr_page_shift = max(12, ffs(attr->page_size_cap) - 1); 3595 srp_dev->mr_page_size = 1 << mr_page_shift; 3596 srp_dev->mr_page_mask = ~((u64) srp_dev->mr_page_size - 1); 3597 max_pages_per_mr = attr->max_mr_size; 3598 do_div(max_pages_per_mr, srp_dev->mr_page_size); 3599 pr_debug("%s: %llu / %u = %llu <> %u\n", __func__, 3600 attr->max_mr_size, srp_dev->mr_page_size, 3601 max_pages_per_mr, SRP_MAX_PAGES_PER_MR); 3602 srp_dev->max_pages_per_mr = min_t(u64, SRP_MAX_PAGES_PER_MR, 3603 max_pages_per_mr); 3604 3605 srp_dev->has_fmr = (device->alloc_fmr && device->dealloc_fmr && 3606 device->map_phys_fmr && device->unmap_fmr); 3607 srp_dev->has_fr = (attr->device_cap_flags & 3608 IB_DEVICE_MEM_MGT_EXTENSIONS); 3609 if (!never_register && !srp_dev->has_fmr && !srp_dev->has_fr) { 3610 dev_warn(&device->dev, "neither FMR nor FR is supported\n"); 3611 } else if (!never_register && 3612 attr->max_mr_size >= 2 * srp_dev->mr_page_size) { 3613 srp_dev->use_fast_reg = (srp_dev->has_fr && 3614 (!srp_dev->has_fmr || prefer_fr)); 3615 srp_dev->use_fmr = !srp_dev->use_fast_reg && srp_dev->has_fmr; 3616 } 3617 3618 if (never_register || !register_always || 3619 (!srp_dev->has_fmr && !srp_dev->has_fr)) 3620 flags |= IB_PD_UNSAFE_GLOBAL_RKEY; 3621 3622 if (srp_dev->use_fast_reg) { 3623 srp_dev->max_pages_per_mr = 3624 min_t(u32, srp_dev->max_pages_per_mr, 3625 attr->max_fast_reg_page_list_len); 3626 } 3627 srp_dev->mr_max_size = srp_dev->mr_page_size * 3628 srp_dev->max_pages_per_mr; 3629 pr_debug("%s: mr_page_shift = %d, device->max_mr_size = %#llx, device->max_fast_reg_page_list_len = %u, max_pages_per_mr = %d, mr_max_size = %#x\n", 3630 device->name, mr_page_shift, attr->max_mr_size, 3631 attr->max_fast_reg_page_list_len, 3632 srp_dev->max_pages_per_mr, srp_dev->mr_max_size); 3633 3634 INIT_LIST_HEAD(&srp_dev->dev_list); 3635 3636 srp_dev->dev = device; 3637 srp_dev->pd = ib_alloc_pd(device, flags); 3638 if (IS_ERR(srp_dev->pd)) 3639 goto free_dev; 3640 3641 3642 for (p = rdma_start_port(device); p <= rdma_end_port(device); ++p) { 3643 host = srp_add_port(srp_dev, p); 3644 if (host) 3645 list_add_tail(&host->list, &srp_dev->dev_list); 3646 } 3647 3648 ib_set_client_data(device, &srp_client, srp_dev); 3649 return; 3650 3651 free_dev: 3652 kfree(srp_dev); 3653 } 3654 3655 static void srp_remove_one(struct ib_device *device, void *client_data) 3656 { 3657 struct srp_device *srp_dev; 3658 struct srp_host *host, *tmp_host; 3659 struct srp_target_port *target; 3660 3661 srp_dev = client_data; 3662 if (!srp_dev) 3663 return; 3664 3665 list_for_each_entry_safe(host, tmp_host, &srp_dev->dev_list, list) { 3666 device_unregister(&host->dev); 3667 /* 3668 * Wait for the sysfs entry to go away, so that no new 3669 * target ports can be created. 3670 */ 3671 wait_for_completion(&host->released); 3672 3673 /* 3674 * Remove all target ports. 3675 */ 3676 spin_lock(&host->target_lock); 3677 list_for_each_entry(target, &host->target_list, list) 3678 srp_queue_remove_work(target); 3679 spin_unlock(&host->target_lock); 3680 3681 /* 3682 * Wait for tl_err and target port removal tasks. 3683 */ 3684 flush_workqueue(system_long_wq); 3685 flush_workqueue(srp_remove_wq); 3686 3687 kfree(host); 3688 } 3689 3690 ib_dealloc_pd(srp_dev->pd); 3691 3692 kfree(srp_dev); 3693 } 3694 3695 static struct srp_function_template ib_srp_transport_functions = { 3696 .has_rport_state = true, 3697 .reset_timer_if_blocked = true, 3698 .reconnect_delay = &srp_reconnect_delay, 3699 .fast_io_fail_tmo = &srp_fast_io_fail_tmo, 3700 .dev_loss_tmo = &srp_dev_loss_tmo, 3701 .reconnect = srp_rport_reconnect, 3702 .rport_delete = srp_rport_delete, 3703 .terminate_rport_io = srp_terminate_io, 3704 }; 3705 3706 static int __init srp_init_module(void) 3707 { 3708 int ret; 3709 3710 if (srp_sg_tablesize) { 3711 pr_warn("srp_sg_tablesize is deprecated, please use cmd_sg_entries\n"); 3712 if (!cmd_sg_entries) 3713 cmd_sg_entries = srp_sg_tablesize; 3714 } 3715 3716 if (!cmd_sg_entries) 3717 cmd_sg_entries = SRP_DEF_SG_TABLESIZE; 3718 3719 if (cmd_sg_entries > 255) { 3720 pr_warn("Clamping cmd_sg_entries to 255\n"); 3721 cmd_sg_entries = 255; 3722 } 3723 3724 if (!indirect_sg_entries) 3725 indirect_sg_entries = cmd_sg_entries; 3726 else if (indirect_sg_entries < cmd_sg_entries) { 3727 pr_warn("Bumping up indirect_sg_entries to match cmd_sg_entries (%u)\n", 3728 cmd_sg_entries); 3729 indirect_sg_entries = cmd_sg_entries; 3730 } 3731 3732 if (indirect_sg_entries > SG_MAX_SEGMENTS) { 3733 pr_warn("Clamping indirect_sg_entries to %u\n", 3734 SG_MAX_SEGMENTS); 3735 indirect_sg_entries = SG_MAX_SEGMENTS; 3736 } 3737 3738 srp_remove_wq = create_workqueue("srp_remove"); 3739 if (!srp_remove_wq) { 3740 ret = -ENOMEM; 3741 goto out; 3742 } 3743 3744 ret = -ENOMEM; 3745 ib_srp_transport_template = 3746 srp_attach_transport(&ib_srp_transport_functions); 3747 if (!ib_srp_transport_template) 3748 goto destroy_wq; 3749 3750 ret = class_register(&srp_class); 3751 if (ret) { 3752 pr_err("couldn't register class infiniband_srp\n"); 3753 goto release_tr; 3754 } 3755 3756 ib_sa_register_client(&srp_sa_client); 3757 3758 ret = ib_register_client(&srp_client); 3759 if (ret) { 3760 pr_err("couldn't register IB client\n"); 3761 goto unreg_sa; 3762 } 3763 3764 out: 3765 return ret; 3766 3767 unreg_sa: 3768 ib_sa_unregister_client(&srp_sa_client); 3769 class_unregister(&srp_class); 3770 3771 release_tr: 3772 srp_release_transport(ib_srp_transport_template); 3773 3774 destroy_wq: 3775 destroy_workqueue(srp_remove_wq); 3776 goto out; 3777 } 3778 3779 static void __exit srp_cleanup_module(void) 3780 { 3781 ib_unregister_client(&srp_client); 3782 ib_sa_unregister_client(&srp_sa_client); 3783 class_unregister(&srp_class); 3784 srp_release_transport(ib_srp_transport_template); 3785 destroy_workqueue(srp_remove_wq); 3786 } 3787 3788 module_init(srp_init_module); 3789 module_exit(srp_cleanup_module); 3790