1 /* 2 * Copyright (c) 2005 Cisco Systems. All rights reserved. 3 * 4 * This software is available to you under a choice of one of two 5 * licenses. You may choose to be licensed under the terms of the GNU 6 * General Public License (GPL) Version 2, available from the file 7 * COPYING in the main directory of this source tree, or the 8 * OpenIB.org BSD license below: 9 * 10 * Redistribution and use in source and binary forms, with or 11 * without modification, are permitted provided that the following 12 * conditions are met: 13 * 14 * - Redistributions of source code must retain the above 15 * copyright notice, this list of conditions and the following 16 * disclaimer. 17 * 18 * - Redistributions in binary form must reproduce the above 19 * copyright notice, this list of conditions and the following 20 * disclaimer in the documentation and/or other materials 21 * provided with the distribution. 22 * 23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 * SOFTWARE. 31 */ 32 33 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 34 35 #include <linux/module.h> 36 #include <linux/init.h> 37 #include <linux/slab.h> 38 #include <linux/err.h> 39 #include <linux/string.h> 40 #include <linux/parser.h> 41 #include <linux/random.h> 42 #include <linux/jiffies.h> 43 #include <linux/lockdep.h> 44 #include <linux/inet.h> 45 #include <rdma/ib_cache.h> 46 47 #include <linux/atomic.h> 48 49 #include <scsi/scsi.h> 50 #include <scsi/scsi_device.h> 51 #include <scsi/scsi_dbg.h> 52 #include <scsi/scsi_tcq.h> 53 #include <scsi/srp.h> 54 #include <scsi/scsi_transport_srp.h> 55 56 #include "ib_srp.h" 57 58 #define DRV_NAME "ib_srp" 59 #define PFX DRV_NAME ": " 60 61 MODULE_AUTHOR("Roland Dreier"); 62 MODULE_DESCRIPTION("InfiniBand SCSI RDMA Protocol initiator"); 63 MODULE_LICENSE("Dual BSD/GPL"); 64 65 #if !defined(CONFIG_DYNAMIC_DEBUG) 66 #define DEFINE_DYNAMIC_DEBUG_METADATA(name, fmt) 67 #define DYNAMIC_DEBUG_BRANCH(descriptor) false 68 #endif 69 70 static unsigned int srp_sg_tablesize; 71 static unsigned int cmd_sg_entries; 72 static unsigned int indirect_sg_entries; 73 static bool allow_ext_sg; 74 static bool prefer_fr = true; 75 static bool register_always = true; 76 static bool never_register; 77 static int topspin_workarounds = 1; 78 79 module_param(srp_sg_tablesize, uint, 0444); 80 MODULE_PARM_DESC(srp_sg_tablesize, "Deprecated name for cmd_sg_entries"); 81 82 module_param(cmd_sg_entries, uint, 0444); 83 MODULE_PARM_DESC(cmd_sg_entries, 84 "Default number of gather/scatter entries in the SRP command (default is 12, max 255)"); 85 86 module_param(indirect_sg_entries, uint, 0444); 87 MODULE_PARM_DESC(indirect_sg_entries, 88 "Default max number of gather/scatter entries (default is 12, max is " __stringify(SG_MAX_SEGMENTS) ")"); 89 90 module_param(allow_ext_sg, bool, 0444); 91 MODULE_PARM_DESC(allow_ext_sg, 92 "Default behavior when there are more than cmd_sg_entries S/G entries after mapping; fails the request when false (default false)"); 93 94 module_param(topspin_workarounds, int, 0444); 95 MODULE_PARM_DESC(topspin_workarounds, 96 "Enable workarounds for Topspin/Cisco SRP target bugs if != 0"); 97 98 module_param(prefer_fr, bool, 0444); 99 MODULE_PARM_DESC(prefer_fr, 100 "Whether to use fast registration if both FMR and fast registration are supported"); 101 102 module_param(register_always, bool, 0444); 103 MODULE_PARM_DESC(register_always, 104 "Use memory registration even for contiguous memory regions"); 105 106 module_param(never_register, bool, 0444); 107 MODULE_PARM_DESC(never_register, "Never register memory"); 108 109 static const struct kernel_param_ops srp_tmo_ops; 110 111 static int srp_reconnect_delay = 10; 112 module_param_cb(reconnect_delay, &srp_tmo_ops, &srp_reconnect_delay, 113 S_IRUGO | S_IWUSR); 114 MODULE_PARM_DESC(reconnect_delay, "Time between successive reconnect attempts"); 115 116 static int srp_fast_io_fail_tmo = 15; 117 module_param_cb(fast_io_fail_tmo, &srp_tmo_ops, &srp_fast_io_fail_tmo, 118 S_IRUGO | S_IWUSR); 119 MODULE_PARM_DESC(fast_io_fail_tmo, 120 "Number of seconds between the observation of a transport" 121 " layer error and failing all I/O. \"off\" means that this" 122 " functionality is disabled."); 123 124 static int srp_dev_loss_tmo = 600; 125 module_param_cb(dev_loss_tmo, &srp_tmo_ops, &srp_dev_loss_tmo, 126 S_IRUGO | S_IWUSR); 127 MODULE_PARM_DESC(dev_loss_tmo, 128 "Maximum number of seconds that the SRP transport should" 129 " insulate transport layer errors. After this time has been" 130 " exceeded the SCSI host is removed. Should be" 131 " between 1 and " __stringify(SCSI_DEVICE_BLOCK_MAX_TIMEOUT) 132 " if fast_io_fail_tmo has not been set. \"off\" means that" 133 " this functionality is disabled."); 134 135 static unsigned ch_count; 136 module_param(ch_count, uint, 0444); 137 MODULE_PARM_DESC(ch_count, 138 "Number of RDMA channels to use for communication with an SRP target. Using more than one channel improves performance if the HCA supports multiple completion vectors. The default value is the minimum of four times the number of online CPU sockets and the number of completion vectors supported by the HCA."); 139 140 static void srp_add_one(struct ib_device *device); 141 static void srp_remove_one(struct ib_device *device, void *client_data); 142 static void srp_recv_done(struct ib_cq *cq, struct ib_wc *wc); 143 static void srp_handle_qp_err(struct ib_cq *cq, struct ib_wc *wc, 144 const char *opname); 145 static int srp_ib_cm_handler(struct ib_cm_id *cm_id, 146 const struct ib_cm_event *event); 147 static int srp_rdma_cm_handler(struct rdma_cm_id *cm_id, 148 struct rdma_cm_event *event); 149 150 static struct scsi_transport_template *ib_srp_transport_template; 151 static struct workqueue_struct *srp_remove_wq; 152 153 static struct ib_client srp_client = { 154 .name = "srp", 155 .add = srp_add_one, 156 .remove = srp_remove_one 157 }; 158 159 static struct ib_sa_client srp_sa_client; 160 161 static int srp_tmo_get(char *buffer, const struct kernel_param *kp) 162 { 163 int tmo = *(int *)kp->arg; 164 165 if (tmo >= 0) 166 return sprintf(buffer, "%d", tmo); 167 else 168 return sprintf(buffer, "off"); 169 } 170 171 static int srp_tmo_set(const char *val, const struct kernel_param *kp) 172 { 173 int tmo, res; 174 175 res = srp_parse_tmo(&tmo, val); 176 if (res) 177 goto out; 178 179 if (kp->arg == &srp_reconnect_delay) 180 res = srp_tmo_valid(tmo, srp_fast_io_fail_tmo, 181 srp_dev_loss_tmo); 182 else if (kp->arg == &srp_fast_io_fail_tmo) 183 res = srp_tmo_valid(srp_reconnect_delay, tmo, srp_dev_loss_tmo); 184 else 185 res = srp_tmo_valid(srp_reconnect_delay, srp_fast_io_fail_tmo, 186 tmo); 187 if (res) 188 goto out; 189 *(int *)kp->arg = tmo; 190 191 out: 192 return res; 193 } 194 195 static const struct kernel_param_ops srp_tmo_ops = { 196 .get = srp_tmo_get, 197 .set = srp_tmo_set, 198 }; 199 200 static inline struct srp_target_port *host_to_target(struct Scsi_Host *host) 201 { 202 return (struct srp_target_port *) host->hostdata; 203 } 204 205 static const char *srp_target_info(struct Scsi_Host *host) 206 { 207 return host_to_target(host)->target_name; 208 } 209 210 static int srp_target_is_topspin(struct srp_target_port *target) 211 { 212 static const u8 topspin_oui[3] = { 0x00, 0x05, 0xad }; 213 static const u8 cisco_oui[3] = { 0x00, 0x1b, 0x0d }; 214 215 return topspin_workarounds && 216 (!memcmp(&target->ioc_guid, topspin_oui, sizeof topspin_oui) || 217 !memcmp(&target->ioc_guid, cisco_oui, sizeof cisco_oui)); 218 } 219 220 static struct srp_iu *srp_alloc_iu(struct srp_host *host, size_t size, 221 gfp_t gfp_mask, 222 enum dma_data_direction direction) 223 { 224 struct srp_iu *iu; 225 226 iu = kmalloc(sizeof *iu, gfp_mask); 227 if (!iu) 228 goto out; 229 230 iu->buf = kzalloc(size, gfp_mask); 231 if (!iu->buf) 232 goto out_free_iu; 233 234 iu->dma = ib_dma_map_single(host->srp_dev->dev, iu->buf, size, 235 direction); 236 if (ib_dma_mapping_error(host->srp_dev->dev, iu->dma)) 237 goto out_free_buf; 238 239 iu->size = size; 240 iu->direction = direction; 241 242 return iu; 243 244 out_free_buf: 245 kfree(iu->buf); 246 out_free_iu: 247 kfree(iu); 248 out: 249 return NULL; 250 } 251 252 static void srp_free_iu(struct srp_host *host, struct srp_iu *iu) 253 { 254 if (!iu) 255 return; 256 257 ib_dma_unmap_single(host->srp_dev->dev, iu->dma, iu->size, 258 iu->direction); 259 kfree(iu->buf); 260 kfree(iu); 261 } 262 263 static void srp_qp_event(struct ib_event *event, void *context) 264 { 265 pr_debug("QP event %s (%d)\n", 266 ib_event_msg(event->event), event->event); 267 } 268 269 static int srp_init_ib_qp(struct srp_target_port *target, 270 struct ib_qp *qp) 271 { 272 struct ib_qp_attr *attr; 273 int ret; 274 275 attr = kmalloc(sizeof *attr, GFP_KERNEL); 276 if (!attr) 277 return -ENOMEM; 278 279 ret = ib_find_cached_pkey(target->srp_host->srp_dev->dev, 280 target->srp_host->port, 281 be16_to_cpu(target->ib_cm.pkey), 282 &attr->pkey_index); 283 if (ret) 284 goto out; 285 286 attr->qp_state = IB_QPS_INIT; 287 attr->qp_access_flags = (IB_ACCESS_REMOTE_READ | 288 IB_ACCESS_REMOTE_WRITE); 289 attr->port_num = target->srp_host->port; 290 291 ret = ib_modify_qp(qp, attr, 292 IB_QP_STATE | 293 IB_QP_PKEY_INDEX | 294 IB_QP_ACCESS_FLAGS | 295 IB_QP_PORT); 296 297 out: 298 kfree(attr); 299 return ret; 300 } 301 302 static int srp_new_ib_cm_id(struct srp_rdma_ch *ch) 303 { 304 struct srp_target_port *target = ch->target; 305 struct ib_cm_id *new_cm_id; 306 307 new_cm_id = ib_create_cm_id(target->srp_host->srp_dev->dev, 308 srp_ib_cm_handler, ch); 309 if (IS_ERR(new_cm_id)) 310 return PTR_ERR(new_cm_id); 311 312 if (ch->ib_cm.cm_id) 313 ib_destroy_cm_id(ch->ib_cm.cm_id); 314 ch->ib_cm.cm_id = new_cm_id; 315 if (rdma_cap_opa_ah(target->srp_host->srp_dev->dev, 316 target->srp_host->port)) 317 ch->ib_cm.path.rec_type = SA_PATH_REC_TYPE_OPA; 318 else 319 ch->ib_cm.path.rec_type = SA_PATH_REC_TYPE_IB; 320 ch->ib_cm.path.sgid = target->sgid; 321 ch->ib_cm.path.dgid = target->ib_cm.orig_dgid; 322 ch->ib_cm.path.pkey = target->ib_cm.pkey; 323 ch->ib_cm.path.service_id = target->ib_cm.service_id; 324 325 return 0; 326 } 327 328 static int srp_new_rdma_cm_id(struct srp_rdma_ch *ch) 329 { 330 struct srp_target_port *target = ch->target; 331 struct rdma_cm_id *new_cm_id; 332 int ret; 333 334 new_cm_id = rdma_create_id(target->net, srp_rdma_cm_handler, ch, 335 RDMA_PS_TCP, IB_QPT_RC); 336 if (IS_ERR(new_cm_id)) { 337 ret = PTR_ERR(new_cm_id); 338 new_cm_id = NULL; 339 goto out; 340 } 341 342 init_completion(&ch->done); 343 ret = rdma_resolve_addr(new_cm_id, target->rdma_cm.src_specified ? 344 (struct sockaddr *)&target->rdma_cm.src : NULL, 345 (struct sockaddr *)&target->rdma_cm.dst, 346 SRP_PATH_REC_TIMEOUT_MS); 347 if (ret) { 348 pr_err("No route available from %pIS to %pIS (%d)\n", 349 &target->rdma_cm.src, &target->rdma_cm.dst, ret); 350 goto out; 351 } 352 ret = wait_for_completion_interruptible(&ch->done); 353 if (ret < 0) 354 goto out; 355 356 ret = ch->status; 357 if (ret) { 358 pr_err("Resolving address %pIS failed (%d)\n", 359 &target->rdma_cm.dst, ret); 360 goto out; 361 } 362 363 swap(ch->rdma_cm.cm_id, new_cm_id); 364 365 out: 366 if (new_cm_id) 367 rdma_destroy_id(new_cm_id); 368 369 return ret; 370 } 371 372 static int srp_new_cm_id(struct srp_rdma_ch *ch) 373 { 374 struct srp_target_port *target = ch->target; 375 376 return target->using_rdma_cm ? srp_new_rdma_cm_id(ch) : 377 srp_new_ib_cm_id(ch); 378 } 379 380 static struct ib_fmr_pool *srp_alloc_fmr_pool(struct srp_target_port *target) 381 { 382 struct srp_device *dev = target->srp_host->srp_dev; 383 struct ib_fmr_pool_param fmr_param; 384 385 memset(&fmr_param, 0, sizeof(fmr_param)); 386 fmr_param.pool_size = target->mr_pool_size; 387 fmr_param.dirty_watermark = fmr_param.pool_size / 4; 388 fmr_param.cache = 1; 389 fmr_param.max_pages_per_fmr = dev->max_pages_per_mr; 390 fmr_param.page_shift = ilog2(dev->mr_page_size); 391 fmr_param.access = (IB_ACCESS_LOCAL_WRITE | 392 IB_ACCESS_REMOTE_WRITE | 393 IB_ACCESS_REMOTE_READ); 394 395 return ib_create_fmr_pool(dev->pd, &fmr_param); 396 } 397 398 /** 399 * srp_destroy_fr_pool() - free the resources owned by a pool 400 * @pool: Fast registration pool to be destroyed. 401 */ 402 static void srp_destroy_fr_pool(struct srp_fr_pool *pool) 403 { 404 int i; 405 struct srp_fr_desc *d; 406 407 if (!pool) 408 return; 409 410 for (i = 0, d = &pool->desc[0]; i < pool->size; i++, d++) { 411 if (d->mr) 412 ib_dereg_mr(d->mr); 413 } 414 kfree(pool); 415 } 416 417 /** 418 * srp_create_fr_pool() - allocate and initialize a pool for fast registration 419 * @device: IB device to allocate fast registration descriptors for. 420 * @pd: Protection domain associated with the FR descriptors. 421 * @pool_size: Number of descriptors to allocate. 422 * @max_page_list_len: Maximum fast registration work request page list length. 423 */ 424 static struct srp_fr_pool *srp_create_fr_pool(struct ib_device *device, 425 struct ib_pd *pd, int pool_size, 426 int max_page_list_len) 427 { 428 struct srp_fr_pool *pool; 429 struct srp_fr_desc *d; 430 struct ib_mr *mr; 431 int i, ret = -EINVAL; 432 enum ib_mr_type mr_type; 433 434 if (pool_size <= 0) 435 goto err; 436 ret = -ENOMEM; 437 pool = kzalloc(sizeof(struct srp_fr_pool) + 438 pool_size * sizeof(struct srp_fr_desc), GFP_KERNEL); 439 if (!pool) 440 goto err; 441 pool->size = pool_size; 442 pool->max_page_list_len = max_page_list_len; 443 spin_lock_init(&pool->lock); 444 INIT_LIST_HEAD(&pool->free_list); 445 446 if (device->attrs.device_cap_flags & IB_DEVICE_SG_GAPS_REG) 447 mr_type = IB_MR_TYPE_SG_GAPS; 448 else 449 mr_type = IB_MR_TYPE_MEM_REG; 450 451 for (i = 0, d = &pool->desc[0]; i < pool->size; i++, d++) { 452 mr = ib_alloc_mr(pd, mr_type, max_page_list_len); 453 if (IS_ERR(mr)) { 454 ret = PTR_ERR(mr); 455 if (ret == -ENOMEM) 456 pr_info("%s: ib_alloc_mr() failed. Try to reduce max_cmd_per_lun, max_sect or ch_count\n", 457 dev_name(&device->dev)); 458 goto destroy_pool; 459 } 460 d->mr = mr; 461 list_add_tail(&d->entry, &pool->free_list); 462 } 463 464 out: 465 return pool; 466 467 destroy_pool: 468 srp_destroy_fr_pool(pool); 469 470 err: 471 pool = ERR_PTR(ret); 472 goto out; 473 } 474 475 /** 476 * srp_fr_pool_get() - obtain a descriptor suitable for fast registration 477 * @pool: Pool to obtain descriptor from. 478 */ 479 static struct srp_fr_desc *srp_fr_pool_get(struct srp_fr_pool *pool) 480 { 481 struct srp_fr_desc *d = NULL; 482 unsigned long flags; 483 484 spin_lock_irqsave(&pool->lock, flags); 485 if (!list_empty(&pool->free_list)) { 486 d = list_first_entry(&pool->free_list, typeof(*d), entry); 487 list_del(&d->entry); 488 } 489 spin_unlock_irqrestore(&pool->lock, flags); 490 491 return d; 492 } 493 494 /** 495 * srp_fr_pool_put() - put an FR descriptor back in the free list 496 * @pool: Pool the descriptor was allocated from. 497 * @desc: Pointer to an array of fast registration descriptor pointers. 498 * @n: Number of descriptors to put back. 499 * 500 * Note: The caller must already have queued an invalidation request for 501 * desc->mr->rkey before calling this function. 502 */ 503 static void srp_fr_pool_put(struct srp_fr_pool *pool, struct srp_fr_desc **desc, 504 int n) 505 { 506 unsigned long flags; 507 int i; 508 509 spin_lock_irqsave(&pool->lock, flags); 510 for (i = 0; i < n; i++) 511 list_add(&desc[i]->entry, &pool->free_list); 512 spin_unlock_irqrestore(&pool->lock, flags); 513 } 514 515 static struct srp_fr_pool *srp_alloc_fr_pool(struct srp_target_port *target) 516 { 517 struct srp_device *dev = target->srp_host->srp_dev; 518 519 return srp_create_fr_pool(dev->dev, dev->pd, target->mr_pool_size, 520 dev->max_pages_per_mr); 521 } 522 523 /** 524 * srp_destroy_qp() - destroy an RDMA queue pair 525 * @ch: SRP RDMA channel. 526 * 527 * Drain the qp before destroying it. This avoids that the receive 528 * completion handler can access the queue pair while it is 529 * being destroyed. 530 */ 531 static void srp_destroy_qp(struct srp_rdma_ch *ch) 532 { 533 spin_lock_irq(&ch->lock); 534 ib_process_cq_direct(ch->send_cq, -1); 535 spin_unlock_irq(&ch->lock); 536 537 ib_drain_qp(ch->qp); 538 ib_destroy_qp(ch->qp); 539 } 540 541 static int srp_create_ch_ib(struct srp_rdma_ch *ch) 542 { 543 struct srp_target_port *target = ch->target; 544 struct srp_device *dev = target->srp_host->srp_dev; 545 struct ib_qp_init_attr *init_attr; 546 struct ib_cq *recv_cq, *send_cq; 547 struct ib_qp *qp; 548 struct ib_fmr_pool *fmr_pool = NULL; 549 struct srp_fr_pool *fr_pool = NULL; 550 const int m = 1 + dev->use_fast_reg * target->mr_per_cmd * 2; 551 int ret; 552 553 init_attr = kzalloc(sizeof *init_attr, GFP_KERNEL); 554 if (!init_attr) 555 return -ENOMEM; 556 557 /* queue_size + 1 for ib_drain_rq() */ 558 recv_cq = ib_alloc_cq(dev->dev, ch, target->queue_size + 1, 559 ch->comp_vector, IB_POLL_SOFTIRQ); 560 if (IS_ERR(recv_cq)) { 561 ret = PTR_ERR(recv_cq); 562 goto err; 563 } 564 565 send_cq = ib_alloc_cq(dev->dev, ch, m * target->queue_size, 566 ch->comp_vector, IB_POLL_DIRECT); 567 if (IS_ERR(send_cq)) { 568 ret = PTR_ERR(send_cq); 569 goto err_recv_cq; 570 } 571 572 init_attr->event_handler = srp_qp_event; 573 init_attr->cap.max_send_wr = m * target->queue_size; 574 init_attr->cap.max_recv_wr = target->queue_size + 1; 575 init_attr->cap.max_recv_sge = 1; 576 init_attr->cap.max_send_sge = 1; 577 init_attr->sq_sig_type = IB_SIGNAL_REQ_WR; 578 init_attr->qp_type = IB_QPT_RC; 579 init_attr->send_cq = send_cq; 580 init_attr->recv_cq = recv_cq; 581 582 if (target->using_rdma_cm) { 583 ret = rdma_create_qp(ch->rdma_cm.cm_id, dev->pd, init_attr); 584 qp = ch->rdma_cm.cm_id->qp; 585 } else { 586 qp = ib_create_qp(dev->pd, init_attr); 587 if (!IS_ERR(qp)) { 588 ret = srp_init_ib_qp(target, qp); 589 if (ret) 590 ib_destroy_qp(qp); 591 } else { 592 ret = PTR_ERR(qp); 593 } 594 } 595 if (ret) { 596 pr_err("QP creation failed for dev %s: %d\n", 597 dev_name(&dev->dev->dev), ret); 598 goto err_send_cq; 599 } 600 601 if (dev->use_fast_reg) { 602 fr_pool = srp_alloc_fr_pool(target); 603 if (IS_ERR(fr_pool)) { 604 ret = PTR_ERR(fr_pool); 605 shost_printk(KERN_WARNING, target->scsi_host, PFX 606 "FR pool allocation failed (%d)\n", ret); 607 goto err_qp; 608 } 609 } else if (dev->use_fmr) { 610 fmr_pool = srp_alloc_fmr_pool(target); 611 if (IS_ERR(fmr_pool)) { 612 ret = PTR_ERR(fmr_pool); 613 shost_printk(KERN_WARNING, target->scsi_host, PFX 614 "FMR pool allocation failed (%d)\n", ret); 615 goto err_qp; 616 } 617 } 618 619 if (ch->qp) 620 srp_destroy_qp(ch); 621 if (ch->recv_cq) 622 ib_free_cq(ch->recv_cq); 623 if (ch->send_cq) 624 ib_free_cq(ch->send_cq); 625 626 ch->qp = qp; 627 ch->recv_cq = recv_cq; 628 ch->send_cq = send_cq; 629 630 if (dev->use_fast_reg) { 631 if (ch->fr_pool) 632 srp_destroy_fr_pool(ch->fr_pool); 633 ch->fr_pool = fr_pool; 634 } else if (dev->use_fmr) { 635 if (ch->fmr_pool) 636 ib_destroy_fmr_pool(ch->fmr_pool); 637 ch->fmr_pool = fmr_pool; 638 } 639 640 kfree(init_attr); 641 return 0; 642 643 err_qp: 644 if (target->using_rdma_cm) 645 rdma_destroy_qp(ch->rdma_cm.cm_id); 646 else 647 ib_destroy_qp(qp); 648 649 err_send_cq: 650 ib_free_cq(send_cq); 651 652 err_recv_cq: 653 ib_free_cq(recv_cq); 654 655 err: 656 kfree(init_attr); 657 return ret; 658 } 659 660 /* 661 * Note: this function may be called without srp_alloc_iu_bufs() having been 662 * invoked. Hence the ch->[rt]x_ring checks. 663 */ 664 static void srp_free_ch_ib(struct srp_target_port *target, 665 struct srp_rdma_ch *ch) 666 { 667 struct srp_device *dev = target->srp_host->srp_dev; 668 int i; 669 670 if (!ch->target) 671 return; 672 673 if (target->using_rdma_cm) { 674 if (ch->rdma_cm.cm_id) { 675 rdma_destroy_id(ch->rdma_cm.cm_id); 676 ch->rdma_cm.cm_id = NULL; 677 } 678 } else { 679 if (ch->ib_cm.cm_id) { 680 ib_destroy_cm_id(ch->ib_cm.cm_id); 681 ch->ib_cm.cm_id = NULL; 682 } 683 } 684 685 /* If srp_new_cm_id() succeeded but srp_create_ch_ib() not, return. */ 686 if (!ch->qp) 687 return; 688 689 if (dev->use_fast_reg) { 690 if (ch->fr_pool) 691 srp_destroy_fr_pool(ch->fr_pool); 692 } else if (dev->use_fmr) { 693 if (ch->fmr_pool) 694 ib_destroy_fmr_pool(ch->fmr_pool); 695 } 696 697 srp_destroy_qp(ch); 698 ib_free_cq(ch->send_cq); 699 ib_free_cq(ch->recv_cq); 700 701 /* 702 * Avoid that the SCSI error handler tries to use this channel after 703 * it has been freed. The SCSI error handler can namely continue 704 * trying to perform recovery actions after scsi_remove_host() 705 * returned. 706 */ 707 ch->target = NULL; 708 709 ch->qp = NULL; 710 ch->send_cq = ch->recv_cq = NULL; 711 712 if (ch->rx_ring) { 713 for (i = 0; i < target->queue_size; ++i) 714 srp_free_iu(target->srp_host, ch->rx_ring[i]); 715 kfree(ch->rx_ring); 716 ch->rx_ring = NULL; 717 } 718 if (ch->tx_ring) { 719 for (i = 0; i < target->queue_size; ++i) 720 srp_free_iu(target->srp_host, ch->tx_ring[i]); 721 kfree(ch->tx_ring); 722 ch->tx_ring = NULL; 723 } 724 } 725 726 static void srp_path_rec_completion(int status, 727 struct sa_path_rec *pathrec, 728 void *ch_ptr) 729 { 730 struct srp_rdma_ch *ch = ch_ptr; 731 struct srp_target_port *target = ch->target; 732 733 ch->status = status; 734 if (status) 735 shost_printk(KERN_ERR, target->scsi_host, 736 PFX "Got failed path rec status %d\n", status); 737 else 738 ch->ib_cm.path = *pathrec; 739 complete(&ch->done); 740 } 741 742 static int srp_ib_lookup_path(struct srp_rdma_ch *ch) 743 { 744 struct srp_target_port *target = ch->target; 745 int ret; 746 747 ch->ib_cm.path.numb_path = 1; 748 749 init_completion(&ch->done); 750 751 ch->ib_cm.path_query_id = ib_sa_path_rec_get(&srp_sa_client, 752 target->srp_host->srp_dev->dev, 753 target->srp_host->port, 754 &ch->ib_cm.path, 755 IB_SA_PATH_REC_SERVICE_ID | 756 IB_SA_PATH_REC_DGID | 757 IB_SA_PATH_REC_SGID | 758 IB_SA_PATH_REC_NUMB_PATH | 759 IB_SA_PATH_REC_PKEY, 760 SRP_PATH_REC_TIMEOUT_MS, 761 GFP_KERNEL, 762 srp_path_rec_completion, 763 ch, &ch->ib_cm.path_query); 764 if (ch->ib_cm.path_query_id < 0) 765 return ch->ib_cm.path_query_id; 766 767 ret = wait_for_completion_interruptible(&ch->done); 768 if (ret < 0) 769 return ret; 770 771 if (ch->status < 0) 772 shost_printk(KERN_WARNING, target->scsi_host, 773 PFX "Path record query failed: sgid %pI6, dgid %pI6, pkey %#04x, service_id %#16llx\n", 774 ch->ib_cm.path.sgid.raw, ch->ib_cm.path.dgid.raw, 775 be16_to_cpu(target->ib_cm.pkey), 776 be64_to_cpu(target->ib_cm.service_id)); 777 778 return ch->status; 779 } 780 781 static int srp_rdma_lookup_path(struct srp_rdma_ch *ch) 782 { 783 struct srp_target_port *target = ch->target; 784 int ret; 785 786 init_completion(&ch->done); 787 788 ret = rdma_resolve_route(ch->rdma_cm.cm_id, SRP_PATH_REC_TIMEOUT_MS); 789 if (ret) 790 return ret; 791 792 wait_for_completion_interruptible(&ch->done); 793 794 if (ch->status != 0) 795 shost_printk(KERN_WARNING, target->scsi_host, 796 PFX "Path resolution failed\n"); 797 798 return ch->status; 799 } 800 801 static int srp_lookup_path(struct srp_rdma_ch *ch) 802 { 803 struct srp_target_port *target = ch->target; 804 805 return target->using_rdma_cm ? srp_rdma_lookup_path(ch) : 806 srp_ib_lookup_path(ch); 807 } 808 809 static u8 srp_get_subnet_timeout(struct srp_host *host) 810 { 811 struct ib_port_attr attr; 812 int ret; 813 u8 subnet_timeout = 18; 814 815 ret = ib_query_port(host->srp_dev->dev, host->port, &attr); 816 if (ret == 0) 817 subnet_timeout = attr.subnet_timeout; 818 819 if (unlikely(subnet_timeout < 15)) 820 pr_warn("%s: subnet timeout %d may cause SRP login to fail.\n", 821 dev_name(&host->srp_dev->dev->dev), subnet_timeout); 822 823 return subnet_timeout; 824 } 825 826 static int srp_send_req(struct srp_rdma_ch *ch, bool multich) 827 { 828 struct srp_target_port *target = ch->target; 829 struct { 830 struct rdma_conn_param rdma_param; 831 struct srp_login_req_rdma rdma_req; 832 struct ib_cm_req_param ib_param; 833 struct srp_login_req ib_req; 834 } *req = NULL; 835 char *ipi, *tpi; 836 int status; 837 838 req = kzalloc(sizeof *req, GFP_KERNEL); 839 if (!req) 840 return -ENOMEM; 841 842 req->ib_param.flow_control = 1; 843 req->ib_param.retry_count = target->tl_retry_count; 844 845 /* 846 * Pick some arbitrary defaults here; we could make these 847 * module parameters if anyone cared about setting them. 848 */ 849 req->ib_param.responder_resources = 4; 850 req->ib_param.rnr_retry_count = 7; 851 req->ib_param.max_cm_retries = 15; 852 853 req->ib_req.opcode = SRP_LOGIN_REQ; 854 req->ib_req.tag = 0; 855 req->ib_req.req_it_iu_len = cpu_to_be32(target->max_iu_len); 856 req->ib_req.req_buf_fmt = cpu_to_be16(SRP_BUF_FORMAT_DIRECT | 857 SRP_BUF_FORMAT_INDIRECT); 858 req->ib_req.req_flags = (multich ? SRP_MULTICHAN_MULTI : 859 SRP_MULTICHAN_SINGLE); 860 861 if (target->using_rdma_cm) { 862 req->rdma_param.flow_control = req->ib_param.flow_control; 863 req->rdma_param.responder_resources = 864 req->ib_param.responder_resources; 865 req->rdma_param.initiator_depth = req->ib_param.initiator_depth; 866 req->rdma_param.retry_count = req->ib_param.retry_count; 867 req->rdma_param.rnr_retry_count = req->ib_param.rnr_retry_count; 868 req->rdma_param.private_data = &req->rdma_req; 869 req->rdma_param.private_data_len = sizeof(req->rdma_req); 870 871 req->rdma_req.opcode = req->ib_req.opcode; 872 req->rdma_req.tag = req->ib_req.tag; 873 req->rdma_req.req_it_iu_len = req->ib_req.req_it_iu_len; 874 req->rdma_req.req_buf_fmt = req->ib_req.req_buf_fmt; 875 req->rdma_req.req_flags = req->ib_req.req_flags; 876 877 ipi = req->rdma_req.initiator_port_id; 878 tpi = req->rdma_req.target_port_id; 879 } else { 880 u8 subnet_timeout; 881 882 subnet_timeout = srp_get_subnet_timeout(target->srp_host); 883 884 req->ib_param.primary_path = &ch->ib_cm.path; 885 req->ib_param.alternate_path = NULL; 886 req->ib_param.service_id = target->ib_cm.service_id; 887 get_random_bytes(&req->ib_param.starting_psn, 4); 888 req->ib_param.starting_psn &= 0xffffff; 889 req->ib_param.qp_num = ch->qp->qp_num; 890 req->ib_param.qp_type = ch->qp->qp_type; 891 req->ib_param.local_cm_response_timeout = subnet_timeout + 2; 892 req->ib_param.remote_cm_response_timeout = subnet_timeout + 2; 893 req->ib_param.private_data = &req->ib_req; 894 req->ib_param.private_data_len = sizeof(req->ib_req); 895 896 ipi = req->ib_req.initiator_port_id; 897 tpi = req->ib_req.target_port_id; 898 } 899 900 /* 901 * In the published SRP specification (draft rev. 16a), the 902 * port identifier format is 8 bytes of ID extension followed 903 * by 8 bytes of GUID. Older drafts put the two halves in the 904 * opposite order, so that the GUID comes first. 905 * 906 * Targets conforming to these obsolete drafts can be 907 * recognized by the I/O Class they report. 908 */ 909 if (target->io_class == SRP_REV10_IB_IO_CLASS) { 910 memcpy(ipi, &target->sgid.global.interface_id, 8); 911 memcpy(ipi + 8, &target->initiator_ext, 8); 912 memcpy(tpi, &target->ioc_guid, 8); 913 memcpy(tpi + 8, &target->id_ext, 8); 914 } else { 915 memcpy(ipi, &target->initiator_ext, 8); 916 memcpy(ipi + 8, &target->sgid.global.interface_id, 8); 917 memcpy(tpi, &target->id_ext, 8); 918 memcpy(tpi + 8, &target->ioc_guid, 8); 919 } 920 921 /* 922 * Topspin/Cisco SRP targets will reject our login unless we 923 * zero out the first 8 bytes of our initiator port ID and set 924 * the second 8 bytes to the local node GUID. 925 */ 926 if (srp_target_is_topspin(target)) { 927 shost_printk(KERN_DEBUG, target->scsi_host, 928 PFX "Topspin/Cisco initiator port ID workaround " 929 "activated for target GUID %016llx\n", 930 be64_to_cpu(target->ioc_guid)); 931 memset(ipi, 0, 8); 932 memcpy(ipi + 8, &target->srp_host->srp_dev->dev->node_guid, 8); 933 } 934 935 if (target->using_rdma_cm) 936 status = rdma_connect(ch->rdma_cm.cm_id, &req->rdma_param); 937 else 938 status = ib_send_cm_req(ch->ib_cm.cm_id, &req->ib_param); 939 940 kfree(req); 941 942 return status; 943 } 944 945 static bool srp_queue_remove_work(struct srp_target_port *target) 946 { 947 bool changed = false; 948 949 spin_lock_irq(&target->lock); 950 if (target->state != SRP_TARGET_REMOVED) { 951 target->state = SRP_TARGET_REMOVED; 952 changed = true; 953 } 954 spin_unlock_irq(&target->lock); 955 956 if (changed) 957 queue_work(srp_remove_wq, &target->remove_work); 958 959 return changed; 960 } 961 962 static void srp_disconnect_target(struct srp_target_port *target) 963 { 964 struct srp_rdma_ch *ch; 965 int i, ret; 966 967 /* XXX should send SRP_I_LOGOUT request */ 968 969 for (i = 0; i < target->ch_count; i++) { 970 ch = &target->ch[i]; 971 ch->connected = false; 972 ret = 0; 973 if (target->using_rdma_cm) { 974 if (ch->rdma_cm.cm_id) 975 rdma_disconnect(ch->rdma_cm.cm_id); 976 } else { 977 if (ch->ib_cm.cm_id) 978 ret = ib_send_cm_dreq(ch->ib_cm.cm_id, 979 NULL, 0); 980 } 981 if (ret < 0) { 982 shost_printk(KERN_DEBUG, target->scsi_host, 983 PFX "Sending CM DREQ failed\n"); 984 } 985 } 986 } 987 988 static void srp_free_req_data(struct srp_target_port *target, 989 struct srp_rdma_ch *ch) 990 { 991 struct srp_device *dev = target->srp_host->srp_dev; 992 struct ib_device *ibdev = dev->dev; 993 struct srp_request *req; 994 int i; 995 996 if (!ch->req_ring) 997 return; 998 999 for (i = 0; i < target->req_ring_size; ++i) { 1000 req = &ch->req_ring[i]; 1001 if (dev->use_fast_reg) { 1002 kfree(req->fr_list); 1003 } else { 1004 kfree(req->fmr_list); 1005 kfree(req->map_page); 1006 } 1007 if (req->indirect_dma_addr) { 1008 ib_dma_unmap_single(ibdev, req->indirect_dma_addr, 1009 target->indirect_size, 1010 DMA_TO_DEVICE); 1011 } 1012 kfree(req->indirect_desc); 1013 } 1014 1015 kfree(ch->req_ring); 1016 ch->req_ring = NULL; 1017 } 1018 1019 static int srp_alloc_req_data(struct srp_rdma_ch *ch) 1020 { 1021 struct srp_target_port *target = ch->target; 1022 struct srp_device *srp_dev = target->srp_host->srp_dev; 1023 struct ib_device *ibdev = srp_dev->dev; 1024 struct srp_request *req; 1025 void *mr_list; 1026 dma_addr_t dma_addr; 1027 int i, ret = -ENOMEM; 1028 1029 ch->req_ring = kcalloc(target->req_ring_size, sizeof(*ch->req_ring), 1030 GFP_KERNEL); 1031 if (!ch->req_ring) 1032 goto out; 1033 1034 for (i = 0; i < target->req_ring_size; ++i) { 1035 req = &ch->req_ring[i]; 1036 mr_list = kmalloc_array(target->mr_per_cmd, sizeof(void *), 1037 GFP_KERNEL); 1038 if (!mr_list) 1039 goto out; 1040 if (srp_dev->use_fast_reg) { 1041 req->fr_list = mr_list; 1042 } else { 1043 req->fmr_list = mr_list; 1044 req->map_page = kmalloc_array(srp_dev->max_pages_per_mr, 1045 sizeof(void *), 1046 GFP_KERNEL); 1047 if (!req->map_page) 1048 goto out; 1049 } 1050 req->indirect_desc = kmalloc(target->indirect_size, GFP_KERNEL); 1051 if (!req->indirect_desc) 1052 goto out; 1053 1054 dma_addr = ib_dma_map_single(ibdev, req->indirect_desc, 1055 target->indirect_size, 1056 DMA_TO_DEVICE); 1057 if (ib_dma_mapping_error(ibdev, dma_addr)) 1058 goto out; 1059 1060 req->indirect_dma_addr = dma_addr; 1061 } 1062 ret = 0; 1063 1064 out: 1065 return ret; 1066 } 1067 1068 /** 1069 * srp_del_scsi_host_attr() - Remove attributes defined in the host template. 1070 * @shost: SCSI host whose attributes to remove from sysfs. 1071 * 1072 * Note: Any attributes defined in the host template and that did not exist 1073 * before invocation of this function will be ignored. 1074 */ 1075 static void srp_del_scsi_host_attr(struct Scsi_Host *shost) 1076 { 1077 struct device_attribute **attr; 1078 1079 for (attr = shost->hostt->shost_attrs; attr && *attr; ++attr) 1080 device_remove_file(&shost->shost_dev, *attr); 1081 } 1082 1083 static void srp_remove_target(struct srp_target_port *target) 1084 { 1085 struct srp_rdma_ch *ch; 1086 int i; 1087 1088 WARN_ON_ONCE(target->state != SRP_TARGET_REMOVED); 1089 1090 srp_del_scsi_host_attr(target->scsi_host); 1091 srp_rport_get(target->rport); 1092 srp_remove_host(target->scsi_host); 1093 scsi_remove_host(target->scsi_host); 1094 srp_stop_rport_timers(target->rport); 1095 srp_disconnect_target(target); 1096 kobj_ns_drop(KOBJ_NS_TYPE_NET, target->net); 1097 for (i = 0; i < target->ch_count; i++) { 1098 ch = &target->ch[i]; 1099 srp_free_ch_ib(target, ch); 1100 } 1101 cancel_work_sync(&target->tl_err_work); 1102 srp_rport_put(target->rport); 1103 for (i = 0; i < target->ch_count; i++) { 1104 ch = &target->ch[i]; 1105 srp_free_req_data(target, ch); 1106 } 1107 kfree(target->ch); 1108 target->ch = NULL; 1109 1110 spin_lock(&target->srp_host->target_lock); 1111 list_del(&target->list); 1112 spin_unlock(&target->srp_host->target_lock); 1113 1114 scsi_host_put(target->scsi_host); 1115 } 1116 1117 static void srp_remove_work(struct work_struct *work) 1118 { 1119 struct srp_target_port *target = 1120 container_of(work, struct srp_target_port, remove_work); 1121 1122 WARN_ON_ONCE(target->state != SRP_TARGET_REMOVED); 1123 1124 srp_remove_target(target); 1125 } 1126 1127 static void srp_rport_delete(struct srp_rport *rport) 1128 { 1129 struct srp_target_port *target = rport->lld_data; 1130 1131 srp_queue_remove_work(target); 1132 } 1133 1134 /** 1135 * srp_connected_ch() - number of connected channels 1136 * @target: SRP target port. 1137 */ 1138 static int srp_connected_ch(struct srp_target_port *target) 1139 { 1140 int i, c = 0; 1141 1142 for (i = 0; i < target->ch_count; i++) 1143 c += target->ch[i].connected; 1144 1145 return c; 1146 } 1147 1148 static int srp_connect_ch(struct srp_rdma_ch *ch, bool multich) 1149 { 1150 struct srp_target_port *target = ch->target; 1151 int ret; 1152 1153 WARN_ON_ONCE(!multich && srp_connected_ch(target) > 0); 1154 1155 ret = srp_lookup_path(ch); 1156 if (ret) 1157 goto out; 1158 1159 while (1) { 1160 init_completion(&ch->done); 1161 ret = srp_send_req(ch, multich); 1162 if (ret) 1163 goto out; 1164 ret = wait_for_completion_interruptible(&ch->done); 1165 if (ret < 0) 1166 goto out; 1167 1168 /* 1169 * The CM event handling code will set status to 1170 * SRP_PORT_REDIRECT if we get a port redirect REJ 1171 * back, or SRP_DLID_REDIRECT if we get a lid/qp 1172 * redirect REJ back. 1173 */ 1174 ret = ch->status; 1175 switch (ret) { 1176 case 0: 1177 ch->connected = true; 1178 goto out; 1179 1180 case SRP_PORT_REDIRECT: 1181 ret = srp_lookup_path(ch); 1182 if (ret) 1183 goto out; 1184 break; 1185 1186 case SRP_DLID_REDIRECT: 1187 break; 1188 1189 case SRP_STALE_CONN: 1190 shost_printk(KERN_ERR, target->scsi_host, PFX 1191 "giving up on stale connection\n"); 1192 ret = -ECONNRESET; 1193 goto out; 1194 1195 default: 1196 goto out; 1197 } 1198 } 1199 1200 out: 1201 return ret <= 0 ? ret : -ENODEV; 1202 } 1203 1204 static void srp_inv_rkey_err_done(struct ib_cq *cq, struct ib_wc *wc) 1205 { 1206 srp_handle_qp_err(cq, wc, "INV RKEY"); 1207 } 1208 1209 static int srp_inv_rkey(struct srp_request *req, struct srp_rdma_ch *ch, 1210 u32 rkey) 1211 { 1212 struct ib_send_wr wr = { 1213 .opcode = IB_WR_LOCAL_INV, 1214 .next = NULL, 1215 .num_sge = 0, 1216 .send_flags = 0, 1217 .ex.invalidate_rkey = rkey, 1218 }; 1219 1220 wr.wr_cqe = &req->reg_cqe; 1221 req->reg_cqe.done = srp_inv_rkey_err_done; 1222 return ib_post_send(ch->qp, &wr, NULL); 1223 } 1224 1225 static void srp_unmap_data(struct scsi_cmnd *scmnd, 1226 struct srp_rdma_ch *ch, 1227 struct srp_request *req) 1228 { 1229 struct srp_target_port *target = ch->target; 1230 struct srp_device *dev = target->srp_host->srp_dev; 1231 struct ib_device *ibdev = dev->dev; 1232 int i, res; 1233 1234 if (!scsi_sglist(scmnd) || 1235 (scmnd->sc_data_direction != DMA_TO_DEVICE && 1236 scmnd->sc_data_direction != DMA_FROM_DEVICE)) 1237 return; 1238 1239 if (dev->use_fast_reg) { 1240 struct srp_fr_desc **pfr; 1241 1242 for (i = req->nmdesc, pfr = req->fr_list; i > 0; i--, pfr++) { 1243 res = srp_inv_rkey(req, ch, (*pfr)->mr->rkey); 1244 if (res < 0) { 1245 shost_printk(KERN_ERR, target->scsi_host, PFX 1246 "Queueing INV WR for rkey %#x failed (%d)\n", 1247 (*pfr)->mr->rkey, res); 1248 queue_work(system_long_wq, 1249 &target->tl_err_work); 1250 } 1251 } 1252 if (req->nmdesc) 1253 srp_fr_pool_put(ch->fr_pool, req->fr_list, 1254 req->nmdesc); 1255 } else if (dev->use_fmr) { 1256 struct ib_pool_fmr **pfmr; 1257 1258 for (i = req->nmdesc, pfmr = req->fmr_list; i > 0; i--, pfmr++) 1259 ib_fmr_pool_unmap(*pfmr); 1260 } 1261 1262 ib_dma_unmap_sg(ibdev, scsi_sglist(scmnd), scsi_sg_count(scmnd), 1263 scmnd->sc_data_direction); 1264 } 1265 1266 /** 1267 * srp_claim_req - Take ownership of the scmnd associated with a request. 1268 * @ch: SRP RDMA channel. 1269 * @req: SRP request. 1270 * @sdev: If not NULL, only take ownership for this SCSI device. 1271 * @scmnd: If NULL, take ownership of @req->scmnd. If not NULL, only take 1272 * ownership of @req->scmnd if it equals @scmnd. 1273 * 1274 * Return value: 1275 * Either NULL or a pointer to the SCSI command the caller became owner of. 1276 */ 1277 static struct scsi_cmnd *srp_claim_req(struct srp_rdma_ch *ch, 1278 struct srp_request *req, 1279 struct scsi_device *sdev, 1280 struct scsi_cmnd *scmnd) 1281 { 1282 unsigned long flags; 1283 1284 spin_lock_irqsave(&ch->lock, flags); 1285 if (req->scmnd && 1286 (!sdev || req->scmnd->device == sdev) && 1287 (!scmnd || req->scmnd == scmnd)) { 1288 scmnd = req->scmnd; 1289 req->scmnd = NULL; 1290 } else { 1291 scmnd = NULL; 1292 } 1293 spin_unlock_irqrestore(&ch->lock, flags); 1294 1295 return scmnd; 1296 } 1297 1298 /** 1299 * srp_free_req() - Unmap data and adjust ch->req_lim. 1300 * @ch: SRP RDMA channel. 1301 * @req: Request to be freed. 1302 * @scmnd: SCSI command associated with @req. 1303 * @req_lim_delta: Amount to be added to @target->req_lim. 1304 */ 1305 static void srp_free_req(struct srp_rdma_ch *ch, struct srp_request *req, 1306 struct scsi_cmnd *scmnd, s32 req_lim_delta) 1307 { 1308 unsigned long flags; 1309 1310 srp_unmap_data(scmnd, ch, req); 1311 1312 spin_lock_irqsave(&ch->lock, flags); 1313 ch->req_lim += req_lim_delta; 1314 spin_unlock_irqrestore(&ch->lock, flags); 1315 } 1316 1317 static void srp_finish_req(struct srp_rdma_ch *ch, struct srp_request *req, 1318 struct scsi_device *sdev, int result) 1319 { 1320 struct scsi_cmnd *scmnd = srp_claim_req(ch, req, sdev, NULL); 1321 1322 if (scmnd) { 1323 srp_free_req(ch, req, scmnd, 0); 1324 scmnd->result = result; 1325 scmnd->scsi_done(scmnd); 1326 } 1327 } 1328 1329 static void srp_terminate_io(struct srp_rport *rport) 1330 { 1331 struct srp_target_port *target = rport->lld_data; 1332 struct srp_rdma_ch *ch; 1333 int i, j; 1334 1335 for (i = 0; i < target->ch_count; i++) { 1336 ch = &target->ch[i]; 1337 1338 for (j = 0; j < target->req_ring_size; ++j) { 1339 struct srp_request *req = &ch->req_ring[j]; 1340 1341 srp_finish_req(ch, req, NULL, 1342 DID_TRANSPORT_FAILFAST << 16); 1343 } 1344 } 1345 } 1346 1347 /* 1348 * It is up to the caller to ensure that srp_rport_reconnect() calls are 1349 * serialized and that no concurrent srp_queuecommand(), srp_abort(), 1350 * srp_reset_device() or srp_reset_host() calls will occur while this function 1351 * is in progress. One way to realize that is not to call this function 1352 * directly but to call srp_reconnect_rport() instead since that last function 1353 * serializes calls of this function via rport->mutex and also blocks 1354 * srp_queuecommand() calls before invoking this function. 1355 */ 1356 static int srp_rport_reconnect(struct srp_rport *rport) 1357 { 1358 struct srp_target_port *target = rport->lld_data; 1359 struct srp_rdma_ch *ch; 1360 int i, j, ret = 0; 1361 bool multich = false; 1362 1363 srp_disconnect_target(target); 1364 1365 if (target->state == SRP_TARGET_SCANNING) 1366 return -ENODEV; 1367 1368 /* 1369 * Now get a new local CM ID so that we avoid confusing the target in 1370 * case things are really fouled up. Doing so also ensures that all CM 1371 * callbacks will have finished before a new QP is allocated. 1372 */ 1373 for (i = 0; i < target->ch_count; i++) { 1374 ch = &target->ch[i]; 1375 ret += srp_new_cm_id(ch); 1376 } 1377 for (i = 0; i < target->ch_count; i++) { 1378 ch = &target->ch[i]; 1379 for (j = 0; j < target->req_ring_size; ++j) { 1380 struct srp_request *req = &ch->req_ring[j]; 1381 1382 srp_finish_req(ch, req, NULL, DID_RESET << 16); 1383 } 1384 } 1385 for (i = 0; i < target->ch_count; i++) { 1386 ch = &target->ch[i]; 1387 /* 1388 * Whether or not creating a new CM ID succeeded, create a new 1389 * QP. This guarantees that all completion callback function 1390 * invocations have finished before request resetting starts. 1391 */ 1392 ret += srp_create_ch_ib(ch); 1393 1394 INIT_LIST_HEAD(&ch->free_tx); 1395 for (j = 0; j < target->queue_size; ++j) 1396 list_add(&ch->tx_ring[j]->list, &ch->free_tx); 1397 } 1398 1399 target->qp_in_error = false; 1400 1401 for (i = 0; i < target->ch_count; i++) { 1402 ch = &target->ch[i]; 1403 if (ret) 1404 break; 1405 ret = srp_connect_ch(ch, multich); 1406 multich = true; 1407 } 1408 1409 if (ret == 0) 1410 shost_printk(KERN_INFO, target->scsi_host, 1411 PFX "reconnect succeeded\n"); 1412 1413 return ret; 1414 } 1415 1416 static void srp_map_desc(struct srp_map_state *state, dma_addr_t dma_addr, 1417 unsigned int dma_len, u32 rkey) 1418 { 1419 struct srp_direct_buf *desc = state->desc; 1420 1421 WARN_ON_ONCE(!dma_len); 1422 1423 desc->va = cpu_to_be64(dma_addr); 1424 desc->key = cpu_to_be32(rkey); 1425 desc->len = cpu_to_be32(dma_len); 1426 1427 state->total_len += dma_len; 1428 state->desc++; 1429 state->ndesc++; 1430 } 1431 1432 static int srp_map_finish_fmr(struct srp_map_state *state, 1433 struct srp_rdma_ch *ch) 1434 { 1435 struct srp_target_port *target = ch->target; 1436 struct srp_device *dev = target->srp_host->srp_dev; 1437 struct ib_pool_fmr *fmr; 1438 u64 io_addr = 0; 1439 1440 if (state->fmr.next >= state->fmr.end) { 1441 shost_printk(KERN_ERR, ch->target->scsi_host, 1442 PFX "Out of MRs (mr_per_cmd = %d)\n", 1443 ch->target->mr_per_cmd); 1444 return -ENOMEM; 1445 } 1446 1447 WARN_ON_ONCE(!dev->use_fmr); 1448 1449 if (state->npages == 0) 1450 return 0; 1451 1452 if (state->npages == 1 && target->global_rkey) { 1453 srp_map_desc(state, state->base_dma_addr, state->dma_len, 1454 target->global_rkey); 1455 goto reset_state; 1456 } 1457 1458 fmr = ib_fmr_pool_map_phys(ch->fmr_pool, state->pages, 1459 state->npages, io_addr); 1460 if (IS_ERR(fmr)) 1461 return PTR_ERR(fmr); 1462 1463 *state->fmr.next++ = fmr; 1464 state->nmdesc++; 1465 1466 srp_map_desc(state, state->base_dma_addr & ~dev->mr_page_mask, 1467 state->dma_len, fmr->fmr->rkey); 1468 1469 reset_state: 1470 state->npages = 0; 1471 state->dma_len = 0; 1472 1473 return 0; 1474 } 1475 1476 static void srp_reg_mr_err_done(struct ib_cq *cq, struct ib_wc *wc) 1477 { 1478 srp_handle_qp_err(cq, wc, "FAST REG"); 1479 } 1480 1481 /* 1482 * Map up to sg_nents elements of state->sg where *sg_offset_p is the offset 1483 * where to start in the first element. If sg_offset_p != NULL then 1484 * *sg_offset_p is updated to the offset in state->sg[retval] of the first 1485 * byte that has not yet been mapped. 1486 */ 1487 static int srp_map_finish_fr(struct srp_map_state *state, 1488 struct srp_request *req, 1489 struct srp_rdma_ch *ch, int sg_nents, 1490 unsigned int *sg_offset_p) 1491 { 1492 struct srp_target_port *target = ch->target; 1493 struct srp_device *dev = target->srp_host->srp_dev; 1494 struct ib_reg_wr wr; 1495 struct srp_fr_desc *desc; 1496 u32 rkey; 1497 int n, err; 1498 1499 if (state->fr.next >= state->fr.end) { 1500 shost_printk(KERN_ERR, ch->target->scsi_host, 1501 PFX "Out of MRs (mr_per_cmd = %d)\n", 1502 ch->target->mr_per_cmd); 1503 return -ENOMEM; 1504 } 1505 1506 WARN_ON_ONCE(!dev->use_fast_reg); 1507 1508 if (sg_nents == 1 && target->global_rkey) { 1509 unsigned int sg_offset = sg_offset_p ? *sg_offset_p : 0; 1510 1511 srp_map_desc(state, sg_dma_address(state->sg) + sg_offset, 1512 sg_dma_len(state->sg) - sg_offset, 1513 target->global_rkey); 1514 if (sg_offset_p) 1515 *sg_offset_p = 0; 1516 return 1; 1517 } 1518 1519 desc = srp_fr_pool_get(ch->fr_pool); 1520 if (!desc) 1521 return -ENOMEM; 1522 1523 rkey = ib_inc_rkey(desc->mr->rkey); 1524 ib_update_fast_reg_key(desc->mr, rkey); 1525 1526 n = ib_map_mr_sg(desc->mr, state->sg, sg_nents, sg_offset_p, 1527 dev->mr_page_size); 1528 if (unlikely(n < 0)) { 1529 srp_fr_pool_put(ch->fr_pool, &desc, 1); 1530 pr_debug("%s: ib_map_mr_sg(%d, %d) returned %d.\n", 1531 dev_name(&req->scmnd->device->sdev_gendev), sg_nents, 1532 sg_offset_p ? *sg_offset_p : -1, n); 1533 return n; 1534 } 1535 1536 WARN_ON_ONCE(desc->mr->length == 0); 1537 1538 req->reg_cqe.done = srp_reg_mr_err_done; 1539 1540 wr.wr.next = NULL; 1541 wr.wr.opcode = IB_WR_REG_MR; 1542 wr.wr.wr_cqe = &req->reg_cqe; 1543 wr.wr.num_sge = 0; 1544 wr.wr.send_flags = 0; 1545 wr.mr = desc->mr; 1546 wr.key = desc->mr->rkey; 1547 wr.access = (IB_ACCESS_LOCAL_WRITE | 1548 IB_ACCESS_REMOTE_READ | 1549 IB_ACCESS_REMOTE_WRITE); 1550 1551 *state->fr.next++ = desc; 1552 state->nmdesc++; 1553 1554 srp_map_desc(state, desc->mr->iova, 1555 desc->mr->length, desc->mr->rkey); 1556 1557 err = ib_post_send(ch->qp, &wr.wr, NULL); 1558 if (unlikely(err)) { 1559 WARN_ON_ONCE(err == -ENOMEM); 1560 return err; 1561 } 1562 1563 return n; 1564 } 1565 1566 static int srp_map_sg_entry(struct srp_map_state *state, 1567 struct srp_rdma_ch *ch, 1568 struct scatterlist *sg) 1569 { 1570 struct srp_target_port *target = ch->target; 1571 struct srp_device *dev = target->srp_host->srp_dev; 1572 struct ib_device *ibdev = dev->dev; 1573 dma_addr_t dma_addr = ib_sg_dma_address(ibdev, sg); 1574 unsigned int dma_len = ib_sg_dma_len(ibdev, sg); 1575 unsigned int len = 0; 1576 int ret; 1577 1578 WARN_ON_ONCE(!dma_len); 1579 1580 while (dma_len) { 1581 unsigned offset = dma_addr & ~dev->mr_page_mask; 1582 1583 if (state->npages == dev->max_pages_per_mr || 1584 (state->npages > 0 && offset != 0)) { 1585 ret = srp_map_finish_fmr(state, ch); 1586 if (ret) 1587 return ret; 1588 } 1589 1590 len = min_t(unsigned int, dma_len, dev->mr_page_size - offset); 1591 1592 if (!state->npages) 1593 state->base_dma_addr = dma_addr; 1594 state->pages[state->npages++] = dma_addr & dev->mr_page_mask; 1595 state->dma_len += len; 1596 dma_addr += len; 1597 dma_len -= len; 1598 } 1599 1600 /* 1601 * If the end of the MR is not on a page boundary then we need to 1602 * close it out and start a new one -- we can only merge at page 1603 * boundaries. 1604 */ 1605 ret = 0; 1606 if ((dma_addr & ~dev->mr_page_mask) != 0) 1607 ret = srp_map_finish_fmr(state, ch); 1608 return ret; 1609 } 1610 1611 static int srp_map_sg_fmr(struct srp_map_state *state, struct srp_rdma_ch *ch, 1612 struct srp_request *req, struct scatterlist *scat, 1613 int count) 1614 { 1615 struct scatterlist *sg; 1616 int i, ret; 1617 1618 state->pages = req->map_page; 1619 state->fmr.next = req->fmr_list; 1620 state->fmr.end = req->fmr_list + ch->target->mr_per_cmd; 1621 1622 for_each_sg(scat, sg, count, i) { 1623 ret = srp_map_sg_entry(state, ch, sg); 1624 if (ret) 1625 return ret; 1626 } 1627 1628 ret = srp_map_finish_fmr(state, ch); 1629 if (ret) 1630 return ret; 1631 1632 return 0; 1633 } 1634 1635 static int srp_map_sg_fr(struct srp_map_state *state, struct srp_rdma_ch *ch, 1636 struct srp_request *req, struct scatterlist *scat, 1637 int count) 1638 { 1639 unsigned int sg_offset = 0; 1640 1641 state->fr.next = req->fr_list; 1642 state->fr.end = req->fr_list + ch->target->mr_per_cmd; 1643 state->sg = scat; 1644 1645 if (count == 0) 1646 return 0; 1647 1648 while (count) { 1649 int i, n; 1650 1651 n = srp_map_finish_fr(state, req, ch, count, &sg_offset); 1652 if (unlikely(n < 0)) 1653 return n; 1654 1655 count -= n; 1656 for (i = 0; i < n; i++) 1657 state->sg = sg_next(state->sg); 1658 } 1659 1660 return 0; 1661 } 1662 1663 static int srp_map_sg_dma(struct srp_map_state *state, struct srp_rdma_ch *ch, 1664 struct srp_request *req, struct scatterlist *scat, 1665 int count) 1666 { 1667 struct srp_target_port *target = ch->target; 1668 struct srp_device *dev = target->srp_host->srp_dev; 1669 struct scatterlist *sg; 1670 int i; 1671 1672 for_each_sg(scat, sg, count, i) { 1673 srp_map_desc(state, ib_sg_dma_address(dev->dev, sg), 1674 ib_sg_dma_len(dev->dev, sg), 1675 target->global_rkey); 1676 } 1677 1678 return 0; 1679 } 1680 1681 /* 1682 * Register the indirect data buffer descriptor with the HCA. 1683 * 1684 * Note: since the indirect data buffer descriptor has been allocated with 1685 * kmalloc() it is guaranteed that this buffer is a physically contiguous 1686 * memory buffer. 1687 */ 1688 static int srp_map_idb(struct srp_rdma_ch *ch, struct srp_request *req, 1689 void **next_mr, void **end_mr, u32 idb_len, 1690 __be32 *idb_rkey) 1691 { 1692 struct srp_target_port *target = ch->target; 1693 struct srp_device *dev = target->srp_host->srp_dev; 1694 struct srp_map_state state; 1695 struct srp_direct_buf idb_desc; 1696 u64 idb_pages[1]; 1697 struct scatterlist idb_sg[1]; 1698 int ret; 1699 1700 memset(&state, 0, sizeof(state)); 1701 memset(&idb_desc, 0, sizeof(idb_desc)); 1702 state.gen.next = next_mr; 1703 state.gen.end = end_mr; 1704 state.desc = &idb_desc; 1705 state.base_dma_addr = req->indirect_dma_addr; 1706 state.dma_len = idb_len; 1707 1708 if (dev->use_fast_reg) { 1709 state.sg = idb_sg; 1710 sg_init_one(idb_sg, req->indirect_desc, idb_len); 1711 idb_sg->dma_address = req->indirect_dma_addr; /* hack! */ 1712 #ifdef CONFIG_NEED_SG_DMA_LENGTH 1713 idb_sg->dma_length = idb_sg->length; /* hack^2 */ 1714 #endif 1715 ret = srp_map_finish_fr(&state, req, ch, 1, NULL); 1716 if (ret < 0) 1717 return ret; 1718 WARN_ON_ONCE(ret < 1); 1719 } else if (dev->use_fmr) { 1720 state.pages = idb_pages; 1721 state.pages[0] = (req->indirect_dma_addr & 1722 dev->mr_page_mask); 1723 state.npages = 1; 1724 ret = srp_map_finish_fmr(&state, ch); 1725 if (ret < 0) 1726 return ret; 1727 } else { 1728 return -EINVAL; 1729 } 1730 1731 *idb_rkey = idb_desc.key; 1732 1733 return 0; 1734 } 1735 1736 static void srp_check_mapping(struct srp_map_state *state, 1737 struct srp_rdma_ch *ch, struct srp_request *req, 1738 struct scatterlist *scat, int count) 1739 { 1740 struct srp_device *dev = ch->target->srp_host->srp_dev; 1741 struct srp_fr_desc **pfr; 1742 u64 desc_len = 0, mr_len = 0; 1743 int i; 1744 1745 for (i = 0; i < state->ndesc; i++) 1746 desc_len += be32_to_cpu(req->indirect_desc[i].len); 1747 if (dev->use_fast_reg) 1748 for (i = 0, pfr = req->fr_list; i < state->nmdesc; i++, pfr++) 1749 mr_len += (*pfr)->mr->length; 1750 else if (dev->use_fmr) 1751 for (i = 0; i < state->nmdesc; i++) 1752 mr_len += be32_to_cpu(req->indirect_desc[i].len); 1753 if (desc_len != scsi_bufflen(req->scmnd) || 1754 mr_len > scsi_bufflen(req->scmnd)) 1755 pr_err("Inconsistent: scsi len %d <> desc len %lld <> mr len %lld; ndesc %d; nmdesc = %d\n", 1756 scsi_bufflen(req->scmnd), desc_len, mr_len, 1757 state->ndesc, state->nmdesc); 1758 } 1759 1760 /** 1761 * srp_map_data() - map SCSI data buffer onto an SRP request 1762 * @scmnd: SCSI command to map 1763 * @ch: SRP RDMA channel 1764 * @req: SRP request 1765 * 1766 * Returns the length in bytes of the SRP_CMD IU or a negative value if 1767 * mapping failed. 1768 */ 1769 static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_rdma_ch *ch, 1770 struct srp_request *req) 1771 { 1772 struct srp_target_port *target = ch->target; 1773 struct scatterlist *scat; 1774 struct srp_cmd *cmd = req->cmd->buf; 1775 int len, nents, count, ret; 1776 struct srp_device *dev; 1777 struct ib_device *ibdev; 1778 struct srp_map_state state; 1779 struct srp_indirect_buf *indirect_hdr; 1780 u32 idb_len, table_len; 1781 __be32 idb_rkey; 1782 u8 fmt; 1783 1784 if (!scsi_sglist(scmnd) || scmnd->sc_data_direction == DMA_NONE) 1785 return sizeof (struct srp_cmd); 1786 1787 if (scmnd->sc_data_direction != DMA_FROM_DEVICE && 1788 scmnd->sc_data_direction != DMA_TO_DEVICE) { 1789 shost_printk(KERN_WARNING, target->scsi_host, 1790 PFX "Unhandled data direction %d\n", 1791 scmnd->sc_data_direction); 1792 return -EINVAL; 1793 } 1794 1795 nents = scsi_sg_count(scmnd); 1796 scat = scsi_sglist(scmnd); 1797 1798 dev = target->srp_host->srp_dev; 1799 ibdev = dev->dev; 1800 1801 count = ib_dma_map_sg(ibdev, scat, nents, scmnd->sc_data_direction); 1802 if (unlikely(count == 0)) 1803 return -EIO; 1804 1805 fmt = SRP_DATA_DESC_DIRECT; 1806 len = sizeof (struct srp_cmd) + sizeof (struct srp_direct_buf); 1807 1808 if (count == 1 && target->global_rkey) { 1809 /* 1810 * The midlayer only generated a single gather/scatter 1811 * entry, or DMA mapping coalesced everything to a 1812 * single entry. So a direct descriptor along with 1813 * the DMA MR suffices. 1814 */ 1815 struct srp_direct_buf *buf = (void *) cmd->add_data; 1816 1817 buf->va = cpu_to_be64(ib_sg_dma_address(ibdev, scat)); 1818 buf->key = cpu_to_be32(target->global_rkey); 1819 buf->len = cpu_to_be32(ib_sg_dma_len(ibdev, scat)); 1820 1821 req->nmdesc = 0; 1822 goto map_complete; 1823 } 1824 1825 /* 1826 * We have more than one scatter/gather entry, so build our indirect 1827 * descriptor table, trying to merge as many entries as we can. 1828 */ 1829 indirect_hdr = (void *) cmd->add_data; 1830 1831 ib_dma_sync_single_for_cpu(ibdev, req->indirect_dma_addr, 1832 target->indirect_size, DMA_TO_DEVICE); 1833 1834 memset(&state, 0, sizeof(state)); 1835 state.desc = req->indirect_desc; 1836 if (dev->use_fast_reg) 1837 ret = srp_map_sg_fr(&state, ch, req, scat, count); 1838 else if (dev->use_fmr) 1839 ret = srp_map_sg_fmr(&state, ch, req, scat, count); 1840 else 1841 ret = srp_map_sg_dma(&state, ch, req, scat, count); 1842 req->nmdesc = state.nmdesc; 1843 if (ret < 0) 1844 goto unmap; 1845 1846 { 1847 DEFINE_DYNAMIC_DEBUG_METADATA(ddm, 1848 "Memory mapping consistency check"); 1849 if (DYNAMIC_DEBUG_BRANCH(ddm)) 1850 srp_check_mapping(&state, ch, req, scat, count); 1851 } 1852 1853 /* We've mapped the request, now pull as much of the indirect 1854 * descriptor table as we can into the command buffer. If this 1855 * target is not using an external indirect table, we are 1856 * guaranteed to fit into the command, as the SCSI layer won't 1857 * give us more S/G entries than we allow. 1858 */ 1859 if (state.ndesc == 1) { 1860 /* 1861 * Memory registration collapsed the sg-list into one entry, 1862 * so use a direct descriptor. 1863 */ 1864 struct srp_direct_buf *buf = (void *) cmd->add_data; 1865 1866 *buf = req->indirect_desc[0]; 1867 goto map_complete; 1868 } 1869 1870 if (unlikely(target->cmd_sg_cnt < state.ndesc && 1871 !target->allow_ext_sg)) { 1872 shost_printk(KERN_ERR, target->scsi_host, 1873 "Could not fit S/G list into SRP_CMD\n"); 1874 ret = -EIO; 1875 goto unmap; 1876 } 1877 1878 count = min(state.ndesc, target->cmd_sg_cnt); 1879 table_len = state.ndesc * sizeof (struct srp_direct_buf); 1880 idb_len = sizeof(struct srp_indirect_buf) + table_len; 1881 1882 fmt = SRP_DATA_DESC_INDIRECT; 1883 len = sizeof(struct srp_cmd) + sizeof (struct srp_indirect_buf); 1884 len += count * sizeof (struct srp_direct_buf); 1885 1886 memcpy(indirect_hdr->desc_list, req->indirect_desc, 1887 count * sizeof (struct srp_direct_buf)); 1888 1889 if (!target->global_rkey) { 1890 ret = srp_map_idb(ch, req, state.gen.next, state.gen.end, 1891 idb_len, &idb_rkey); 1892 if (ret < 0) 1893 goto unmap; 1894 req->nmdesc++; 1895 } else { 1896 idb_rkey = cpu_to_be32(target->global_rkey); 1897 } 1898 1899 indirect_hdr->table_desc.va = cpu_to_be64(req->indirect_dma_addr); 1900 indirect_hdr->table_desc.key = idb_rkey; 1901 indirect_hdr->table_desc.len = cpu_to_be32(table_len); 1902 indirect_hdr->len = cpu_to_be32(state.total_len); 1903 1904 if (scmnd->sc_data_direction == DMA_TO_DEVICE) 1905 cmd->data_out_desc_cnt = count; 1906 else 1907 cmd->data_in_desc_cnt = count; 1908 1909 ib_dma_sync_single_for_device(ibdev, req->indirect_dma_addr, table_len, 1910 DMA_TO_DEVICE); 1911 1912 map_complete: 1913 if (scmnd->sc_data_direction == DMA_TO_DEVICE) 1914 cmd->buf_fmt = fmt << 4; 1915 else 1916 cmd->buf_fmt = fmt; 1917 1918 return len; 1919 1920 unmap: 1921 srp_unmap_data(scmnd, ch, req); 1922 if (ret == -ENOMEM && req->nmdesc >= target->mr_pool_size) 1923 ret = -E2BIG; 1924 return ret; 1925 } 1926 1927 /* 1928 * Return an IU and possible credit to the free pool 1929 */ 1930 static void srp_put_tx_iu(struct srp_rdma_ch *ch, struct srp_iu *iu, 1931 enum srp_iu_type iu_type) 1932 { 1933 unsigned long flags; 1934 1935 spin_lock_irqsave(&ch->lock, flags); 1936 list_add(&iu->list, &ch->free_tx); 1937 if (iu_type != SRP_IU_RSP) 1938 ++ch->req_lim; 1939 spin_unlock_irqrestore(&ch->lock, flags); 1940 } 1941 1942 /* 1943 * Must be called with ch->lock held to protect req_lim and free_tx. 1944 * If IU is not sent, it must be returned using srp_put_tx_iu(). 1945 * 1946 * Note: 1947 * An upper limit for the number of allocated information units for each 1948 * request type is: 1949 * - SRP_IU_CMD: SRP_CMD_SQ_SIZE, since the SCSI mid-layer never queues 1950 * more than Scsi_Host.can_queue requests. 1951 * - SRP_IU_TSK_MGMT: SRP_TSK_MGMT_SQ_SIZE. 1952 * - SRP_IU_RSP: 1, since a conforming SRP target never sends more than 1953 * one unanswered SRP request to an initiator. 1954 */ 1955 static struct srp_iu *__srp_get_tx_iu(struct srp_rdma_ch *ch, 1956 enum srp_iu_type iu_type) 1957 { 1958 struct srp_target_port *target = ch->target; 1959 s32 rsv = (iu_type == SRP_IU_TSK_MGMT) ? 0 : SRP_TSK_MGMT_SQ_SIZE; 1960 struct srp_iu *iu; 1961 1962 lockdep_assert_held(&ch->lock); 1963 1964 ib_process_cq_direct(ch->send_cq, -1); 1965 1966 if (list_empty(&ch->free_tx)) 1967 return NULL; 1968 1969 /* Initiator responses to target requests do not consume credits */ 1970 if (iu_type != SRP_IU_RSP) { 1971 if (ch->req_lim <= rsv) { 1972 ++target->zero_req_lim; 1973 return NULL; 1974 } 1975 1976 --ch->req_lim; 1977 } 1978 1979 iu = list_first_entry(&ch->free_tx, struct srp_iu, list); 1980 list_del(&iu->list); 1981 return iu; 1982 } 1983 1984 /* 1985 * Note: if this function is called from inside ib_drain_sq() then it will 1986 * be called without ch->lock being held. If ib_drain_sq() dequeues a WQE 1987 * with status IB_WC_SUCCESS then that's a bug. 1988 */ 1989 static void srp_send_done(struct ib_cq *cq, struct ib_wc *wc) 1990 { 1991 struct srp_iu *iu = container_of(wc->wr_cqe, struct srp_iu, cqe); 1992 struct srp_rdma_ch *ch = cq->cq_context; 1993 1994 if (unlikely(wc->status != IB_WC_SUCCESS)) { 1995 srp_handle_qp_err(cq, wc, "SEND"); 1996 return; 1997 } 1998 1999 lockdep_assert_held(&ch->lock); 2000 2001 list_add(&iu->list, &ch->free_tx); 2002 } 2003 2004 static int srp_post_send(struct srp_rdma_ch *ch, struct srp_iu *iu, int len) 2005 { 2006 struct srp_target_port *target = ch->target; 2007 struct ib_sge list; 2008 struct ib_send_wr wr; 2009 2010 list.addr = iu->dma; 2011 list.length = len; 2012 list.lkey = target->lkey; 2013 2014 iu->cqe.done = srp_send_done; 2015 2016 wr.next = NULL; 2017 wr.wr_cqe = &iu->cqe; 2018 wr.sg_list = &list; 2019 wr.num_sge = 1; 2020 wr.opcode = IB_WR_SEND; 2021 wr.send_flags = IB_SEND_SIGNALED; 2022 2023 return ib_post_send(ch->qp, &wr, NULL); 2024 } 2025 2026 static int srp_post_recv(struct srp_rdma_ch *ch, struct srp_iu *iu) 2027 { 2028 struct srp_target_port *target = ch->target; 2029 struct ib_recv_wr wr; 2030 struct ib_sge list; 2031 2032 list.addr = iu->dma; 2033 list.length = iu->size; 2034 list.lkey = target->lkey; 2035 2036 iu->cqe.done = srp_recv_done; 2037 2038 wr.next = NULL; 2039 wr.wr_cqe = &iu->cqe; 2040 wr.sg_list = &list; 2041 wr.num_sge = 1; 2042 2043 return ib_post_recv(ch->qp, &wr, NULL); 2044 } 2045 2046 static void srp_process_rsp(struct srp_rdma_ch *ch, struct srp_rsp *rsp) 2047 { 2048 struct srp_target_port *target = ch->target; 2049 struct srp_request *req; 2050 struct scsi_cmnd *scmnd; 2051 unsigned long flags; 2052 2053 if (unlikely(rsp->tag & SRP_TAG_TSK_MGMT)) { 2054 spin_lock_irqsave(&ch->lock, flags); 2055 ch->req_lim += be32_to_cpu(rsp->req_lim_delta); 2056 if (rsp->tag == ch->tsk_mgmt_tag) { 2057 ch->tsk_mgmt_status = -1; 2058 if (be32_to_cpu(rsp->resp_data_len) >= 4) 2059 ch->tsk_mgmt_status = rsp->data[3]; 2060 complete(&ch->tsk_mgmt_done); 2061 } else { 2062 shost_printk(KERN_ERR, target->scsi_host, 2063 "Received tsk mgmt response too late for tag %#llx\n", 2064 rsp->tag); 2065 } 2066 spin_unlock_irqrestore(&ch->lock, flags); 2067 } else { 2068 scmnd = scsi_host_find_tag(target->scsi_host, rsp->tag); 2069 if (scmnd && scmnd->host_scribble) { 2070 req = (void *)scmnd->host_scribble; 2071 scmnd = srp_claim_req(ch, req, NULL, scmnd); 2072 } else { 2073 scmnd = NULL; 2074 } 2075 if (!scmnd) { 2076 shost_printk(KERN_ERR, target->scsi_host, 2077 "Null scmnd for RSP w/tag %#016llx received on ch %td / QP %#x\n", 2078 rsp->tag, ch - target->ch, ch->qp->qp_num); 2079 2080 spin_lock_irqsave(&ch->lock, flags); 2081 ch->req_lim += be32_to_cpu(rsp->req_lim_delta); 2082 spin_unlock_irqrestore(&ch->lock, flags); 2083 2084 return; 2085 } 2086 scmnd->result = rsp->status; 2087 2088 if (rsp->flags & SRP_RSP_FLAG_SNSVALID) { 2089 memcpy(scmnd->sense_buffer, rsp->data + 2090 be32_to_cpu(rsp->resp_data_len), 2091 min_t(int, be32_to_cpu(rsp->sense_data_len), 2092 SCSI_SENSE_BUFFERSIZE)); 2093 } 2094 2095 if (unlikely(rsp->flags & SRP_RSP_FLAG_DIUNDER)) 2096 scsi_set_resid(scmnd, be32_to_cpu(rsp->data_in_res_cnt)); 2097 else if (unlikely(rsp->flags & SRP_RSP_FLAG_DIOVER)) 2098 scsi_set_resid(scmnd, -be32_to_cpu(rsp->data_in_res_cnt)); 2099 else if (unlikely(rsp->flags & SRP_RSP_FLAG_DOUNDER)) 2100 scsi_set_resid(scmnd, be32_to_cpu(rsp->data_out_res_cnt)); 2101 else if (unlikely(rsp->flags & SRP_RSP_FLAG_DOOVER)) 2102 scsi_set_resid(scmnd, -be32_to_cpu(rsp->data_out_res_cnt)); 2103 2104 srp_free_req(ch, req, scmnd, 2105 be32_to_cpu(rsp->req_lim_delta)); 2106 2107 scmnd->host_scribble = NULL; 2108 scmnd->scsi_done(scmnd); 2109 } 2110 } 2111 2112 static int srp_response_common(struct srp_rdma_ch *ch, s32 req_delta, 2113 void *rsp, int len) 2114 { 2115 struct srp_target_port *target = ch->target; 2116 struct ib_device *dev = target->srp_host->srp_dev->dev; 2117 unsigned long flags; 2118 struct srp_iu *iu; 2119 int err; 2120 2121 spin_lock_irqsave(&ch->lock, flags); 2122 ch->req_lim += req_delta; 2123 iu = __srp_get_tx_iu(ch, SRP_IU_RSP); 2124 spin_unlock_irqrestore(&ch->lock, flags); 2125 2126 if (!iu) { 2127 shost_printk(KERN_ERR, target->scsi_host, PFX 2128 "no IU available to send response\n"); 2129 return 1; 2130 } 2131 2132 ib_dma_sync_single_for_cpu(dev, iu->dma, len, DMA_TO_DEVICE); 2133 memcpy(iu->buf, rsp, len); 2134 ib_dma_sync_single_for_device(dev, iu->dma, len, DMA_TO_DEVICE); 2135 2136 err = srp_post_send(ch, iu, len); 2137 if (err) { 2138 shost_printk(KERN_ERR, target->scsi_host, PFX 2139 "unable to post response: %d\n", err); 2140 srp_put_tx_iu(ch, iu, SRP_IU_RSP); 2141 } 2142 2143 return err; 2144 } 2145 2146 static void srp_process_cred_req(struct srp_rdma_ch *ch, 2147 struct srp_cred_req *req) 2148 { 2149 struct srp_cred_rsp rsp = { 2150 .opcode = SRP_CRED_RSP, 2151 .tag = req->tag, 2152 }; 2153 s32 delta = be32_to_cpu(req->req_lim_delta); 2154 2155 if (srp_response_common(ch, delta, &rsp, sizeof(rsp))) 2156 shost_printk(KERN_ERR, ch->target->scsi_host, PFX 2157 "problems processing SRP_CRED_REQ\n"); 2158 } 2159 2160 static void srp_process_aer_req(struct srp_rdma_ch *ch, 2161 struct srp_aer_req *req) 2162 { 2163 struct srp_target_port *target = ch->target; 2164 struct srp_aer_rsp rsp = { 2165 .opcode = SRP_AER_RSP, 2166 .tag = req->tag, 2167 }; 2168 s32 delta = be32_to_cpu(req->req_lim_delta); 2169 2170 shost_printk(KERN_ERR, target->scsi_host, PFX 2171 "ignoring AER for LUN %llu\n", scsilun_to_int(&req->lun)); 2172 2173 if (srp_response_common(ch, delta, &rsp, sizeof(rsp))) 2174 shost_printk(KERN_ERR, target->scsi_host, PFX 2175 "problems processing SRP_AER_REQ\n"); 2176 } 2177 2178 static void srp_recv_done(struct ib_cq *cq, struct ib_wc *wc) 2179 { 2180 struct srp_iu *iu = container_of(wc->wr_cqe, struct srp_iu, cqe); 2181 struct srp_rdma_ch *ch = cq->cq_context; 2182 struct srp_target_port *target = ch->target; 2183 struct ib_device *dev = target->srp_host->srp_dev->dev; 2184 int res; 2185 u8 opcode; 2186 2187 if (unlikely(wc->status != IB_WC_SUCCESS)) { 2188 srp_handle_qp_err(cq, wc, "RECV"); 2189 return; 2190 } 2191 2192 ib_dma_sync_single_for_cpu(dev, iu->dma, ch->max_ti_iu_len, 2193 DMA_FROM_DEVICE); 2194 2195 opcode = *(u8 *) iu->buf; 2196 2197 if (0) { 2198 shost_printk(KERN_ERR, target->scsi_host, 2199 PFX "recv completion, opcode 0x%02x\n", opcode); 2200 print_hex_dump(KERN_ERR, "", DUMP_PREFIX_OFFSET, 8, 1, 2201 iu->buf, wc->byte_len, true); 2202 } 2203 2204 switch (opcode) { 2205 case SRP_RSP: 2206 srp_process_rsp(ch, iu->buf); 2207 break; 2208 2209 case SRP_CRED_REQ: 2210 srp_process_cred_req(ch, iu->buf); 2211 break; 2212 2213 case SRP_AER_REQ: 2214 srp_process_aer_req(ch, iu->buf); 2215 break; 2216 2217 case SRP_T_LOGOUT: 2218 /* XXX Handle target logout */ 2219 shost_printk(KERN_WARNING, target->scsi_host, 2220 PFX "Got target logout request\n"); 2221 break; 2222 2223 default: 2224 shost_printk(KERN_WARNING, target->scsi_host, 2225 PFX "Unhandled SRP opcode 0x%02x\n", opcode); 2226 break; 2227 } 2228 2229 ib_dma_sync_single_for_device(dev, iu->dma, ch->max_ti_iu_len, 2230 DMA_FROM_DEVICE); 2231 2232 res = srp_post_recv(ch, iu); 2233 if (res != 0) 2234 shost_printk(KERN_ERR, target->scsi_host, 2235 PFX "Recv failed with error code %d\n", res); 2236 } 2237 2238 /** 2239 * srp_tl_err_work() - handle a transport layer error 2240 * @work: Work structure embedded in an SRP target port. 2241 * 2242 * Note: This function may get invoked before the rport has been created, 2243 * hence the target->rport test. 2244 */ 2245 static void srp_tl_err_work(struct work_struct *work) 2246 { 2247 struct srp_target_port *target; 2248 2249 target = container_of(work, struct srp_target_port, tl_err_work); 2250 if (target->rport) 2251 srp_start_tl_fail_timers(target->rport); 2252 } 2253 2254 static void srp_handle_qp_err(struct ib_cq *cq, struct ib_wc *wc, 2255 const char *opname) 2256 { 2257 struct srp_rdma_ch *ch = cq->cq_context; 2258 struct srp_target_port *target = ch->target; 2259 2260 if (ch->connected && !target->qp_in_error) { 2261 shost_printk(KERN_ERR, target->scsi_host, 2262 PFX "failed %s status %s (%d) for CQE %p\n", 2263 opname, ib_wc_status_msg(wc->status), wc->status, 2264 wc->wr_cqe); 2265 queue_work(system_long_wq, &target->tl_err_work); 2266 } 2267 target->qp_in_error = true; 2268 } 2269 2270 static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd) 2271 { 2272 struct srp_target_port *target = host_to_target(shost); 2273 struct srp_rport *rport = target->rport; 2274 struct srp_rdma_ch *ch; 2275 struct srp_request *req; 2276 struct srp_iu *iu; 2277 struct srp_cmd *cmd; 2278 struct ib_device *dev; 2279 unsigned long flags; 2280 u32 tag; 2281 u16 idx; 2282 int len, ret; 2283 const bool in_scsi_eh = !in_interrupt() && current == shost->ehandler; 2284 2285 /* 2286 * The SCSI EH thread is the only context from which srp_queuecommand() 2287 * can get invoked for blocked devices (SDEV_BLOCK / 2288 * SDEV_CREATED_BLOCK). Avoid racing with srp_reconnect_rport() by 2289 * locking the rport mutex if invoked from inside the SCSI EH. 2290 */ 2291 if (in_scsi_eh) 2292 mutex_lock(&rport->mutex); 2293 2294 scmnd->result = srp_chkready(target->rport); 2295 if (unlikely(scmnd->result)) 2296 goto err; 2297 2298 WARN_ON_ONCE(scmnd->request->tag < 0); 2299 tag = blk_mq_unique_tag(scmnd->request); 2300 ch = &target->ch[blk_mq_unique_tag_to_hwq(tag)]; 2301 idx = blk_mq_unique_tag_to_tag(tag); 2302 WARN_ONCE(idx >= target->req_ring_size, "%s: tag %#x: idx %d >= %d\n", 2303 dev_name(&shost->shost_gendev), tag, idx, 2304 target->req_ring_size); 2305 2306 spin_lock_irqsave(&ch->lock, flags); 2307 iu = __srp_get_tx_iu(ch, SRP_IU_CMD); 2308 spin_unlock_irqrestore(&ch->lock, flags); 2309 2310 if (!iu) 2311 goto err; 2312 2313 req = &ch->req_ring[idx]; 2314 dev = target->srp_host->srp_dev->dev; 2315 ib_dma_sync_single_for_cpu(dev, iu->dma, target->max_iu_len, 2316 DMA_TO_DEVICE); 2317 2318 scmnd->host_scribble = (void *) req; 2319 2320 cmd = iu->buf; 2321 memset(cmd, 0, sizeof *cmd); 2322 2323 cmd->opcode = SRP_CMD; 2324 int_to_scsilun(scmnd->device->lun, &cmd->lun); 2325 cmd->tag = tag; 2326 memcpy(cmd->cdb, scmnd->cmnd, scmnd->cmd_len); 2327 2328 req->scmnd = scmnd; 2329 req->cmd = iu; 2330 2331 len = srp_map_data(scmnd, ch, req); 2332 if (len < 0) { 2333 shost_printk(KERN_ERR, target->scsi_host, 2334 PFX "Failed to map data (%d)\n", len); 2335 /* 2336 * If we ran out of memory descriptors (-ENOMEM) because an 2337 * application is queuing many requests with more than 2338 * max_pages_per_mr sg-list elements, tell the SCSI mid-layer 2339 * to reduce queue depth temporarily. 2340 */ 2341 scmnd->result = len == -ENOMEM ? 2342 DID_OK << 16 | QUEUE_FULL << 1 : DID_ERROR << 16; 2343 goto err_iu; 2344 } 2345 2346 ib_dma_sync_single_for_device(dev, iu->dma, target->max_iu_len, 2347 DMA_TO_DEVICE); 2348 2349 if (srp_post_send(ch, iu, len)) { 2350 shost_printk(KERN_ERR, target->scsi_host, PFX "Send failed\n"); 2351 goto err_unmap; 2352 } 2353 2354 ret = 0; 2355 2356 unlock_rport: 2357 if (in_scsi_eh) 2358 mutex_unlock(&rport->mutex); 2359 2360 return ret; 2361 2362 err_unmap: 2363 srp_unmap_data(scmnd, ch, req); 2364 2365 err_iu: 2366 srp_put_tx_iu(ch, iu, SRP_IU_CMD); 2367 2368 /* 2369 * Avoid that the loops that iterate over the request ring can 2370 * encounter a dangling SCSI command pointer. 2371 */ 2372 req->scmnd = NULL; 2373 2374 err: 2375 if (scmnd->result) { 2376 scmnd->scsi_done(scmnd); 2377 ret = 0; 2378 } else { 2379 ret = SCSI_MLQUEUE_HOST_BUSY; 2380 } 2381 2382 goto unlock_rport; 2383 } 2384 2385 /* 2386 * Note: the resources allocated in this function are freed in 2387 * srp_free_ch_ib(). 2388 */ 2389 static int srp_alloc_iu_bufs(struct srp_rdma_ch *ch) 2390 { 2391 struct srp_target_port *target = ch->target; 2392 int i; 2393 2394 ch->rx_ring = kcalloc(target->queue_size, sizeof(*ch->rx_ring), 2395 GFP_KERNEL); 2396 if (!ch->rx_ring) 2397 goto err_no_ring; 2398 ch->tx_ring = kcalloc(target->queue_size, sizeof(*ch->tx_ring), 2399 GFP_KERNEL); 2400 if (!ch->tx_ring) 2401 goto err_no_ring; 2402 2403 for (i = 0; i < target->queue_size; ++i) { 2404 ch->rx_ring[i] = srp_alloc_iu(target->srp_host, 2405 ch->max_ti_iu_len, 2406 GFP_KERNEL, DMA_FROM_DEVICE); 2407 if (!ch->rx_ring[i]) 2408 goto err; 2409 } 2410 2411 for (i = 0; i < target->queue_size; ++i) { 2412 ch->tx_ring[i] = srp_alloc_iu(target->srp_host, 2413 target->max_iu_len, 2414 GFP_KERNEL, DMA_TO_DEVICE); 2415 if (!ch->tx_ring[i]) 2416 goto err; 2417 2418 list_add(&ch->tx_ring[i]->list, &ch->free_tx); 2419 } 2420 2421 return 0; 2422 2423 err: 2424 for (i = 0; i < target->queue_size; ++i) { 2425 srp_free_iu(target->srp_host, ch->rx_ring[i]); 2426 srp_free_iu(target->srp_host, ch->tx_ring[i]); 2427 } 2428 2429 2430 err_no_ring: 2431 kfree(ch->tx_ring); 2432 ch->tx_ring = NULL; 2433 kfree(ch->rx_ring); 2434 ch->rx_ring = NULL; 2435 2436 return -ENOMEM; 2437 } 2438 2439 static uint32_t srp_compute_rq_tmo(struct ib_qp_attr *qp_attr, int attr_mask) 2440 { 2441 uint64_t T_tr_ns, max_compl_time_ms; 2442 uint32_t rq_tmo_jiffies; 2443 2444 /* 2445 * According to section 11.2.4.2 in the IBTA spec (Modify Queue Pair, 2446 * table 91), both the QP timeout and the retry count have to be set 2447 * for RC QP's during the RTR to RTS transition. 2448 */ 2449 WARN_ON_ONCE((attr_mask & (IB_QP_TIMEOUT | IB_QP_RETRY_CNT)) != 2450 (IB_QP_TIMEOUT | IB_QP_RETRY_CNT)); 2451 2452 /* 2453 * Set target->rq_tmo_jiffies to one second more than the largest time 2454 * it can take before an error completion is generated. See also 2455 * C9-140..142 in the IBTA spec for more information about how to 2456 * convert the QP Local ACK Timeout value to nanoseconds. 2457 */ 2458 T_tr_ns = 4096 * (1ULL << qp_attr->timeout); 2459 max_compl_time_ms = qp_attr->retry_cnt * 4 * T_tr_ns; 2460 do_div(max_compl_time_ms, NSEC_PER_MSEC); 2461 rq_tmo_jiffies = msecs_to_jiffies(max_compl_time_ms + 1000); 2462 2463 return rq_tmo_jiffies; 2464 } 2465 2466 static void srp_cm_rep_handler(struct ib_cm_id *cm_id, 2467 const struct srp_login_rsp *lrsp, 2468 struct srp_rdma_ch *ch) 2469 { 2470 struct srp_target_port *target = ch->target; 2471 struct ib_qp_attr *qp_attr = NULL; 2472 int attr_mask = 0; 2473 int ret = 0; 2474 int i; 2475 2476 if (lrsp->opcode == SRP_LOGIN_RSP) { 2477 ch->max_ti_iu_len = be32_to_cpu(lrsp->max_ti_iu_len); 2478 ch->req_lim = be32_to_cpu(lrsp->req_lim_delta); 2479 2480 /* 2481 * Reserve credits for task management so we don't 2482 * bounce requests back to the SCSI mid-layer. 2483 */ 2484 target->scsi_host->can_queue 2485 = min(ch->req_lim - SRP_TSK_MGMT_SQ_SIZE, 2486 target->scsi_host->can_queue); 2487 target->scsi_host->cmd_per_lun 2488 = min_t(int, target->scsi_host->can_queue, 2489 target->scsi_host->cmd_per_lun); 2490 } else { 2491 shost_printk(KERN_WARNING, target->scsi_host, 2492 PFX "Unhandled RSP opcode %#x\n", lrsp->opcode); 2493 ret = -ECONNRESET; 2494 goto error; 2495 } 2496 2497 if (!ch->rx_ring) { 2498 ret = srp_alloc_iu_bufs(ch); 2499 if (ret) 2500 goto error; 2501 } 2502 2503 for (i = 0; i < target->queue_size; i++) { 2504 struct srp_iu *iu = ch->rx_ring[i]; 2505 2506 ret = srp_post_recv(ch, iu); 2507 if (ret) 2508 goto error; 2509 } 2510 2511 if (!target->using_rdma_cm) { 2512 ret = -ENOMEM; 2513 qp_attr = kmalloc(sizeof(*qp_attr), GFP_KERNEL); 2514 if (!qp_attr) 2515 goto error; 2516 2517 qp_attr->qp_state = IB_QPS_RTR; 2518 ret = ib_cm_init_qp_attr(cm_id, qp_attr, &attr_mask); 2519 if (ret) 2520 goto error_free; 2521 2522 ret = ib_modify_qp(ch->qp, qp_attr, attr_mask); 2523 if (ret) 2524 goto error_free; 2525 2526 qp_attr->qp_state = IB_QPS_RTS; 2527 ret = ib_cm_init_qp_attr(cm_id, qp_attr, &attr_mask); 2528 if (ret) 2529 goto error_free; 2530 2531 target->rq_tmo_jiffies = srp_compute_rq_tmo(qp_attr, attr_mask); 2532 2533 ret = ib_modify_qp(ch->qp, qp_attr, attr_mask); 2534 if (ret) 2535 goto error_free; 2536 2537 ret = ib_send_cm_rtu(cm_id, NULL, 0); 2538 } 2539 2540 error_free: 2541 kfree(qp_attr); 2542 2543 error: 2544 ch->status = ret; 2545 } 2546 2547 static void srp_ib_cm_rej_handler(struct ib_cm_id *cm_id, 2548 const struct ib_cm_event *event, 2549 struct srp_rdma_ch *ch) 2550 { 2551 struct srp_target_port *target = ch->target; 2552 struct Scsi_Host *shost = target->scsi_host; 2553 struct ib_class_port_info *cpi; 2554 int opcode; 2555 u16 dlid; 2556 2557 switch (event->param.rej_rcvd.reason) { 2558 case IB_CM_REJ_PORT_CM_REDIRECT: 2559 cpi = event->param.rej_rcvd.ari; 2560 dlid = be16_to_cpu(cpi->redirect_lid); 2561 sa_path_set_dlid(&ch->ib_cm.path, dlid); 2562 ch->ib_cm.path.pkey = cpi->redirect_pkey; 2563 cm_id->remote_cm_qpn = be32_to_cpu(cpi->redirect_qp) & 0x00ffffff; 2564 memcpy(ch->ib_cm.path.dgid.raw, cpi->redirect_gid, 16); 2565 2566 ch->status = dlid ? SRP_DLID_REDIRECT : SRP_PORT_REDIRECT; 2567 break; 2568 2569 case IB_CM_REJ_PORT_REDIRECT: 2570 if (srp_target_is_topspin(target)) { 2571 union ib_gid *dgid = &ch->ib_cm.path.dgid; 2572 2573 /* 2574 * Topspin/Cisco SRP gateways incorrectly send 2575 * reject reason code 25 when they mean 24 2576 * (port redirect). 2577 */ 2578 memcpy(dgid->raw, event->param.rej_rcvd.ari, 16); 2579 2580 shost_printk(KERN_DEBUG, shost, 2581 PFX "Topspin/Cisco redirect to target port GID %016llx%016llx\n", 2582 be64_to_cpu(dgid->global.subnet_prefix), 2583 be64_to_cpu(dgid->global.interface_id)); 2584 2585 ch->status = SRP_PORT_REDIRECT; 2586 } else { 2587 shost_printk(KERN_WARNING, shost, 2588 " REJ reason: IB_CM_REJ_PORT_REDIRECT\n"); 2589 ch->status = -ECONNRESET; 2590 } 2591 break; 2592 2593 case IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID: 2594 shost_printk(KERN_WARNING, shost, 2595 " REJ reason: IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID\n"); 2596 ch->status = -ECONNRESET; 2597 break; 2598 2599 case IB_CM_REJ_CONSUMER_DEFINED: 2600 opcode = *(u8 *) event->private_data; 2601 if (opcode == SRP_LOGIN_REJ) { 2602 struct srp_login_rej *rej = event->private_data; 2603 u32 reason = be32_to_cpu(rej->reason); 2604 2605 if (reason == SRP_LOGIN_REJ_REQ_IT_IU_LENGTH_TOO_LARGE) 2606 shost_printk(KERN_WARNING, shost, 2607 PFX "SRP_LOGIN_REJ: requested max_it_iu_len too large\n"); 2608 else 2609 shost_printk(KERN_WARNING, shost, PFX 2610 "SRP LOGIN from %pI6 to %pI6 REJECTED, reason 0x%08x\n", 2611 target->sgid.raw, 2612 target->ib_cm.orig_dgid.raw, 2613 reason); 2614 } else 2615 shost_printk(KERN_WARNING, shost, 2616 " REJ reason: IB_CM_REJ_CONSUMER_DEFINED," 2617 " opcode 0x%02x\n", opcode); 2618 ch->status = -ECONNRESET; 2619 break; 2620 2621 case IB_CM_REJ_STALE_CONN: 2622 shost_printk(KERN_WARNING, shost, " REJ reason: stale connection\n"); 2623 ch->status = SRP_STALE_CONN; 2624 break; 2625 2626 default: 2627 shost_printk(KERN_WARNING, shost, " REJ reason 0x%x\n", 2628 event->param.rej_rcvd.reason); 2629 ch->status = -ECONNRESET; 2630 } 2631 } 2632 2633 static int srp_ib_cm_handler(struct ib_cm_id *cm_id, 2634 const struct ib_cm_event *event) 2635 { 2636 struct srp_rdma_ch *ch = cm_id->context; 2637 struct srp_target_port *target = ch->target; 2638 int comp = 0; 2639 2640 switch (event->event) { 2641 case IB_CM_REQ_ERROR: 2642 shost_printk(KERN_DEBUG, target->scsi_host, 2643 PFX "Sending CM REQ failed\n"); 2644 comp = 1; 2645 ch->status = -ECONNRESET; 2646 break; 2647 2648 case IB_CM_REP_RECEIVED: 2649 comp = 1; 2650 srp_cm_rep_handler(cm_id, event->private_data, ch); 2651 break; 2652 2653 case IB_CM_REJ_RECEIVED: 2654 shost_printk(KERN_DEBUG, target->scsi_host, PFX "REJ received\n"); 2655 comp = 1; 2656 2657 srp_ib_cm_rej_handler(cm_id, event, ch); 2658 break; 2659 2660 case IB_CM_DREQ_RECEIVED: 2661 shost_printk(KERN_WARNING, target->scsi_host, 2662 PFX "DREQ received - connection closed\n"); 2663 ch->connected = false; 2664 if (ib_send_cm_drep(cm_id, NULL, 0)) 2665 shost_printk(KERN_ERR, target->scsi_host, 2666 PFX "Sending CM DREP failed\n"); 2667 queue_work(system_long_wq, &target->tl_err_work); 2668 break; 2669 2670 case IB_CM_TIMEWAIT_EXIT: 2671 shost_printk(KERN_ERR, target->scsi_host, 2672 PFX "connection closed\n"); 2673 comp = 1; 2674 2675 ch->status = 0; 2676 break; 2677 2678 case IB_CM_MRA_RECEIVED: 2679 case IB_CM_DREQ_ERROR: 2680 case IB_CM_DREP_RECEIVED: 2681 break; 2682 2683 default: 2684 shost_printk(KERN_WARNING, target->scsi_host, 2685 PFX "Unhandled CM event %d\n", event->event); 2686 break; 2687 } 2688 2689 if (comp) 2690 complete(&ch->done); 2691 2692 return 0; 2693 } 2694 2695 static void srp_rdma_cm_rej_handler(struct srp_rdma_ch *ch, 2696 struct rdma_cm_event *event) 2697 { 2698 struct srp_target_port *target = ch->target; 2699 struct Scsi_Host *shost = target->scsi_host; 2700 int opcode; 2701 2702 switch (event->status) { 2703 case IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID: 2704 shost_printk(KERN_WARNING, shost, 2705 " REJ reason: IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID\n"); 2706 ch->status = -ECONNRESET; 2707 break; 2708 2709 case IB_CM_REJ_CONSUMER_DEFINED: 2710 opcode = *(u8 *) event->param.conn.private_data; 2711 if (opcode == SRP_LOGIN_REJ) { 2712 struct srp_login_rej *rej = 2713 (struct srp_login_rej *) 2714 event->param.conn.private_data; 2715 u32 reason = be32_to_cpu(rej->reason); 2716 2717 if (reason == SRP_LOGIN_REJ_REQ_IT_IU_LENGTH_TOO_LARGE) 2718 shost_printk(KERN_WARNING, shost, 2719 PFX "SRP_LOGIN_REJ: requested max_it_iu_len too large\n"); 2720 else 2721 shost_printk(KERN_WARNING, shost, 2722 PFX "SRP LOGIN REJECTED, reason 0x%08x\n", reason); 2723 } else { 2724 shost_printk(KERN_WARNING, shost, 2725 " REJ reason: IB_CM_REJ_CONSUMER_DEFINED, opcode 0x%02x\n", 2726 opcode); 2727 } 2728 ch->status = -ECONNRESET; 2729 break; 2730 2731 case IB_CM_REJ_STALE_CONN: 2732 shost_printk(KERN_WARNING, shost, 2733 " REJ reason: stale connection\n"); 2734 ch->status = SRP_STALE_CONN; 2735 break; 2736 2737 default: 2738 shost_printk(KERN_WARNING, shost, " REJ reason 0x%x\n", 2739 event->status); 2740 ch->status = -ECONNRESET; 2741 break; 2742 } 2743 } 2744 2745 static int srp_rdma_cm_handler(struct rdma_cm_id *cm_id, 2746 struct rdma_cm_event *event) 2747 { 2748 struct srp_rdma_ch *ch = cm_id->context; 2749 struct srp_target_port *target = ch->target; 2750 int comp = 0; 2751 2752 switch (event->event) { 2753 case RDMA_CM_EVENT_ADDR_RESOLVED: 2754 ch->status = 0; 2755 comp = 1; 2756 break; 2757 2758 case RDMA_CM_EVENT_ADDR_ERROR: 2759 ch->status = -ENXIO; 2760 comp = 1; 2761 break; 2762 2763 case RDMA_CM_EVENT_ROUTE_RESOLVED: 2764 ch->status = 0; 2765 comp = 1; 2766 break; 2767 2768 case RDMA_CM_EVENT_ROUTE_ERROR: 2769 case RDMA_CM_EVENT_UNREACHABLE: 2770 ch->status = -EHOSTUNREACH; 2771 comp = 1; 2772 break; 2773 2774 case RDMA_CM_EVENT_CONNECT_ERROR: 2775 shost_printk(KERN_DEBUG, target->scsi_host, 2776 PFX "Sending CM REQ failed\n"); 2777 comp = 1; 2778 ch->status = -ECONNRESET; 2779 break; 2780 2781 case RDMA_CM_EVENT_ESTABLISHED: 2782 comp = 1; 2783 srp_cm_rep_handler(NULL, event->param.conn.private_data, ch); 2784 break; 2785 2786 case RDMA_CM_EVENT_REJECTED: 2787 shost_printk(KERN_DEBUG, target->scsi_host, PFX "REJ received\n"); 2788 comp = 1; 2789 2790 srp_rdma_cm_rej_handler(ch, event); 2791 break; 2792 2793 case RDMA_CM_EVENT_DISCONNECTED: 2794 if (ch->connected) { 2795 shost_printk(KERN_WARNING, target->scsi_host, 2796 PFX "received DREQ\n"); 2797 rdma_disconnect(ch->rdma_cm.cm_id); 2798 comp = 1; 2799 ch->status = 0; 2800 queue_work(system_long_wq, &target->tl_err_work); 2801 } 2802 break; 2803 2804 case RDMA_CM_EVENT_TIMEWAIT_EXIT: 2805 shost_printk(KERN_ERR, target->scsi_host, 2806 PFX "connection closed\n"); 2807 2808 comp = 1; 2809 ch->status = 0; 2810 break; 2811 2812 default: 2813 shost_printk(KERN_WARNING, target->scsi_host, 2814 PFX "Unhandled CM event %d\n", event->event); 2815 break; 2816 } 2817 2818 if (comp) 2819 complete(&ch->done); 2820 2821 return 0; 2822 } 2823 2824 /** 2825 * srp_change_queue_depth - setting device queue depth 2826 * @sdev: scsi device struct 2827 * @qdepth: requested queue depth 2828 * 2829 * Returns queue depth. 2830 */ 2831 static int 2832 srp_change_queue_depth(struct scsi_device *sdev, int qdepth) 2833 { 2834 if (!sdev->tagged_supported) 2835 qdepth = 1; 2836 return scsi_change_queue_depth(sdev, qdepth); 2837 } 2838 2839 static int srp_send_tsk_mgmt(struct srp_rdma_ch *ch, u64 req_tag, u64 lun, 2840 u8 func, u8 *status) 2841 { 2842 struct srp_target_port *target = ch->target; 2843 struct srp_rport *rport = target->rport; 2844 struct ib_device *dev = target->srp_host->srp_dev->dev; 2845 struct srp_iu *iu; 2846 struct srp_tsk_mgmt *tsk_mgmt; 2847 int res; 2848 2849 if (!ch->connected || target->qp_in_error) 2850 return -1; 2851 2852 /* 2853 * Lock the rport mutex to avoid that srp_create_ch_ib() is 2854 * invoked while a task management function is being sent. 2855 */ 2856 mutex_lock(&rport->mutex); 2857 spin_lock_irq(&ch->lock); 2858 iu = __srp_get_tx_iu(ch, SRP_IU_TSK_MGMT); 2859 spin_unlock_irq(&ch->lock); 2860 2861 if (!iu) { 2862 mutex_unlock(&rport->mutex); 2863 2864 return -1; 2865 } 2866 2867 ib_dma_sync_single_for_cpu(dev, iu->dma, sizeof *tsk_mgmt, 2868 DMA_TO_DEVICE); 2869 tsk_mgmt = iu->buf; 2870 memset(tsk_mgmt, 0, sizeof *tsk_mgmt); 2871 2872 tsk_mgmt->opcode = SRP_TSK_MGMT; 2873 int_to_scsilun(lun, &tsk_mgmt->lun); 2874 tsk_mgmt->tsk_mgmt_func = func; 2875 tsk_mgmt->task_tag = req_tag; 2876 2877 spin_lock_irq(&ch->lock); 2878 ch->tsk_mgmt_tag = (ch->tsk_mgmt_tag + 1) | SRP_TAG_TSK_MGMT; 2879 tsk_mgmt->tag = ch->tsk_mgmt_tag; 2880 spin_unlock_irq(&ch->lock); 2881 2882 init_completion(&ch->tsk_mgmt_done); 2883 2884 ib_dma_sync_single_for_device(dev, iu->dma, sizeof *tsk_mgmt, 2885 DMA_TO_DEVICE); 2886 if (srp_post_send(ch, iu, sizeof(*tsk_mgmt))) { 2887 srp_put_tx_iu(ch, iu, SRP_IU_TSK_MGMT); 2888 mutex_unlock(&rport->mutex); 2889 2890 return -1; 2891 } 2892 res = wait_for_completion_timeout(&ch->tsk_mgmt_done, 2893 msecs_to_jiffies(SRP_ABORT_TIMEOUT_MS)); 2894 if (res > 0 && status) 2895 *status = ch->tsk_mgmt_status; 2896 mutex_unlock(&rport->mutex); 2897 2898 WARN_ON_ONCE(res < 0); 2899 2900 return res > 0 ? 0 : -1; 2901 } 2902 2903 static int srp_abort(struct scsi_cmnd *scmnd) 2904 { 2905 struct srp_target_port *target = host_to_target(scmnd->device->host); 2906 struct srp_request *req = (struct srp_request *) scmnd->host_scribble; 2907 u32 tag; 2908 u16 ch_idx; 2909 struct srp_rdma_ch *ch; 2910 int ret; 2911 2912 shost_printk(KERN_ERR, target->scsi_host, "SRP abort called\n"); 2913 2914 if (!req) 2915 return SUCCESS; 2916 tag = blk_mq_unique_tag(scmnd->request); 2917 ch_idx = blk_mq_unique_tag_to_hwq(tag); 2918 if (WARN_ON_ONCE(ch_idx >= target->ch_count)) 2919 return SUCCESS; 2920 ch = &target->ch[ch_idx]; 2921 if (!srp_claim_req(ch, req, NULL, scmnd)) 2922 return SUCCESS; 2923 shost_printk(KERN_ERR, target->scsi_host, 2924 "Sending SRP abort for tag %#x\n", tag); 2925 if (srp_send_tsk_mgmt(ch, tag, scmnd->device->lun, 2926 SRP_TSK_ABORT_TASK, NULL) == 0) 2927 ret = SUCCESS; 2928 else if (target->rport->state == SRP_RPORT_LOST) 2929 ret = FAST_IO_FAIL; 2930 else 2931 ret = FAILED; 2932 if (ret == SUCCESS) { 2933 srp_free_req(ch, req, scmnd, 0); 2934 scmnd->result = DID_ABORT << 16; 2935 scmnd->scsi_done(scmnd); 2936 } 2937 2938 return ret; 2939 } 2940 2941 static int srp_reset_device(struct scsi_cmnd *scmnd) 2942 { 2943 struct srp_target_port *target = host_to_target(scmnd->device->host); 2944 struct srp_rdma_ch *ch; 2945 int i, j; 2946 u8 status; 2947 2948 shost_printk(KERN_ERR, target->scsi_host, "SRP reset_device called\n"); 2949 2950 ch = &target->ch[0]; 2951 if (srp_send_tsk_mgmt(ch, SRP_TAG_NO_REQ, scmnd->device->lun, 2952 SRP_TSK_LUN_RESET, &status)) 2953 return FAILED; 2954 if (status) 2955 return FAILED; 2956 2957 for (i = 0; i < target->ch_count; i++) { 2958 ch = &target->ch[i]; 2959 for (j = 0; j < target->req_ring_size; ++j) { 2960 struct srp_request *req = &ch->req_ring[j]; 2961 2962 srp_finish_req(ch, req, scmnd->device, DID_RESET << 16); 2963 } 2964 } 2965 2966 return SUCCESS; 2967 } 2968 2969 static int srp_reset_host(struct scsi_cmnd *scmnd) 2970 { 2971 struct srp_target_port *target = host_to_target(scmnd->device->host); 2972 2973 shost_printk(KERN_ERR, target->scsi_host, PFX "SRP reset_host called\n"); 2974 2975 return srp_reconnect_rport(target->rport) == 0 ? SUCCESS : FAILED; 2976 } 2977 2978 static int srp_target_alloc(struct scsi_target *starget) 2979 { 2980 struct Scsi_Host *shost = dev_to_shost(starget->dev.parent); 2981 struct srp_target_port *target = host_to_target(shost); 2982 2983 if (target->target_can_queue) 2984 starget->can_queue = target->target_can_queue; 2985 return 0; 2986 } 2987 2988 static int srp_slave_alloc(struct scsi_device *sdev) 2989 { 2990 struct Scsi_Host *shost = sdev->host; 2991 struct srp_target_port *target = host_to_target(shost); 2992 struct srp_device *srp_dev = target->srp_host->srp_dev; 2993 struct ib_device *ibdev = srp_dev->dev; 2994 2995 if (!(ibdev->attrs.device_cap_flags & IB_DEVICE_SG_GAPS_REG)) 2996 blk_queue_virt_boundary(sdev->request_queue, 2997 ~srp_dev->mr_page_mask); 2998 2999 return 0; 3000 } 3001 3002 static int srp_slave_configure(struct scsi_device *sdev) 3003 { 3004 struct Scsi_Host *shost = sdev->host; 3005 struct srp_target_port *target = host_to_target(shost); 3006 struct request_queue *q = sdev->request_queue; 3007 unsigned long timeout; 3008 3009 if (sdev->type == TYPE_DISK) { 3010 timeout = max_t(unsigned, 30 * HZ, target->rq_tmo_jiffies); 3011 blk_queue_rq_timeout(q, timeout); 3012 } 3013 3014 return 0; 3015 } 3016 3017 static ssize_t show_id_ext(struct device *dev, struct device_attribute *attr, 3018 char *buf) 3019 { 3020 struct srp_target_port *target = host_to_target(class_to_shost(dev)); 3021 3022 return sprintf(buf, "0x%016llx\n", be64_to_cpu(target->id_ext)); 3023 } 3024 3025 static ssize_t show_ioc_guid(struct device *dev, struct device_attribute *attr, 3026 char *buf) 3027 { 3028 struct srp_target_port *target = host_to_target(class_to_shost(dev)); 3029 3030 return sprintf(buf, "0x%016llx\n", be64_to_cpu(target->ioc_guid)); 3031 } 3032 3033 static ssize_t show_service_id(struct device *dev, 3034 struct device_attribute *attr, char *buf) 3035 { 3036 struct srp_target_port *target = host_to_target(class_to_shost(dev)); 3037 3038 if (target->using_rdma_cm) 3039 return -ENOENT; 3040 return sprintf(buf, "0x%016llx\n", 3041 be64_to_cpu(target->ib_cm.service_id)); 3042 } 3043 3044 static ssize_t show_pkey(struct device *dev, struct device_attribute *attr, 3045 char *buf) 3046 { 3047 struct srp_target_port *target = host_to_target(class_to_shost(dev)); 3048 3049 if (target->using_rdma_cm) 3050 return -ENOENT; 3051 return sprintf(buf, "0x%04x\n", be16_to_cpu(target->ib_cm.pkey)); 3052 } 3053 3054 static ssize_t show_sgid(struct device *dev, struct device_attribute *attr, 3055 char *buf) 3056 { 3057 struct srp_target_port *target = host_to_target(class_to_shost(dev)); 3058 3059 return sprintf(buf, "%pI6\n", target->sgid.raw); 3060 } 3061 3062 static ssize_t show_dgid(struct device *dev, struct device_attribute *attr, 3063 char *buf) 3064 { 3065 struct srp_target_port *target = host_to_target(class_to_shost(dev)); 3066 struct srp_rdma_ch *ch = &target->ch[0]; 3067 3068 if (target->using_rdma_cm) 3069 return -ENOENT; 3070 return sprintf(buf, "%pI6\n", ch->ib_cm.path.dgid.raw); 3071 } 3072 3073 static ssize_t show_orig_dgid(struct device *dev, 3074 struct device_attribute *attr, char *buf) 3075 { 3076 struct srp_target_port *target = host_to_target(class_to_shost(dev)); 3077 3078 if (target->using_rdma_cm) 3079 return -ENOENT; 3080 return sprintf(buf, "%pI6\n", target->ib_cm.orig_dgid.raw); 3081 } 3082 3083 static ssize_t show_req_lim(struct device *dev, 3084 struct device_attribute *attr, char *buf) 3085 { 3086 struct srp_target_port *target = host_to_target(class_to_shost(dev)); 3087 struct srp_rdma_ch *ch; 3088 int i, req_lim = INT_MAX; 3089 3090 for (i = 0; i < target->ch_count; i++) { 3091 ch = &target->ch[i]; 3092 req_lim = min(req_lim, ch->req_lim); 3093 } 3094 return sprintf(buf, "%d\n", req_lim); 3095 } 3096 3097 static ssize_t show_zero_req_lim(struct device *dev, 3098 struct device_attribute *attr, char *buf) 3099 { 3100 struct srp_target_port *target = host_to_target(class_to_shost(dev)); 3101 3102 return sprintf(buf, "%d\n", target->zero_req_lim); 3103 } 3104 3105 static ssize_t show_local_ib_port(struct device *dev, 3106 struct device_attribute *attr, char *buf) 3107 { 3108 struct srp_target_port *target = host_to_target(class_to_shost(dev)); 3109 3110 return sprintf(buf, "%d\n", target->srp_host->port); 3111 } 3112 3113 static ssize_t show_local_ib_device(struct device *dev, 3114 struct device_attribute *attr, char *buf) 3115 { 3116 struct srp_target_port *target = host_to_target(class_to_shost(dev)); 3117 3118 return sprintf(buf, "%s\n", 3119 dev_name(&target->srp_host->srp_dev->dev->dev)); 3120 } 3121 3122 static ssize_t show_ch_count(struct device *dev, struct device_attribute *attr, 3123 char *buf) 3124 { 3125 struct srp_target_port *target = host_to_target(class_to_shost(dev)); 3126 3127 return sprintf(buf, "%d\n", target->ch_count); 3128 } 3129 3130 static ssize_t show_comp_vector(struct device *dev, 3131 struct device_attribute *attr, char *buf) 3132 { 3133 struct srp_target_port *target = host_to_target(class_to_shost(dev)); 3134 3135 return sprintf(buf, "%d\n", target->comp_vector); 3136 } 3137 3138 static ssize_t show_tl_retry_count(struct device *dev, 3139 struct device_attribute *attr, char *buf) 3140 { 3141 struct srp_target_port *target = host_to_target(class_to_shost(dev)); 3142 3143 return sprintf(buf, "%d\n", target->tl_retry_count); 3144 } 3145 3146 static ssize_t show_cmd_sg_entries(struct device *dev, 3147 struct device_attribute *attr, char *buf) 3148 { 3149 struct srp_target_port *target = host_to_target(class_to_shost(dev)); 3150 3151 return sprintf(buf, "%u\n", target->cmd_sg_cnt); 3152 } 3153 3154 static ssize_t show_allow_ext_sg(struct device *dev, 3155 struct device_attribute *attr, char *buf) 3156 { 3157 struct srp_target_port *target = host_to_target(class_to_shost(dev)); 3158 3159 return sprintf(buf, "%s\n", target->allow_ext_sg ? "true" : "false"); 3160 } 3161 3162 static DEVICE_ATTR(id_ext, S_IRUGO, show_id_ext, NULL); 3163 static DEVICE_ATTR(ioc_guid, S_IRUGO, show_ioc_guid, NULL); 3164 static DEVICE_ATTR(service_id, S_IRUGO, show_service_id, NULL); 3165 static DEVICE_ATTR(pkey, S_IRUGO, show_pkey, NULL); 3166 static DEVICE_ATTR(sgid, S_IRUGO, show_sgid, NULL); 3167 static DEVICE_ATTR(dgid, S_IRUGO, show_dgid, NULL); 3168 static DEVICE_ATTR(orig_dgid, S_IRUGO, show_orig_dgid, NULL); 3169 static DEVICE_ATTR(req_lim, S_IRUGO, show_req_lim, NULL); 3170 static DEVICE_ATTR(zero_req_lim, S_IRUGO, show_zero_req_lim, NULL); 3171 static DEVICE_ATTR(local_ib_port, S_IRUGO, show_local_ib_port, NULL); 3172 static DEVICE_ATTR(local_ib_device, S_IRUGO, show_local_ib_device, NULL); 3173 static DEVICE_ATTR(ch_count, S_IRUGO, show_ch_count, NULL); 3174 static DEVICE_ATTR(comp_vector, S_IRUGO, show_comp_vector, NULL); 3175 static DEVICE_ATTR(tl_retry_count, S_IRUGO, show_tl_retry_count, NULL); 3176 static DEVICE_ATTR(cmd_sg_entries, S_IRUGO, show_cmd_sg_entries, NULL); 3177 static DEVICE_ATTR(allow_ext_sg, S_IRUGO, show_allow_ext_sg, NULL); 3178 3179 static struct device_attribute *srp_host_attrs[] = { 3180 &dev_attr_id_ext, 3181 &dev_attr_ioc_guid, 3182 &dev_attr_service_id, 3183 &dev_attr_pkey, 3184 &dev_attr_sgid, 3185 &dev_attr_dgid, 3186 &dev_attr_orig_dgid, 3187 &dev_attr_req_lim, 3188 &dev_attr_zero_req_lim, 3189 &dev_attr_local_ib_port, 3190 &dev_attr_local_ib_device, 3191 &dev_attr_ch_count, 3192 &dev_attr_comp_vector, 3193 &dev_attr_tl_retry_count, 3194 &dev_attr_cmd_sg_entries, 3195 &dev_attr_allow_ext_sg, 3196 NULL 3197 }; 3198 3199 static struct scsi_host_template srp_template = { 3200 .module = THIS_MODULE, 3201 .name = "InfiniBand SRP initiator", 3202 .proc_name = DRV_NAME, 3203 .target_alloc = srp_target_alloc, 3204 .slave_alloc = srp_slave_alloc, 3205 .slave_configure = srp_slave_configure, 3206 .info = srp_target_info, 3207 .queuecommand = srp_queuecommand, 3208 .change_queue_depth = srp_change_queue_depth, 3209 .eh_timed_out = srp_timed_out, 3210 .eh_abort_handler = srp_abort, 3211 .eh_device_reset_handler = srp_reset_device, 3212 .eh_host_reset_handler = srp_reset_host, 3213 .skip_settle_delay = true, 3214 .sg_tablesize = SRP_DEF_SG_TABLESIZE, 3215 .can_queue = SRP_DEFAULT_CMD_SQ_SIZE, 3216 .this_id = -1, 3217 .cmd_per_lun = SRP_DEFAULT_CMD_SQ_SIZE, 3218 .use_clustering = ENABLE_CLUSTERING, 3219 .shost_attrs = srp_host_attrs, 3220 .track_queue_depth = 1, 3221 }; 3222 3223 static int srp_sdev_count(struct Scsi_Host *host) 3224 { 3225 struct scsi_device *sdev; 3226 int c = 0; 3227 3228 shost_for_each_device(sdev, host) 3229 c++; 3230 3231 return c; 3232 } 3233 3234 /* 3235 * Return values: 3236 * < 0 upon failure. Caller is responsible for SRP target port cleanup. 3237 * 0 and target->state == SRP_TARGET_REMOVED if asynchronous target port 3238 * removal has been scheduled. 3239 * 0 and target->state != SRP_TARGET_REMOVED upon success. 3240 */ 3241 static int srp_add_target(struct srp_host *host, struct srp_target_port *target) 3242 { 3243 struct srp_rport_identifiers ids; 3244 struct srp_rport *rport; 3245 3246 target->state = SRP_TARGET_SCANNING; 3247 sprintf(target->target_name, "SRP.T10:%016llX", 3248 be64_to_cpu(target->id_ext)); 3249 3250 if (scsi_add_host(target->scsi_host, host->srp_dev->dev->dev.parent)) 3251 return -ENODEV; 3252 3253 memcpy(ids.port_id, &target->id_ext, 8); 3254 memcpy(ids.port_id + 8, &target->ioc_guid, 8); 3255 ids.roles = SRP_RPORT_ROLE_TARGET; 3256 rport = srp_rport_add(target->scsi_host, &ids); 3257 if (IS_ERR(rport)) { 3258 scsi_remove_host(target->scsi_host); 3259 return PTR_ERR(rport); 3260 } 3261 3262 rport->lld_data = target; 3263 target->rport = rport; 3264 3265 spin_lock(&host->target_lock); 3266 list_add_tail(&target->list, &host->target_list); 3267 spin_unlock(&host->target_lock); 3268 3269 scsi_scan_target(&target->scsi_host->shost_gendev, 3270 0, target->scsi_id, SCAN_WILD_CARD, SCSI_SCAN_INITIAL); 3271 3272 if (srp_connected_ch(target) < target->ch_count || 3273 target->qp_in_error) { 3274 shost_printk(KERN_INFO, target->scsi_host, 3275 PFX "SCSI scan failed - removing SCSI host\n"); 3276 srp_queue_remove_work(target); 3277 goto out; 3278 } 3279 3280 pr_debug("%s: SCSI scan succeeded - detected %d LUNs\n", 3281 dev_name(&target->scsi_host->shost_gendev), 3282 srp_sdev_count(target->scsi_host)); 3283 3284 spin_lock_irq(&target->lock); 3285 if (target->state == SRP_TARGET_SCANNING) 3286 target->state = SRP_TARGET_LIVE; 3287 spin_unlock_irq(&target->lock); 3288 3289 out: 3290 return 0; 3291 } 3292 3293 static void srp_release_dev(struct device *dev) 3294 { 3295 struct srp_host *host = 3296 container_of(dev, struct srp_host, dev); 3297 3298 complete(&host->released); 3299 } 3300 3301 static struct class srp_class = { 3302 .name = "infiniband_srp", 3303 .dev_release = srp_release_dev 3304 }; 3305 3306 /** 3307 * srp_conn_unique() - check whether the connection to a target is unique 3308 * @host: SRP host. 3309 * @target: SRP target port. 3310 */ 3311 static bool srp_conn_unique(struct srp_host *host, 3312 struct srp_target_port *target) 3313 { 3314 struct srp_target_port *t; 3315 bool ret = false; 3316 3317 if (target->state == SRP_TARGET_REMOVED) 3318 goto out; 3319 3320 ret = true; 3321 3322 spin_lock(&host->target_lock); 3323 list_for_each_entry(t, &host->target_list, list) { 3324 if (t != target && 3325 target->id_ext == t->id_ext && 3326 target->ioc_guid == t->ioc_guid && 3327 target->initiator_ext == t->initiator_ext) { 3328 ret = false; 3329 break; 3330 } 3331 } 3332 spin_unlock(&host->target_lock); 3333 3334 out: 3335 return ret; 3336 } 3337 3338 /* 3339 * Target ports are added by writing 3340 * 3341 * id_ext=<SRP ID ext>,ioc_guid=<SRP IOC GUID>,dgid=<dest GID>, 3342 * pkey=<P_Key>,service_id=<service ID> 3343 * or 3344 * id_ext=<SRP ID ext>,ioc_guid=<SRP IOC GUID>, 3345 * [src=<IPv4 address>,]dest=<IPv4 address>:<port number> 3346 * 3347 * to the add_target sysfs attribute. 3348 */ 3349 enum { 3350 SRP_OPT_ERR = 0, 3351 SRP_OPT_ID_EXT = 1 << 0, 3352 SRP_OPT_IOC_GUID = 1 << 1, 3353 SRP_OPT_DGID = 1 << 2, 3354 SRP_OPT_PKEY = 1 << 3, 3355 SRP_OPT_SERVICE_ID = 1 << 4, 3356 SRP_OPT_MAX_SECT = 1 << 5, 3357 SRP_OPT_MAX_CMD_PER_LUN = 1 << 6, 3358 SRP_OPT_IO_CLASS = 1 << 7, 3359 SRP_OPT_INITIATOR_EXT = 1 << 8, 3360 SRP_OPT_CMD_SG_ENTRIES = 1 << 9, 3361 SRP_OPT_ALLOW_EXT_SG = 1 << 10, 3362 SRP_OPT_SG_TABLESIZE = 1 << 11, 3363 SRP_OPT_COMP_VECTOR = 1 << 12, 3364 SRP_OPT_TL_RETRY_COUNT = 1 << 13, 3365 SRP_OPT_QUEUE_SIZE = 1 << 14, 3366 SRP_OPT_IP_SRC = 1 << 15, 3367 SRP_OPT_IP_DEST = 1 << 16, 3368 SRP_OPT_TARGET_CAN_QUEUE= 1 << 17, 3369 }; 3370 3371 static unsigned int srp_opt_mandatory[] = { 3372 SRP_OPT_ID_EXT | 3373 SRP_OPT_IOC_GUID | 3374 SRP_OPT_DGID | 3375 SRP_OPT_PKEY | 3376 SRP_OPT_SERVICE_ID, 3377 SRP_OPT_ID_EXT | 3378 SRP_OPT_IOC_GUID | 3379 SRP_OPT_IP_DEST, 3380 }; 3381 3382 static const match_table_t srp_opt_tokens = { 3383 { SRP_OPT_ID_EXT, "id_ext=%s" }, 3384 { SRP_OPT_IOC_GUID, "ioc_guid=%s" }, 3385 { SRP_OPT_DGID, "dgid=%s" }, 3386 { SRP_OPT_PKEY, "pkey=%x" }, 3387 { SRP_OPT_SERVICE_ID, "service_id=%s" }, 3388 { SRP_OPT_MAX_SECT, "max_sect=%d" }, 3389 { SRP_OPT_MAX_CMD_PER_LUN, "max_cmd_per_lun=%d" }, 3390 { SRP_OPT_TARGET_CAN_QUEUE, "target_can_queue=%d" }, 3391 { SRP_OPT_IO_CLASS, "io_class=%x" }, 3392 { SRP_OPT_INITIATOR_EXT, "initiator_ext=%s" }, 3393 { SRP_OPT_CMD_SG_ENTRIES, "cmd_sg_entries=%u" }, 3394 { SRP_OPT_ALLOW_EXT_SG, "allow_ext_sg=%u" }, 3395 { SRP_OPT_SG_TABLESIZE, "sg_tablesize=%u" }, 3396 { SRP_OPT_COMP_VECTOR, "comp_vector=%u" }, 3397 { SRP_OPT_TL_RETRY_COUNT, "tl_retry_count=%u" }, 3398 { SRP_OPT_QUEUE_SIZE, "queue_size=%d" }, 3399 { SRP_OPT_IP_SRC, "src=%s" }, 3400 { SRP_OPT_IP_DEST, "dest=%s" }, 3401 { SRP_OPT_ERR, NULL } 3402 }; 3403 3404 /** 3405 * srp_parse_in - parse an IP address and port number combination 3406 * 3407 * Parse the following address formats: 3408 * - IPv4: <ip_address>:<port>, e.g. 1.2.3.4:5. 3409 * - IPv6: \[<ipv6_address>\]:<port>, e.g. [1::2:3%4]:5. 3410 */ 3411 static int srp_parse_in(struct net *net, struct sockaddr_storage *sa, 3412 const char *addr_port_str) 3413 { 3414 char *addr_end, *addr = kstrdup(addr_port_str, GFP_KERNEL); 3415 char *port_str; 3416 int ret; 3417 3418 if (!addr) 3419 return -ENOMEM; 3420 port_str = strrchr(addr, ':'); 3421 if (!port_str) 3422 return -EINVAL; 3423 *port_str++ = '\0'; 3424 ret = inet_pton_with_scope(net, AF_INET, addr, port_str, sa); 3425 if (ret && addr[0]) { 3426 addr_end = addr + strlen(addr) - 1; 3427 if (addr[0] == '[' && *addr_end == ']') { 3428 *addr_end = '\0'; 3429 ret = inet_pton_with_scope(net, AF_INET6, addr + 1, 3430 port_str, sa); 3431 } 3432 } 3433 kfree(addr); 3434 pr_debug("%s -> %pISpfsc\n", addr_port_str, sa); 3435 return ret; 3436 } 3437 3438 static int srp_parse_options(struct net *net, const char *buf, 3439 struct srp_target_port *target) 3440 { 3441 char *options, *sep_opt; 3442 char *p; 3443 substring_t args[MAX_OPT_ARGS]; 3444 unsigned long long ull; 3445 int opt_mask = 0; 3446 int token; 3447 int ret = -EINVAL; 3448 int i; 3449 3450 options = kstrdup(buf, GFP_KERNEL); 3451 if (!options) 3452 return -ENOMEM; 3453 3454 sep_opt = options; 3455 while ((p = strsep(&sep_opt, ",\n")) != NULL) { 3456 if (!*p) 3457 continue; 3458 3459 token = match_token(p, srp_opt_tokens, args); 3460 opt_mask |= token; 3461 3462 switch (token) { 3463 case SRP_OPT_ID_EXT: 3464 p = match_strdup(args); 3465 if (!p) { 3466 ret = -ENOMEM; 3467 goto out; 3468 } 3469 ret = kstrtoull(p, 16, &ull); 3470 if (ret) { 3471 pr_warn("invalid id_ext parameter '%s'\n", p); 3472 kfree(p); 3473 goto out; 3474 } 3475 target->id_ext = cpu_to_be64(ull); 3476 kfree(p); 3477 break; 3478 3479 case SRP_OPT_IOC_GUID: 3480 p = match_strdup(args); 3481 if (!p) { 3482 ret = -ENOMEM; 3483 goto out; 3484 } 3485 ret = kstrtoull(p, 16, &ull); 3486 if (ret) { 3487 pr_warn("invalid ioc_guid parameter '%s'\n", p); 3488 kfree(p); 3489 goto out; 3490 } 3491 target->ioc_guid = cpu_to_be64(ull); 3492 kfree(p); 3493 break; 3494 3495 case SRP_OPT_DGID: 3496 p = match_strdup(args); 3497 if (!p) { 3498 ret = -ENOMEM; 3499 goto out; 3500 } 3501 if (strlen(p) != 32) { 3502 pr_warn("bad dest GID parameter '%s'\n", p); 3503 kfree(p); 3504 goto out; 3505 } 3506 3507 ret = hex2bin(target->ib_cm.orig_dgid.raw, p, 16); 3508 kfree(p); 3509 if (ret < 0) 3510 goto out; 3511 break; 3512 3513 case SRP_OPT_PKEY: 3514 if (match_hex(args, &token)) { 3515 pr_warn("bad P_Key parameter '%s'\n", p); 3516 goto out; 3517 } 3518 target->ib_cm.pkey = cpu_to_be16(token); 3519 break; 3520 3521 case SRP_OPT_SERVICE_ID: 3522 p = match_strdup(args); 3523 if (!p) { 3524 ret = -ENOMEM; 3525 goto out; 3526 } 3527 ret = kstrtoull(p, 16, &ull); 3528 if (ret) { 3529 pr_warn("bad service_id parameter '%s'\n", p); 3530 kfree(p); 3531 goto out; 3532 } 3533 target->ib_cm.service_id = cpu_to_be64(ull); 3534 kfree(p); 3535 break; 3536 3537 case SRP_OPT_IP_SRC: 3538 p = match_strdup(args); 3539 if (!p) { 3540 ret = -ENOMEM; 3541 goto out; 3542 } 3543 ret = srp_parse_in(net, &target->rdma_cm.src.ss, p); 3544 if (ret < 0) { 3545 pr_warn("bad source parameter '%s'\n", p); 3546 kfree(p); 3547 goto out; 3548 } 3549 target->rdma_cm.src_specified = true; 3550 kfree(p); 3551 break; 3552 3553 case SRP_OPT_IP_DEST: 3554 p = match_strdup(args); 3555 if (!p) { 3556 ret = -ENOMEM; 3557 goto out; 3558 } 3559 ret = srp_parse_in(net, &target->rdma_cm.dst.ss, p); 3560 if (ret < 0) { 3561 pr_warn("bad dest parameter '%s'\n", p); 3562 kfree(p); 3563 goto out; 3564 } 3565 target->using_rdma_cm = true; 3566 kfree(p); 3567 break; 3568 3569 case SRP_OPT_MAX_SECT: 3570 if (match_int(args, &token)) { 3571 pr_warn("bad max sect parameter '%s'\n", p); 3572 goto out; 3573 } 3574 target->scsi_host->max_sectors = token; 3575 break; 3576 3577 case SRP_OPT_QUEUE_SIZE: 3578 if (match_int(args, &token) || token < 1) { 3579 pr_warn("bad queue_size parameter '%s'\n", p); 3580 goto out; 3581 } 3582 target->scsi_host->can_queue = token; 3583 target->queue_size = token + SRP_RSP_SQ_SIZE + 3584 SRP_TSK_MGMT_SQ_SIZE; 3585 if (!(opt_mask & SRP_OPT_MAX_CMD_PER_LUN)) 3586 target->scsi_host->cmd_per_lun = token; 3587 break; 3588 3589 case SRP_OPT_MAX_CMD_PER_LUN: 3590 if (match_int(args, &token) || token < 1) { 3591 pr_warn("bad max cmd_per_lun parameter '%s'\n", 3592 p); 3593 goto out; 3594 } 3595 target->scsi_host->cmd_per_lun = token; 3596 break; 3597 3598 case SRP_OPT_TARGET_CAN_QUEUE: 3599 if (match_int(args, &token) || token < 1) { 3600 pr_warn("bad max target_can_queue parameter '%s'\n", 3601 p); 3602 goto out; 3603 } 3604 target->target_can_queue = token; 3605 break; 3606 3607 case SRP_OPT_IO_CLASS: 3608 if (match_hex(args, &token)) { 3609 pr_warn("bad IO class parameter '%s'\n", p); 3610 goto out; 3611 } 3612 if (token != SRP_REV10_IB_IO_CLASS && 3613 token != SRP_REV16A_IB_IO_CLASS) { 3614 pr_warn("unknown IO class parameter value %x specified (use %x or %x).\n", 3615 token, SRP_REV10_IB_IO_CLASS, 3616 SRP_REV16A_IB_IO_CLASS); 3617 goto out; 3618 } 3619 target->io_class = token; 3620 break; 3621 3622 case SRP_OPT_INITIATOR_EXT: 3623 p = match_strdup(args); 3624 if (!p) { 3625 ret = -ENOMEM; 3626 goto out; 3627 } 3628 ret = kstrtoull(p, 16, &ull); 3629 if (ret) { 3630 pr_warn("bad initiator_ext value '%s'\n", p); 3631 kfree(p); 3632 goto out; 3633 } 3634 target->initiator_ext = cpu_to_be64(ull); 3635 kfree(p); 3636 break; 3637 3638 case SRP_OPT_CMD_SG_ENTRIES: 3639 if (match_int(args, &token) || token < 1 || token > 255) { 3640 pr_warn("bad max cmd_sg_entries parameter '%s'\n", 3641 p); 3642 goto out; 3643 } 3644 target->cmd_sg_cnt = token; 3645 break; 3646 3647 case SRP_OPT_ALLOW_EXT_SG: 3648 if (match_int(args, &token)) { 3649 pr_warn("bad allow_ext_sg parameter '%s'\n", p); 3650 goto out; 3651 } 3652 target->allow_ext_sg = !!token; 3653 break; 3654 3655 case SRP_OPT_SG_TABLESIZE: 3656 if (match_int(args, &token) || token < 1 || 3657 token > SG_MAX_SEGMENTS) { 3658 pr_warn("bad max sg_tablesize parameter '%s'\n", 3659 p); 3660 goto out; 3661 } 3662 target->sg_tablesize = token; 3663 break; 3664 3665 case SRP_OPT_COMP_VECTOR: 3666 if (match_int(args, &token) || token < 0) { 3667 pr_warn("bad comp_vector parameter '%s'\n", p); 3668 goto out; 3669 } 3670 target->comp_vector = token; 3671 break; 3672 3673 case SRP_OPT_TL_RETRY_COUNT: 3674 if (match_int(args, &token) || token < 2 || token > 7) { 3675 pr_warn("bad tl_retry_count parameter '%s' (must be a number between 2 and 7)\n", 3676 p); 3677 goto out; 3678 } 3679 target->tl_retry_count = token; 3680 break; 3681 3682 default: 3683 pr_warn("unknown parameter or missing value '%s' in target creation request\n", 3684 p); 3685 goto out; 3686 } 3687 } 3688 3689 for (i = 0; i < ARRAY_SIZE(srp_opt_mandatory); i++) { 3690 if ((opt_mask & srp_opt_mandatory[i]) == srp_opt_mandatory[i]) { 3691 ret = 0; 3692 break; 3693 } 3694 } 3695 if (ret) 3696 pr_warn("target creation request is missing one or more parameters\n"); 3697 3698 if (target->scsi_host->cmd_per_lun > target->scsi_host->can_queue 3699 && (opt_mask & SRP_OPT_MAX_CMD_PER_LUN)) 3700 pr_warn("cmd_per_lun = %d > queue_size = %d\n", 3701 target->scsi_host->cmd_per_lun, 3702 target->scsi_host->can_queue); 3703 3704 out: 3705 kfree(options); 3706 return ret; 3707 } 3708 3709 static ssize_t srp_create_target(struct device *dev, 3710 struct device_attribute *attr, 3711 const char *buf, size_t count) 3712 { 3713 struct srp_host *host = 3714 container_of(dev, struct srp_host, dev); 3715 struct Scsi_Host *target_host; 3716 struct srp_target_port *target; 3717 struct srp_rdma_ch *ch; 3718 struct srp_device *srp_dev = host->srp_dev; 3719 struct ib_device *ibdev = srp_dev->dev; 3720 int ret, node_idx, node, cpu, i; 3721 unsigned int max_sectors_per_mr, mr_per_cmd = 0; 3722 bool multich = false; 3723 3724 target_host = scsi_host_alloc(&srp_template, 3725 sizeof (struct srp_target_port)); 3726 if (!target_host) 3727 return -ENOMEM; 3728 3729 target_host->transportt = ib_srp_transport_template; 3730 target_host->max_channel = 0; 3731 target_host->max_id = 1; 3732 target_host->max_lun = -1LL; 3733 target_host->max_cmd_len = sizeof ((struct srp_cmd *) (void *) 0L)->cdb; 3734 3735 target = host_to_target(target_host); 3736 3737 target->net = kobj_ns_grab_current(KOBJ_NS_TYPE_NET); 3738 target->io_class = SRP_REV16A_IB_IO_CLASS; 3739 target->scsi_host = target_host; 3740 target->srp_host = host; 3741 target->lkey = host->srp_dev->pd->local_dma_lkey; 3742 target->global_rkey = host->srp_dev->global_rkey; 3743 target->cmd_sg_cnt = cmd_sg_entries; 3744 target->sg_tablesize = indirect_sg_entries ? : cmd_sg_entries; 3745 target->allow_ext_sg = allow_ext_sg; 3746 target->tl_retry_count = 7; 3747 target->queue_size = SRP_DEFAULT_QUEUE_SIZE; 3748 3749 /* 3750 * Avoid that the SCSI host can be removed by srp_remove_target() 3751 * before this function returns. 3752 */ 3753 scsi_host_get(target->scsi_host); 3754 3755 ret = mutex_lock_interruptible(&host->add_target_mutex); 3756 if (ret < 0) 3757 goto put; 3758 3759 ret = srp_parse_options(target->net, buf, target); 3760 if (ret) 3761 goto out; 3762 3763 target->req_ring_size = target->queue_size - SRP_TSK_MGMT_SQ_SIZE; 3764 3765 if (!srp_conn_unique(target->srp_host, target)) { 3766 if (target->using_rdma_cm) { 3767 shost_printk(KERN_INFO, target->scsi_host, 3768 PFX "Already connected to target port with id_ext=%016llx;ioc_guid=%016llx;dest=%pIS\n", 3769 be64_to_cpu(target->id_ext), 3770 be64_to_cpu(target->ioc_guid), 3771 &target->rdma_cm.dst); 3772 } else { 3773 shost_printk(KERN_INFO, target->scsi_host, 3774 PFX "Already connected to target port with id_ext=%016llx;ioc_guid=%016llx;initiator_ext=%016llx\n", 3775 be64_to_cpu(target->id_ext), 3776 be64_to_cpu(target->ioc_guid), 3777 be64_to_cpu(target->initiator_ext)); 3778 } 3779 ret = -EEXIST; 3780 goto out; 3781 } 3782 3783 if (!srp_dev->has_fmr && !srp_dev->has_fr && !target->allow_ext_sg && 3784 target->cmd_sg_cnt < target->sg_tablesize) { 3785 pr_warn("No MR pool and no external indirect descriptors, limiting sg_tablesize to cmd_sg_cnt\n"); 3786 target->sg_tablesize = target->cmd_sg_cnt; 3787 } 3788 3789 if (srp_dev->use_fast_reg || srp_dev->use_fmr) { 3790 bool gaps_reg = (ibdev->attrs.device_cap_flags & 3791 IB_DEVICE_SG_GAPS_REG); 3792 3793 max_sectors_per_mr = srp_dev->max_pages_per_mr << 3794 (ilog2(srp_dev->mr_page_size) - 9); 3795 if (!gaps_reg) { 3796 /* 3797 * FR and FMR can only map one HCA page per entry. If 3798 * the start address is not aligned on a HCA page 3799 * boundary two entries will be used for the head and 3800 * the tail although these two entries combined 3801 * contain at most one HCA page of data. Hence the "+ 3802 * 1" in the calculation below. 3803 * 3804 * The indirect data buffer descriptor is contiguous 3805 * so the memory for that buffer will only be 3806 * registered if register_always is true. Hence add 3807 * one to mr_per_cmd if register_always has been set. 3808 */ 3809 mr_per_cmd = register_always + 3810 (target->scsi_host->max_sectors + 1 + 3811 max_sectors_per_mr - 1) / max_sectors_per_mr; 3812 } else { 3813 mr_per_cmd = register_always + 3814 (target->sg_tablesize + 3815 srp_dev->max_pages_per_mr - 1) / 3816 srp_dev->max_pages_per_mr; 3817 } 3818 pr_debug("max_sectors = %u; max_pages_per_mr = %u; mr_page_size = %u; max_sectors_per_mr = %u; mr_per_cmd = %u\n", 3819 target->scsi_host->max_sectors, srp_dev->max_pages_per_mr, srp_dev->mr_page_size, 3820 max_sectors_per_mr, mr_per_cmd); 3821 } 3822 3823 target_host->sg_tablesize = target->sg_tablesize; 3824 target->mr_pool_size = target->scsi_host->can_queue * mr_per_cmd; 3825 target->mr_per_cmd = mr_per_cmd; 3826 target->indirect_size = target->sg_tablesize * 3827 sizeof (struct srp_direct_buf); 3828 target->max_iu_len = sizeof (struct srp_cmd) + 3829 sizeof (struct srp_indirect_buf) + 3830 target->cmd_sg_cnt * sizeof (struct srp_direct_buf); 3831 3832 INIT_WORK(&target->tl_err_work, srp_tl_err_work); 3833 INIT_WORK(&target->remove_work, srp_remove_work); 3834 spin_lock_init(&target->lock); 3835 ret = rdma_query_gid(ibdev, host->port, 0, &target->sgid); 3836 if (ret) 3837 goto out; 3838 3839 ret = -ENOMEM; 3840 target->ch_count = max_t(unsigned, num_online_nodes(), 3841 min(ch_count ? : 3842 min(4 * num_online_nodes(), 3843 ibdev->num_comp_vectors), 3844 num_online_cpus())); 3845 target->ch = kcalloc(target->ch_count, sizeof(*target->ch), 3846 GFP_KERNEL); 3847 if (!target->ch) 3848 goto out; 3849 3850 node_idx = 0; 3851 for_each_online_node(node) { 3852 const int ch_start = (node_idx * target->ch_count / 3853 num_online_nodes()); 3854 const int ch_end = ((node_idx + 1) * target->ch_count / 3855 num_online_nodes()); 3856 const int cv_start = node_idx * ibdev->num_comp_vectors / 3857 num_online_nodes(); 3858 const int cv_end = (node_idx + 1) * ibdev->num_comp_vectors / 3859 num_online_nodes(); 3860 int cpu_idx = 0; 3861 3862 for_each_online_cpu(cpu) { 3863 if (cpu_to_node(cpu) != node) 3864 continue; 3865 if (ch_start + cpu_idx >= ch_end) 3866 continue; 3867 ch = &target->ch[ch_start + cpu_idx]; 3868 ch->target = target; 3869 ch->comp_vector = cv_start == cv_end ? cv_start : 3870 cv_start + cpu_idx % (cv_end - cv_start); 3871 spin_lock_init(&ch->lock); 3872 INIT_LIST_HEAD(&ch->free_tx); 3873 ret = srp_new_cm_id(ch); 3874 if (ret) 3875 goto err_disconnect; 3876 3877 ret = srp_create_ch_ib(ch); 3878 if (ret) 3879 goto err_disconnect; 3880 3881 ret = srp_alloc_req_data(ch); 3882 if (ret) 3883 goto err_disconnect; 3884 3885 ret = srp_connect_ch(ch, multich); 3886 if (ret) { 3887 char dst[64]; 3888 3889 if (target->using_rdma_cm) 3890 snprintf(dst, sizeof(dst), "%pIS", 3891 &target->rdma_cm.dst); 3892 else 3893 snprintf(dst, sizeof(dst), "%pI6", 3894 target->ib_cm.orig_dgid.raw); 3895 shost_printk(KERN_ERR, target->scsi_host, 3896 PFX "Connection %d/%d to %s failed\n", 3897 ch_start + cpu_idx, 3898 target->ch_count, dst); 3899 if (node_idx == 0 && cpu_idx == 0) { 3900 goto free_ch; 3901 } else { 3902 srp_free_ch_ib(target, ch); 3903 srp_free_req_data(target, ch); 3904 target->ch_count = ch - target->ch; 3905 goto connected; 3906 } 3907 } 3908 3909 multich = true; 3910 cpu_idx++; 3911 } 3912 node_idx++; 3913 } 3914 3915 connected: 3916 target->scsi_host->nr_hw_queues = target->ch_count; 3917 3918 ret = srp_add_target(host, target); 3919 if (ret) 3920 goto err_disconnect; 3921 3922 if (target->state != SRP_TARGET_REMOVED) { 3923 if (target->using_rdma_cm) { 3924 shost_printk(KERN_DEBUG, target->scsi_host, PFX 3925 "new target: id_ext %016llx ioc_guid %016llx sgid %pI6 dest %pIS\n", 3926 be64_to_cpu(target->id_ext), 3927 be64_to_cpu(target->ioc_guid), 3928 target->sgid.raw, &target->rdma_cm.dst); 3929 } else { 3930 shost_printk(KERN_DEBUG, target->scsi_host, PFX 3931 "new target: id_ext %016llx ioc_guid %016llx pkey %04x service_id %016llx sgid %pI6 dgid %pI6\n", 3932 be64_to_cpu(target->id_ext), 3933 be64_to_cpu(target->ioc_guid), 3934 be16_to_cpu(target->ib_cm.pkey), 3935 be64_to_cpu(target->ib_cm.service_id), 3936 target->sgid.raw, 3937 target->ib_cm.orig_dgid.raw); 3938 } 3939 } 3940 3941 ret = count; 3942 3943 out: 3944 mutex_unlock(&host->add_target_mutex); 3945 3946 put: 3947 scsi_host_put(target->scsi_host); 3948 if (ret < 0) { 3949 /* 3950 * If a call to srp_remove_target() has not been scheduled, 3951 * drop the network namespace reference now that was obtained 3952 * earlier in this function. 3953 */ 3954 if (target->state != SRP_TARGET_REMOVED) 3955 kobj_ns_drop(KOBJ_NS_TYPE_NET, target->net); 3956 scsi_host_put(target->scsi_host); 3957 } 3958 3959 return ret; 3960 3961 err_disconnect: 3962 srp_disconnect_target(target); 3963 3964 free_ch: 3965 for (i = 0; i < target->ch_count; i++) { 3966 ch = &target->ch[i]; 3967 srp_free_ch_ib(target, ch); 3968 srp_free_req_data(target, ch); 3969 } 3970 3971 kfree(target->ch); 3972 goto out; 3973 } 3974 3975 static DEVICE_ATTR(add_target, S_IWUSR, NULL, srp_create_target); 3976 3977 static ssize_t show_ibdev(struct device *dev, struct device_attribute *attr, 3978 char *buf) 3979 { 3980 struct srp_host *host = container_of(dev, struct srp_host, dev); 3981 3982 return sprintf(buf, "%s\n", dev_name(&host->srp_dev->dev->dev)); 3983 } 3984 3985 static DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL); 3986 3987 static ssize_t show_port(struct device *dev, struct device_attribute *attr, 3988 char *buf) 3989 { 3990 struct srp_host *host = container_of(dev, struct srp_host, dev); 3991 3992 return sprintf(buf, "%d\n", host->port); 3993 } 3994 3995 static DEVICE_ATTR(port, S_IRUGO, show_port, NULL); 3996 3997 static struct srp_host *srp_add_port(struct srp_device *device, u8 port) 3998 { 3999 struct srp_host *host; 4000 4001 host = kzalloc(sizeof *host, GFP_KERNEL); 4002 if (!host) 4003 return NULL; 4004 4005 INIT_LIST_HEAD(&host->target_list); 4006 spin_lock_init(&host->target_lock); 4007 init_completion(&host->released); 4008 mutex_init(&host->add_target_mutex); 4009 host->srp_dev = device; 4010 host->port = port; 4011 4012 host->dev.class = &srp_class; 4013 host->dev.parent = device->dev->dev.parent; 4014 dev_set_name(&host->dev, "srp-%s-%d", dev_name(&device->dev->dev), 4015 port); 4016 4017 if (device_register(&host->dev)) 4018 goto free_host; 4019 if (device_create_file(&host->dev, &dev_attr_add_target)) 4020 goto err_class; 4021 if (device_create_file(&host->dev, &dev_attr_ibdev)) 4022 goto err_class; 4023 if (device_create_file(&host->dev, &dev_attr_port)) 4024 goto err_class; 4025 4026 return host; 4027 4028 err_class: 4029 device_unregister(&host->dev); 4030 4031 free_host: 4032 kfree(host); 4033 4034 return NULL; 4035 } 4036 4037 static void srp_add_one(struct ib_device *device) 4038 { 4039 struct srp_device *srp_dev; 4040 struct ib_device_attr *attr = &device->attrs; 4041 struct srp_host *host; 4042 int mr_page_shift, p; 4043 u64 max_pages_per_mr; 4044 unsigned int flags = 0; 4045 4046 srp_dev = kzalloc(sizeof(*srp_dev), GFP_KERNEL); 4047 if (!srp_dev) 4048 return; 4049 4050 /* 4051 * Use the smallest page size supported by the HCA, down to a 4052 * minimum of 4096 bytes. We're unlikely to build large sglists 4053 * out of smaller entries. 4054 */ 4055 mr_page_shift = max(12, ffs(attr->page_size_cap) - 1); 4056 srp_dev->mr_page_size = 1 << mr_page_shift; 4057 srp_dev->mr_page_mask = ~((u64) srp_dev->mr_page_size - 1); 4058 max_pages_per_mr = attr->max_mr_size; 4059 do_div(max_pages_per_mr, srp_dev->mr_page_size); 4060 pr_debug("%s: %llu / %u = %llu <> %u\n", __func__, 4061 attr->max_mr_size, srp_dev->mr_page_size, 4062 max_pages_per_mr, SRP_MAX_PAGES_PER_MR); 4063 srp_dev->max_pages_per_mr = min_t(u64, SRP_MAX_PAGES_PER_MR, 4064 max_pages_per_mr); 4065 4066 srp_dev->has_fmr = (device->alloc_fmr && device->dealloc_fmr && 4067 device->map_phys_fmr && device->unmap_fmr); 4068 srp_dev->has_fr = (attr->device_cap_flags & 4069 IB_DEVICE_MEM_MGT_EXTENSIONS); 4070 if (!never_register && !srp_dev->has_fmr && !srp_dev->has_fr) { 4071 dev_warn(&device->dev, "neither FMR nor FR is supported\n"); 4072 } else if (!never_register && 4073 attr->max_mr_size >= 2 * srp_dev->mr_page_size) { 4074 srp_dev->use_fast_reg = (srp_dev->has_fr && 4075 (!srp_dev->has_fmr || prefer_fr)); 4076 srp_dev->use_fmr = !srp_dev->use_fast_reg && srp_dev->has_fmr; 4077 } 4078 4079 if (never_register || !register_always || 4080 (!srp_dev->has_fmr && !srp_dev->has_fr)) 4081 flags |= IB_PD_UNSAFE_GLOBAL_RKEY; 4082 4083 if (srp_dev->use_fast_reg) { 4084 srp_dev->max_pages_per_mr = 4085 min_t(u32, srp_dev->max_pages_per_mr, 4086 attr->max_fast_reg_page_list_len); 4087 } 4088 srp_dev->mr_max_size = srp_dev->mr_page_size * 4089 srp_dev->max_pages_per_mr; 4090 pr_debug("%s: mr_page_shift = %d, device->max_mr_size = %#llx, device->max_fast_reg_page_list_len = %u, max_pages_per_mr = %d, mr_max_size = %#x\n", 4091 dev_name(&device->dev), mr_page_shift, attr->max_mr_size, 4092 attr->max_fast_reg_page_list_len, 4093 srp_dev->max_pages_per_mr, srp_dev->mr_max_size); 4094 4095 INIT_LIST_HEAD(&srp_dev->dev_list); 4096 4097 srp_dev->dev = device; 4098 srp_dev->pd = ib_alloc_pd(device, flags); 4099 if (IS_ERR(srp_dev->pd)) 4100 goto free_dev; 4101 4102 if (flags & IB_PD_UNSAFE_GLOBAL_RKEY) { 4103 srp_dev->global_rkey = srp_dev->pd->unsafe_global_rkey; 4104 WARN_ON_ONCE(srp_dev->global_rkey == 0); 4105 } 4106 4107 for (p = rdma_start_port(device); p <= rdma_end_port(device); ++p) { 4108 host = srp_add_port(srp_dev, p); 4109 if (host) 4110 list_add_tail(&host->list, &srp_dev->dev_list); 4111 } 4112 4113 ib_set_client_data(device, &srp_client, srp_dev); 4114 return; 4115 4116 free_dev: 4117 kfree(srp_dev); 4118 } 4119 4120 static void srp_remove_one(struct ib_device *device, void *client_data) 4121 { 4122 struct srp_device *srp_dev; 4123 struct srp_host *host, *tmp_host; 4124 struct srp_target_port *target; 4125 4126 srp_dev = client_data; 4127 if (!srp_dev) 4128 return; 4129 4130 list_for_each_entry_safe(host, tmp_host, &srp_dev->dev_list, list) { 4131 device_unregister(&host->dev); 4132 /* 4133 * Wait for the sysfs entry to go away, so that no new 4134 * target ports can be created. 4135 */ 4136 wait_for_completion(&host->released); 4137 4138 /* 4139 * Remove all target ports. 4140 */ 4141 spin_lock(&host->target_lock); 4142 list_for_each_entry(target, &host->target_list, list) 4143 srp_queue_remove_work(target); 4144 spin_unlock(&host->target_lock); 4145 4146 /* 4147 * Wait for tl_err and target port removal tasks. 4148 */ 4149 flush_workqueue(system_long_wq); 4150 flush_workqueue(srp_remove_wq); 4151 4152 kfree(host); 4153 } 4154 4155 ib_dealloc_pd(srp_dev->pd); 4156 4157 kfree(srp_dev); 4158 } 4159 4160 static struct srp_function_template ib_srp_transport_functions = { 4161 .has_rport_state = true, 4162 .reset_timer_if_blocked = true, 4163 .reconnect_delay = &srp_reconnect_delay, 4164 .fast_io_fail_tmo = &srp_fast_io_fail_tmo, 4165 .dev_loss_tmo = &srp_dev_loss_tmo, 4166 .reconnect = srp_rport_reconnect, 4167 .rport_delete = srp_rport_delete, 4168 .terminate_rport_io = srp_terminate_io, 4169 }; 4170 4171 static int __init srp_init_module(void) 4172 { 4173 int ret; 4174 4175 if (srp_sg_tablesize) { 4176 pr_warn("srp_sg_tablesize is deprecated, please use cmd_sg_entries\n"); 4177 if (!cmd_sg_entries) 4178 cmd_sg_entries = srp_sg_tablesize; 4179 } 4180 4181 if (!cmd_sg_entries) 4182 cmd_sg_entries = SRP_DEF_SG_TABLESIZE; 4183 4184 if (cmd_sg_entries > 255) { 4185 pr_warn("Clamping cmd_sg_entries to 255\n"); 4186 cmd_sg_entries = 255; 4187 } 4188 4189 if (!indirect_sg_entries) 4190 indirect_sg_entries = cmd_sg_entries; 4191 else if (indirect_sg_entries < cmd_sg_entries) { 4192 pr_warn("Bumping up indirect_sg_entries to match cmd_sg_entries (%u)\n", 4193 cmd_sg_entries); 4194 indirect_sg_entries = cmd_sg_entries; 4195 } 4196 4197 if (indirect_sg_entries > SG_MAX_SEGMENTS) { 4198 pr_warn("Clamping indirect_sg_entries to %u\n", 4199 SG_MAX_SEGMENTS); 4200 indirect_sg_entries = SG_MAX_SEGMENTS; 4201 } 4202 4203 srp_remove_wq = create_workqueue("srp_remove"); 4204 if (!srp_remove_wq) { 4205 ret = -ENOMEM; 4206 goto out; 4207 } 4208 4209 ret = -ENOMEM; 4210 ib_srp_transport_template = 4211 srp_attach_transport(&ib_srp_transport_functions); 4212 if (!ib_srp_transport_template) 4213 goto destroy_wq; 4214 4215 ret = class_register(&srp_class); 4216 if (ret) { 4217 pr_err("couldn't register class infiniband_srp\n"); 4218 goto release_tr; 4219 } 4220 4221 ib_sa_register_client(&srp_sa_client); 4222 4223 ret = ib_register_client(&srp_client); 4224 if (ret) { 4225 pr_err("couldn't register IB client\n"); 4226 goto unreg_sa; 4227 } 4228 4229 out: 4230 return ret; 4231 4232 unreg_sa: 4233 ib_sa_unregister_client(&srp_sa_client); 4234 class_unregister(&srp_class); 4235 4236 release_tr: 4237 srp_release_transport(ib_srp_transport_template); 4238 4239 destroy_wq: 4240 destroy_workqueue(srp_remove_wq); 4241 goto out; 4242 } 4243 4244 static void __exit srp_cleanup_module(void) 4245 { 4246 ib_unregister_client(&srp_client); 4247 ib_sa_unregister_client(&srp_sa_client); 4248 class_unregister(&srp_class); 4249 srp_release_transport(ib_srp_transport_template); 4250 destroy_workqueue(srp_remove_wq); 4251 } 4252 4253 module_init(srp_init_module); 4254 module_exit(srp_cleanup_module); 4255