1 /* 2 * Copyright (c) 2005 Cisco Systems. All rights reserved. 3 * 4 * This software is available to you under a choice of one of two 5 * licenses. You may choose to be licensed under the terms of the GNU 6 * General Public License (GPL) Version 2, available from the file 7 * COPYING in the main directory of this source tree, or the 8 * OpenIB.org BSD license below: 9 * 10 * Redistribution and use in source and binary forms, with or 11 * without modification, are permitted provided that the following 12 * conditions are met: 13 * 14 * - Redistributions of source code must retain the above 15 * copyright notice, this list of conditions and the following 16 * disclaimer. 17 * 18 * - Redistributions in binary form must reproduce the above 19 * copyright notice, this list of conditions and the following 20 * disclaimer in the documentation and/or other materials 21 * provided with the distribution. 22 * 23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 * SOFTWARE. 31 */ 32 33 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 34 35 #include <linux/module.h> 36 #include <linux/init.h> 37 #include <linux/slab.h> 38 #include <linux/err.h> 39 #include <linux/string.h> 40 #include <linux/parser.h> 41 #include <linux/random.h> 42 #include <linux/jiffies.h> 43 #include <linux/lockdep.h> 44 #include <linux/inet.h> 45 #include <rdma/ib_cache.h> 46 47 #include <linux/atomic.h> 48 49 #include <scsi/scsi.h> 50 #include <scsi/scsi_device.h> 51 #include <scsi/scsi_dbg.h> 52 #include <scsi/scsi_tcq.h> 53 #include <scsi/srp.h> 54 #include <scsi/scsi_transport_srp.h> 55 56 #include "ib_srp.h" 57 58 #define DRV_NAME "ib_srp" 59 #define PFX DRV_NAME ": " 60 61 MODULE_AUTHOR("Roland Dreier"); 62 MODULE_DESCRIPTION("InfiniBand SCSI RDMA Protocol initiator"); 63 MODULE_LICENSE("Dual BSD/GPL"); 64 65 #if !defined(CONFIG_DYNAMIC_DEBUG) 66 #define DEFINE_DYNAMIC_DEBUG_METADATA(name, fmt) 67 #define DYNAMIC_DEBUG_BRANCH(descriptor) false 68 #endif 69 70 static unsigned int srp_sg_tablesize; 71 static unsigned int cmd_sg_entries; 72 static unsigned int indirect_sg_entries; 73 static bool allow_ext_sg; 74 static bool prefer_fr = true; 75 static bool register_always = true; 76 static bool never_register; 77 static int topspin_workarounds = 1; 78 79 module_param(srp_sg_tablesize, uint, 0444); 80 MODULE_PARM_DESC(srp_sg_tablesize, "Deprecated name for cmd_sg_entries"); 81 82 module_param(cmd_sg_entries, uint, 0444); 83 MODULE_PARM_DESC(cmd_sg_entries, 84 "Default number of gather/scatter entries in the SRP command (default is 12, max 255)"); 85 86 module_param(indirect_sg_entries, uint, 0444); 87 MODULE_PARM_DESC(indirect_sg_entries, 88 "Default max number of gather/scatter entries (default is 12, max is " __stringify(SG_MAX_SEGMENTS) ")"); 89 90 module_param(allow_ext_sg, bool, 0444); 91 MODULE_PARM_DESC(allow_ext_sg, 92 "Default behavior when there are more than cmd_sg_entries S/G entries after mapping; fails the request when false (default false)"); 93 94 module_param(topspin_workarounds, int, 0444); 95 MODULE_PARM_DESC(topspin_workarounds, 96 "Enable workarounds for Topspin/Cisco SRP target bugs if != 0"); 97 98 module_param(prefer_fr, bool, 0444); 99 MODULE_PARM_DESC(prefer_fr, 100 "Whether to use fast registration if both FMR and fast registration are supported"); 101 102 module_param(register_always, bool, 0444); 103 MODULE_PARM_DESC(register_always, 104 "Use memory registration even for contiguous memory regions"); 105 106 module_param(never_register, bool, 0444); 107 MODULE_PARM_DESC(never_register, "Never register memory"); 108 109 static const struct kernel_param_ops srp_tmo_ops; 110 111 static int srp_reconnect_delay = 10; 112 module_param_cb(reconnect_delay, &srp_tmo_ops, &srp_reconnect_delay, 113 S_IRUGO | S_IWUSR); 114 MODULE_PARM_DESC(reconnect_delay, "Time between successive reconnect attempts"); 115 116 static int srp_fast_io_fail_tmo = 15; 117 module_param_cb(fast_io_fail_tmo, &srp_tmo_ops, &srp_fast_io_fail_tmo, 118 S_IRUGO | S_IWUSR); 119 MODULE_PARM_DESC(fast_io_fail_tmo, 120 "Number of seconds between the observation of a transport" 121 " layer error and failing all I/O. \"off\" means that this" 122 " functionality is disabled."); 123 124 static int srp_dev_loss_tmo = 600; 125 module_param_cb(dev_loss_tmo, &srp_tmo_ops, &srp_dev_loss_tmo, 126 S_IRUGO | S_IWUSR); 127 MODULE_PARM_DESC(dev_loss_tmo, 128 "Maximum number of seconds that the SRP transport should" 129 " insulate transport layer errors. After this time has been" 130 " exceeded the SCSI host is removed. Should be" 131 " between 1 and " __stringify(SCSI_DEVICE_BLOCK_MAX_TIMEOUT) 132 " if fast_io_fail_tmo has not been set. \"off\" means that" 133 " this functionality is disabled."); 134 135 static bool srp_use_imm_data = true; 136 module_param_named(use_imm_data, srp_use_imm_data, bool, 0644); 137 MODULE_PARM_DESC(use_imm_data, 138 "Whether or not to request permission to use immediate data during SRP login."); 139 140 static unsigned int srp_max_imm_data = 8 * 1024; 141 module_param_named(max_imm_data, srp_max_imm_data, uint, 0644); 142 MODULE_PARM_DESC(max_imm_data, "Maximum immediate data size."); 143 144 static unsigned ch_count; 145 module_param(ch_count, uint, 0444); 146 MODULE_PARM_DESC(ch_count, 147 "Number of RDMA channels to use for communication with an SRP target. Using more than one channel improves performance if the HCA supports multiple completion vectors. The default value is the minimum of four times the number of online CPU sockets and the number of completion vectors supported by the HCA."); 148 149 static void srp_add_one(struct ib_device *device); 150 static void srp_remove_one(struct ib_device *device, void *client_data); 151 static void srp_recv_done(struct ib_cq *cq, struct ib_wc *wc); 152 static void srp_handle_qp_err(struct ib_cq *cq, struct ib_wc *wc, 153 const char *opname); 154 static int srp_ib_cm_handler(struct ib_cm_id *cm_id, 155 const struct ib_cm_event *event); 156 static int srp_rdma_cm_handler(struct rdma_cm_id *cm_id, 157 struct rdma_cm_event *event); 158 159 static struct scsi_transport_template *ib_srp_transport_template; 160 static struct workqueue_struct *srp_remove_wq; 161 162 static struct ib_client srp_client = { 163 .name = "srp", 164 .add = srp_add_one, 165 .remove = srp_remove_one 166 }; 167 168 static struct ib_sa_client srp_sa_client; 169 170 static int srp_tmo_get(char *buffer, const struct kernel_param *kp) 171 { 172 int tmo = *(int *)kp->arg; 173 174 if (tmo >= 0) 175 return sprintf(buffer, "%d", tmo); 176 else 177 return sprintf(buffer, "off"); 178 } 179 180 static int srp_tmo_set(const char *val, const struct kernel_param *kp) 181 { 182 int tmo, res; 183 184 res = srp_parse_tmo(&tmo, val); 185 if (res) 186 goto out; 187 188 if (kp->arg == &srp_reconnect_delay) 189 res = srp_tmo_valid(tmo, srp_fast_io_fail_tmo, 190 srp_dev_loss_tmo); 191 else if (kp->arg == &srp_fast_io_fail_tmo) 192 res = srp_tmo_valid(srp_reconnect_delay, tmo, srp_dev_loss_tmo); 193 else 194 res = srp_tmo_valid(srp_reconnect_delay, srp_fast_io_fail_tmo, 195 tmo); 196 if (res) 197 goto out; 198 *(int *)kp->arg = tmo; 199 200 out: 201 return res; 202 } 203 204 static const struct kernel_param_ops srp_tmo_ops = { 205 .get = srp_tmo_get, 206 .set = srp_tmo_set, 207 }; 208 209 static inline struct srp_target_port *host_to_target(struct Scsi_Host *host) 210 { 211 return (struct srp_target_port *) host->hostdata; 212 } 213 214 static const char *srp_target_info(struct Scsi_Host *host) 215 { 216 return host_to_target(host)->target_name; 217 } 218 219 static int srp_target_is_topspin(struct srp_target_port *target) 220 { 221 static const u8 topspin_oui[3] = { 0x00, 0x05, 0xad }; 222 static const u8 cisco_oui[3] = { 0x00, 0x1b, 0x0d }; 223 224 return topspin_workarounds && 225 (!memcmp(&target->ioc_guid, topspin_oui, sizeof topspin_oui) || 226 !memcmp(&target->ioc_guid, cisco_oui, sizeof cisco_oui)); 227 } 228 229 static struct srp_iu *srp_alloc_iu(struct srp_host *host, size_t size, 230 gfp_t gfp_mask, 231 enum dma_data_direction direction) 232 { 233 struct srp_iu *iu; 234 235 iu = kmalloc(sizeof *iu, gfp_mask); 236 if (!iu) 237 goto out; 238 239 iu->buf = kzalloc(size, gfp_mask); 240 if (!iu->buf) 241 goto out_free_iu; 242 243 iu->dma = ib_dma_map_single(host->srp_dev->dev, iu->buf, size, 244 direction); 245 if (ib_dma_mapping_error(host->srp_dev->dev, iu->dma)) 246 goto out_free_buf; 247 248 iu->size = size; 249 iu->direction = direction; 250 251 return iu; 252 253 out_free_buf: 254 kfree(iu->buf); 255 out_free_iu: 256 kfree(iu); 257 out: 258 return NULL; 259 } 260 261 static void srp_free_iu(struct srp_host *host, struct srp_iu *iu) 262 { 263 if (!iu) 264 return; 265 266 ib_dma_unmap_single(host->srp_dev->dev, iu->dma, iu->size, 267 iu->direction); 268 kfree(iu->buf); 269 kfree(iu); 270 } 271 272 static void srp_qp_event(struct ib_event *event, void *context) 273 { 274 pr_debug("QP event %s (%d)\n", 275 ib_event_msg(event->event), event->event); 276 } 277 278 static int srp_init_ib_qp(struct srp_target_port *target, 279 struct ib_qp *qp) 280 { 281 struct ib_qp_attr *attr; 282 int ret; 283 284 attr = kmalloc(sizeof *attr, GFP_KERNEL); 285 if (!attr) 286 return -ENOMEM; 287 288 ret = ib_find_cached_pkey(target->srp_host->srp_dev->dev, 289 target->srp_host->port, 290 be16_to_cpu(target->ib_cm.pkey), 291 &attr->pkey_index); 292 if (ret) 293 goto out; 294 295 attr->qp_state = IB_QPS_INIT; 296 attr->qp_access_flags = (IB_ACCESS_REMOTE_READ | 297 IB_ACCESS_REMOTE_WRITE); 298 attr->port_num = target->srp_host->port; 299 300 ret = ib_modify_qp(qp, attr, 301 IB_QP_STATE | 302 IB_QP_PKEY_INDEX | 303 IB_QP_ACCESS_FLAGS | 304 IB_QP_PORT); 305 306 out: 307 kfree(attr); 308 return ret; 309 } 310 311 static int srp_new_ib_cm_id(struct srp_rdma_ch *ch) 312 { 313 struct srp_target_port *target = ch->target; 314 struct ib_cm_id *new_cm_id; 315 316 new_cm_id = ib_create_cm_id(target->srp_host->srp_dev->dev, 317 srp_ib_cm_handler, ch); 318 if (IS_ERR(new_cm_id)) 319 return PTR_ERR(new_cm_id); 320 321 if (ch->ib_cm.cm_id) 322 ib_destroy_cm_id(ch->ib_cm.cm_id); 323 ch->ib_cm.cm_id = new_cm_id; 324 if (rdma_cap_opa_ah(target->srp_host->srp_dev->dev, 325 target->srp_host->port)) 326 ch->ib_cm.path.rec_type = SA_PATH_REC_TYPE_OPA; 327 else 328 ch->ib_cm.path.rec_type = SA_PATH_REC_TYPE_IB; 329 ch->ib_cm.path.sgid = target->sgid; 330 ch->ib_cm.path.dgid = target->ib_cm.orig_dgid; 331 ch->ib_cm.path.pkey = target->ib_cm.pkey; 332 ch->ib_cm.path.service_id = target->ib_cm.service_id; 333 334 return 0; 335 } 336 337 static int srp_new_rdma_cm_id(struct srp_rdma_ch *ch) 338 { 339 struct srp_target_port *target = ch->target; 340 struct rdma_cm_id *new_cm_id; 341 int ret; 342 343 new_cm_id = rdma_create_id(target->net, srp_rdma_cm_handler, ch, 344 RDMA_PS_TCP, IB_QPT_RC); 345 if (IS_ERR(new_cm_id)) { 346 ret = PTR_ERR(new_cm_id); 347 new_cm_id = NULL; 348 goto out; 349 } 350 351 init_completion(&ch->done); 352 ret = rdma_resolve_addr(new_cm_id, target->rdma_cm.src_specified ? 353 (struct sockaddr *)&target->rdma_cm.src : NULL, 354 (struct sockaddr *)&target->rdma_cm.dst, 355 SRP_PATH_REC_TIMEOUT_MS); 356 if (ret) { 357 pr_err("No route available from %pIS to %pIS (%d)\n", 358 &target->rdma_cm.src, &target->rdma_cm.dst, ret); 359 goto out; 360 } 361 ret = wait_for_completion_interruptible(&ch->done); 362 if (ret < 0) 363 goto out; 364 365 ret = ch->status; 366 if (ret) { 367 pr_err("Resolving address %pIS failed (%d)\n", 368 &target->rdma_cm.dst, ret); 369 goto out; 370 } 371 372 swap(ch->rdma_cm.cm_id, new_cm_id); 373 374 out: 375 if (new_cm_id) 376 rdma_destroy_id(new_cm_id); 377 378 return ret; 379 } 380 381 static int srp_new_cm_id(struct srp_rdma_ch *ch) 382 { 383 struct srp_target_port *target = ch->target; 384 385 return target->using_rdma_cm ? srp_new_rdma_cm_id(ch) : 386 srp_new_ib_cm_id(ch); 387 } 388 389 static struct ib_fmr_pool *srp_alloc_fmr_pool(struct srp_target_port *target) 390 { 391 struct srp_device *dev = target->srp_host->srp_dev; 392 struct ib_fmr_pool_param fmr_param; 393 394 memset(&fmr_param, 0, sizeof(fmr_param)); 395 fmr_param.pool_size = target->mr_pool_size; 396 fmr_param.dirty_watermark = fmr_param.pool_size / 4; 397 fmr_param.cache = 1; 398 fmr_param.max_pages_per_fmr = dev->max_pages_per_mr; 399 fmr_param.page_shift = ilog2(dev->mr_page_size); 400 fmr_param.access = (IB_ACCESS_LOCAL_WRITE | 401 IB_ACCESS_REMOTE_WRITE | 402 IB_ACCESS_REMOTE_READ); 403 404 return ib_create_fmr_pool(dev->pd, &fmr_param); 405 } 406 407 /** 408 * srp_destroy_fr_pool() - free the resources owned by a pool 409 * @pool: Fast registration pool to be destroyed. 410 */ 411 static void srp_destroy_fr_pool(struct srp_fr_pool *pool) 412 { 413 int i; 414 struct srp_fr_desc *d; 415 416 if (!pool) 417 return; 418 419 for (i = 0, d = &pool->desc[0]; i < pool->size; i++, d++) { 420 if (d->mr) 421 ib_dereg_mr(d->mr); 422 } 423 kfree(pool); 424 } 425 426 /** 427 * srp_create_fr_pool() - allocate and initialize a pool for fast registration 428 * @device: IB device to allocate fast registration descriptors for. 429 * @pd: Protection domain associated with the FR descriptors. 430 * @pool_size: Number of descriptors to allocate. 431 * @max_page_list_len: Maximum fast registration work request page list length. 432 */ 433 static struct srp_fr_pool *srp_create_fr_pool(struct ib_device *device, 434 struct ib_pd *pd, int pool_size, 435 int max_page_list_len) 436 { 437 struct srp_fr_pool *pool; 438 struct srp_fr_desc *d; 439 struct ib_mr *mr; 440 int i, ret = -EINVAL; 441 enum ib_mr_type mr_type; 442 443 if (pool_size <= 0) 444 goto err; 445 ret = -ENOMEM; 446 pool = kzalloc(struct_size(pool, desc, pool_size), GFP_KERNEL); 447 if (!pool) 448 goto err; 449 pool->size = pool_size; 450 pool->max_page_list_len = max_page_list_len; 451 spin_lock_init(&pool->lock); 452 INIT_LIST_HEAD(&pool->free_list); 453 454 if (device->attrs.device_cap_flags & IB_DEVICE_SG_GAPS_REG) 455 mr_type = IB_MR_TYPE_SG_GAPS; 456 else 457 mr_type = IB_MR_TYPE_MEM_REG; 458 459 for (i = 0, d = &pool->desc[0]; i < pool->size; i++, d++) { 460 mr = ib_alloc_mr(pd, mr_type, max_page_list_len); 461 if (IS_ERR(mr)) { 462 ret = PTR_ERR(mr); 463 if (ret == -ENOMEM) 464 pr_info("%s: ib_alloc_mr() failed. Try to reduce max_cmd_per_lun, max_sect or ch_count\n", 465 dev_name(&device->dev)); 466 goto destroy_pool; 467 } 468 d->mr = mr; 469 list_add_tail(&d->entry, &pool->free_list); 470 } 471 472 out: 473 return pool; 474 475 destroy_pool: 476 srp_destroy_fr_pool(pool); 477 478 err: 479 pool = ERR_PTR(ret); 480 goto out; 481 } 482 483 /** 484 * srp_fr_pool_get() - obtain a descriptor suitable for fast registration 485 * @pool: Pool to obtain descriptor from. 486 */ 487 static struct srp_fr_desc *srp_fr_pool_get(struct srp_fr_pool *pool) 488 { 489 struct srp_fr_desc *d = NULL; 490 unsigned long flags; 491 492 spin_lock_irqsave(&pool->lock, flags); 493 if (!list_empty(&pool->free_list)) { 494 d = list_first_entry(&pool->free_list, typeof(*d), entry); 495 list_del(&d->entry); 496 } 497 spin_unlock_irqrestore(&pool->lock, flags); 498 499 return d; 500 } 501 502 /** 503 * srp_fr_pool_put() - put an FR descriptor back in the free list 504 * @pool: Pool the descriptor was allocated from. 505 * @desc: Pointer to an array of fast registration descriptor pointers. 506 * @n: Number of descriptors to put back. 507 * 508 * Note: The caller must already have queued an invalidation request for 509 * desc->mr->rkey before calling this function. 510 */ 511 static void srp_fr_pool_put(struct srp_fr_pool *pool, struct srp_fr_desc **desc, 512 int n) 513 { 514 unsigned long flags; 515 int i; 516 517 spin_lock_irqsave(&pool->lock, flags); 518 for (i = 0; i < n; i++) 519 list_add(&desc[i]->entry, &pool->free_list); 520 spin_unlock_irqrestore(&pool->lock, flags); 521 } 522 523 static struct srp_fr_pool *srp_alloc_fr_pool(struct srp_target_port *target) 524 { 525 struct srp_device *dev = target->srp_host->srp_dev; 526 527 return srp_create_fr_pool(dev->dev, dev->pd, target->mr_pool_size, 528 dev->max_pages_per_mr); 529 } 530 531 /** 532 * srp_destroy_qp() - destroy an RDMA queue pair 533 * @ch: SRP RDMA channel. 534 * 535 * Drain the qp before destroying it. This avoids that the receive 536 * completion handler can access the queue pair while it is 537 * being destroyed. 538 */ 539 static void srp_destroy_qp(struct srp_rdma_ch *ch) 540 { 541 spin_lock_irq(&ch->lock); 542 ib_process_cq_direct(ch->send_cq, -1); 543 spin_unlock_irq(&ch->lock); 544 545 ib_drain_qp(ch->qp); 546 ib_destroy_qp(ch->qp); 547 } 548 549 static int srp_create_ch_ib(struct srp_rdma_ch *ch) 550 { 551 struct srp_target_port *target = ch->target; 552 struct srp_device *dev = target->srp_host->srp_dev; 553 struct ib_qp_init_attr *init_attr; 554 struct ib_cq *recv_cq, *send_cq; 555 struct ib_qp *qp; 556 struct ib_fmr_pool *fmr_pool = NULL; 557 struct srp_fr_pool *fr_pool = NULL; 558 const int m = 1 + dev->use_fast_reg * target->mr_per_cmd * 2; 559 int ret; 560 561 init_attr = kzalloc(sizeof *init_attr, GFP_KERNEL); 562 if (!init_attr) 563 return -ENOMEM; 564 565 /* queue_size + 1 for ib_drain_rq() */ 566 recv_cq = ib_alloc_cq(dev->dev, ch, target->queue_size + 1, 567 ch->comp_vector, IB_POLL_SOFTIRQ); 568 if (IS_ERR(recv_cq)) { 569 ret = PTR_ERR(recv_cq); 570 goto err; 571 } 572 573 send_cq = ib_alloc_cq(dev->dev, ch, m * target->queue_size, 574 ch->comp_vector, IB_POLL_DIRECT); 575 if (IS_ERR(send_cq)) { 576 ret = PTR_ERR(send_cq); 577 goto err_recv_cq; 578 } 579 580 init_attr->event_handler = srp_qp_event; 581 init_attr->cap.max_send_wr = m * target->queue_size; 582 init_attr->cap.max_recv_wr = target->queue_size + 1; 583 init_attr->cap.max_recv_sge = 1; 584 init_attr->cap.max_send_sge = SRP_MAX_SGE; 585 init_attr->sq_sig_type = IB_SIGNAL_REQ_WR; 586 init_attr->qp_type = IB_QPT_RC; 587 init_attr->send_cq = send_cq; 588 init_attr->recv_cq = recv_cq; 589 590 if (target->using_rdma_cm) { 591 ret = rdma_create_qp(ch->rdma_cm.cm_id, dev->pd, init_attr); 592 qp = ch->rdma_cm.cm_id->qp; 593 } else { 594 qp = ib_create_qp(dev->pd, init_attr); 595 if (!IS_ERR(qp)) { 596 ret = srp_init_ib_qp(target, qp); 597 if (ret) 598 ib_destroy_qp(qp); 599 } else { 600 ret = PTR_ERR(qp); 601 } 602 } 603 if (ret) { 604 pr_err("QP creation failed for dev %s: %d\n", 605 dev_name(&dev->dev->dev), ret); 606 goto err_send_cq; 607 } 608 609 if (dev->use_fast_reg) { 610 fr_pool = srp_alloc_fr_pool(target); 611 if (IS_ERR(fr_pool)) { 612 ret = PTR_ERR(fr_pool); 613 shost_printk(KERN_WARNING, target->scsi_host, PFX 614 "FR pool allocation failed (%d)\n", ret); 615 goto err_qp; 616 } 617 } else if (dev->use_fmr) { 618 fmr_pool = srp_alloc_fmr_pool(target); 619 if (IS_ERR(fmr_pool)) { 620 ret = PTR_ERR(fmr_pool); 621 shost_printk(KERN_WARNING, target->scsi_host, PFX 622 "FMR pool allocation failed (%d)\n", ret); 623 goto err_qp; 624 } 625 } 626 627 if (ch->qp) 628 srp_destroy_qp(ch); 629 if (ch->recv_cq) 630 ib_free_cq(ch->recv_cq); 631 if (ch->send_cq) 632 ib_free_cq(ch->send_cq); 633 634 ch->qp = qp; 635 ch->recv_cq = recv_cq; 636 ch->send_cq = send_cq; 637 638 if (dev->use_fast_reg) { 639 if (ch->fr_pool) 640 srp_destroy_fr_pool(ch->fr_pool); 641 ch->fr_pool = fr_pool; 642 } else if (dev->use_fmr) { 643 if (ch->fmr_pool) 644 ib_destroy_fmr_pool(ch->fmr_pool); 645 ch->fmr_pool = fmr_pool; 646 } 647 648 kfree(init_attr); 649 return 0; 650 651 err_qp: 652 if (target->using_rdma_cm) 653 rdma_destroy_qp(ch->rdma_cm.cm_id); 654 else 655 ib_destroy_qp(qp); 656 657 err_send_cq: 658 ib_free_cq(send_cq); 659 660 err_recv_cq: 661 ib_free_cq(recv_cq); 662 663 err: 664 kfree(init_attr); 665 return ret; 666 } 667 668 /* 669 * Note: this function may be called without srp_alloc_iu_bufs() having been 670 * invoked. Hence the ch->[rt]x_ring checks. 671 */ 672 static void srp_free_ch_ib(struct srp_target_port *target, 673 struct srp_rdma_ch *ch) 674 { 675 struct srp_device *dev = target->srp_host->srp_dev; 676 int i; 677 678 if (!ch->target) 679 return; 680 681 if (target->using_rdma_cm) { 682 if (ch->rdma_cm.cm_id) { 683 rdma_destroy_id(ch->rdma_cm.cm_id); 684 ch->rdma_cm.cm_id = NULL; 685 } 686 } else { 687 if (ch->ib_cm.cm_id) { 688 ib_destroy_cm_id(ch->ib_cm.cm_id); 689 ch->ib_cm.cm_id = NULL; 690 } 691 } 692 693 /* If srp_new_cm_id() succeeded but srp_create_ch_ib() not, return. */ 694 if (!ch->qp) 695 return; 696 697 if (dev->use_fast_reg) { 698 if (ch->fr_pool) 699 srp_destroy_fr_pool(ch->fr_pool); 700 } else if (dev->use_fmr) { 701 if (ch->fmr_pool) 702 ib_destroy_fmr_pool(ch->fmr_pool); 703 } 704 705 srp_destroy_qp(ch); 706 ib_free_cq(ch->send_cq); 707 ib_free_cq(ch->recv_cq); 708 709 /* 710 * Avoid that the SCSI error handler tries to use this channel after 711 * it has been freed. The SCSI error handler can namely continue 712 * trying to perform recovery actions after scsi_remove_host() 713 * returned. 714 */ 715 ch->target = NULL; 716 717 ch->qp = NULL; 718 ch->send_cq = ch->recv_cq = NULL; 719 720 if (ch->rx_ring) { 721 for (i = 0; i < target->queue_size; ++i) 722 srp_free_iu(target->srp_host, ch->rx_ring[i]); 723 kfree(ch->rx_ring); 724 ch->rx_ring = NULL; 725 } 726 if (ch->tx_ring) { 727 for (i = 0; i < target->queue_size; ++i) 728 srp_free_iu(target->srp_host, ch->tx_ring[i]); 729 kfree(ch->tx_ring); 730 ch->tx_ring = NULL; 731 } 732 } 733 734 static void srp_path_rec_completion(int status, 735 struct sa_path_rec *pathrec, 736 void *ch_ptr) 737 { 738 struct srp_rdma_ch *ch = ch_ptr; 739 struct srp_target_port *target = ch->target; 740 741 ch->status = status; 742 if (status) 743 shost_printk(KERN_ERR, target->scsi_host, 744 PFX "Got failed path rec status %d\n", status); 745 else 746 ch->ib_cm.path = *pathrec; 747 complete(&ch->done); 748 } 749 750 static int srp_ib_lookup_path(struct srp_rdma_ch *ch) 751 { 752 struct srp_target_port *target = ch->target; 753 int ret; 754 755 ch->ib_cm.path.numb_path = 1; 756 757 init_completion(&ch->done); 758 759 ch->ib_cm.path_query_id = ib_sa_path_rec_get(&srp_sa_client, 760 target->srp_host->srp_dev->dev, 761 target->srp_host->port, 762 &ch->ib_cm.path, 763 IB_SA_PATH_REC_SERVICE_ID | 764 IB_SA_PATH_REC_DGID | 765 IB_SA_PATH_REC_SGID | 766 IB_SA_PATH_REC_NUMB_PATH | 767 IB_SA_PATH_REC_PKEY, 768 SRP_PATH_REC_TIMEOUT_MS, 769 GFP_KERNEL, 770 srp_path_rec_completion, 771 ch, &ch->ib_cm.path_query); 772 if (ch->ib_cm.path_query_id < 0) 773 return ch->ib_cm.path_query_id; 774 775 ret = wait_for_completion_interruptible(&ch->done); 776 if (ret < 0) 777 return ret; 778 779 if (ch->status < 0) 780 shost_printk(KERN_WARNING, target->scsi_host, 781 PFX "Path record query failed: sgid %pI6, dgid %pI6, pkey %#04x, service_id %#16llx\n", 782 ch->ib_cm.path.sgid.raw, ch->ib_cm.path.dgid.raw, 783 be16_to_cpu(target->ib_cm.pkey), 784 be64_to_cpu(target->ib_cm.service_id)); 785 786 return ch->status; 787 } 788 789 static int srp_rdma_lookup_path(struct srp_rdma_ch *ch) 790 { 791 struct srp_target_port *target = ch->target; 792 int ret; 793 794 init_completion(&ch->done); 795 796 ret = rdma_resolve_route(ch->rdma_cm.cm_id, SRP_PATH_REC_TIMEOUT_MS); 797 if (ret) 798 return ret; 799 800 wait_for_completion_interruptible(&ch->done); 801 802 if (ch->status != 0) 803 shost_printk(KERN_WARNING, target->scsi_host, 804 PFX "Path resolution failed\n"); 805 806 return ch->status; 807 } 808 809 static int srp_lookup_path(struct srp_rdma_ch *ch) 810 { 811 struct srp_target_port *target = ch->target; 812 813 return target->using_rdma_cm ? srp_rdma_lookup_path(ch) : 814 srp_ib_lookup_path(ch); 815 } 816 817 static u8 srp_get_subnet_timeout(struct srp_host *host) 818 { 819 struct ib_port_attr attr; 820 int ret; 821 u8 subnet_timeout = 18; 822 823 ret = ib_query_port(host->srp_dev->dev, host->port, &attr); 824 if (ret == 0) 825 subnet_timeout = attr.subnet_timeout; 826 827 if (unlikely(subnet_timeout < 15)) 828 pr_warn("%s: subnet timeout %d may cause SRP login to fail.\n", 829 dev_name(&host->srp_dev->dev->dev), subnet_timeout); 830 831 return subnet_timeout; 832 } 833 834 static int srp_send_req(struct srp_rdma_ch *ch, uint32_t max_iu_len, 835 bool multich) 836 { 837 struct srp_target_port *target = ch->target; 838 struct { 839 struct rdma_conn_param rdma_param; 840 struct srp_login_req_rdma rdma_req; 841 struct ib_cm_req_param ib_param; 842 struct srp_login_req ib_req; 843 } *req = NULL; 844 char *ipi, *tpi; 845 int status; 846 847 req = kzalloc(sizeof *req, GFP_KERNEL); 848 if (!req) 849 return -ENOMEM; 850 851 req->ib_param.flow_control = 1; 852 req->ib_param.retry_count = target->tl_retry_count; 853 854 /* 855 * Pick some arbitrary defaults here; we could make these 856 * module parameters if anyone cared about setting them. 857 */ 858 req->ib_param.responder_resources = 4; 859 req->ib_param.rnr_retry_count = 7; 860 req->ib_param.max_cm_retries = 15; 861 862 req->ib_req.opcode = SRP_LOGIN_REQ; 863 req->ib_req.tag = 0; 864 req->ib_req.req_it_iu_len = cpu_to_be32(max_iu_len); 865 req->ib_req.req_buf_fmt = cpu_to_be16(SRP_BUF_FORMAT_DIRECT | 866 SRP_BUF_FORMAT_INDIRECT); 867 req->ib_req.req_flags = (multich ? SRP_MULTICHAN_MULTI : 868 SRP_MULTICHAN_SINGLE); 869 if (srp_use_imm_data) { 870 req->ib_req.req_flags |= SRP_IMMED_REQUESTED; 871 req->ib_req.imm_data_offset = cpu_to_be16(SRP_IMM_DATA_OFFSET); 872 } 873 874 if (target->using_rdma_cm) { 875 req->rdma_param.flow_control = req->ib_param.flow_control; 876 req->rdma_param.responder_resources = 877 req->ib_param.responder_resources; 878 req->rdma_param.initiator_depth = req->ib_param.initiator_depth; 879 req->rdma_param.retry_count = req->ib_param.retry_count; 880 req->rdma_param.rnr_retry_count = req->ib_param.rnr_retry_count; 881 req->rdma_param.private_data = &req->rdma_req; 882 req->rdma_param.private_data_len = sizeof(req->rdma_req); 883 884 req->rdma_req.opcode = req->ib_req.opcode; 885 req->rdma_req.tag = req->ib_req.tag; 886 req->rdma_req.req_it_iu_len = req->ib_req.req_it_iu_len; 887 req->rdma_req.req_buf_fmt = req->ib_req.req_buf_fmt; 888 req->rdma_req.req_flags = req->ib_req.req_flags; 889 req->rdma_req.imm_data_offset = req->ib_req.imm_data_offset; 890 891 ipi = req->rdma_req.initiator_port_id; 892 tpi = req->rdma_req.target_port_id; 893 } else { 894 u8 subnet_timeout; 895 896 subnet_timeout = srp_get_subnet_timeout(target->srp_host); 897 898 req->ib_param.primary_path = &ch->ib_cm.path; 899 req->ib_param.alternate_path = NULL; 900 req->ib_param.service_id = target->ib_cm.service_id; 901 get_random_bytes(&req->ib_param.starting_psn, 4); 902 req->ib_param.starting_psn &= 0xffffff; 903 req->ib_param.qp_num = ch->qp->qp_num; 904 req->ib_param.qp_type = ch->qp->qp_type; 905 req->ib_param.local_cm_response_timeout = subnet_timeout + 2; 906 req->ib_param.remote_cm_response_timeout = subnet_timeout + 2; 907 req->ib_param.private_data = &req->ib_req; 908 req->ib_param.private_data_len = sizeof(req->ib_req); 909 910 ipi = req->ib_req.initiator_port_id; 911 tpi = req->ib_req.target_port_id; 912 } 913 914 /* 915 * In the published SRP specification (draft rev. 16a), the 916 * port identifier format is 8 bytes of ID extension followed 917 * by 8 bytes of GUID. Older drafts put the two halves in the 918 * opposite order, so that the GUID comes first. 919 * 920 * Targets conforming to these obsolete drafts can be 921 * recognized by the I/O Class they report. 922 */ 923 if (target->io_class == SRP_REV10_IB_IO_CLASS) { 924 memcpy(ipi, &target->sgid.global.interface_id, 8); 925 memcpy(ipi + 8, &target->initiator_ext, 8); 926 memcpy(tpi, &target->ioc_guid, 8); 927 memcpy(tpi + 8, &target->id_ext, 8); 928 } else { 929 memcpy(ipi, &target->initiator_ext, 8); 930 memcpy(ipi + 8, &target->sgid.global.interface_id, 8); 931 memcpy(tpi, &target->id_ext, 8); 932 memcpy(tpi + 8, &target->ioc_guid, 8); 933 } 934 935 /* 936 * Topspin/Cisco SRP targets will reject our login unless we 937 * zero out the first 8 bytes of our initiator port ID and set 938 * the second 8 bytes to the local node GUID. 939 */ 940 if (srp_target_is_topspin(target)) { 941 shost_printk(KERN_DEBUG, target->scsi_host, 942 PFX "Topspin/Cisco initiator port ID workaround " 943 "activated for target GUID %016llx\n", 944 be64_to_cpu(target->ioc_guid)); 945 memset(ipi, 0, 8); 946 memcpy(ipi + 8, &target->srp_host->srp_dev->dev->node_guid, 8); 947 } 948 949 if (target->using_rdma_cm) 950 status = rdma_connect(ch->rdma_cm.cm_id, &req->rdma_param); 951 else 952 status = ib_send_cm_req(ch->ib_cm.cm_id, &req->ib_param); 953 954 kfree(req); 955 956 return status; 957 } 958 959 static bool srp_queue_remove_work(struct srp_target_port *target) 960 { 961 bool changed = false; 962 963 spin_lock_irq(&target->lock); 964 if (target->state != SRP_TARGET_REMOVED) { 965 target->state = SRP_TARGET_REMOVED; 966 changed = true; 967 } 968 spin_unlock_irq(&target->lock); 969 970 if (changed) 971 queue_work(srp_remove_wq, &target->remove_work); 972 973 return changed; 974 } 975 976 static void srp_disconnect_target(struct srp_target_port *target) 977 { 978 struct srp_rdma_ch *ch; 979 int i, ret; 980 981 /* XXX should send SRP_I_LOGOUT request */ 982 983 for (i = 0; i < target->ch_count; i++) { 984 ch = &target->ch[i]; 985 ch->connected = false; 986 ret = 0; 987 if (target->using_rdma_cm) { 988 if (ch->rdma_cm.cm_id) 989 rdma_disconnect(ch->rdma_cm.cm_id); 990 } else { 991 if (ch->ib_cm.cm_id) 992 ret = ib_send_cm_dreq(ch->ib_cm.cm_id, 993 NULL, 0); 994 } 995 if (ret < 0) { 996 shost_printk(KERN_DEBUG, target->scsi_host, 997 PFX "Sending CM DREQ failed\n"); 998 } 999 } 1000 } 1001 1002 static void srp_free_req_data(struct srp_target_port *target, 1003 struct srp_rdma_ch *ch) 1004 { 1005 struct srp_device *dev = target->srp_host->srp_dev; 1006 struct ib_device *ibdev = dev->dev; 1007 struct srp_request *req; 1008 int i; 1009 1010 if (!ch->req_ring) 1011 return; 1012 1013 for (i = 0; i < target->req_ring_size; ++i) { 1014 req = &ch->req_ring[i]; 1015 if (dev->use_fast_reg) { 1016 kfree(req->fr_list); 1017 } else { 1018 kfree(req->fmr_list); 1019 kfree(req->map_page); 1020 } 1021 if (req->indirect_dma_addr) { 1022 ib_dma_unmap_single(ibdev, req->indirect_dma_addr, 1023 target->indirect_size, 1024 DMA_TO_DEVICE); 1025 } 1026 kfree(req->indirect_desc); 1027 } 1028 1029 kfree(ch->req_ring); 1030 ch->req_ring = NULL; 1031 } 1032 1033 static int srp_alloc_req_data(struct srp_rdma_ch *ch) 1034 { 1035 struct srp_target_port *target = ch->target; 1036 struct srp_device *srp_dev = target->srp_host->srp_dev; 1037 struct ib_device *ibdev = srp_dev->dev; 1038 struct srp_request *req; 1039 void *mr_list; 1040 dma_addr_t dma_addr; 1041 int i, ret = -ENOMEM; 1042 1043 ch->req_ring = kcalloc(target->req_ring_size, sizeof(*ch->req_ring), 1044 GFP_KERNEL); 1045 if (!ch->req_ring) 1046 goto out; 1047 1048 for (i = 0; i < target->req_ring_size; ++i) { 1049 req = &ch->req_ring[i]; 1050 mr_list = kmalloc_array(target->mr_per_cmd, sizeof(void *), 1051 GFP_KERNEL); 1052 if (!mr_list) 1053 goto out; 1054 if (srp_dev->use_fast_reg) { 1055 req->fr_list = mr_list; 1056 } else { 1057 req->fmr_list = mr_list; 1058 req->map_page = kmalloc_array(srp_dev->max_pages_per_mr, 1059 sizeof(void *), 1060 GFP_KERNEL); 1061 if (!req->map_page) 1062 goto out; 1063 } 1064 req->indirect_desc = kmalloc(target->indirect_size, GFP_KERNEL); 1065 if (!req->indirect_desc) 1066 goto out; 1067 1068 dma_addr = ib_dma_map_single(ibdev, req->indirect_desc, 1069 target->indirect_size, 1070 DMA_TO_DEVICE); 1071 if (ib_dma_mapping_error(ibdev, dma_addr)) 1072 goto out; 1073 1074 req->indirect_dma_addr = dma_addr; 1075 } 1076 ret = 0; 1077 1078 out: 1079 return ret; 1080 } 1081 1082 /** 1083 * srp_del_scsi_host_attr() - Remove attributes defined in the host template. 1084 * @shost: SCSI host whose attributes to remove from sysfs. 1085 * 1086 * Note: Any attributes defined in the host template and that did not exist 1087 * before invocation of this function will be ignored. 1088 */ 1089 static void srp_del_scsi_host_attr(struct Scsi_Host *shost) 1090 { 1091 struct device_attribute **attr; 1092 1093 for (attr = shost->hostt->shost_attrs; attr && *attr; ++attr) 1094 device_remove_file(&shost->shost_dev, *attr); 1095 } 1096 1097 static void srp_remove_target(struct srp_target_port *target) 1098 { 1099 struct srp_rdma_ch *ch; 1100 int i; 1101 1102 WARN_ON_ONCE(target->state != SRP_TARGET_REMOVED); 1103 1104 srp_del_scsi_host_attr(target->scsi_host); 1105 srp_rport_get(target->rport); 1106 srp_remove_host(target->scsi_host); 1107 scsi_remove_host(target->scsi_host); 1108 srp_stop_rport_timers(target->rport); 1109 srp_disconnect_target(target); 1110 kobj_ns_drop(KOBJ_NS_TYPE_NET, target->net); 1111 for (i = 0; i < target->ch_count; i++) { 1112 ch = &target->ch[i]; 1113 srp_free_ch_ib(target, ch); 1114 } 1115 cancel_work_sync(&target->tl_err_work); 1116 srp_rport_put(target->rport); 1117 for (i = 0; i < target->ch_count; i++) { 1118 ch = &target->ch[i]; 1119 srp_free_req_data(target, ch); 1120 } 1121 kfree(target->ch); 1122 target->ch = NULL; 1123 1124 spin_lock(&target->srp_host->target_lock); 1125 list_del(&target->list); 1126 spin_unlock(&target->srp_host->target_lock); 1127 1128 scsi_host_put(target->scsi_host); 1129 } 1130 1131 static void srp_remove_work(struct work_struct *work) 1132 { 1133 struct srp_target_port *target = 1134 container_of(work, struct srp_target_port, remove_work); 1135 1136 WARN_ON_ONCE(target->state != SRP_TARGET_REMOVED); 1137 1138 srp_remove_target(target); 1139 } 1140 1141 static void srp_rport_delete(struct srp_rport *rport) 1142 { 1143 struct srp_target_port *target = rport->lld_data; 1144 1145 srp_queue_remove_work(target); 1146 } 1147 1148 /** 1149 * srp_connected_ch() - number of connected channels 1150 * @target: SRP target port. 1151 */ 1152 static int srp_connected_ch(struct srp_target_port *target) 1153 { 1154 int i, c = 0; 1155 1156 for (i = 0; i < target->ch_count; i++) 1157 c += target->ch[i].connected; 1158 1159 return c; 1160 } 1161 1162 static int srp_connect_ch(struct srp_rdma_ch *ch, uint32_t max_iu_len, 1163 bool multich) 1164 { 1165 struct srp_target_port *target = ch->target; 1166 int ret; 1167 1168 WARN_ON_ONCE(!multich && srp_connected_ch(target) > 0); 1169 1170 ret = srp_lookup_path(ch); 1171 if (ret) 1172 goto out; 1173 1174 while (1) { 1175 init_completion(&ch->done); 1176 ret = srp_send_req(ch, max_iu_len, multich); 1177 if (ret) 1178 goto out; 1179 ret = wait_for_completion_interruptible(&ch->done); 1180 if (ret < 0) 1181 goto out; 1182 1183 /* 1184 * The CM event handling code will set status to 1185 * SRP_PORT_REDIRECT if we get a port redirect REJ 1186 * back, or SRP_DLID_REDIRECT if we get a lid/qp 1187 * redirect REJ back. 1188 */ 1189 ret = ch->status; 1190 switch (ret) { 1191 case 0: 1192 ch->connected = true; 1193 goto out; 1194 1195 case SRP_PORT_REDIRECT: 1196 ret = srp_lookup_path(ch); 1197 if (ret) 1198 goto out; 1199 break; 1200 1201 case SRP_DLID_REDIRECT: 1202 break; 1203 1204 case SRP_STALE_CONN: 1205 shost_printk(KERN_ERR, target->scsi_host, PFX 1206 "giving up on stale connection\n"); 1207 ret = -ECONNRESET; 1208 goto out; 1209 1210 default: 1211 goto out; 1212 } 1213 } 1214 1215 out: 1216 return ret <= 0 ? ret : -ENODEV; 1217 } 1218 1219 static void srp_inv_rkey_err_done(struct ib_cq *cq, struct ib_wc *wc) 1220 { 1221 srp_handle_qp_err(cq, wc, "INV RKEY"); 1222 } 1223 1224 static int srp_inv_rkey(struct srp_request *req, struct srp_rdma_ch *ch, 1225 u32 rkey) 1226 { 1227 struct ib_send_wr wr = { 1228 .opcode = IB_WR_LOCAL_INV, 1229 .next = NULL, 1230 .num_sge = 0, 1231 .send_flags = 0, 1232 .ex.invalidate_rkey = rkey, 1233 }; 1234 1235 wr.wr_cqe = &req->reg_cqe; 1236 req->reg_cqe.done = srp_inv_rkey_err_done; 1237 return ib_post_send(ch->qp, &wr, NULL); 1238 } 1239 1240 static void srp_unmap_data(struct scsi_cmnd *scmnd, 1241 struct srp_rdma_ch *ch, 1242 struct srp_request *req) 1243 { 1244 struct srp_target_port *target = ch->target; 1245 struct srp_device *dev = target->srp_host->srp_dev; 1246 struct ib_device *ibdev = dev->dev; 1247 int i, res; 1248 1249 if (!scsi_sglist(scmnd) || 1250 (scmnd->sc_data_direction != DMA_TO_DEVICE && 1251 scmnd->sc_data_direction != DMA_FROM_DEVICE)) 1252 return; 1253 1254 if (dev->use_fast_reg) { 1255 struct srp_fr_desc **pfr; 1256 1257 for (i = req->nmdesc, pfr = req->fr_list; i > 0; i--, pfr++) { 1258 res = srp_inv_rkey(req, ch, (*pfr)->mr->rkey); 1259 if (res < 0) { 1260 shost_printk(KERN_ERR, target->scsi_host, PFX 1261 "Queueing INV WR for rkey %#x failed (%d)\n", 1262 (*pfr)->mr->rkey, res); 1263 queue_work(system_long_wq, 1264 &target->tl_err_work); 1265 } 1266 } 1267 if (req->nmdesc) 1268 srp_fr_pool_put(ch->fr_pool, req->fr_list, 1269 req->nmdesc); 1270 } else if (dev->use_fmr) { 1271 struct ib_pool_fmr **pfmr; 1272 1273 for (i = req->nmdesc, pfmr = req->fmr_list; i > 0; i--, pfmr++) 1274 ib_fmr_pool_unmap(*pfmr); 1275 } 1276 1277 ib_dma_unmap_sg(ibdev, scsi_sglist(scmnd), scsi_sg_count(scmnd), 1278 scmnd->sc_data_direction); 1279 } 1280 1281 /** 1282 * srp_claim_req - Take ownership of the scmnd associated with a request. 1283 * @ch: SRP RDMA channel. 1284 * @req: SRP request. 1285 * @sdev: If not NULL, only take ownership for this SCSI device. 1286 * @scmnd: If NULL, take ownership of @req->scmnd. If not NULL, only take 1287 * ownership of @req->scmnd if it equals @scmnd. 1288 * 1289 * Return value: 1290 * Either NULL or a pointer to the SCSI command the caller became owner of. 1291 */ 1292 static struct scsi_cmnd *srp_claim_req(struct srp_rdma_ch *ch, 1293 struct srp_request *req, 1294 struct scsi_device *sdev, 1295 struct scsi_cmnd *scmnd) 1296 { 1297 unsigned long flags; 1298 1299 spin_lock_irqsave(&ch->lock, flags); 1300 if (req->scmnd && 1301 (!sdev || req->scmnd->device == sdev) && 1302 (!scmnd || req->scmnd == scmnd)) { 1303 scmnd = req->scmnd; 1304 req->scmnd = NULL; 1305 } else { 1306 scmnd = NULL; 1307 } 1308 spin_unlock_irqrestore(&ch->lock, flags); 1309 1310 return scmnd; 1311 } 1312 1313 /** 1314 * srp_free_req() - Unmap data and adjust ch->req_lim. 1315 * @ch: SRP RDMA channel. 1316 * @req: Request to be freed. 1317 * @scmnd: SCSI command associated with @req. 1318 * @req_lim_delta: Amount to be added to @target->req_lim. 1319 */ 1320 static void srp_free_req(struct srp_rdma_ch *ch, struct srp_request *req, 1321 struct scsi_cmnd *scmnd, s32 req_lim_delta) 1322 { 1323 unsigned long flags; 1324 1325 srp_unmap_data(scmnd, ch, req); 1326 1327 spin_lock_irqsave(&ch->lock, flags); 1328 ch->req_lim += req_lim_delta; 1329 spin_unlock_irqrestore(&ch->lock, flags); 1330 } 1331 1332 static void srp_finish_req(struct srp_rdma_ch *ch, struct srp_request *req, 1333 struct scsi_device *sdev, int result) 1334 { 1335 struct scsi_cmnd *scmnd = srp_claim_req(ch, req, sdev, NULL); 1336 1337 if (scmnd) { 1338 srp_free_req(ch, req, scmnd, 0); 1339 scmnd->result = result; 1340 scmnd->scsi_done(scmnd); 1341 } 1342 } 1343 1344 static void srp_terminate_io(struct srp_rport *rport) 1345 { 1346 struct srp_target_port *target = rport->lld_data; 1347 struct srp_rdma_ch *ch; 1348 int i, j; 1349 1350 for (i = 0; i < target->ch_count; i++) { 1351 ch = &target->ch[i]; 1352 1353 for (j = 0; j < target->req_ring_size; ++j) { 1354 struct srp_request *req = &ch->req_ring[j]; 1355 1356 srp_finish_req(ch, req, NULL, 1357 DID_TRANSPORT_FAILFAST << 16); 1358 } 1359 } 1360 } 1361 1362 /* Calculate maximum initiator to target information unit length. */ 1363 static uint32_t srp_max_it_iu_len(int cmd_sg_cnt, bool use_imm_data) 1364 { 1365 uint32_t max_iu_len = sizeof(struct srp_cmd) + SRP_MAX_ADD_CDB_LEN + 1366 sizeof(struct srp_indirect_buf) + 1367 cmd_sg_cnt * sizeof(struct srp_direct_buf); 1368 1369 if (use_imm_data) 1370 max_iu_len = max(max_iu_len, SRP_IMM_DATA_OFFSET + 1371 srp_max_imm_data); 1372 1373 return max_iu_len; 1374 } 1375 1376 /* 1377 * It is up to the caller to ensure that srp_rport_reconnect() calls are 1378 * serialized and that no concurrent srp_queuecommand(), srp_abort(), 1379 * srp_reset_device() or srp_reset_host() calls will occur while this function 1380 * is in progress. One way to realize that is not to call this function 1381 * directly but to call srp_reconnect_rport() instead since that last function 1382 * serializes calls of this function via rport->mutex and also blocks 1383 * srp_queuecommand() calls before invoking this function. 1384 */ 1385 static int srp_rport_reconnect(struct srp_rport *rport) 1386 { 1387 struct srp_target_port *target = rport->lld_data; 1388 struct srp_rdma_ch *ch; 1389 uint32_t max_iu_len = srp_max_it_iu_len(target->cmd_sg_cnt, 1390 srp_use_imm_data); 1391 int i, j, ret = 0; 1392 bool multich = false; 1393 1394 srp_disconnect_target(target); 1395 1396 if (target->state == SRP_TARGET_SCANNING) 1397 return -ENODEV; 1398 1399 /* 1400 * Now get a new local CM ID so that we avoid confusing the target in 1401 * case things are really fouled up. Doing so also ensures that all CM 1402 * callbacks will have finished before a new QP is allocated. 1403 */ 1404 for (i = 0; i < target->ch_count; i++) { 1405 ch = &target->ch[i]; 1406 ret += srp_new_cm_id(ch); 1407 } 1408 for (i = 0; i < target->ch_count; i++) { 1409 ch = &target->ch[i]; 1410 for (j = 0; j < target->req_ring_size; ++j) { 1411 struct srp_request *req = &ch->req_ring[j]; 1412 1413 srp_finish_req(ch, req, NULL, DID_RESET << 16); 1414 } 1415 } 1416 for (i = 0; i < target->ch_count; i++) { 1417 ch = &target->ch[i]; 1418 /* 1419 * Whether or not creating a new CM ID succeeded, create a new 1420 * QP. This guarantees that all completion callback function 1421 * invocations have finished before request resetting starts. 1422 */ 1423 ret += srp_create_ch_ib(ch); 1424 1425 INIT_LIST_HEAD(&ch->free_tx); 1426 for (j = 0; j < target->queue_size; ++j) 1427 list_add(&ch->tx_ring[j]->list, &ch->free_tx); 1428 } 1429 1430 target->qp_in_error = false; 1431 1432 for (i = 0; i < target->ch_count; i++) { 1433 ch = &target->ch[i]; 1434 if (ret) 1435 break; 1436 ret = srp_connect_ch(ch, max_iu_len, multich); 1437 multich = true; 1438 } 1439 1440 if (ret == 0) 1441 shost_printk(KERN_INFO, target->scsi_host, 1442 PFX "reconnect succeeded\n"); 1443 1444 return ret; 1445 } 1446 1447 static void srp_map_desc(struct srp_map_state *state, dma_addr_t dma_addr, 1448 unsigned int dma_len, u32 rkey) 1449 { 1450 struct srp_direct_buf *desc = state->desc; 1451 1452 WARN_ON_ONCE(!dma_len); 1453 1454 desc->va = cpu_to_be64(dma_addr); 1455 desc->key = cpu_to_be32(rkey); 1456 desc->len = cpu_to_be32(dma_len); 1457 1458 state->total_len += dma_len; 1459 state->desc++; 1460 state->ndesc++; 1461 } 1462 1463 static int srp_map_finish_fmr(struct srp_map_state *state, 1464 struct srp_rdma_ch *ch) 1465 { 1466 struct srp_target_port *target = ch->target; 1467 struct srp_device *dev = target->srp_host->srp_dev; 1468 struct ib_pool_fmr *fmr; 1469 u64 io_addr = 0; 1470 1471 if (state->fmr.next >= state->fmr.end) { 1472 shost_printk(KERN_ERR, ch->target->scsi_host, 1473 PFX "Out of MRs (mr_per_cmd = %d)\n", 1474 ch->target->mr_per_cmd); 1475 return -ENOMEM; 1476 } 1477 1478 WARN_ON_ONCE(!dev->use_fmr); 1479 1480 if (state->npages == 0) 1481 return 0; 1482 1483 if (state->npages == 1 && target->global_rkey) { 1484 srp_map_desc(state, state->base_dma_addr, state->dma_len, 1485 target->global_rkey); 1486 goto reset_state; 1487 } 1488 1489 fmr = ib_fmr_pool_map_phys(ch->fmr_pool, state->pages, 1490 state->npages, io_addr); 1491 if (IS_ERR(fmr)) 1492 return PTR_ERR(fmr); 1493 1494 *state->fmr.next++ = fmr; 1495 state->nmdesc++; 1496 1497 srp_map_desc(state, state->base_dma_addr & ~dev->mr_page_mask, 1498 state->dma_len, fmr->fmr->rkey); 1499 1500 reset_state: 1501 state->npages = 0; 1502 state->dma_len = 0; 1503 1504 return 0; 1505 } 1506 1507 static void srp_reg_mr_err_done(struct ib_cq *cq, struct ib_wc *wc) 1508 { 1509 srp_handle_qp_err(cq, wc, "FAST REG"); 1510 } 1511 1512 /* 1513 * Map up to sg_nents elements of state->sg where *sg_offset_p is the offset 1514 * where to start in the first element. If sg_offset_p != NULL then 1515 * *sg_offset_p is updated to the offset in state->sg[retval] of the first 1516 * byte that has not yet been mapped. 1517 */ 1518 static int srp_map_finish_fr(struct srp_map_state *state, 1519 struct srp_request *req, 1520 struct srp_rdma_ch *ch, int sg_nents, 1521 unsigned int *sg_offset_p) 1522 { 1523 struct srp_target_port *target = ch->target; 1524 struct srp_device *dev = target->srp_host->srp_dev; 1525 struct ib_reg_wr wr; 1526 struct srp_fr_desc *desc; 1527 u32 rkey; 1528 int n, err; 1529 1530 if (state->fr.next >= state->fr.end) { 1531 shost_printk(KERN_ERR, ch->target->scsi_host, 1532 PFX "Out of MRs (mr_per_cmd = %d)\n", 1533 ch->target->mr_per_cmd); 1534 return -ENOMEM; 1535 } 1536 1537 WARN_ON_ONCE(!dev->use_fast_reg); 1538 1539 if (sg_nents == 1 && target->global_rkey) { 1540 unsigned int sg_offset = sg_offset_p ? *sg_offset_p : 0; 1541 1542 srp_map_desc(state, sg_dma_address(state->sg) + sg_offset, 1543 sg_dma_len(state->sg) - sg_offset, 1544 target->global_rkey); 1545 if (sg_offset_p) 1546 *sg_offset_p = 0; 1547 return 1; 1548 } 1549 1550 desc = srp_fr_pool_get(ch->fr_pool); 1551 if (!desc) 1552 return -ENOMEM; 1553 1554 rkey = ib_inc_rkey(desc->mr->rkey); 1555 ib_update_fast_reg_key(desc->mr, rkey); 1556 1557 n = ib_map_mr_sg(desc->mr, state->sg, sg_nents, sg_offset_p, 1558 dev->mr_page_size); 1559 if (unlikely(n < 0)) { 1560 srp_fr_pool_put(ch->fr_pool, &desc, 1); 1561 pr_debug("%s: ib_map_mr_sg(%d, %d) returned %d.\n", 1562 dev_name(&req->scmnd->device->sdev_gendev), sg_nents, 1563 sg_offset_p ? *sg_offset_p : -1, n); 1564 return n; 1565 } 1566 1567 WARN_ON_ONCE(desc->mr->length == 0); 1568 1569 req->reg_cqe.done = srp_reg_mr_err_done; 1570 1571 wr.wr.next = NULL; 1572 wr.wr.opcode = IB_WR_REG_MR; 1573 wr.wr.wr_cqe = &req->reg_cqe; 1574 wr.wr.num_sge = 0; 1575 wr.wr.send_flags = 0; 1576 wr.mr = desc->mr; 1577 wr.key = desc->mr->rkey; 1578 wr.access = (IB_ACCESS_LOCAL_WRITE | 1579 IB_ACCESS_REMOTE_READ | 1580 IB_ACCESS_REMOTE_WRITE); 1581 1582 *state->fr.next++ = desc; 1583 state->nmdesc++; 1584 1585 srp_map_desc(state, desc->mr->iova, 1586 desc->mr->length, desc->mr->rkey); 1587 1588 err = ib_post_send(ch->qp, &wr.wr, NULL); 1589 if (unlikely(err)) { 1590 WARN_ON_ONCE(err == -ENOMEM); 1591 return err; 1592 } 1593 1594 return n; 1595 } 1596 1597 static int srp_map_sg_entry(struct srp_map_state *state, 1598 struct srp_rdma_ch *ch, 1599 struct scatterlist *sg) 1600 { 1601 struct srp_target_port *target = ch->target; 1602 struct srp_device *dev = target->srp_host->srp_dev; 1603 dma_addr_t dma_addr = sg_dma_address(sg); 1604 unsigned int dma_len = sg_dma_len(sg); 1605 unsigned int len = 0; 1606 int ret; 1607 1608 WARN_ON_ONCE(!dma_len); 1609 1610 while (dma_len) { 1611 unsigned offset = dma_addr & ~dev->mr_page_mask; 1612 1613 if (state->npages == dev->max_pages_per_mr || 1614 (state->npages > 0 && offset != 0)) { 1615 ret = srp_map_finish_fmr(state, ch); 1616 if (ret) 1617 return ret; 1618 } 1619 1620 len = min_t(unsigned int, dma_len, dev->mr_page_size - offset); 1621 1622 if (!state->npages) 1623 state->base_dma_addr = dma_addr; 1624 state->pages[state->npages++] = dma_addr & dev->mr_page_mask; 1625 state->dma_len += len; 1626 dma_addr += len; 1627 dma_len -= len; 1628 } 1629 1630 /* 1631 * If the end of the MR is not on a page boundary then we need to 1632 * close it out and start a new one -- we can only merge at page 1633 * boundaries. 1634 */ 1635 ret = 0; 1636 if ((dma_addr & ~dev->mr_page_mask) != 0) 1637 ret = srp_map_finish_fmr(state, ch); 1638 return ret; 1639 } 1640 1641 static int srp_map_sg_fmr(struct srp_map_state *state, struct srp_rdma_ch *ch, 1642 struct srp_request *req, struct scatterlist *scat, 1643 int count) 1644 { 1645 struct scatterlist *sg; 1646 int i, ret; 1647 1648 state->pages = req->map_page; 1649 state->fmr.next = req->fmr_list; 1650 state->fmr.end = req->fmr_list + ch->target->mr_per_cmd; 1651 1652 for_each_sg(scat, sg, count, i) { 1653 ret = srp_map_sg_entry(state, ch, sg); 1654 if (ret) 1655 return ret; 1656 } 1657 1658 ret = srp_map_finish_fmr(state, ch); 1659 if (ret) 1660 return ret; 1661 1662 return 0; 1663 } 1664 1665 static int srp_map_sg_fr(struct srp_map_state *state, struct srp_rdma_ch *ch, 1666 struct srp_request *req, struct scatterlist *scat, 1667 int count) 1668 { 1669 unsigned int sg_offset = 0; 1670 1671 state->fr.next = req->fr_list; 1672 state->fr.end = req->fr_list + ch->target->mr_per_cmd; 1673 state->sg = scat; 1674 1675 if (count == 0) 1676 return 0; 1677 1678 while (count) { 1679 int i, n; 1680 1681 n = srp_map_finish_fr(state, req, ch, count, &sg_offset); 1682 if (unlikely(n < 0)) 1683 return n; 1684 1685 count -= n; 1686 for (i = 0; i < n; i++) 1687 state->sg = sg_next(state->sg); 1688 } 1689 1690 return 0; 1691 } 1692 1693 static int srp_map_sg_dma(struct srp_map_state *state, struct srp_rdma_ch *ch, 1694 struct srp_request *req, struct scatterlist *scat, 1695 int count) 1696 { 1697 struct srp_target_port *target = ch->target; 1698 struct scatterlist *sg; 1699 int i; 1700 1701 for_each_sg(scat, sg, count, i) { 1702 srp_map_desc(state, sg_dma_address(sg), sg_dma_len(sg), 1703 target->global_rkey); 1704 } 1705 1706 return 0; 1707 } 1708 1709 /* 1710 * Register the indirect data buffer descriptor with the HCA. 1711 * 1712 * Note: since the indirect data buffer descriptor has been allocated with 1713 * kmalloc() it is guaranteed that this buffer is a physically contiguous 1714 * memory buffer. 1715 */ 1716 static int srp_map_idb(struct srp_rdma_ch *ch, struct srp_request *req, 1717 void **next_mr, void **end_mr, u32 idb_len, 1718 __be32 *idb_rkey) 1719 { 1720 struct srp_target_port *target = ch->target; 1721 struct srp_device *dev = target->srp_host->srp_dev; 1722 struct srp_map_state state; 1723 struct srp_direct_buf idb_desc; 1724 u64 idb_pages[1]; 1725 struct scatterlist idb_sg[1]; 1726 int ret; 1727 1728 memset(&state, 0, sizeof(state)); 1729 memset(&idb_desc, 0, sizeof(idb_desc)); 1730 state.gen.next = next_mr; 1731 state.gen.end = end_mr; 1732 state.desc = &idb_desc; 1733 state.base_dma_addr = req->indirect_dma_addr; 1734 state.dma_len = idb_len; 1735 1736 if (dev->use_fast_reg) { 1737 state.sg = idb_sg; 1738 sg_init_one(idb_sg, req->indirect_desc, idb_len); 1739 idb_sg->dma_address = req->indirect_dma_addr; /* hack! */ 1740 #ifdef CONFIG_NEED_SG_DMA_LENGTH 1741 idb_sg->dma_length = idb_sg->length; /* hack^2 */ 1742 #endif 1743 ret = srp_map_finish_fr(&state, req, ch, 1, NULL); 1744 if (ret < 0) 1745 return ret; 1746 WARN_ON_ONCE(ret < 1); 1747 } else if (dev->use_fmr) { 1748 state.pages = idb_pages; 1749 state.pages[0] = (req->indirect_dma_addr & 1750 dev->mr_page_mask); 1751 state.npages = 1; 1752 ret = srp_map_finish_fmr(&state, ch); 1753 if (ret < 0) 1754 return ret; 1755 } else { 1756 return -EINVAL; 1757 } 1758 1759 *idb_rkey = idb_desc.key; 1760 1761 return 0; 1762 } 1763 1764 static void srp_check_mapping(struct srp_map_state *state, 1765 struct srp_rdma_ch *ch, struct srp_request *req, 1766 struct scatterlist *scat, int count) 1767 { 1768 struct srp_device *dev = ch->target->srp_host->srp_dev; 1769 struct srp_fr_desc **pfr; 1770 u64 desc_len = 0, mr_len = 0; 1771 int i; 1772 1773 for (i = 0; i < state->ndesc; i++) 1774 desc_len += be32_to_cpu(req->indirect_desc[i].len); 1775 if (dev->use_fast_reg) 1776 for (i = 0, pfr = req->fr_list; i < state->nmdesc; i++, pfr++) 1777 mr_len += (*pfr)->mr->length; 1778 else if (dev->use_fmr) 1779 for (i = 0; i < state->nmdesc; i++) 1780 mr_len += be32_to_cpu(req->indirect_desc[i].len); 1781 if (desc_len != scsi_bufflen(req->scmnd) || 1782 mr_len > scsi_bufflen(req->scmnd)) 1783 pr_err("Inconsistent: scsi len %d <> desc len %lld <> mr len %lld; ndesc %d; nmdesc = %d\n", 1784 scsi_bufflen(req->scmnd), desc_len, mr_len, 1785 state->ndesc, state->nmdesc); 1786 } 1787 1788 /** 1789 * srp_map_data() - map SCSI data buffer onto an SRP request 1790 * @scmnd: SCSI command to map 1791 * @ch: SRP RDMA channel 1792 * @req: SRP request 1793 * 1794 * Returns the length in bytes of the SRP_CMD IU or a negative value if 1795 * mapping failed. The size of any immediate data is not included in the 1796 * return value. 1797 */ 1798 static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_rdma_ch *ch, 1799 struct srp_request *req) 1800 { 1801 struct srp_target_port *target = ch->target; 1802 struct scatterlist *scat, *sg; 1803 struct srp_cmd *cmd = req->cmd->buf; 1804 int i, len, nents, count, ret; 1805 struct srp_device *dev; 1806 struct ib_device *ibdev; 1807 struct srp_map_state state; 1808 struct srp_indirect_buf *indirect_hdr; 1809 u64 data_len; 1810 u32 idb_len, table_len; 1811 __be32 idb_rkey; 1812 u8 fmt; 1813 1814 req->cmd->num_sge = 1; 1815 1816 if (!scsi_sglist(scmnd) || scmnd->sc_data_direction == DMA_NONE) 1817 return sizeof(struct srp_cmd) + cmd->add_cdb_len; 1818 1819 if (scmnd->sc_data_direction != DMA_FROM_DEVICE && 1820 scmnd->sc_data_direction != DMA_TO_DEVICE) { 1821 shost_printk(KERN_WARNING, target->scsi_host, 1822 PFX "Unhandled data direction %d\n", 1823 scmnd->sc_data_direction); 1824 return -EINVAL; 1825 } 1826 1827 nents = scsi_sg_count(scmnd); 1828 scat = scsi_sglist(scmnd); 1829 data_len = scsi_bufflen(scmnd); 1830 1831 dev = target->srp_host->srp_dev; 1832 ibdev = dev->dev; 1833 1834 count = ib_dma_map_sg(ibdev, scat, nents, scmnd->sc_data_direction); 1835 if (unlikely(count == 0)) 1836 return -EIO; 1837 1838 if (ch->use_imm_data && 1839 count <= SRP_MAX_IMM_SGE && 1840 SRP_IMM_DATA_OFFSET + data_len <= ch->max_it_iu_len && 1841 scmnd->sc_data_direction == DMA_TO_DEVICE) { 1842 struct srp_imm_buf *buf; 1843 struct ib_sge *sge = &req->cmd->sge[1]; 1844 1845 fmt = SRP_DATA_DESC_IMM; 1846 len = SRP_IMM_DATA_OFFSET; 1847 req->nmdesc = 0; 1848 buf = (void *)cmd->add_data + cmd->add_cdb_len; 1849 buf->len = cpu_to_be32(data_len); 1850 WARN_ON_ONCE((void *)(buf + 1) > (void *)cmd + len); 1851 for_each_sg(scat, sg, count, i) { 1852 sge[i].addr = sg_dma_address(sg); 1853 sge[i].length = sg_dma_len(sg); 1854 sge[i].lkey = target->lkey; 1855 } 1856 req->cmd->num_sge += count; 1857 goto map_complete; 1858 } 1859 1860 fmt = SRP_DATA_DESC_DIRECT; 1861 len = sizeof(struct srp_cmd) + cmd->add_cdb_len + 1862 sizeof(struct srp_direct_buf); 1863 1864 if (count == 1 && target->global_rkey) { 1865 /* 1866 * The midlayer only generated a single gather/scatter 1867 * entry, or DMA mapping coalesced everything to a 1868 * single entry. So a direct descriptor along with 1869 * the DMA MR suffices. 1870 */ 1871 struct srp_direct_buf *buf; 1872 1873 buf = (void *)cmd->add_data + cmd->add_cdb_len; 1874 buf->va = cpu_to_be64(sg_dma_address(scat)); 1875 buf->key = cpu_to_be32(target->global_rkey); 1876 buf->len = cpu_to_be32(sg_dma_len(scat)); 1877 1878 req->nmdesc = 0; 1879 goto map_complete; 1880 } 1881 1882 /* 1883 * We have more than one scatter/gather entry, so build our indirect 1884 * descriptor table, trying to merge as many entries as we can. 1885 */ 1886 indirect_hdr = (void *)cmd->add_data + cmd->add_cdb_len; 1887 1888 ib_dma_sync_single_for_cpu(ibdev, req->indirect_dma_addr, 1889 target->indirect_size, DMA_TO_DEVICE); 1890 1891 memset(&state, 0, sizeof(state)); 1892 state.desc = req->indirect_desc; 1893 if (dev->use_fast_reg) 1894 ret = srp_map_sg_fr(&state, ch, req, scat, count); 1895 else if (dev->use_fmr) 1896 ret = srp_map_sg_fmr(&state, ch, req, scat, count); 1897 else 1898 ret = srp_map_sg_dma(&state, ch, req, scat, count); 1899 req->nmdesc = state.nmdesc; 1900 if (ret < 0) 1901 goto unmap; 1902 1903 { 1904 DEFINE_DYNAMIC_DEBUG_METADATA(ddm, 1905 "Memory mapping consistency check"); 1906 if (DYNAMIC_DEBUG_BRANCH(ddm)) 1907 srp_check_mapping(&state, ch, req, scat, count); 1908 } 1909 1910 /* We've mapped the request, now pull as much of the indirect 1911 * descriptor table as we can into the command buffer. If this 1912 * target is not using an external indirect table, we are 1913 * guaranteed to fit into the command, as the SCSI layer won't 1914 * give us more S/G entries than we allow. 1915 */ 1916 if (state.ndesc == 1) { 1917 /* 1918 * Memory registration collapsed the sg-list into one entry, 1919 * so use a direct descriptor. 1920 */ 1921 struct srp_direct_buf *buf; 1922 1923 buf = (void *)cmd->add_data + cmd->add_cdb_len; 1924 *buf = req->indirect_desc[0]; 1925 goto map_complete; 1926 } 1927 1928 if (unlikely(target->cmd_sg_cnt < state.ndesc && 1929 !target->allow_ext_sg)) { 1930 shost_printk(KERN_ERR, target->scsi_host, 1931 "Could not fit S/G list into SRP_CMD\n"); 1932 ret = -EIO; 1933 goto unmap; 1934 } 1935 1936 count = min(state.ndesc, target->cmd_sg_cnt); 1937 table_len = state.ndesc * sizeof (struct srp_direct_buf); 1938 idb_len = sizeof(struct srp_indirect_buf) + table_len; 1939 1940 fmt = SRP_DATA_DESC_INDIRECT; 1941 len = sizeof(struct srp_cmd) + cmd->add_cdb_len + 1942 sizeof(struct srp_indirect_buf); 1943 len += count * sizeof (struct srp_direct_buf); 1944 1945 memcpy(indirect_hdr->desc_list, req->indirect_desc, 1946 count * sizeof (struct srp_direct_buf)); 1947 1948 if (!target->global_rkey) { 1949 ret = srp_map_idb(ch, req, state.gen.next, state.gen.end, 1950 idb_len, &idb_rkey); 1951 if (ret < 0) 1952 goto unmap; 1953 req->nmdesc++; 1954 } else { 1955 idb_rkey = cpu_to_be32(target->global_rkey); 1956 } 1957 1958 indirect_hdr->table_desc.va = cpu_to_be64(req->indirect_dma_addr); 1959 indirect_hdr->table_desc.key = idb_rkey; 1960 indirect_hdr->table_desc.len = cpu_to_be32(table_len); 1961 indirect_hdr->len = cpu_to_be32(state.total_len); 1962 1963 if (scmnd->sc_data_direction == DMA_TO_DEVICE) 1964 cmd->data_out_desc_cnt = count; 1965 else 1966 cmd->data_in_desc_cnt = count; 1967 1968 ib_dma_sync_single_for_device(ibdev, req->indirect_dma_addr, table_len, 1969 DMA_TO_DEVICE); 1970 1971 map_complete: 1972 if (scmnd->sc_data_direction == DMA_TO_DEVICE) 1973 cmd->buf_fmt = fmt << 4; 1974 else 1975 cmd->buf_fmt = fmt; 1976 1977 return len; 1978 1979 unmap: 1980 srp_unmap_data(scmnd, ch, req); 1981 if (ret == -ENOMEM && req->nmdesc >= target->mr_pool_size) 1982 ret = -E2BIG; 1983 return ret; 1984 } 1985 1986 /* 1987 * Return an IU and possible credit to the free pool 1988 */ 1989 static void srp_put_tx_iu(struct srp_rdma_ch *ch, struct srp_iu *iu, 1990 enum srp_iu_type iu_type) 1991 { 1992 unsigned long flags; 1993 1994 spin_lock_irqsave(&ch->lock, flags); 1995 list_add(&iu->list, &ch->free_tx); 1996 if (iu_type != SRP_IU_RSP) 1997 ++ch->req_lim; 1998 spin_unlock_irqrestore(&ch->lock, flags); 1999 } 2000 2001 /* 2002 * Must be called with ch->lock held to protect req_lim and free_tx. 2003 * If IU is not sent, it must be returned using srp_put_tx_iu(). 2004 * 2005 * Note: 2006 * An upper limit for the number of allocated information units for each 2007 * request type is: 2008 * - SRP_IU_CMD: SRP_CMD_SQ_SIZE, since the SCSI mid-layer never queues 2009 * more than Scsi_Host.can_queue requests. 2010 * - SRP_IU_TSK_MGMT: SRP_TSK_MGMT_SQ_SIZE. 2011 * - SRP_IU_RSP: 1, since a conforming SRP target never sends more than 2012 * one unanswered SRP request to an initiator. 2013 */ 2014 static struct srp_iu *__srp_get_tx_iu(struct srp_rdma_ch *ch, 2015 enum srp_iu_type iu_type) 2016 { 2017 struct srp_target_port *target = ch->target; 2018 s32 rsv = (iu_type == SRP_IU_TSK_MGMT) ? 0 : SRP_TSK_MGMT_SQ_SIZE; 2019 struct srp_iu *iu; 2020 2021 lockdep_assert_held(&ch->lock); 2022 2023 ib_process_cq_direct(ch->send_cq, -1); 2024 2025 if (list_empty(&ch->free_tx)) 2026 return NULL; 2027 2028 /* Initiator responses to target requests do not consume credits */ 2029 if (iu_type != SRP_IU_RSP) { 2030 if (ch->req_lim <= rsv) { 2031 ++target->zero_req_lim; 2032 return NULL; 2033 } 2034 2035 --ch->req_lim; 2036 } 2037 2038 iu = list_first_entry(&ch->free_tx, struct srp_iu, list); 2039 list_del(&iu->list); 2040 return iu; 2041 } 2042 2043 /* 2044 * Note: if this function is called from inside ib_drain_sq() then it will 2045 * be called without ch->lock being held. If ib_drain_sq() dequeues a WQE 2046 * with status IB_WC_SUCCESS then that's a bug. 2047 */ 2048 static void srp_send_done(struct ib_cq *cq, struct ib_wc *wc) 2049 { 2050 struct srp_iu *iu = container_of(wc->wr_cqe, struct srp_iu, cqe); 2051 struct srp_rdma_ch *ch = cq->cq_context; 2052 2053 if (unlikely(wc->status != IB_WC_SUCCESS)) { 2054 srp_handle_qp_err(cq, wc, "SEND"); 2055 return; 2056 } 2057 2058 lockdep_assert_held(&ch->lock); 2059 2060 list_add(&iu->list, &ch->free_tx); 2061 } 2062 2063 /** 2064 * srp_post_send() - send an SRP information unit 2065 * @ch: RDMA channel over which to send the information unit. 2066 * @iu: Information unit to send. 2067 * @len: Length of the information unit excluding immediate data. 2068 */ 2069 static int srp_post_send(struct srp_rdma_ch *ch, struct srp_iu *iu, int len) 2070 { 2071 struct srp_target_port *target = ch->target; 2072 struct ib_send_wr wr; 2073 2074 if (WARN_ON_ONCE(iu->num_sge > SRP_MAX_SGE)) 2075 return -EINVAL; 2076 2077 iu->sge[0].addr = iu->dma; 2078 iu->sge[0].length = len; 2079 iu->sge[0].lkey = target->lkey; 2080 2081 iu->cqe.done = srp_send_done; 2082 2083 wr.next = NULL; 2084 wr.wr_cqe = &iu->cqe; 2085 wr.sg_list = &iu->sge[0]; 2086 wr.num_sge = iu->num_sge; 2087 wr.opcode = IB_WR_SEND; 2088 wr.send_flags = IB_SEND_SIGNALED; 2089 2090 return ib_post_send(ch->qp, &wr, NULL); 2091 } 2092 2093 static int srp_post_recv(struct srp_rdma_ch *ch, struct srp_iu *iu) 2094 { 2095 struct srp_target_port *target = ch->target; 2096 struct ib_recv_wr wr; 2097 struct ib_sge list; 2098 2099 list.addr = iu->dma; 2100 list.length = iu->size; 2101 list.lkey = target->lkey; 2102 2103 iu->cqe.done = srp_recv_done; 2104 2105 wr.next = NULL; 2106 wr.wr_cqe = &iu->cqe; 2107 wr.sg_list = &list; 2108 wr.num_sge = 1; 2109 2110 return ib_post_recv(ch->qp, &wr, NULL); 2111 } 2112 2113 static void srp_process_rsp(struct srp_rdma_ch *ch, struct srp_rsp *rsp) 2114 { 2115 struct srp_target_port *target = ch->target; 2116 struct srp_request *req; 2117 struct scsi_cmnd *scmnd; 2118 unsigned long flags; 2119 2120 if (unlikely(rsp->tag & SRP_TAG_TSK_MGMT)) { 2121 spin_lock_irqsave(&ch->lock, flags); 2122 ch->req_lim += be32_to_cpu(rsp->req_lim_delta); 2123 if (rsp->tag == ch->tsk_mgmt_tag) { 2124 ch->tsk_mgmt_status = -1; 2125 if (be32_to_cpu(rsp->resp_data_len) >= 4) 2126 ch->tsk_mgmt_status = rsp->data[3]; 2127 complete(&ch->tsk_mgmt_done); 2128 } else { 2129 shost_printk(KERN_ERR, target->scsi_host, 2130 "Received tsk mgmt response too late for tag %#llx\n", 2131 rsp->tag); 2132 } 2133 spin_unlock_irqrestore(&ch->lock, flags); 2134 } else { 2135 scmnd = scsi_host_find_tag(target->scsi_host, rsp->tag); 2136 if (scmnd && scmnd->host_scribble) { 2137 req = (void *)scmnd->host_scribble; 2138 scmnd = srp_claim_req(ch, req, NULL, scmnd); 2139 } else { 2140 scmnd = NULL; 2141 } 2142 if (!scmnd) { 2143 shost_printk(KERN_ERR, target->scsi_host, 2144 "Null scmnd for RSP w/tag %#016llx received on ch %td / QP %#x\n", 2145 rsp->tag, ch - target->ch, ch->qp->qp_num); 2146 2147 spin_lock_irqsave(&ch->lock, flags); 2148 ch->req_lim += be32_to_cpu(rsp->req_lim_delta); 2149 spin_unlock_irqrestore(&ch->lock, flags); 2150 2151 return; 2152 } 2153 scmnd->result = rsp->status; 2154 2155 if (rsp->flags & SRP_RSP_FLAG_SNSVALID) { 2156 memcpy(scmnd->sense_buffer, rsp->data + 2157 be32_to_cpu(rsp->resp_data_len), 2158 min_t(int, be32_to_cpu(rsp->sense_data_len), 2159 SCSI_SENSE_BUFFERSIZE)); 2160 } 2161 2162 if (unlikely(rsp->flags & SRP_RSP_FLAG_DIUNDER)) 2163 scsi_set_resid(scmnd, be32_to_cpu(rsp->data_in_res_cnt)); 2164 else if (unlikely(rsp->flags & SRP_RSP_FLAG_DIOVER)) 2165 scsi_set_resid(scmnd, -be32_to_cpu(rsp->data_in_res_cnt)); 2166 else if (unlikely(rsp->flags & SRP_RSP_FLAG_DOUNDER)) 2167 scsi_set_resid(scmnd, be32_to_cpu(rsp->data_out_res_cnt)); 2168 else if (unlikely(rsp->flags & SRP_RSP_FLAG_DOOVER)) 2169 scsi_set_resid(scmnd, -be32_to_cpu(rsp->data_out_res_cnt)); 2170 2171 srp_free_req(ch, req, scmnd, 2172 be32_to_cpu(rsp->req_lim_delta)); 2173 2174 scmnd->host_scribble = NULL; 2175 scmnd->scsi_done(scmnd); 2176 } 2177 } 2178 2179 static int srp_response_common(struct srp_rdma_ch *ch, s32 req_delta, 2180 void *rsp, int len) 2181 { 2182 struct srp_target_port *target = ch->target; 2183 struct ib_device *dev = target->srp_host->srp_dev->dev; 2184 unsigned long flags; 2185 struct srp_iu *iu; 2186 int err; 2187 2188 spin_lock_irqsave(&ch->lock, flags); 2189 ch->req_lim += req_delta; 2190 iu = __srp_get_tx_iu(ch, SRP_IU_RSP); 2191 spin_unlock_irqrestore(&ch->lock, flags); 2192 2193 if (!iu) { 2194 shost_printk(KERN_ERR, target->scsi_host, PFX 2195 "no IU available to send response\n"); 2196 return 1; 2197 } 2198 2199 iu->num_sge = 1; 2200 ib_dma_sync_single_for_cpu(dev, iu->dma, len, DMA_TO_DEVICE); 2201 memcpy(iu->buf, rsp, len); 2202 ib_dma_sync_single_for_device(dev, iu->dma, len, DMA_TO_DEVICE); 2203 2204 err = srp_post_send(ch, iu, len); 2205 if (err) { 2206 shost_printk(KERN_ERR, target->scsi_host, PFX 2207 "unable to post response: %d\n", err); 2208 srp_put_tx_iu(ch, iu, SRP_IU_RSP); 2209 } 2210 2211 return err; 2212 } 2213 2214 static void srp_process_cred_req(struct srp_rdma_ch *ch, 2215 struct srp_cred_req *req) 2216 { 2217 struct srp_cred_rsp rsp = { 2218 .opcode = SRP_CRED_RSP, 2219 .tag = req->tag, 2220 }; 2221 s32 delta = be32_to_cpu(req->req_lim_delta); 2222 2223 if (srp_response_common(ch, delta, &rsp, sizeof(rsp))) 2224 shost_printk(KERN_ERR, ch->target->scsi_host, PFX 2225 "problems processing SRP_CRED_REQ\n"); 2226 } 2227 2228 static void srp_process_aer_req(struct srp_rdma_ch *ch, 2229 struct srp_aer_req *req) 2230 { 2231 struct srp_target_port *target = ch->target; 2232 struct srp_aer_rsp rsp = { 2233 .opcode = SRP_AER_RSP, 2234 .tag = req->tag, 2235 }; 2236 s32 delta = be32_to_cpu(req->req_lim_delta); 2237 2238 shost_printk(KERN_ERR, target->scsi_host, PFX 2239 "ignoring AER for LUN %llu\n", scsilun_to_int(&req->lun)); 2240 2241 if (srp_response_common(ch, delta, &rsp, sizeof(rsp))) 2242 shost_printk(KERN_ERR, target->scsi_host, PFX 2243 "problems processing SRP_AER_REQ\n"); 2244 } 2245 2246 static void srp_recv_done(struct ib_cq *cq, struct ib_wc *wc) 2247 { 2248 struct srp_iu *iu = container_of(wc->wr_cqe, struct srp_iu, cqe); 2249 struct srp_rdma_ch *ch = cq->cq_context; 2250 struct srp_target_port *target = ch->target; 2251 struct ib_device *dev = target->srp_host->srp_dev->dev; 2252 int res; 2253 u8 opcode; 2254 2255 if (unlikely(wc->status != IB_WC_SUCCESS)) { 2256 srp_handle_qp_err(cq, wc, "RECV"); 2257 return; 2258 } 2259 2260 ib_dma_sync_single_for_cpu(dev, iu->dma, ch->max_ti_iu_len, 2261 DMA_FROM_DEVICE); 2262 2263 opcode = *(u8 *) iu->buf; 2264 2265 if (0) { 2266 shost_printk(KERN_ERR, target->scsi_host, 2267 PFX "recv completion, opcode 0x%02x\n", opcode); 2268 print_hex_dump(KERN_ERR, "", DUMP_PREFIX_OFFSET, 8, 1, 2269 iu->buf, wc->byte_len, true); 2270 } 2271 2272 switch (opcode) { 2273 case SRP_RSP: 2274 srp_process_rsp(ch, iu->buf); 2275 break; 2276 2277 case SRP_CRED_REQ: 2278 srp_process_cred_req(ch, iu->buf); 2279 break; 2280 2281 case SRP_AER_REQ: 2282 srp_process_aer_req(ch, iu->buf); 2283 break; 2284 2285 case SRP_T_LOGOUT: 2286 /* XXX Handle target logout */ 2287 shost_printk(KERN_WARNING, target->scsi_host, 2288 PFX "Got target logout request\n"); 2289 break; 2290 2291 default: 2292 shost_printk(KERN_WARNING, target->scsi_host, 2293 PFX "Unhandled SRP opcode 0x%02x\n", opcode); 2294 break; 2295 } 2296 2297 ib_dma_sync_single_for_device(dev, iu->dma, ch->max_ti_iu_len, 2298 DMA_FROM_DEVICE); 2299 2300 res = srp_post_recv(ch, iu); 2301 if (res != 0) 2302 shost_printk(KERN_ERR, target->scsi_host, 2303 PFX "Recv failed with error code %d\n", res); 2304 } 2305 2306 /** 2307 * srp_tl_err_work() - handle a transport layer error 2308 * @work: Work structure embedded in an SRP target port. 2309 * 2310 * Note: This function may get invoked before the rport has been created, 2311 * hence the target->rport test. 2312 */ 2313 static void srp_tl_err_work(struct work_struct *work) 2314 { 2315 struct srp_target_port *target; 2316 2317 target = container_of(work, struct srp_target_port, tl_err_work); 2318 if (target->rport) 2319 srp_start_tl_fail_timers(target->rport); 2320 } 2321 2322 static void srp_handle_qp_err(struct ib_cq *cq, struct ib_wc *wc, 2323 const char *opname) 2324 { 2325 struct srp_rdma_ch *ch = cq->cq_context; 2326 struct srp_target_port *target = ch->target; 2327 2328 if (ch->connected && !target->qp_in_error) { 2329 shost_printk(KERN_ERR, target->scsi_host, 2330 PFX "failed %s status %s (%d) for CQE %p\n", 2331 opname, ib_wc_status_msg(wc->status), wc->status, 2332 wc->wr_cqe); 2333 queue_work(system_long_wq, &target->tl_err_work); 2334 } 2335 target->qp_in_error = true; 2336 } 2337 2338 static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd) 2339 { 2340 struct srp_target_port *target = host_to_target(shost); 2341 struct srp_rport *rport = target->rport; 2342 struct srp_rdma_ch *ch; 2343 struct srp_request *req; 2344 struct srp_iu *iu; 2345 struct srp_cmd *cmd; 2346 struct ib_device *dev; 2347 unsigned long flags; 2348 u32 tag; 2349 u16 idx; 2350 int len, ret; 2351 const bool in_scsi_eh = !in_interrupt() && current == shost->ehandler; 2352 2353 /* 2354 * The SCSI EH thread is the only context from which srp_queuecommand() 2355 * can get invoked for blocked devices (SDEV_BLOCK / 2356 * SDEV_CREATED_BLOCK). Avoid racing with srp_reconnect_rport() by 2357 * locking the rport mutex if invoked from inside the SCSI EH. 2358 */ 2359 if (in_scsi_eh) 2360 mutex_lock(&rport->mutex); 2361 2362 scmnd->result = srp_chkready(target->rport); 2363 if (unlikely(scmnd->result)) 2364 goto err; 2365 2366 WARN_ON_ONCE(scmnd->request->tag < 0); 2367 tag = blk_mq_unique_tag(scmnd->request); 2368 ch = &target->ch[blk_mq_unique_tag_to_hwq(tag)]; 2369 idx = blk_mq_unique_tag_to_tag(tag); 2370 WARN_ONCE(idx >= target->req_ring_size, "%s: tag %#x: idx %d >= %d\n", 2371 dev_name(&shost->shost_gendev), tag, idx, 2372 target->req_ring_size); 2373 2374 spin_lock_irqsave(&ch->lock, flags); 2375 iu = __srp_get_tx_iu(ch, SRP_IU_CMD); 2376 spin_unlock_irqrestore(&ch->lock, flags); 2377 2378 if (!iu) 2379 goto err; 2380 2381 req = &ch->req_ring[idx]; 2382 dev = target->srp_host->srp_dev->dev; 2383 ib_dma_sync_single_for_cpu(dev, iu->dma, ch->max_it_iu_len, 2384 DMA_TO_DEVICE); 2385 2386 scmnd->host_scribble = (void *) req; 2387 2388 cmd = iu->buf; 2389 memset(cmd, 0, sizeof *cmd); 2390 2391 cmd->opcode = SRP_CMD; 2392 int_to_scsilun(scmnd->device->lun, &cmd->lun); 2393 cmd->tag = tag; 2394 memcpy(cmd->cdb, scmnd->cmnd, scmnd->cmd_len); 2395 if (unlikely(scmnd->cmd_len > sizeof(cmd->cdb))) { 2396 cmd->add_cdb_len = round_up(scmnd->cmd_len - sizeof(cmd->cdb), 2397 4); 2398 if (WARN_ON_ONCE(cmd->add_cdb_len > SRP_MAX_ADD_CDB_LEN)) 2399 goto err_iu; 2400 } 2401 2402 req->scmnd = scmnd; 2403 req->cmd = iu; 2404 2405 len = srp_map_data(scmnd, ch, req); 2406 if (len < 0) { 2407 shost_printk(KERN_ERR, target->scsi_host, 2408 PFX "Failed to map data (%d)\n", len); 2409 /* 2410 * If we ran out of memory descriptors (-ENOMEM) because an 2411 * application is queuing many requests with more than 2412 * max_pages_per_mr sg-list elements, tell the SCSI mid-layer 2413 * to reduce queue depth temporarily. 2414 */ 2415 scmnd->result = len == -ENOMEM ? 2416 DID_OK << 16 | QUEUE_FULL << 1 : DID_ERROR << 16; 2417 goto err_iu; 2418 } 2419 2420 ib_dma_sync_single_for_device(dev, iu->dma, ch->max_it_iu_len, 2421 DMA_TO_DEVICE); 2422 2423 if (srp_post_send(ch, iu, len)) { 2424 shost_printk(KERN_ERR, target->scsi_host, PFX "Send failed\n"); 2425 scmnd->result = DID_ERROR << 16; 2426 goto err_unmap; 2427 } 2428 2429 ret = 0; 2430 2431 unlock_rport: 2432 if (in_scsi_eh) 2433 mutex_unlock(&rport->mutex); 2434 2435 return ret; 2436 2437 err_unmap: 2438 srp_unmap_data(scmnd, ch, req); 2439 2440 err_iu: 2441 srp_put_tx_iu(ch, iu, SRP_IU_CMD); 2442 2443 /* 2444 * Avoid that the loops that iterate over the request ring can 2445 * encounter a dangling SCSI command pointer. 2446 */ 2447 req->scmnd = NULL; 2448 2449 err: 2450 if (scmnd->result) { 2451 scmnd->scsi_done(scmnd); 2452 ret = 0; 2453 } else { 2454 ret = SCSI_MLQUEUE_HOST_BUSY; 2455 } 2456 2457 goto unlock_rport; 2458 } 2459 2460 /* 2461 * Note: the resources allocated in this function are freed in 2462 * srp_free_ch_ib(). 2463 */ 2464 static int srp_alloc_iu_bufs(struct srp_rdma_ch *ch) 2465 { 2466 struct srp_target_port *target = ch->target; 2467 int i; 2468 2469 ch->rx_ring = kcalloc(target->queue_size, sizeof(*ch->rx_ring), 2470 GFP_KERNEL); 2471 if (!ch->rx_ring) 2472 goto err_no_ring; 2473 ch->tx_ring = kcalloc(target->queue_size, sizeof(*ch->tx_ring), 2474 GFP_KERNEL); 2475 if (!ch->tx_ring) 2476 goto err_no_ring; 2477 2478 for (i = 0; i < target->queue_size; ++i) { 2479 ch->rx_ring[i] = srp_alloc_iu(target->srp_host, 2480 ch->max_ti_iu_len, 2481 GFP_KERNEL, DMA_FROM_DEVICE); 2482 if (!ch->rx_ring[i]) 2483 goto err; 2484 } 2485 2486 for (i = 0; i < target->queue_size; ++i) { 2487 ch->tx_ring[i] = srp_alloc_iu(target->srp_host, 2488 ch->max_it_iu_len, 2489 GFP_KERNEL, DMA_TO_DEVICE); 2490 if (!ch->tx_ring[i]) 2491 goto err; 2492 2493 list_add(&ch->tx_ring[i]->list, &ch->free_tx); 2494 } 2495 2496 return 0; 2497 2498 err: 2499 for (i = 0; i < target->queue_size; ++i) { 2500 srp_free_iu(target->srp_host, ch->rx_ring[i]); 2501 srp_free_iu(target->srp_host, ch->tx_ring[i]); 2502 } 2503 2504 2505 err_no_ring: 2506 kfree(ch->tx_ring); 2507 ch->tx_ring = NULL; 2508 kfree(ch->rx_ring); 2509 ch->rx_ring = NULL; 2510 2511 return -ENOMEM; 2512 } 2513 2514 static uint32_t srp_compute_rq_tmo(struct ib_qp_attr *qp_attr, int attr_mask) 2515 { 2516 uint64_t T_tr_ns, max_compl_time_ms; 2517 uint32_t rq_tmo_jiffies; 2518 2519 /* 2520 * According to section 11.2.4.2 in the IBTA spec (Modify Queue Pair, 2521 * table 91), both the QP timeout and the retry count have to be set 2522 * for RC QP's during the RTR to RTS transition. 2523 */ 2524 WARN_ON_ONCE((attr_mask & (IB_QP_TIMEOUT | IB_QP_RETRY_CNT)) != 2525 (IB_QP_TIMEOUT | IB_QP_RETRY_CNT)); 2526 2527 /* 2528 * Set target->rq_tmo_jiffies to one second more than the largest time 2529 * it can take before an error completion is generated. See also 2530 * C9-140..142 in the IBTA spec for more information about how to 2531 * convert the QP Local ACK Timeout value to nanoseconds. 2532 */ 2533 T_tr_ns = 4096 * (1ULL << qp_attr->timeout); 2534 max_compl_time_ms = qp_attr->retry_cnt * 4 * T_tr_ns; 2535 do_div(max_compl_time_ms, NSEC_PER_MSEC); 2536 rq_tmo_jiffies = msecs_to_jiffies(max_compl_time_ms + 1000); 2537 2538 return rq_tmo_jiffies; 2539 } 2540 2541 static void srp_cm_rep_handler(struct ib_cm_id *cm_id, 2542 const struct srp_login_rsp *lrsp, 2543 struct srp_rdma_ch *ch) 2544 { 2545 struct srp_target_port *target = ch->target; 2546 struct ib_qp_attr *qp_attr = NULL; 2547 int attr_mask = 0; 2548 int ret = 0; 2549 int i; 2550 2551 if (lrsp->opcode == SRP_LOGIN_RSP) { 2552 ch->max_ti_iu_len = be32_to_cpu(lrsp->max_ti_iu_len); 2553 ch->req_lim = be32_to_cpu(lrsp->req_lim_delta); 2554 ch->use_imm_data = lrsp->rsp_flags & SRP_LOGIN_RSP_IMMED_SUPP; 2555 ch->max_it_iu_len = srp_max_it_iu_len(target->cmd_sg_cnt, 2556 ch->use_imm_data); 2557 WARN_ON_ONCE(ch->max_it_iu_len > 2558 be32_to_cpu(lrsp->max_it_iu_len)); 2559 2560 if (ch->use_imm_data) 2561 shost_printk(KERN_DEBUG, target->scsi_host, 2562 PFX "using immediate data\n"); 2563 2564 /* 2565 * Reserve credits for task management so we don't 2566 * bounce requests back to the SCSI mid-layer. 2567 */ 2568 target->scsi_host->can_queue 2569 = min(ch->req_lim - SRP_TSK_MGMT_SQ_SIZE, 2570 target->scsi_host->can_queue); 2571 target->scsi_host->cmd_per_lun 2572 = min_t(int, target->scsi_host->can_queue, 2573 target->scsi_host->cmd_per_lun); 2574 } else { 2575 shost_printk(KERN_WARNING, target->scsi_host, 2576 PFX "Unhandled RSP opcode %#x\n", lrsp->opcode); 2577 ret = -ECONNRESET; 2578 goto error; 2579 } 2580 2581 if (!ch->rx_ring) { 2582 ret = srp_alloc_iu_bufs(ch); 2583 if (ret) 2584 goto error; 2585 } 2586 2587 for (i = 0; i < target->queue_size; i++) { 2588 struct srp_iu *iu = ch->rx_ring[i]; 2589 2590 ret = srp_post_recv(ch, iu); 2591 if (ret) 2592 goto error; 2593 } 2594 2595 if (!target->using_rdma_cm) { 2596 ret = -ENOMEM; 2597 qp_attr = kmalloc(sizeof(*qp_attr), GFP_KERNEL); 2598 if (!qp_attr) 2599 goto error; 2600 2601 qp_attr->qp_state = IB_QPS_RTR; 2602 ret = ib_cm_init_qp_attr(cm_id, qp_attr, &attr_mask); 2603 if (ret) 2604 goto error_free; 2605 2606 ret = ib_modify_qp(ch->qp, qp_attr, attr_mask); 2607 if (ret) 2608 goto error_free; 2609 2610 qp_attr->qp_state = IB_QPS_RTS; 2611 ret = ib_cm_init_qp_attr(cm_id, qp_attr, &attr_mask); 2612 if (ret) 2613 goto error_free; 2614 2615 target->rq_tmo_jiffies = srp_compute_rq_tmo(qp_attr, attr_mask); 2616 2617 ret = ib_modify_qp(ch->qp, qp_attr, attr_mask); 2618 if (ret) 2619 goto error_free; 2620 2621 ret = ib_send_cm_rtu(cm_id, NULL, 0); 2622 } 2623 2624 error_free: 2625 kfree(qp_attr); 2626 2627 error: 2628 ch->status = ret; 2629 } 2630 2631 static void srp_ib_cm_rej_handler(struct ib_cm_id *cm_id, 2632 const struct ib_cm_event *event, 2633 struct srp_rdma_ch *ch) 2634 { 2635 struct srp_target_port *target = ch->target; 2636 struct Scsi_Host *shost = target->scsi_host; 2637 struct ib_class_port_info *cpi; 2638 int opcode; 2639 u16 dlid; 2640 2641 switch (event->param.rej_rcvd.reason) { 2642 case IB_CM_REJ_PORT_CM_REDIRECT: 2643 cpi = event->param.rej_rcvd.ari; 2644 dlid = be16_to_cpu(cpi->redirect_lid); 2645 sa_path_set_dlid(&ch->ib_cm.path, dlid); 2646 ch->ib_cm.path.pkey = cpi->redirect_pkey; 2647 cm_id->remote_cm_qpn = be32_to_cpu(cpi->redirect_qp) & 0x00ffffff; 2648 memcpy(ch->ib_cm.path.dgid.raw, cpi->redirect_gid, 16); 2649 2650 ch->status = dlid ? SRP_DLID_REDIRECT : SRP_PORT_REDIRECT; 2651 break; 2652 2653 case IB_CM_REJ_PORT_REDIRECT: 2654 if (srp_target_is_topspin(target)) { 2655 union ib_gid *dgid = &ch->ib_cm.path.dgid; 2656 2657 /* 2658 * Topspin/Cisco SRP gateways incorrectly send 2659 * reject reason code 25 when they mean 24 2660 * (port redirect). 2661 */ 2662 memcpy(dgid->raw, event->param.rej_rcvd.ari, 16); 2663 2664 shost_printk(KERN_DEBUG, shost, 2665 PFX "Topspin/Cisco redirect to target port GID %016llx%016llx\n", 2666 be64_to_cpu(dgid->global.subnet_prefix), 2667 be64_to_cpu(dgid->global.interface_id)); 2668 2669 ch->status = SRP_PORT_REDIRECT; 2670 } else { 2671 shost_printk(KERN_WARNING, shost, 2672 " REJ reason: IB_CM_REJ_PORT_REDIRECT\n"); 2673 ch->status = -ECONNRESET; 2674 } 2675 break; 2676 2677 case IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID: 2678 shost_printk(KERN_WARNING, shost, 2679 " REJ reason: IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID\n"); 2680 ch->status = -ECONNRESET; 2681 break; 2682 2683 case IB_CM_REJ_CONSUMER_DEFINED: 2684 opcode = *(u8 *) event->private_data; 2685 if (opcode == SRP_LOGIN_REJ) { 2686 struct srp_login_rej *rej = event->private_data; 2687 u32 reason = be32_to_cpu(rej->reason); 2688 2689 if (reason == SRP_LOGIN_REJ_REQ_IT_IU_LENGTH_TOO_LARGE) 2690 shost_printk(KERN_WARNING, shost, 2691 PFX "SRP_LOGIN_REJ: requested max_it_iu_len too large\n"); 2692 else 2693 shost_printk(KERN_WARNING, shost, PFX 2694 "SRP LOGIN from %pI6 to %pI6 REJECTED, reason 0x%08x\n", 2695 target->sgid.raw, 2696 target->ib_cm.orig_dgid.raw, 2697 reason); 2698 } else 2699 shost_printk(KERN_WARNING, shost, 2700 " REJ reason: IB_CM_REJ_CONSUMER_DEFINED," 2701 " opcode 0x%02x\n", opcode); 2702 ch->status = -ECONNRESET; 2703 break; 2704 2705 case IB_CM_REJ_STALE_CONN: 2706 shost_printk(KERN_WARNING, shost, " REJ reason: stale connection\n"); 2707 ch->status = SRP_STALE_CONN; 2708 break; 2709 2710 default: 2711 shost_printk(KERN_WARNING, shost, " REJ reason 0x%x\n", 2712 event->param.rej_rcvd.reason); 2713 ch->status = -ECONNRESET; 2714 } 2715 } 2716 2717 static int srp_ib_cm_handler(struct ib_cm_id *cm_id, 2718 const struct ib_cm_event *event) 2719 { 2720 struct srp_rdma_ch *ch = cm_id->context; 2721 struct srp_target_port *target = ch->target; 2722 int comp = 0; 2723 2724 switch (event->event) { 2725 case IB_CM_REQ_ERROR: 2726 shost_printk(KERN_DEBUG, target->scsi_host, 2727 PFX "Sending CM REQ failed\n"); 2728 comp = 1; 2729 ch->status = -ECONNRESET; 2730 break; 2731 2732 case IB_CM_REP_RECEIVED: 2733 comp = 1; 2734 srp_cm_rep_handler(cm_id, event->private_data, ch); 2735 break; 2736 2737 case IB_CM_REJ_RECEIVED: 2738 shost_printk(KERN_DEBUG, target->scsi_host, PFX "REJ received\n"); 2739 comp = 1; 2740 2741 srp_ib_cm_rej_handler(cm_id, event, ch); 2742 break; 2743 2744 case IB_CM_DREQ_RECEIVED: 2745 shost_printk(KERN_WARNING, target->scsi_host, 2746 PFX "DREQ received - connection closed\n"); 2747 ch->connected = false; 2748 if (ib_send_cm_drep(cm_id, NULL, 0)) 2749 shost_printk(KERN_ERR, target->scsi_host, 2750 PFX "Sending CM DREP failed\n"); 2751 queue_work(system_long_wq, &target->tl_err_work); 2752 break; 2753 2754 case IB_CM_TIMEWAIT_EXIT: 2755 shost_printk(KERN_ERR, target->scsi_host, 2756 PFX "connection closed\n"); 2757 comp = 1; 2758 2759 ch->status = 0; 2760 break; 2761 2762 case IB_CM_MRA_RECEIVED: 2763 case IB_CM_DREQ_ERROR: 2764 case IB_CM_DREP_RECEIVED: 2765 break; 2766 2767 default: 2768 shost_printk(KERN_WARNING, target->scsi_host, 2769 PFX "Unhandled CM event %d\n", event->event); 2770 break; 2771 } 2772 2773 if (comp) 2774 complete(&ch->done); 2775 2776 return 0; 2777 } 2778 2779 static void srp_rdma_cm_rej_handler(struct srp_rdma_ch *ch, 2780 struct rdma_cm_event *event) 2781 { 2782 struct srp_target_port *target = ch->target; 2783 struct Scsi_Host *shost = target->scsi_host; 2784 int opcode; 2785 2786 switch (event->status) { 2787 case IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID: 2788 shost_printk(KERN_WARNING, shost, 2789 " REJ reason: IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID\n"); 2790 ch->status = -ECONNRESET; 2791 break; 2792 2793 case IB_CM_REJ_CONSUMER_DEFINED: 2794 opcode = *(u8 *) event->param.conn.private_data; 2795 if (opcode == SRP_LOGIN_REJ) { 2796 struct srp_login_rej *rej = 2797 (struct srp_login_rej *) 2798 event->param.conn.private_data; 2799 u32 reason = be32_to_cpu(rej->reason); 2800 2801 if (reason == SRP_LOGIN_REJ_REQ_IT_IU_LENGTH_TOO_LARGE) 2802 shost_printk(KERN_WARNING, shost, 2803 PFX "SRP_LOGIN_REJ: requested max_it_iu_len too large\n"); 2804 else 2805 shost_printk(KERN_WARNING, shost, 2806 PFX "SRP LOGIN REJECTED, reason 0x%08x\n", reason); 2807 } else { 2808 shost_printk(KERN_WARNING, shost, 2809 " REJ reason: IB_CM_REJ_CONSUMER_DEFINED, opcode 0x%02x\n", 2810 opcode); 2811 } 2812 ch->status = -ECONNRESET; 2813 break; 2814 2815 case IB_CM_REJ_STALE_CONN: 2816 shost_printk(KERN_WARNING, shost, 2817 " REJ reason: stale connection\n"); 2818 ch->status = SRP_STALE_CONN; 2819 break; 2820 2821 default: 2822 shost_printk(KERN_WARNING, shost, " REJ reason 0x%x\n", 2823 event->status); 2824 ch->status = -ECONNRESET; 2825 break; 2826 } 2827 } 2828 2829 static int srp_rdma_cm_handler(struct rdma_cm_id *cm_id, 2830 struct rdma_cm_event *event) 2831 { 2832 struct srp_rdma_ch *ch = cm_id->context; 2833 struct srp_target_port *target = ch->target; 2834 int comp = 0; 2835 2836 switch (event->event) { 2837 case RDMA_CM_EVENT_ADDR_RESOLVED: 2838 ch->status = 0; 2839 comp = 1; 2840 break; 2841 2842 case RDMA_CM_EVENT_ADDR_ERROR: 2843 ch->status = -ENXIO; 2844 comp = 1; 2845 break; 2846 2847 case RDMA_CM_EVENT_ROUTE_RESOLVED: 2848 ch->status = 0; 2849 comp = 1; 2850 break; 2851 2852 case RDMA_CM_EVENT_ROUTE_ERROR: 2853 case RDMA_CM_EVENT_UNREACHABLE: 2854 ch->status = -EHOSTUNREACH; 2855 comp = 1; 2856 break; 2857 2858 case RDMA_CM_EVENT_CONNECT_ERROR: 2859 shost_printk(KERN_DEBUG, target->scsi_host, 2860 PFX "Sending CM REQ failed\n"); 2861 comp = 1; 2862 ch->status = -ECONNRESET; 2863 break; 2864 2865 case RDMA_CM_EVENT_ESTABLISHED: 2866 comp = 1; 2867 srp_cm_rep_handler(NULL, event->param.conn.private_data, ch); 2868 break; 2869 2870 case RDMA_CM_EVENT_REJECTED: 2871 shost_printk(KERN_DEBUG, target->scsi_host, PFX "REJ received\n"); 2872 comp = 1; 2873 2874 srp_rdma_cm_rej_handler(ch, event); 2875 break; 2876 2877 case RDMA_CM_EVENT_DISCONNECTED: 2878 if (ch->connected) { 2879 shost_printk(KERN_WARNING, target->scsi_host, 2880 PFX "received DREQ\n"); 2881 rdma_disconnect(ch->rdma_cm.cm_id); 2882 comp = 1; 2883 ch->status = 0; 2884 queue_work(system_long_wq, &target->tl_err_work); 2885 } 2886 break; 2887 2888 case RDMA_CM_EVENT_TIMEWAIT_EXIT: 2889 shost_printk(KERN_ERR, target->scsi_host, 2890 PFX "connection closed\n"); 2891 2892 comp = 1; 2893 ch->status = 0; 2894 break; 2895 2896 default: 2897 shost_printk(KERN_WARNING, target->scsi_host, 2898 PFX "Unhandled CM event %d\n", event->event); 2899 break; 2900 } 2901 2902 if (comp) 2903 complete(&ch->done); 2904 2905 return 0; 2906 } 2907 2908 /** 2909 * srp_change_queue_depth - setting device queue depth 2910 * @sdev: scsi device struct 2911 * @qdepth: requested queue depth 2912 * 2913 * Returns queue depth. 2914 */ 2915 static int 2916 srp_change_queue_depth(struct scsi_device *sdev, int qdepth) 2917 { 2918 if (!sdev->tagged_supported) 2919 qdepth = 1; 2920 return scsi_change_queue_depth(sdev, qdepth); 2921 } 2922 2923 static int srp_send_tsk_mgmt(struct srp_rdma_ch *ch, u64 req_tag, u64 lun, 2924 u8 func, u8 *status) 2925 { 2926 struct srp_target_port *target = ch->target; 2927 struct srp_rport *rport = target->rport; 2928 struct ib_device *dev = target->srp_host->srp_dev->dev; 2929 struct srp_iu *iu; 2930 struct srp_tsk_mgmt *tsk_mgmt; 2931 int res; 2932 2933 if (!ch->connected || target->qp_in_error) 2934 return -1; 2935 2936 /* 2937 * Lock the rport mutex to avoid that srp_create_ch_ib() is 2938 * invoked while a task management function is being sent. 2939 */ 2940 mutex_lock(&rport->mutex); 2941 spin_lock_irq(&ch->lock); 2942 iu = __srp_get_tx_iu(ch, SRP_IU_TSK_MGMT); 2943 spin_unlock_irq(&ch->lock); 2944 2945 if (!iu) { 2946 mutex_unlock(&rport->mutex); 2947 2948 return -1; 2949 } 2950 2951 iu->num_sge = 1; 2952 2953 ib_dma_sync_single_for_cpu(dev, iu->dma, sizeof *tsk_mgmt, 2954 DMA_TO_DEVICE); 2955 tsk_mgmt = iu->buf; 2956 memset(tsk_mgmt, 0, sizeof *tsk_mgmt); 2957 2958 tsk_mgmt->opcode = SRP_TSK_MGMT; 2959 int_to_scsilun(lun, &tsk_mgmt->lun); 2960 tsk_mgmt->tsk_mgmt_func = func; 2961 tsk_mgmt->task_tag = req_tag; 2962 2963 spin_lock_irq(&ch->lock); 2964 ch->tsk_mgmt_tag = (ch->tsk_mgmt_tag + 1) | SRP_TAG_TSK_MGMT; 2965 tsk_mgmt->tag = ch->tsk_mgmt_tag; 2966 spin_unlock_irq(&ch->lock); 2967 2968 init_completion(&ch->tsk_mgmt_done); 2969 2970 ib_dma_sync_single_for_device(dev, iu->dma, sizeof *tsk_mgmt, 2971 DMA_TO_DEVICE); 2972 if (srp_post_send(ch, iu, sizeof(*tsk_mgmt))) { 2973 srp_put_tx_iu(ch, iu, SRP_IU_TSK_MGMT); 2974 mutex_unlock(&rport->mutex); 2975 2976 return -1; 2977 } 2978 res = wait_for_completion_timeout(&ch->tsk_mgmt_done, 2979 msecs_to_jiffies(SRP_ABORT_TIMEOUT_MS)); 2980 if (res > 0 && status) 2981 *status = ch->tsk_mgmt_status; 2982 mutex_unlock(&rport->mutex); 2983 2984 WARN_ON_ONCE(res < 0); 2985 2986 return res > 0 ? 0 : -1; 2987 } 2988 2989 static int srp_abort(struct scsi_cmnd *scmnd) 2990 { 2991 struct srp_target_port *target = host_to_target(scmnd->device->host); 2992 struct srp_request *req = (struct srp_request *) scmnd->host_scribble; 2993 u32 tag; 2994 u16 ch_idx; 2995 struct srp_rdma_ch *ch; 2996 int ret; 2997 2998 shost_printk(KERN_ERR, target->scsi_host, "SRP abort called\n"); 2999 3000 if (!req) 3001 return SUCCESS; 3002 tag = blk_mq_unique_tag(scmnd->request); 3003 ch_idx = blk_mq_unique_tag_to_hwq(tag); 3004 if (WARN_ON_ONCE(ch_idx >= target->ch_count)) 3005 return SUCCESS; 3006 ch = &target->ch[ch_idx]; 3007 if (!srp_claim_req(ch, req, NULL, scmnd)) 3008 return SUCCESS; 3009 shost_printk(KERN_ERR, target->scsi_host, 3010 "Sending SRP abort for tag %#x\n", tag); 3011 if (srp_send_tsk_mgmt(ch, tag, scmnd->device->lun, 3012 SRP_TSK_ABORT_TASK, NULL) == 0) 3013 ret = SUCCESS; 3014 else if (target->rport->state == SRP_RPORT_LOST) 3015 ret = FAST_IO_FAIL; 3016 else 3017 ret = FAILED; 3018 if (ret == SUCCESS) { 3019 srp_free_req(ch, req, scmnd, 0); 3020 scmnd->result = DID_ABORT << 16; 3021 scmnd->scsi_done(scmnd); 3022 } 3023 3024 return ret; 3025 } 3026 3027 static int srp_reset_device(struct scsi_cmnd *scmnd) 3028 { 3029 struct srp_target_port *target = host_to_target(scmnd->device->host); 3030 struct srp_rdma_ch *ch; 3031 u8 status; 3032 3033 shost_printk(KERN_ERR, target->scsi_host, "SRP reset_device called\n"); 3034 3035 ch = &target->ch[0]; 3036 if (srp_send_tsk_mgmt(ch, SRP_TAG_NO_REQ, scmnd->device->lun, 3037 SRP_TSK_LUN_RESET, &status)) 3038 return FAILED; 3039 if (status) 3040 return FAILED; 3041 3042 return SUCCESS; 3043 } 3044 3045 static int srp_reset_host(struct scsi_cmnd *scmnd) 3046 { 3047 struct srp_target_port *target = host_to_target(scmnd->device->host); 3048 3049 shost_printk(KERN_ERR, target->scsi_host, PFX "SRP reset_host called\n"); 3050 3051 return srp_reconnect_rport(target->rport) == 0 ? SUCCESS : FAILED; 3052 } 3053 3054 static int srp_target_alloc(struct scsi_target *starget) 3055 { 3056 struct Scsi_Host *shost = dev_to_shost(starget->dev.parent); 3057 struct srp_target_port *target = host_to_target(shost); 3058 3059 if (target->target_can_queue) 3060 starget->can_queue = target->target_can_queue; 3061 return 0; 3062 } 3063 3064 static int srp_slave_alloc(struct scsi_device *sdev) 3065 { 3066 struct Scsi_Host *shost = sdev->host; 3067 struct srp_target_port *target = host_to_target(shost); 3068 struct srp_device *srp_dev = target->srp_host->srp_dev; 3069 struct ib_device *ibdev = srp_dev->dev; 3070 3071 if (!(ibdev->attrs.device_cap_flags & IB_DEVICE_SG_GAPS_REG)) 3072 blk_queue_virt_boundary(sdev->request_queue, 3073 ~srp_dev->mr_page_mask); 3074 3075 return 0; 3076 } 3077 3078 static int srp_slave_configure(struct scsi_device *sdev) 3079 { 3080 struct Scsi_Host *shost = sdev->host; 3081 struct srp_target_port *target = host_to_target(shost); 3082 struct request_queue *q = sdev->request_queue; 3083 unsigned long timeout; 3084 3085 if (sdev->type == TYPE_DISK) { 3086 timeout = max_t(unsigned, 30 * HZ, target->rq_tmo_jiffies); 3087 blk_queue_rq_timeout(q, timeout); 3088 } 3089 3090 return 0; 3091 } 3092 3093 static ssize_t show_id_ext(struct device *dev, struct device_attribute *attr, 3094 char *buf) 3095 { 3096 struct srp_target_port *target = host_to_target(class_to_shost(dev)); 3097 3098 return sprintf(buf, "0x%016llx\n", be64_to_cpu(target->id_ext)); 3099 } 3100 3101 static ssize_t show_ioc_guid(struct device *dev, struct device_attribute *attr, 3102 char *buf) 3103 { 3104 struct srp_target_port *target = host_to_target(class_to_shost(dev)); 3105 3106 return sprintf(buf, "0x%016llx\n", be64_to_cpu(target->ioc_guid)); 3107 } 3108 3109 static ssize_t show_service_id(struct device *dev, 3110 struct device_attribute *attr, char *buf) 3111 { 3112 struct srp_target_port *target = host_to_target(class_to_shost(dev)); 3113 3114 if (target->using_rdma_cm) 3115 return -ENOENT; 3116 return sprintf(buf, "0x%016llx\n", 3117 be64_to_cpu(target->ib_cm.service_id)); 3118 } 3119 3120 static ssize_t show_pkey(struct device *dev, struct device_attribute *attr, 3121 char *buf) 3122 { 3123 struct srp_target_port *target = host_to_target(class_to_shost(dev)); 3124 3125 if (target->using_rdma_cm) 3126 return -ENOENT; 3127 return sprintf(buf, "0x%04x\n", be16_to_cpu(target->ib_cm.pkey)); 3128 } 3129 3130 static ssize_t show_sgid(struct device *dev, struct device_attribute *attr, 3131 char *buf) 3132 { 3133 struct srp_target_port *target = host_to_target(class_to_shost(dev)); 3134 3135 return sprintf(buf, "%pI6\n", target->sgid.raw); 3136 } 3137 3138 static ssize_t show_dgid(struct device *dev, struct device_attribute *attr, 3139 char *buf) 3140 { 3141 struct srp_target_port *target = host_to_target(class_to_shost(dev)); 3142 struct srp_rdma_ch *ch = &target->ch[0]; 3143 3144 if (target->using_rdma_cm) 3145 return -ENOENT; 3146 return sprintf(buf, "%pI6\n", ch->ib_cm.path.dgid.raw); 3147 } 3148 3149 static ssize_t show_orig_dgid(struct device *dev, 3150 struct device_attribute *attr, char *buf) 3151 { 3152 struct srp_target_port *target = host_to_target(class_to_shost(dev)); 3153 3154 if (target->using_rdma_cm) 3155 return -ENOENT; 3156 return sprintf(buf, "%pI6\n", target->ib_cm.orig_dgid.raw); 3157 } 3158 3159 static ssize_t show_req_lim(struct device *dev, 3160 struct device_attribute *attr, char *buf) 3161 { 3162 struct srp_target_port *target = host_to_target(class_to_shost(dev)); 3163 struct srp_rdma_ch *ch; 3164 int i, req_lim = INT_MAX; 3165 3166 for (i = 0; i < target->ch_count; i++) { 3167 ch = &target->ch[i]; 3168 req_lim = min(req_lim, ch->req_lim); 3169 } 3170 return sprintf(buf, "%d\n", req_lim); 3171 } 3172 3173 static ssize_t show_zero_req_lim(struct device *dev, 3174 struct device_attribute *attr, char *buf) 3175 { 3176 struct srp_target_port *target = host_to_target(class_to_shost(dev)); 3177 3178 return sprintf(buf, "%d\n", target->zero_req_lim); 3179 } 3180 3181 static ssize_t show_local_ib_port(struct device *dev, 3182 struct device_attribute *attr, char *buf) 3183 { 3184 struct srp_target_port *target = host_to_target(class_to_shost(dev)); 3185 3186 return sprintf(buf, "%d\n", target->srp_host->port); 3187 } 3188 3189 static ssize_t show_local_ib_device(struct device *dev, 3190 struct device_attribute *attr, char *buf) 3191 { 3192 struct srp_target_port *target = host_to_target(class_to_shost(dev)); 3193 3194 return sprintf(buf, "%s\n", 3195 dev_name(&target->srp_host->srp_dev->dev->dev)); 3196 } 3197 3198 static ssize_t show_ch_count(struct device *dev, struct device_attribute *attr, 3199 char *buf) 3200 { 3201 struct srp_target_port *target = host_to_target(class_to_shost(dev)); 3202 3203 return sprintf(buf, "%d\n", target->ch_count); 3204 } 3205 3206 static ssize_t show_comp_vector(struct device *dev, 3207 struct device_attribute *attr, char *buf) 3208 { 3209 struct srp_target_port *target = host_to_target(class_to_shost(dev)); 3210 3211 return sprintf(buf, "%d\n", target->comp_vector); 3212 } 3213 3214 static ssize_t show_tl_retry_count(struct device *dev, 3215 struct device_attribute *attr, char *buf) 3216 { 3217 struct srp_target_port *target = host_to_target(class_to_shost(dev)); 3218 3219 return sprintf(buf, "%d\n", target->tl_retry_count); 3220 } 3221 3222 static ssize_t show_cmd_sg_entries(struct device *dev, 3223 struct device_attribute *attr, char *buf) 3224 { 3225 struct srp_target_port *target = host_to_target(class_to_shost(dev)); 3226 3227 return sprintf(buf, "%u\n", target->cmd_sg_cnt); 3228 } 3229 3230 static ssize_t show_allow_ext_sg(struct device *dev, 3231 struct device_attribute *attr, char *buf) 3232 { 3233 struct srp_target_port *target = host_to_target(class_to_shost(dev)); 3234 3235 return sprintf(buf, "%s\n", target->allow_ext_sg ? "true" : "false"); 3236 } 3237 3238 static DEVICE_ATTR(id_ext, S_IRUGO, show_id_ext, NULL); 3239 static DEVICE_ATTR(ioc_guid, S_IRUGO, show_ioc_guid, NULL); 3240 static DEVICE_ATTR(service_id, S_IRUGO, show_service_id, NULL); 3241 static DEVICE_ATTR(pkey, S_IRUGO, show_pkey, NULL); 3242 static DEVICE_ATTR(sgid, S_IRUGO, show_sgid, NULL); 3243 static DEVICE_ATTR(dgid, S_IRUGO, show_dgid, NULL); 3244 static DEVICE_ATTR(orig_dgid, S_IRUGO, show_orig_dgid, NULL); 3245 static DEVICE_ATTR(req_lim, S_IRUGO, show_req_lim, NULL); 3246 static DEVICE_ATTR(zero_req_lim, S_IRUGO, show_zero_req_lim, NULL); 3247 static DEVICE_ATTR(local_ib_port, S_IRUGO, show_local_ib_port, NULL); 3248 static DEVICE_ATTR(local_ib_device, S_IRUGO, show_local_ib_device, NULL); 3249 static DEVICE_ATTR(ch_count, S_IRUGO, show_ch_count, NULL); 3250 static DEVICE_ATTR(comp_vector, S_IRUGO, show_comp_vector, NULL); 3251 static DEVICE_ATTR(tl_retry_count, S_IRUGO, show_tl_retry_count, NULL); 3252 static DEVICE_ATTR(cmd_sg_entries, S_IRUGO, show_cmd_sg_entries, NULL); 3253 static DEVICE_ATTR(allow_ext_sg, S_IRUGO, show_allow_ext_sg, NULL); 3254 3255 static struct device_attribute *srp_host_attrs[] = { 3256 &dev_attr_id_ext, 3257 &dev_attr_ioc_guid, 3258 &dev_attr_service_id, 3259 &dev_attr_pkey, 3260 &dev_attr_sgid, 3261 &dev_attr_dgid, 3262 &dev_attr_orig_dgid, 3263 &dev_attr_req_lim, 3264 &dev_attr_zero_req_lim, 3265 &dev_attr_local_ib_port, 3266 &dev_attr_local_ib_device, 3267 &dev_attr_ch_count, 3268 &dev_attr_comp_vector, 3269 &dev_attr_tl_retry_count, 3270 &dev_attr_cmd_sg_entries, 3271 &dev_attr_allow_ext_sg, 3272 NULL 3273 }; 3274 3275 static struct scsi_host_template srp_template = { 3276 .module = THIS_MODULE, 3277 .name = "InfiniBand SRP initiator", 3278 .proc_name = DRV_NAME, 3279 .target_alloc = srp_target_alloc, 3280 .slave_alloc = srp_slave_alloc, 3281 .slave_configure = srp_slave_configure, 3282 .info = srp_target_info, 3283 .queuecommand = srp_queuecommand, 3284 .change_queue_depth = srp_change_queue_depth, 3285 .eh_timed_out = srp_timed_out, 3286 .eh_abort_handler = srp_abort, 3287 .eh_device_reset_handler = srp_reset_device, 3288 .eh_host_reset_handler = srp_reset_host, 3289 .skip_settle_delay = true, 3290 .sg_tablesize = SRP_DEF_SG_TABLESIZE, 3291 .can_queue = SRP_DEFAULT_CMD_SQ_SIZE, 3292 .this_id = -1, 3293 .cmd_per_lun = SRP_DEFAULT_CMD_SQ_SIZE, 3294 .shost_attrs = srp_host_attrs, 3295 .track_queue_depth = 1, 3296 }; 3297 3298 static int srp_sdev_count(struct Scsi_Host *host) 3299 { 3300 struct scsi_device *sdev; 3301 int c = 0; 3302 3303 shost_for_each_device(sdev, host) 3304 c++; 3305 3306 return c; 3307 } 3308 3309 /* 3310 * Return values: 3311 * < 0 upon failure. Caller is responsible for SRP target port cleanup. 3312 * 0 and target->state == SRP_TARGET_REMOVED if asynchronous target port 3313 * removal has been scheduled. 3314 * 0 and target->state != SRP_TARGET_REMOVED upon success. 3315 */ 3316 static int srp_add_target(struct srp_host *host, struct srp_target_port *target) 3317 { 3318 struct srp_rport_identifiers ids; 3319 struct srp_rport *rport; 3320 3321 target->state = SRP_TARGET_SCANNING; 3322 sprintf(target->target_name, "SRP.T10:%016llX", 3323 be64_to_cpu(target->id_ext)); 3324 3325 if (scsi_add_host(target->scsi_host, host->srp_dev->dev->dev.parent)) 3326 return -ENODEV; 3327 3328 memcpy(ids.port_id, &target->id_ext, 8); 3329 memcpy(ids.port_id + 8, &target->ioc_guid, 8); 3330 ids.roles = SRP_RPORT_ROLE_TARGET; 3331 rport = srp_rport_add(target->scsi_host, &ids); 3332 if (IS_ERR(rport)) { 3333 scsi_remove_host(target->scsi_host); 3334 return PTR_ERR(rport); 3335 } 3336 3337 rport->lld_data = target; 3338 target->rport = rport; 3339 3340 spin_lock(&host->target_lock); 3341 list_add_tail(&target->list, &host->target_list); 3342 spin_unlock(&host->target_lock); 3343 3344 scsi_scan_target(&target->scsi_host->shost_gendev, 3345 0, target->scsi_id, SCAN_WILD_CARD, SCSI_SCAN_INITIAL); 3346 3347 if (srp_connected_ch(target) < target->ch_count || 3348 target->qp_in_error) { 3349 shost_printk(KERN_INFO, target->scsi_host, 3350 PFX "SCSI scan failed - removing SCSI host\n"); 3351 srp_queue_remove_work(target); 3352 goto out; 3353 } 3354 3355 pr_debug("%s: SCSI scan succeeded - detected %d LUNs\n", 3356 dev_name(&target->scsi_host->shost_gendev), 3357 srp_sdev_count(target->scsi_host)); 3358 3359 spin_lock_irq(&target->lock); 3360 if (target->state == SRP_TARGET_SCANNING) 3361 target->state = SRP_TARGET_LIVE; 3362 spin_unlock_irq(&target->lock); 3363 3364 out: 3365 return 0; 3366 } 3367 3368 static void srp_release_dev(struct device *dev) 3369 { 3370 struct srp_host *host = 3371 container_of(dev, struct srp_host, dev); 3372 3373 complete(&host->released); 3374 } 3375 3376 static struct class srp_class = { 3377 .name = "infiniband_srp", 3378 .dev_release = srp_release_dev 3379 }; 3380 3381 /** 3382 * srp_conn_unique() - check whether the connection to a target is unique 3383 * @host: SRP host. 3384 * @target: SRP target port. 3385 */ 3386 static bool srp_conn_unique(struct srp_host *host, 3387 struct srp_target_port *target) 3388 { 3389 struct srp_target_port *t; 3390 bool ret = false; 3391 3392 if (target->state == SRP_TARGET_REMOVED) 3393 goto out; 3394 3395 ret = true; 3396 3397 spin_lock(&host->target_lock); 3398 list_for_each_entry(t, &host->target_list, list) { 3399 if (t != target && 3400 target->id_ext == t->id_ext && 3401 target->ioc_guid == t->ioc_guid && 3402 target->initiator_ext == t->initiator_ext) { 3403 ret = false; 3404 break; 3405 } 3406 } 3407 spin_unlock(&host->target_lock); 3408 3409 out: 3410 return ret; 3411 } 3412 3413 /* 3414 * Target ports are added by writing 3415 * 3416 * id_ext=<SRP ID ext>,ioc_guid=<SRP IOC GUID>,dgid=<dest GID>, 3417 * pkey=<P_Key>,service_id=<service ID> 3418 * or 3419 * id_ext=<SRP ID ext>,ioc_guid=<SRP IOC GUID>, 3420 * [src=<IPv4 address>,]dest=<IPv4 address>:<port number> 3421 * 3422 * to the add_target sysfs attribute. 3423 */ 3424 enum { 3425 SRP_OPT_ERR = 0, 3426 SRP_OPT_ID_EXT = 1 << 0, 3427 SRP_OPT_IOC_GUID = 1 << 1, 3428 SRP_OPT_DGID = 1 << 2, 3429 SRP_OPT_PKEY = 1 << 3, 3430 SRP_OPT_SERVICE_ID = 1 << 4, 3431 SRP_OPT_MAX_SECT = 1 << 5, 3432 SRP_OPT_MAX_CMD_PER_LUN = 1 << 6, 3433 SRP_OPT_IO_CLASS = 1 << 7, 3434 SRP_OPT_INITIATOR_EXT = 1 << 8, 3435 SRP_OPT_CMD_SG_ENTRIES = 1 << 9, 3436 SRP_OPT_ALLOW_EXT_SG = 1 << 10, 3437 SRP_OPT_SG_TABLESIZE = 1 << 11, 3438 SRP_OPT_COMP_VECTOR = 1 << 12, 3439 SRP_OPT_TL_RETRY_COUNT = 1 << 13, 3440 SRP_OPT_QUEUE_SIZE = 1 << 14, 3441 SRP_OPT_IP_SRC = 1 << 15, 3442 SRP_OPT_IP_DEST = 1 << 16, 3443 SRP_OPT_TARGET_CAN_QUEUE= 1 << 17, 3444 }; 3445 3446 static unsigned int srp_opt_mandatory[] = { 3447 SRP_OPT_ID_EXT | 3448 SRP_OPT_IOC_GUID | 3449 SRP_OPT_DGID | 3450 SRP_OPT_PKEY | 3451 SRP_OPT_SERVICE_ID, 3452 SRP_OPT_ID_EXT | 3453 SRP_OPT_IOC_GUID | 3454 SRP_OPT_IP_DEST, 3455 }; 3456 3457 static const match_table_t srp_opt_tokens = { 3458 { SRP_OPT_ID_EXT, "id_ext=%s" }, 3459 { SRP_OPT_IOC_GUID, "ioc_guid=%s" }, 3460 { SRP_OPT_DGID, "dgid=%s" }, 3461 { SRP_OPT_PKEY, "pkey=%x" }, 3462 { SRP_OPT_SERVICE_ID, "service_id=%s" }, 3463 { SRP_OPT_MAX_SECT, "max_sect=%d" }, 3464 { SRP_OPT_MAX_CMD_PER_LUN, "max_cmd_per_lun=%d" }, 3465 { SRP_OPT_TARGET_CAN_QUEUE, "target_can_queue=%d" }, 3466 { SRP_OPT_IO_CLASS, "io_class=%x" }, 3467 { SRP_OPT_INITIATOR_EXT, "initiator_ext=%s" }, 3468 { SRP_OPT_CMD_SG_ENTRIES, "cmd_sg_entries=%u" }, 3469 { SRP_OPT_ALLOW_EXT_SG, "allow_ext_sg=%u" }, 3470 { SRP_OPT_SG_TABLESIZE, "sg_tablesize=%u" }, 3471 { SRP_OPT_COMP_VECTOR, "comp_vector=%u" }, 3472 { SRP_OPT_TL_RETRY_COUNT, "tl_retry_count=%u" }, 3473 { SRP_OPT_QUEUE_SIZE, "queue_size=%d" }, 3474 { SRP_OPT_IP_SRC, "src=%s" }, 3475 { SRP_OPT_IP_DEST, "dest=%s" }, 3476 { SRP_OPT_ERR, NULL } 3477 }; 3478 3479 /** 3480 * srp_parse_in - parse an IP address and port number combination 3481 * @net: [in] Network namespace. 3482 * @sa: [out] Address family, IP address and port number. 3483 * @addr_port_str: [in] IP address and port number. 3484 * 3485 * Parse the following address formats: 3486 * - IPv4: <ip_address>:<port>, e.g. 1.2.3.4:5. 3487 * - IPv6: \[<ipv6_address>\]:<port>, e.g. [1::2:3%4]:5. 3488 */ 3489 static int srp_parse_in(struct net *net, struct sockaddr_storage *sa, 3490 const char *addr_port_str) 3491 { 3492 char *addr_end, *addr = kstrdup(addr_port_str, GFP_KERNEL); 3493 char *port_str; 3494 int ret; 3495 3496 if (!addr) 3497 return -ENOMEM; 3498 port_str = strrchr(addr, ':'); 3499 if (!port_str) 3500 return -EINVAL; 3501 *port_str++ = '\0'; 3502 ret = inet_pton_with_scope(net, AF_INET, addr, port_str, sa); 3503 if (ret && addr[0]) { 3504 addr_end = addr + strlen(addr) - 1; 3505 if (addr[0] == '[' && *addr_end == ']') { 3506 *addr_end = '\0'; 3507 ret = inet_pton_with_scope(net, AF_INET6, addr + 1, 3508 port_str, sa); 3509 } 3510 } 3511 kfree(addr); 3512 pr_debug("%s -> %pISpfsc\n", addr_port_str, sa); 3513 return ret; 3514 } 3515 3516 static int srp_parse_options(struct net *net, const char *buf, 3517 struct srp_target_port *target) 3518 { 3519 char *options, *sep_opt; 3520 char *p; 3521 substring_t args[MAX_OPT_ARGS]; 3522 unsigned long long ull; 3523 int opt_mask = 0; 3524 int token; 3525 int ret = -EINVAL; 3526 int i; 3527 3528 options = kstrdup(buf, GFP_KERNEL); 3529 if (!options) 3530 return -ENOMEM; 3531 3532 sep_opt = options; 3533 while ((p = strsep(&sep_opt, ",\n")) != NULL) { 3534 if (!*p) 3535 continue; 3536 3537 token = match_token(p, srp_opt_tokens, args); 3538 opt_mask |= token; 3539 3540 switch (token) { 3541 case SRP_OPT_ID_EXT: 3542 p = match_strdup(args); 3543 if (!p) { 3544 ret = -ENOMEM; 3545 goto out; 3546 } 3547 ret = kstrtoull(p, 16, &ull); 3548 if (ret) { 3549 pr_warn("invalid id_ext parameter '%s'\n", p); 3550 kfree(p); 3551 goto out; 3552 } 3553 target->id_ext = cpu_to_be64(ull); 3554 kfree(p); 3555 break; 3556 3557 case SRP_OPT_IOC_GUID: 3558 p = match_strdup(args); 3559 if (!p) { 3560 ret = -ENOMEM; 3561 goto out; 3562 } 3563 ret = kstrtoull(p, 16, &ull); 3564 if (ret) { 3565 pr_warn("invalid ioc_guid parameter '%s'\n", p); 3566 kfree(p); 3567 goto out; 3568 } 3569 target->ioc_guid = cpu_to_be64(ull); 3570 kfree(p); 3571 break; 3572 3573 case SRP_OPT_DGID: 3574 p = match_strdup(args); 3575 if (!p) { 3576 ret = -ENOMEM; 3577 goto out; 3578 } 3579 if (strlen(p) != 32) { 3580 pr_warn("bad dest GID parameter '%s'\n", p); 3581 kfree(p); 3582 goto out; 3583 } 3584 3585 ret = hex2bin(target->ib_cm.orig_dgid.raw, p, 16); 3586 kfree(p); 3587 if (ret < 0) 3588 goto out; 3589 break; 3590 3591 case SRP_OPT_PKEY: 3592 if (match_hex(args, &token)) { 3593 pr_warn("bad P_Key parameter '%s'\n", p); 3594 goto out; 3595 } 3596 target->ib_cm.pkey = cpu_to_be16(token); 3597 break; 3598 3599 case SRP_OPT_SERVICE_ID: 3600 p = match_strdup(args); 3601 if (!p) { 3602 ret = -ENOMEM; 3603 goto out; 3604 } 3605 ret = kstrtoull(p, 16, &ull); 3606 if (ret) { 3607 pr_warn("bad service_id parameter '%s'\n", p); 3608 kfree(p); 3609 goto out; 3610 } 3611 target->ib_cm.service_id = cpu_to_be64(ull); 3612 kfree(p); 3613 break; 3614 3615 case SRP_OPT_IP_SRC: 3616 p = match_strdup(args); 3617 if (!p) { 3618 ret = -ENOMEM; 3619 goto out; 3620 } 3621 ret = srp_parse_in(net, &target->rdma_cm.src.ss, p); 3622 if (ret < 0) { 3623 pr_warn("bad source parameter '%s'\n", p); 3624 kfree(p); 3625 goto out; 3626 } 3627 target->rdma_cm.src_specified = true; 3628 kfree(p); 3629 break; 3630 3631 case SRP_OPT_IP_DEST: 3632 p = match_strdup(args); 3633 if (!p) { 3634 ret = -ENOMEM; 3635 goto out; 3636 } 3637 ret = srp_parse_in(net, &target->rdma_cm.dst.ss, p); 3638 if (ret < 0) { 3639 pr_warn("bad dest parameter '%s'\n", p); 3640 kfree(p); 3641 goto out; 3642 } 3643 target->using_rdma_cm = true; 3644 kfree(p); 3645 break; 3646 3647 case SRP_OPT_MAX_SECT: 3648 if (match_int(args, &token)) { 3649 pr_warn("bad max sect parameter '%s'\n", p); 3650 goto out; 3651 } 3652 target->scsi_host->max_sectors = token; 3653 break; 3654 3655 case SRP_OPT_QUEUE_SIZE: 3656 if (match_int(args, &token) || token < 1) { 3657 pr_warn("bad queue_size parameter '%s'\n", p); 3658 goto out; 3659 } 3660 target->scsi_host->can_queue = token; 3661 target->queue_size = token + SRP_RSP_SQ_SIZE + 3662 SRP_TSK_MGMT_SQ_SIZE; 3663 if (!(opt_mask & SRP_OPT_MAX_CMD_PER_LUN)) 3664 target->scsi_host->cmd_per_lun = token; 3665 break; 3666 3667 case SRP_OPT_MAX_CMD_PER_LUN: 3668 if (match_int(args, &token) || token < 1) { 3669 pr_warn("bad max cmd_per_lun parameter '%s'\n", 3670 p); 3671 goto out; 3672 } 3673 target->scsi_host->cmd_per_lun = token; 3674 break; 3675 3676 case SRP_OPT_TARGET_CAN_QUEUE: 3677 if (match_int(args, &token) || token < 1) { 3678 pr_warn("bad max target_can_queue parameter '%s'\n", 3679 p); 3680 goto out; 3681 } 3682 target->target_can_queue = token; 3683 break; 3684 3685 case SRP_OPT_IO_CLASS: 3686 if (match_hex(args, &token)) { 3687 pr_warn("bad IO class parameter '%s'\n", p); 3688 goto out; 3689 } 3690 if (token != SRP_REV10_IB_IO_CLASS && 3691 token != SRP_REV16A_IB_IO_CLASS) { 3692 pr_warn("unknown IO class parameter value %x specified (use %x or %x).\n", 3693 token, SRP_REV10_IB_IO_CLASS, 3694 SRP_REV16A_IB_IO_CLASS); 3695 goto out; 3696 } 3697 target->io_class = token; 3698 break; 3699 3700 case SRP_OPT_INITIATOR_EXT: 3701 p = match_strdup(args); 3702 if (!p) { 3703 ret = -ENOMEM; 3704 goto out; 3705 } 3706 ret = kstrtoull(p, 16, &ull); 3707 if (ret) { 3708 pr_warn("bad initiator_ext value '%s'\n", p); 3709 kfree(p); 3710 goto out; 3711 } 3712 target->initiator_ext = cpu_to_be64(ull); 3713 kfree(p); 3714 break; 3715 3716 case SRP_OPT_CMD_SG_ENTRIES: 3717 if (match_int(args, &token) || token < 1 || token > 255) { 3718 pr_warn("bad max cmd_sg_entries parameter '%s'\n", 3719 p); 3720 goto out; 3721 } 3722 target->cmd_sg_cnt = token; 3723 break; 3724 3725 case SRP_OPT_ALLOW_EXT_SG: 3726 if (match_int(args, &token)) { 3727 pr_warn("bad allow_ext_sg parameter '%s'\n", p); 3728 goto out; 3729 } 3730 target->allow_ext_sg = !!token; 3731 break; 3732 3733 case SRP_OPT_SG_TABLESIZE: 3734 if (match_int(args, &token) || token < 1 || 3735 token > SG_MAX_SEGMENTS) { 3736 pr_warn("bad max sg_tablesize parameter '%s'\n", 3737 p); 3738 goto out; 3739 } 3740 target->sg_tablesize = token; 3741 break; 3742 3743 case SRP_OPT_COMP_VECTOR: 3744 if (match_int(args, &token) || token < 0) { 3745 pr_warn("bad comp_vector parameter '%s'\n", p); 3746 goto out; 3747 } 3748 target->comp_vector = token; 3749 break; 3750 3751 case SRP_OPT_TL_RETRY_COUNT: 3752 if (match_int(args, &token) || token < 2 || token > 7) { 3753 pr_warn("bad tl_retry_count parameter '%s' (must be a number between 2 and 7)\n", 3754 p); 3755 goto out; 3756 } 3757 target->tl_retry_count = token; 3758 break; 3759 3760 default: 3761 pr_warn("unknown parameter or missing value '%s' in target creation request\n", 3762 p); 3763 goto out; 3764 } 3765 } 3766 3767 for (i = 0; i < ARRAY_SIZE(srp_opt_mandatory); i++) { 3768 if ((opt_mask & srp_opt_mandatory[i]) == srp_opt_mandatory[i]) { 3769 ret = 0; 3770 break; 3771 } 3772 } 3773 if (ret) 3774 pr_warn("target creation request is missing one or more parameters\n"); 3775 3776 if (target->scsi_host->cmd_per_lun > target->scsi_host->can_queue 3777 && (opt_mask & SRP_OPT_MAX_CMD_PER_LUN)) 3778 pr_warn("cmd_per_lun = %d > queue_size = %d\n", 3779 target->scsi_host->cmd_per_lun, 3780 target->scsi_host->can_queue); 3781 3782 out: 3783 kfree(options); 3784 return ret; 3785 } 3786 3787 static ssize_t srp_create_target(struct device *dev, 3788 struct device_attribute *attr, 3789 const char *buf, size_t count) 3790 { 3791 struct srp_host *host = 3792 container_of(dev, struct srp_host, dev); 3793 struct Scsi_Host *target_host; 3794 struct srp_target_port *target; 3795 struct srp_rdma_ch *ch; 3796 struct srp_device *srp_dev = host->srp_dev; 3797 struct ib_device *ibdev = srp_dev->dev; 3798 int ret, node_idx, node, cpu, i; 3799 unsigned int max_sectors_per_mr, mr_per_cmd = 0; 3800 bool multich = false; 3801 uint32_t max_iu_len; 3802 3803 target_host = scsi_host_alloc(&srp_template, 3804 sizeof (struct srp_target_port)); 3805 if (!target_host) 3806 return -ENOMEM; 3807 3808 target_host->transportt = ib_srp_transport_template; 3809 target_host->max_channel = 0; 3810 target_host->max_id = 1; 3811 target_host->max_lun = -1LL; 3812 target_host->max_cmd_len = sizeof ((struct srp_cmd *) (void *) 0L)->cdb; 3813 target_host->max_segment_size = ib_dma_max_seg_size(ibdev); 3814 3815 target = host_to_target(target_host); 3816 3817 target->net = kobj_ns_grab_current(KOBJ_NS_TYPE_NET); 3818 target->io_class = SRP_REV16A_IB_IO_CLASS; 3819 target->scsi_host = target_host; 3820 target->srp_host = host; 3821 target->lkey = host->srp_dev->pd->local_dma_lkey; 3822 target->global_rkey = host->srp_dev->global_rkey; 3823 target->cmd_sg_cnt = cmd_sg_entries; 3824 target->sg_tablesize = indirect_sg_entries ? : cmd_sg_entries; 3825 target->allow_ext_sg = allow_ext_sg; 3826 target->tl_retry_count = 7; 3827 target->queue_size = SRP_DEFAULT_QUEUE_SIZE; 3828 3829 /* 3830 * Avoid that the SCSI host can be removed by srp_remove_target() 3831 * before this function returns. 3832 */ 3833 scsi_host_get(target->scsi_host); 3834 3835 ret = mutex_lock_interruptible(&host->add_target_mutex); 3836 if (ret < 0) 3837 goto put; 3838 3839 ret = srp_parse_options(target->net, buf, target); 3840 if (ret) 3841 goto out; 3842 3843 target->req_ring_size = target->queue_size - SRP_TSK_MGMT_SQ_SIZE; 3844 3845 if (!srp_conn_unique(target->srp_host, target)) { 3846 if (target->using_rdma_cm) { 3847 shost_printk(KERN_INFO, target->scsi_host, 3848 PFX "Already connected to target port with id_ext=%016llx;ioc_guid=%016llx;dest=%pIS\n", 3849 be64_to_cpu(target->id_ext), 3850 be64_to_cpu(target->ioc_guid), 3851 &target->rdma_cm.dst); 3852 } else { 3853 shost_printk(KERN_INFO, target->scsi_host, 3854 PFX "Already connected to target port with id_ext=%016llx;ioc_guid=%016llx;initiator_ext=%016llx\n", 3855 be64_to_cpu(target->id_ext), 3856 be64_to_cpu(target->ioc_guid), 3857 be64_to_cpu(target->initiator_ext)); 3858 } 3859 ret = -EEXIST; 3860 goto out; 3861 } 3862 3863 if (!srp_dev->has_fmr && !srp_dev->has_fr && !target->allow_ext_sg && 3864 target->cmd_sg_cnt < target->sg_tablesize) { 3865 pr_warn("No MR pool and no external indirect descriptors, limiting sg_tablesize to cmd_sg_cnt\n"); 3866 target->sg_tablesize = target->cmd_sg_cnt; 3867 } 3868 3869 if (srp_dev->use_fast_reg || srp_dev->use_fmr) { 3870 bool gaps_reg = (ibdev->attrs.device_cap_flags & 3871 IB_DEVICE_SG_GAPS_REG); 3872 3873 max_sectors_per_mr = srp_dev->max_pages_per_mr << 3874 (ilog2(srp_dev->mr_page_size) - 9); 3875 if (!gaps_reg) { 3876 /* 3877 * FR and FMR can only map one HCA page per entry. If 3878 * the start address is not aligned on a HCA page 3879 * boundary two entries will be used for the head and 3880 * the tail although these two entries combined 3881 * contain at most one HCA page of data. Hence the "+ 3882 * 1" in the calculation below. 3883 * 3884 * The indirect data buffer descriptor is contiguous 3885 * so the memory for that buffer will only be 3886 * registered if register_always is true. Hence add 3887 * one to mr_per_cmd if register_always has been set. 3888 */ 3889 mr_per_cmd = register_always + 3890 (target->scsi_host->max_sectors + 1 + 3891 max_sectors_per_mr - 1) / max_sectors_per_mr; 3892 } else { 3893 mr_per_cmd = register_always + 3894 (target->sg_tablesize + 3895 srp_dev->max_pages_per_mr - 1) / 3896 srp_dev->max_pages_per_mr; 3897 } 3898 pr_debug("max_sectors = %u; max_pages_per_mr = %u; mr_page_size = %u; max_sectors_per_mr = %u; mr_per_cmd = %u\n", 3899 target->scsi_host->max_sectors, srp_dev->max_pages_per_mr, srp_dev->mr_page_size, 3900 max_sectors_per_mr, mr_per_cmd); 3901 } 3902 3903 target_host->sg_tablesize = target->sg_tablesize; 3904 target->mr_pool_size = target->scsi_host->can_queue * mr_per_cmd; 3905 target->mr_per_cmd = mr_per_cmd; 3906 target->indirect_size = target->sg_tablesize * 3907 sizeof (struct srp_direct_buf); 3908 max_iu_len = srp_max_it_iu_len(target->cmd_sg_cnt, srp_use_imm_data); 3909 3910 INIT_WORK(&target->tl_err_work, srp_tl_err_work); 3911 INIT_WORK(&target->remove_work, srp_remove_work); 3912 spin_lock_init(&target->lock); 3913 ret = rdma_query_gid(ibdev, host->port, 0, &target->sgid); 3914 if (ret) 3915 goto out; 3916 3917 ret = -ENOMEM; 3918 target->ch_count = max_t(unsigned, num_online_nodes(), 3919 min(ch_count ? : 3920 min(4 * num_online_nodes(), 3921 ibdev->num_comp_vectors), 3922 num_online_cpus())); 3923 target->ch = kcalloc(target->ch_count, sizeof(*target->ch), 3924 GFP_KERNEL); 3925 if (!target->ch) 3926 goto out; 3927 3928 node_idx = 0; 3929 for_each_online_node(node) { 3930 const int ch_start = (node_idx * target->ch_count / 3931 num_online_nodes()); 3932 const int ch_end = ((node_idx + 1) * target->ch_count / 3933 num_online_nodes()); 3934 const int cv_start = node_idx * ibdev->num_comp_vectors / 3935 num_online_nodes(); 3936 const int cv_end = (node_idx + 1) * ibdev->num_comp_vectors / 3937 num_online_nodes(); 3938 int cpu_idx = 0; 3939 3940 for_each_online_cpu(cpu) { 3941 if (cpu_to_node(cpu) != node) 3942 continue; 3943 if (ch_start + cpu_idx >= ch_end) 3944 continue; 3945 ch = &target->ch[ch_start + cpu_idx]; 3946 ch->target = target; 3947 ch->comp_vector = cv_start == cv_end ? cv_start : 3948 cv_start + cpu_idx % (cv_end - cv_start); 3949 spin_lock_init(&ch->lock); 3950 INIT_LIST_HEAD(&ch->free_tx); 3951 ret = srp_new_cm_id(ch); 3952 if (ret) 3953 goto err_disconnect; 3954 3955 ret = srp_create_ch_ib(ch); 3956 if (ret) 3957 goto err_disconnect; 3958 3959 ret = srp_alloc_req_data(ch); 3960 if (ret) 3961 goto err_disconnect; 3962 3963 ret = srp_connect_ch(ch, max_iu_len, multich); 3964 if (ret) { 3965 char dst[64]; 3966 3967 if (target->using_rdma_cm) 3968 snprintf(dst, sizeof(dst), "%pIS", 3969 &target->rdma_cm.dst); 3970 else 3971 snprintf(dst, sizeof(dst), "%pI6", 3972 target->ib_cm.orig_dgid.raw); 3973 shost_printk(KERN_ERR, target->scsi_host, 3974 PFX "Connection %d/%d to %s failed\n", 3975 ch_start + cpu_idx, 3976 target->ch_count, dst); 3977 if (node_idx == 0 && cpu_idx == 0) { 3978 goto free_ch; 3979 } else { 3980 srp_free_ch_ib(target, ch); 3981 srp_free_req_data(target, ch); 3982 target->ch_count = ch - target->ch; 3983 goto connected; 3984 } 3985 } 3986 3987 multich = true; 3988 cpu_idx++; 3989 } 3990 node_idx++; 3991 } 3992 3993 connected: 3994 target->scsi_host->nr_hw_queues = target->ch_count; 3995 3996 ret = srp_add_target(host, target); 3997 if (ret) 3998 goto err_disconnect; 3999 4000 if (target->state != SRP_TARGET_REMOVED) { 4001 if (target->using_rdma_cm) { 4002 shost_printk(KERN_DEBUG, target->scsi_host, PFX 4003 "new target: id_ext %016llx ioc_guid %016llx sgid %pI6 dest %pIS\n", 4004 be64_to_cpu(target->id_ext), 4005 be64_to_cpu(target->ioc_guid), 4006 target->sgid.raw, &target->rdma_cm.dst); 4007 } else { 4008 shost_printk(KERN_DEBUG, target->scsi_host, PFX 4009 "new target: id_ext %016llx ioc_guid %016llx pkey %04x service_id %016llx sgid %pI6 dgid %pI6\n", 4010 be64_to_cpu(target->id_ext), 4011 be64_to_cpu(target->ioc_guid), 4012 be16_to_cpu(target->ib_cm.pkey), 4013 be64_to_cpu(target->ib_cm.service_id), 4014 target->sgid.raw, 4015 target->ib_cm.orig_dgid.raw); 4016 } 4017 } 4018 4019 ret = count; 4020 4021 out: 4022 mutex_unlock(&host->add_target_mutex); 4023 4024 put: 4025 scsi_host_put(target->scsi_host); 4026 if (ret < 0) { 4027 /* 4028 * If a call to srp_remove_target() has not been scheduled, 4029 * drop the network namespace reference now that was obtained 4030 * earlier in this function. 4031 */ 4032 if (target->state != SRP_TARGET_REMOVED) 4033 kobj_ns_drop(KOBJ_NS_TYPE_NET, target->net); 4034 scsi_host_put(target->scsi_host); 4035 } 4036 4037 return ret; 4038 4039 err_disconnect: 4040 srp_disconnect_target(target); 4041 4042 free_ch: 4043 for (i = 0; i < target->ch_count; i++) { 4044 ch = &target->ch[i]; 4045 srp_free_ch_ib(target, ch); 4046 srp_free_req_data(target, ch); 4047 } 4048 4049 kfree(target->ch); 4050 goto out; 4051 } 4052 4053 static DEVICE_ATTR(add_target, S_IWUSR, NULL, srp_create_target); 4054 4055 static ssize_t show_ibdev(struct device *dev, struct device_attribute *attr, 4056 char *buf) 4057 { 4058 struct srp_host *host = container_of(dev, struct srp_host, dev); 4059 4060 return sprintf(buf, "%s\n", dev_name(&host->srp_dev->dev->dev)); 4061 } 4062 4063 static DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL); 4064 4065 static ssize_t show_port(struct device *dev, struct device_attribute *attr, 4066 char *buf) 4067 { 4068 struct srp_host *host = container_of(dev, struct srp_host, dev); 4069 4070 return sprintf(buf, "%d\n", host->port); 4071 } 4072 4073 static DEVICE_ATTR(port, S_IRUGO, show_port, NULL); 4074 4075 static struct srp_host *srp_add_port(struct srp_device *device, u8 port) 4076 { 4077 struct srp_host *host; 4078 4079 host = kzalloc(sizeof *host, GFP_KERNEL); 4080 if (!host) 4081 return NULL; 4082 4083 INIT_LIST_HEAD(&host->target_list); 4084 spin_lock_init(&host->target_lock); 4085 init_completion(&host->released); 4086 mutex_init(&host->add_target_mutex); 4087 host->srp_dev = device; 4088 host->port = port; 4089 4090 host->dev.class = &srp_class; 4091 host->dev.parent = device->dev->dev.parent; 4092 dev_set_name(&host->dev, "srp-%s-%d", dev_name(&device->dev->dev), 4093 port); 4094 4095 if (device_register(&host->dev)) 4096 goto free_host; 4097 if (device_create_file(&host->dev, &dev_attr_add_target)) 4098 goto err_class; 4099 if (device_create_file(&host->dev, &dev_attr_ibdev)) 4100 goto err_class; 4101 if (device_create_file(&host->dev, &dev_attr_port)) 4102 goto err_class; 4103 4104 return host; 4105 4106 err_class: 4107 device_unregister(&host->dev); 4108 4109 free_host: 4110 kfree(host); 4111 4112 return NULL; 4113 } 4114 4115 static void srp_add_one(struct ib_device *device) 4116 { 4117 struct srp_device *srp_dev; 4118 struct ib_device_attr *attr = &device->attrs; 4119 struct srp_host *host; 4120 int mr_page_shift; 4121 unsigned int p; 4122 u64 max_pages_per_mr; 4123 unsigned int flags = 0; 4124 4125 srp_dev = kzalloc(sizeof(*srp_dev), GFP_KERNEL); 4126 if (!srp_dev) 4127 return; 4128 4129 /* 4130 * Use the smallest page size supported by the HCA, down to a 4131 * minimum of 4096 bytes. We're unlikely to build large sglists 4132 * out of smaller entries. 4133 */ 4134 mr_page_shift = max(12, ffs(attr->page_size_cap) - 1); 4135 srp_dev->mr_page_size = 1 << mr_page_shift; 4136 srp_dev->mr_page_mask = ~((u64) srp_dev->mr_page_size - 1); 4137 max_pages_per_mr = attr->max_mr_size; 4138 do_div(max_pages_per_mr, srp_dev->mr_page_size); 4139 pr_debug("%s: %llu / %u = %llu <> %u\n", __func__, 4140 attr->max_mr_size, srp_dev->mr_page_size, 4141 max_pages_per_mr, SRP_MAX_PAGES_PER_MR); 4142 srp_dev->max_pages_per_mr = min_t(u64, SRP_MAX_PAGES_PER_MR, 4143 max_pages_per_mr); 4144 4145 srp_dev->has_fmr = (device->ops.alloc_fmr && 4146 device->ops.dealloc_fmr && 4147 device->ops.map_phys_fmr && 4148 device->ops.unmap_fmr); 4149 srp_dev->has_fr = (attr->device_cap_flags & 4150 IB_DEVICE_MEM_MGT_EXTENSIONS); 4151 if (!never_register && !srp_dev->has_fmr && !srp_dev->has_fr) { 4152 dev_warn(&device->dev, "neither FMR nor FR is supported\n"); 4153 } else if (!never_register && 4154 attr->max_mr_size >= 2 * srp_dev->mr_page_size) { 4155 srp_dev->use_fast_reg = (srp_dev->has_fr && 4156 (!srp_dev->has_fmr || prefer_fr)); 4157 srp_dev->use_fmr = !srp_dev->use_fast_reg && srp_dev->has_fmr; 4158 } 4159 4160 if (never_register || !register_always || 4161 (!srp_dev->has_fmr && !srp_dev->has_fr)) 4162 flags |= IB_PD_UNSAFE_GLOBAL_RKEY; 4163 4164 if (srp_dev->use_fast_reg) { 4165 srp_dev->max_pages_per_mr = 4166 min_t(u32, srp_dev->max_pages_per_mr, 4167 attr->max_fast_reg_page_list_len); 4168 } 4169 srp_dev->mr_max_size = srp_dev->mr_page_size * 4170 srp_dev->max_pages_per_mr; 4171 pr_debug("%s: mr_page_shift = %d, device->max_mr_size = %#llx, device->max_fast_reg_page_list_len = %u, max_pages_per_mr = %d, mr_max_size = %#x\n", 4172 dev_name(&device->dev), mr_page_shift, attr->max_mr_size, 4173 attr->max_fast_reg_page_list_len, 4174 srp_dev->max_pages_per_mr, srp_dev->mr_max_size); 4175 4176 INIT_LIST_HEAD(&srp_dev->dev_list); 4177 4178 srp_dev->dev = device; 4179 srp_dev->pd = ib_alloc_pd(device, flags); 4180 if (IS_ERR(srp_dev->pd)) 4181 goto free_dev; 4182 4183 if (flags & IB_PD_UNSAFE_GLOBAL_RKEY) { 4184 srp_dev->global_rkey = srp_dev->pd->unsafe_global_rkey; 4185 WARN_ON_ONCE(srp_dev->global_rkey == 0); 4186 } 4187 4188 rdma_for_each_port (device, p) { 4189 host = srp_add_port(srp_dev, p); 4190 if (host) 4191 list_add_tail(&host->list, &srp_dev->dev_list); 4192 } 4193 4194 ib_set_client_data(device, &srp_client, srp_dev); 4195 return; 4196 4197 free_dev: 4198 kfree(srp_dev); 4199 } 4200 4201 static void srp_remove_one(struct ib_device *device, void *client_data) 4202 { 4203 struct srp_device *srp_dev; 4204 struct srp_host *host, *tmp_host; 4205 struct srp_target_port *target; 4206 4207 srp_dev = client_data; 4208 if (!srp_dev) 4209 return; 4210 4211 list_for_each_entry_safe(host, tmp_host, &srp_dev->dev_list, list) { 4212 device_unregister(&host->dev); 4213 /* 4214 * Wait for the sysfs entry to go away, so that no new 4215 * target ports can be created. 4216 */ 4217 wait_for_completion(&host->released); 4218 4219 /* 4220 * Remove all target ports. 4221 */ 4222 spin_lock(&host->target_lock); 4223 list_for_each_entry(target, &host->target_list, list) 4224 srp_queue_remove_work(target); 4225 spin_unlock(&host->target_lock); 4226 4227 /* 4228 * Wait for tl_err and target port removal tasks. 4229 */ 4230 flush_workqueue(system_long_wq); 4231 flush_workqueue(srp_remove_wq); 4232 4233 kfree(host); 4234 } 4235 4236 ib_dealloc_pd(srp_dev->pd); 4237 4238 kfree(srp_dev); 4239 } 4240 4241 static struct srp_function_template ib_srp_transport_functions = { 4242 .has_rport_state = true, 4243 .reset_timer_if_blocked = true, 4244 .reconnect_delay = &srp_reconnect_delay, 4245 .fast_io_fail_tmo = &srp_fast_io_fail_tmo, 4246 .dev_loss_tmo = &srp_dev_loss_tmo, 4247 .reconnect = srp_rport_reconnect, 4248 .rport_delete = srp_rport_delete, 4249 .terminate_rport_io = srp_terminate_io, 4250 }; 4251 4252 static int __init srp_init_module(void) 4253 { 4254 int ret; 4255 4256 BUILD_BUG_ON(sizeof(struct srp_imm_buf) != 4); 4257 BUILD_BUG_ON(sizeof(struct srp_login_req) != 64); 4258 BUILD_BUG_ON(sizeof(struct srp_login_req_rdma) != 56); 4259 BUILD_BUG_ON(sizeof(struct srp_cmd) != 48); 4260 4261 if (srp_sg_tablesize) { 4262 pr_warn("srp_sg_tablesize is deprecated, please use cmd_sg_entries\n"); 4263 if (!cmd_sg_entries) 4264 cmd_sg_entries = srp_sg_tablesize; 4265 } 4266 4267 if (!cmd_sg_entries) 4268 cmd_sg_entries = SRP_DEF_SG_TABLESIZE; 4269 4270 if (cmd_sg_entries > 255) { 4271 pr_warn("Clamping cmd_sg_entries to 255\n"); 4272 cmd_sg_entries = 255; 4273 } 4274 4275 if (!indirect_sg_entries) 4276 indirect_sg_entries = cmd_sg_entries; 4277 else if (indirect_sg_entries < cmd_sg_entries) { 4278 pr_warn("Bumping up indirect_sg_entries to match cmd_sg_entries (%u)\n", 4279 cmd_sg_entries); 4280 indirect_sg_entries = cmd_sg_entries; 4281 } 4282 4283 if (indirect_sg_entries > SG_MAX_SEGMENTS) { 4284 pr_warn("Clamping indirect_sg_entries to %u\n", 4285 SG_MAX_SEGMENTS); 4286 indirect_sg_entries = SG_MAX_SEGMENTS; 4287 } 4288 4289 srp_remove_wq = create_workqueue("srp_remove"); 4290 if (!srp_remove_wq) { 4291 ret = -ENOMEM; 4292 goto out; 4293 } 4294 4295 ret = -ENOMEM; 4296 ib_srp_transport_template = 4297 srp_attach_transport(&ib_srp_transport_functions); 4298 if (!ib_srp_transport_template) 4299 goto destroy_wq; 4300 4301 ret = class_register(&srp_class); 4302 if (ret) { 4303 pr_err("couldn't register class infiniband_srp\n"); 4304 goto release_tr; 4305 } 4306 4307 ib_sa_register_client(&srp_sa_client); 4308 4309 ret = ib_register_client(&srp_client); 4310 if (ret) { 4311 pr_err("couldn't register IB client\n"); 4312 goto unreg_sa; 4313 } 4314 4315 out: 4316 return ret; 4317 4318 unreg_sa: 4319 ib_sa_unregister_client(&srp_sa_client); 4320 class_unregister(&srp_class); 4321 4322 release_tr: 4323 srp_release_transport(ib_srp_transport_template); 4324 4325 destroy_wq: 4326 destroy_workqueue(srp_remove_wq); 4327 goto out; 4328 } 4329 4330 static void __exit srp_cleanup_module(void) 4331 { 4332 ib_unregister_client(&srp_client); 4333 ib_sa_unregister_client(&srp_sa_client); 4334 class_unregister(&srp_class); 4335 srp_release_transport(ib_srp_transport_template); 4336 destroy_workqueue(srp_remove_wq); 4337 } 4338 4339 module_init(srp_init_module); 4340 module_exit(srp_cleanup_module); 4341