1 /* 2 * Copyright (c) 2005 Cisco Systems. All rights reserved. 3 * 4 * This software is available to you under a choice of one of two 5 * licenses. You may choose to be licensed under the terms of the GNU 6 * General Public License (GPL) Version 2, available from the file 7 * COPYING in the main directory of this source tree, or the 8 * OpenIB.org BSD license below: 9 * 10 * Redistribution and use in source and binary forms, with or 11 * without modification, are permitted provided that the following 12 * conditions are met: 13 * 14 * - Redistributions of source code must retain the above 15 * copyright notice, this list of conditions and the following 16 * disclaimer. 17 * 18 * - Redistributions in binary form must reproduce the above 19 * copyright notice, this list of conditions and the following 20 * disclaimer in the documentation and/or other materials 21 * provided with the distribution. 22 * 23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 * SOFTWARE. 31 */ 32 33 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 34 35 #include <linux/module.h> 36 #include <linux/init.h> 37 #include <linux/slab.h> 38 #include <linux/err.h> 39 #include <linux/string.h> 40 #include <linux/parser.h> 41 #include <linux/random.h> 42 #include <linux/jiffies.h> 43 #include <linux/lockdep.h> 44 #include <linux/inet.h> 45 #include <rdma/ib_cache.h> 46 47 #include <linux/atomic.h> 48 49 #include <scsi/scsi.h> 50 #include <scsi/scsi_device.h> 51 #include <scsi/scsi_dbg.h> 52 #include <scsi/scsi_tcq.h> 53 #include <scsi/srp.h> 54 #include <scsi/scsi_transport_srp.h> 55 56 #include "ib_srp.h" 57 58 #define DRV_NAME "ib_srp" 59 #define PFX DRV_NAME ": " 60 61 MODULE_AUTHOR("Roland Dreier"); 62 MODULE_DESCRIPTION("InfiniBand SCSI RDMA Protocol initiator"); 63 MODULE_LICENSE("Dual BSD/GPL"); 64 65 #if !defined(CONFIG_DYNAMIC_DEBUG) 66 #define DEFINE_DYNAMIC_DEBUG_METADATA(name, fmt) 67 #define DYNAMIC_DEBUG_BRANCH(descriptor) false 68 #endif 69 70 static unsigned int srp_sg_tablesize; 71 static unsigned int cmd_sg_entries; 72 static unsigned int indirect_sg_entries; 73 static bool allow_ext_sg; 74 static bool prefer_fr = true; 75 static bool register_always = true; 76 static bool never_register; 77 static int topspin_workarounds = 1; 78 79 module_param(srp_sg_tablesize, uint, 0444); 80 MODULE_PARM_DESC(srp_sg_tablesize, "Deprecated name for cmd_sg_entries"); 81 82 module_param(cmd_sg_entries, uint, 0444); 83 MODULE_PARM_DESC(cmd_sg_entries, 84 "Default number of gather/scatter entries in the SRP command (default is 12, max 255)"); 85 86 module_param(indirect_sg_entries, uint, 0444); 87 MODULE_PARM_DESC(indirect_sg_entries, 88 "Default max number of gather/scatter entries (default is 12, max is " __stringify(SG_MAX_SEGMENTS) ")"); 89 90 module_param(allow_ext_sg, bool, 0444); 91 MODULE_PARM_DESC(allow_ext_sg, 92 "Default behavior when there are more than cmd_sg_entries S/G entries after mapping; fails the request when false (default false)"); 93 94 module_param(topspin_workarounds, int, 0444); 95 MODULE_PARM_DESC(topspin_workarounds, 96 "Enable workarounds for Topspin/Cisco SRP target bugs if != 0"); 97 98 module_param(prefer_fr, bool, 0444); 99 MODULE_PARM_DESC(prefer_fr, 100 "Whether to use fast registration if both FMR and fast registration are supported"); 101 102 module_param(register_always, bool, 0444); 103 MODULE_PARM_DESC(register_always, 104 "Use memory registration even for contiguous memory regions"); 105 106 module_param(never_register, bool, 0444); 107 MODULE_PARM_DESC(never_register, "Never register memory"); 108 109 static const struct kernel_param_ops srp_tmo_ops; 110 111 static int srp_reconnect_delay = 10; 112 module_param_cb(reconnect_delay, &srp_tmo_ops, &srp_reconnect_delay, 113 S_IRUGO | S_IWUSR); 114 MODULE_PARM_DESC(reconnect_delay, "Time between successive reconnect attempts"); 115 116 static int srp_fast_io_fail_tmo = 15; 117 module_param_cb(fast_io_fail_tmo, &srp_tmo_ops, &srp_fast_io_fail_tmo, 118 S_IRUGO | S_IWUSR); 119 MODULE_PARM_DESC(fast_io_fail_tmo, 120 "Number of seconds between the observation of a transport" 121 " layer error and failing all I/O. \"off\" means that this" 122 " functionality is disabled."); 123 124 static int srp_dev_loss_tmo = 600; 125 module_param_cb(dev_loss_tmo, &srp_tmo_ops, &srp_dev_loss_tmo, 126 S_IRUGO | S_IWUSR); 127 MODULE_PARM_DESC(dev_loss_tmo, 128 "Maximum number of seconds that the SRP transport should" 129 " insulate transport layer errors. After this time has been" 130 " exceeded the SCSI host is removed. Should be" 131 " between 1 and " __stringify(SCSI_DEVICE_BLOCK_MAX_TIMEOUT) 132 " if fast_io_fail_tmo has not been set. \"off\" means that" 133 " this functionality is disabled."); 134 135 static bool srp_use_imm_data = true; 136 module_param_named(use_imm_data, srp_use_imm_data, bool, 0644); 137 MODULE_PARM_DESC(use_imm_data, 138 "Whether or not to request permission to use immediate data during SRP login."); 139 140 static unsigned int srp_max_imm_data = 8 * 1024; 141 module_param_named(max_imm_data, srp_max_imm_data, uint, 0644); 142 MODULE_PARM_DESC(max_imm_data, "Maximum immediate data size."); 143 144 static unsigned ch_count; 145 module_param(ch_count, uint, 0444); 146 MODULE_PARM_DESC(ch_count, 147 "Number of RDMA channels to use for communication with an SRP target. Using more than one channel improves performance if the HCA supports multiple completion vectors. The default value is the minimum of four times the number of online CPU sockets and the number of completion vectors supported by the HCA."); 148 149 static void srp_add_one(struct ib_device *device); 150 static void srp_remove_one(struct ib_device *device, void *client_data); 151 static void srp_rename_dev(struct ib_device *device, void *client_data); 152 static void srp_recv_done(struct ib_cq *cq, struct ib_wc *wc); 153 static void srp_handle_qp_err(struct ib_cq *cq, struct ib_wc *wc, 154 const char *opname); 155 static int srp_ib_cm_handler(struct ib_cm_id *cm_id, 156 const struct ib_cm_event *event); 157 static int srp_rdma_cm_handler(struct rdma_cm_id *cm_id, 158 struct rdma_cm_event *event); 159 160 static struct scsi_transport_template *ib_srp_transport_template; 161 static struct workqueue_struct *srp_remove_wq; 162 163 static struct ib_client srp_client = { 164 .name = "srp", 165 .add = srp_add_one, 166 .remove = srp_remove_one, 167 .rename = srp_rename_dev 168 }; 169 170 static struct ib_sa_client srp_sa_client; 171 172 static int srp_tmo_get(char *buffer, const struct kernel_param *kp) 173 { 174 int tmo = *(int *)kp->arg; 175 176 if (tmo >= 0) 177 return sprintf(buffer, "%d", tmo); 178 else 179 return sprintf(buffer, "off"); 180 } 181 182 static int srp_tmo_set(const char *val, const struct kernel_param *kp) 183 { 184 int tmo, res; 185 186 res = srp_parse_tmo(&tmo, val); 187 if (res) 188 goto out; 189 190 if (kp->arg == &srp_reconnect_delay) 191 res = srp_tmo_valid(tmo, srp_fast_io_fail_tmo, 192 srp_dev_loss_tmo); 193 else if (kp->arg == &srp_fast_io_fail_tmo) 194 res = srp_tmo_valid(srp_reconnect_delay, tmo, srp_dev_loss_tmo); 195 else 196 res = srp_tmo_valid(srp_reconnect_delay, srp_fast_io_fail_tmo, 197 tmo); 198 if (res) 199 goto out; 200 *(int *)kp->arg = tmo; 201 202 out: 203 return res; 204 } 205 206 static const struct kernel_param_ops srp_tmo_ops = { 207 .get = srp_tmo_get, 208 .set = srp_tmo_set, 209 }; 210 211 static inline struct srp_target_port *host_to_target(struct Scsi_Host *host) 212 { 213 return (struct srp_target_port *) host->hostdata; 214 } 215 216 static const char *srp_target_info(struct Scsi_Host *host) 217 { 218 return host_to_target(host)->target_name; 219 } 220 221 static int srp_target_is_topspin(struct srp_target_port *target) 222 { 223 static const u8 topspin_oui[3] = { 0x00, 0x05, 0xad }; 224 static const u8 cisco_oui[3] = { 0x00, 0x1b, 0x0d }; 225 226 return topspin_workarounds && 227 (!memcmp(&target->ioc_guid, topspin_oui, sizeof topspin_oui) || 228 !memcmp(&target->ioc_guid, cisco_oui, sizeof cisco_oui)); 229 } 230 231 static struct srp_iu *srp_alloc_iu(struct srp_host *host, size_t size, 232 gfp_t gfp_mask, 233 enum dma_data_direction direction) 234 { 235 struct srp_iu *iu; 236 237 iu = kmalloc(sizeof *iu, gfp_mask); 238 if (!iu) 239 goto out; 240 241 iu->buf = kzalloc(size, gfp_mask); 242 if (!iu->buf) 243 goto out_free_iu; 244 245 iu->dma = ib_dma_map_single(host->srp_dev->dev, iu->buf, size, 246 direction); 247 if (ib_dma_mapping_error(host->srp_dev->dev, iu->dma)) 248 goto out_free_buf; 249 250 iu->size = size; 251 iu->direction = direction; 252 253 return iu; 254 255 out_free_buf: 256 kfree(iu->buf); 257 out_free_iu: 258 kfree(iu); 259 out: 260 return NULL; 261 } 262 263 static void srp_free_iu(struct srp_host *host, struct srp_iu *iu) 264 { 265 if (!iu) 266 return; 267 268 ib_dma_unmap_single(host->srp_dev->dev, iu->dma, iu->size, 269 iu->direction); 270 kfree(iu->buf); 271 kfree(iu); 272 } 273 274 static void srp_qp_event(struct ib_event *event, void *context) 275 { 276 pr_debug("QP event %s (%d)\n", 277 ib_event_msg(event->event), event->event); 278 } 279 280 static int srp_init_ib_qp(struct srp_target_port *target, 281 struct ib_qp *qp) 282 { 283 struct ib_qp_attr *attr; 284 int ret; 285 286 attr = kmalloc(sizeof *attr, GFP_KERNEL); 287 if (!attr) 288 return -ENOMEM; 289 290 ret = ib_find_cached_pkey(target->srp_host->srp_dev->dev, 291 target->srp_host->port, 292 be16_to_cpu(target->ib_cm.pkey), 293 &attr->pkey_index); 294 if (ret) 295 goto out; 296 297 attr->qp_state = IB_QPS_INIT; 298 attr->qp_access_flags = (IB_ACCESS_REMOTE_READ | 299 IB_ACCESS_REMOTE_WRITE); 300 attr->port_num = target->srp_host->port; 301 302 ret = ib_modify_qp(qp, attr, 303 IB_QP_STATE | 304 IB_QP_PKEY_INDEX | 305 IB_QP_ACCESS_FLAGS | 306 IB_QP_PORT); 307 308 out: 309 kfree(attr); 310 return ret; 311 } 312 313 static int srp_new_ib_cm_id(struct srp_rdma_ch *ch) 314 { 315 struct srp_target_port *target = ch->target; 316 struct ib_cm_id *new_cm_id; 317 318 new_cm_id = ib_create_cm_id(target->srp_host->srp_dev->dev, 319 srp_ib_cm_handler, ch); 320 if (IS_ERR(new_cm_id)) 321 return PTR_ERR(new_cm_id); 322 323 if (ch->ib_cm.cm_id) 324 ib_destroy_cm_id(ch->ib_cm.cm_id); 325 ch->ib_cm.cm_id = new_cm_id; 326 if (rdma_cap_opa_ah(target->srp_host->srp_dev->dev, 327 target->srp_host->port)) 328 ch->ib_cm.path.rec_type = SA_PATH_REC_TYPE_OPA; 329 else 330 ch->ib_cm.path.rec_type = SA_PATH_REC_TYPE_IB; 331 ch->ib_cm.path.sgid = target->sgid; 332 ch->ib_cm.path.dgid = target->ib_cm.orig_dgid; 333 ch->ib_cm.path.pkey = target->ib_cm.pkey; 334 ch->ib_cm.path.service_id = target->ib_cm.service_id; 335 336 return 0; 337 } 338 339 static int srp_new_rdma_cm_id(struct srp_rdma_ch *ch) 340 { 341 struct srp_target_port *target = ch->target; 342 struct rdma_cm_id *new_cm_id; 343 int ret; 344 345 new_cm_id = rdma_create_id(target->net, srp_rdma_cm_handler, ch, 346 RDMA_PS_TCP, IB_QPT_RC); 347 if (IS_ERR(new_cm_id)) { 348 ret = PTR_ERR(new_cm_id); 349 new_cm_id = NULL; 350 goto out; 351 } 352 353 init_completion(&ch->done); 354 ret = rdma_resolve_addr(new_cm_id, target->rdma_cm.src_specified ? 355 (struct sockaddr *)&target->rdma_cm.src : NULL, 356 (struct sockaddr *)&target->rdma_cm.dst, 357 SRP_PATH_REC_TIMEOUT_MS); 358 if (ret) { 359 pr_err("No route available from %pIS to %pIS (%d)\n", 360 &target->rdma_cm.src, &target->rdma_cm.dst, ret); 361 goto out; 362 } 363 ret = wait_for_completion_interruptible(&ch->done); 364 if (ret < 0) 365 goto out; 366 367 ret = ch->status; 368 if (ret) { 369 pr_err("Resolving address %pIS failed (%d)\n", 370 &target->rdma_cm.dst, ret); 371 goto out; 372 } 373 374 swap(ch->rdma_cm.cm_id, new_cm_id); 375 376 out: 377 if (new_cm_id) 378 rdma_destroy_id(new_cm_id); 379 380 return ret; 381 } 382 383 static int srp_new_cm_id(struct srp_rdma_ch *ch) 384 { 385 struct srp_target_port *target = ch->target; 386 387 return target->using_rdma_cm ? srp_new_rdma_cm_id(ch) : 388 srp_new_ib_cm_id(ch); 389 } 390 391 static struct ib_fmr_pool *srp_alloc_fmr_pool(struct srp_target_port *target) 392 { 393 struct srp_device *dev = target->srp_host->srp_dev; 394 struct ib_fmr_pool_param fmr_param; 395 396 memset(&fmr_param, 0, sizeof(fmr_param)); 397 fmr_param.pool_size = target->mr_pool_size; 398 fmr_param.dirty_watermark = fmr_param.pool_size / 4; 399 fmr_param.cache = 1; 400 fmr_param.max_pages_per_fmr = dev->max_pages_per_mr; 401 fmr_param.page_shift = ilog2(dev->mr_page_size); 402 fmr_param.access = (IB_ACCESS_LOCAL_WRITE | 403 IB_ACCESS_REMOTE_WRITE | 404 IB_ACCESS_REMOTE_READ); 405 406 return ib_create_fmr_pool(dev->pd, &fmr_param); 407 } 408 409 /** 410 * srp_destroy_fr_pool() - free the resources owned by a pool 411 * @pool: Fast registration pool to be destroyed. 412 */ 413 static void srp_destroy_fr_pool(struct srp_fr_pool *pool) 414 { 415 int i; 416 struct srp_fr_desc *d; 417 418 if (!pool) 419 return; 420 421 for (i = 0, d = &pool->desc[0]; i < pool->size; i++, d++) { 422 if (d->mr) 423 ib_dereg_mr(d->mr); 424 } 425 kfree(pool); 426 } 427 428 /** 429 * srp_create_fr_pool() - allocate and initialize a pool for fast registration 430 * @device: IB device to allocate fast registration descriptors for. 431 * @pd: Protection domain associated with the FR descriptors. 432 * @pool_size: Number of descriptors to allocate. 433 * @max_page_list_len: Maximum fast registration work request page list length. 434 */ 435 static struct srp_fr_pool *srp_create_fr_pool(struct ib_device *device, 436 struct ib_pd *pd, int pool_size, 437 int max_page_list_len) 438 { 439 struct srp_fr_pool *pool; 440 struct srp_fr_desc *d; 441 struct ib_mr *mr; 442 int i, ret = -EINVAL; 443 enum ib_mr_type mr_type; 444 445 if (pool_size <= 0) 446 goto err; 447 ret = -ENOMEM; 448 pool = kzalloc(struct_size(pool, desc, pool_size), GFP_KERNEL); 449 if (!pool) 450 goto err; 451 pool->size = pool_size; 452 pool->max_page_list_len = max_page_list_len; 453 spin_lock_init(&pool->lock); 454 INIT_LIST_HEAD(&pool->free_list); 455 456 if (device->attrs.device_cap_flags & IB_DEVICE_SG_GAPS_REG) 457 mr_type = IB_MR_TYPE_SG_GAPS; 458 else 459 mr_type = IB_MR_TYPE_MEM_REG; 460 461 for (i = 0, d = &pool->desc[0]; i < pool->size; i++, d++) { 462 mr = ib_alloc_mr(pd, mr_type, max_page_list_len); 463 if (IS_ERR(mr)) { 464 ret = PTR_ERR(mr); 465 if (ret == -ENOMEM) 466 pr_info("%s: ib_alloc_mr() failed. Try to reduce max_cmd_per_lun, max_sect or ch_count\n", 467 dev_name(&device->dev)); 468 goto destroy_pool; 469 } 470 d->mr = mr; 471 list_add_tail(&d->entry, &pool->free_list); 472 } 473 474 out: 475 return pool; 476 477 destroy_pool: 478 srp_destroy_fr_pool(pool); 479 480 err: 481 pool = ERR_PTR(ret); 482 goto out; 483 } 484 485 /** 486 * srp_fr_pool_get() - obtain a descriptor suitable for fast registration 487 * @pool: Pool to obtain descriptor from. 488 */ 489 static struct srp_fr_desc *srp_fr_pool_get(struct srp_fr_pool *pool) 490 { 491 struct srp_fr_desc *d = NULL; 492 unsigned long flags; 493 494 spin_lock_irqsave(&pool->lock, flags); 495 if (!list_empty(&pool->free_list)) { 496 d = list_first_entry(&pool->free_list, typeof(*d), entry); 497 list_del(&d->entry); 498 } 499 spin_unlock_irqrestore(&pool->lock, flags); 500 501 return d; 502 } 503 504 /** 505 * srp_fr_pool_put() - put an FR descriptor back in the free list 506 * @pool: Pool the descriptor was allocated from. 507 * @desc: Pointer to an array of fast registration descriptor pointers. 508 * @n: Number of descriptors to put back. 509 * 510 * Note: The caller must already have queued an invalidation request for 511 * desc->mr->rkey before calling this function. 512 */ 513 static void srp_fr_pool_put(struct srp_fr_pool *pool, struct srp_fr_desc **desc, 514 int n) 515 { 516 unsigned long flags; 517 int i; 518 519 spin_lock_irqsave(&pool->lock, flags); 520 for (i = 0; i < n; i++) 521 list_add(&desc[i]->entry, &pool->free_list); 522 spin_unlock_irqrestore(&pool->lock, flags); 523 } 524 525 static struct srp_fr_pool *srp_alloc_fr_pool(struct srp_target_port *target) 526 { 527 struct srp_device *dev = target->srp_host->srp_dev; 528 529 return srp_create_fr_pool(dev->dev, dev->pd, target->mr_pool_size, 530 dev->max_pages_per_mr); 531 } 532 533 /** 534 * srp_destroy_qp() - destroy an RDMA queue pair 535 * @ch: SRP RDMA channel. 536 * 537 * Drain the qp before destroying it. This avoids that the receive 538 * completion handler can access the queue pair while it is 539 * being destroyed. 540 */ 541 static void srp_destroy_qp(struct srp_rdma_ch *ch) 542 { 543 spin_lock_irq(&ch->lock); 544 ib_process_cq_direct(ch->send_cq, -1); 545 spin_unlock_irq(&ch->lock); 546 547 ib_drain_qp(ch->qp); 548 ib_destroy_qp(ch->qp); 549 } 550 551 static int srp_create_ch_ib(struct srp_rdma_ch *ch) 552 { 553 struct srp_target_port *target = ch->target; 554 struct srp_device *dev = target->srp_host->srp_dev; 555 struct ib_qp_init_attr *init_attr; 556 struct ib_cq *recv_cq, *send_cq; 557 struct ib_qp *qp; 558 struct ib_fmr_pool *fmr_pool = NULL; 559 struct srp_fr_pool *fr_pool = NULL; 560 const int m = 1 + dev->use_fast_reg * target->mr_per_cmd * 2; 561 int ret; 562 563 init_attr = kzalloc(sizeof *init_attr, GFP_KERNEL); 564 if (!init_attr) 565 return -ENOMEM; 566 567 /* queue_size + 1 for ib_drain_rq() */ 568 recv_cq = ib_alloc_cq(dev->dev, ch, target->queue_size + 1, 569 ch->comp_vector, IB_POLL_SOFTIRQ); 570 if (IS_ERR(recv_cq)) { 571 ret = PTR_ERR(recv_cq); 572 goto err; 573 } 574 575 send_cq = ib_alloc_cq(dev->dev, ch, m * target->queue_size, 576 ch->comp_vector, IB_POLL_DIRECT); 577 if (IS_ERR(send_cq)) { 578 ret = PTR_ERR(send_cq); 579 goto err_recv_cq; 580 } 581 582 init_attr->event_handler = srp_qp_event; 583 init_attr->cap.max_send_wr = m * target->queue_size; 584 init_attr->cap.max_recv_wr = target->queue_size + 1; 585 init_attr->cap.max_recv_sge = 1; 586 init_attr->cap.max_send_sge = SRP_MAX_SGE; 587 init_attr->sq_sig_type = IB_SIGNAL_REQ_WR; 588 init_attr->qp_type = IB_QPT_RC; 589 init_attr->send_cq = send_cq; 590 init_attr->recv_cq = recv_cq; 591 592 if (target->using_rdma_cm) { 593 ret = rdma_create_qp(ch->rdma_cm.cm_id, dev->pd, init_attr); 594 qp = ch->rdma_cm.cm_id->qp; 595 } else { 596 qp = ib_create_qp(dev->pd, init_attr); 597 if (!IS_ERR(qp)) { 598 ret = srp_init_ib_qp(target, qp); 599 if (ret) 600 ib_destroy_qp(qp); 601 } else { 602 ret = PTR_ERR(qp); 603 } 604 } 605 if (ret) { 606 pr_err("QP creation failed for dev %s: %d\n", 607 dev_name(&dev->dev->dev), ret); 608 goto err_send_cq; 609 } 610 611 if (dev->use_fast_reg) { 612 fr_pool = srp_alloc_fr_pool(target); 613 if (IS_ERR(fr_pool)) { 614 ret = PTR_ERR(fr_pool); 615 shost_printk(KERN_WARNING, target->scsi_host, PFX 616 "FR pool allocation failed (%d)\n", ret); 617 goto err_qp; 618 } 619 } else if (dev->use_fmr) { 620 fmr_pool = srp_alloc_fmr_pool(target); 621 if (IS_ERR(fmr_pool)) { 622 ret = PTR_ERR(fmr_pool); 623 shost_printk(KERN_WARNING, target->scsi_host, PFX 624 "FMR pool allocation failed (%d)\n", ret); 625 goto err_qp; 626 } 627 } 628 629 if (ch->qp) 630 srp_destroy_qp(ch); 631 if (ch->recv_cq) 632 ib_free_cq(ch->recv_cq); 633 if (ch->send_cq) 634 ib_free_cq(ch->send_cq); 635 636 ch->qp = qp; 637 ch->recv_cq = recv_cq; 638 ch->send_cq = send_cq; 639 640 if (dev->use_fast_reg) { 641 if (ch->fr_pool) 642 srp_destroy_fr_pool(ch->fr_pool); 643 ch->fr_pool = fr_pool; 644 } else if (dev->use_fmr) { 645 if (ch->fmr_pool) 646 ib_destroy_fmr_pool(ch->fmr_pool); 647 ch->fmr_pool = fmr_pool; 648 } 649 650 kfree(init_attr); 651 return 0; 652 653 err_qp: 654 if (target->using_rdma_cm) 655 rdma_destroy_qp(ch->rdma_cm.cm_id); 656 else 657 ib_destroy_qp(qp); 658 659 err_send_cq: 660 ib_free_cq(send_cq); 661 662 err_recv_cq: 663 ib_free_cq(recv_cq); 664 665 err: 666 kfree(init_attr); 667 return ret; 668 } 669 670 /* 671 * Note: this function may be called without srp_alloc_iu_bufs() having been 672 * invoked. Hence the ch->[rt]x_ring checks. 673 */ 674 static void srp_free_ch_ib(struct srp_target_port *target, 675 struct srp_rdma_ch *ch) 676 { 677 struct srp_device *dev = target->srp_host->srp_dev; 678 int i; 679 680 if (!ch->target) 681 return; 682 683 if (target->using_rdma_cm) { 684 if (ch->rdma_cm.cm_id) { 685 rdma_destroy_id(ch->rdma_cm.cm_id); 686 ch->rdma_cm.cm_id = NULL; 687 } 688 } else { 689 if (ch->ib_cm.cm_id) { 690 ib_destroy_cm_id(ch->ib_cm.cm_id); 691 ch->ib_cm.cm_id = NULL; 692 } 693 } 694 695 /* If srp_new_cm_id() succeeded but srp_create_ch_ib() not, return. */ 696 if (!ch->qp) 697 return; 698 699 if (dev->use_fast_reg) { 700 if (ch->fr_pool) 701 srp_destroy_fr_pool(ch->fr_pool); 702 } else if (dev->use_fmr) { 703 if (ch->fmr_pool) 704 ib_destroy_fmr_pool(ch->fmr_pool); 705 } 706 707 srp_destroy_qp(ch); 708 ib_free_cq(ch->send_cq); 709 ib_free_cq(ch->recv_cq); 710 711 /* 712 * Avoid that the SCSI error handler tries to use this channel after 713 * it has been freed. The SCSI error handler can namely continue 714 * trying to perform recovery actions after scsi_remove_host() 715 * returned. 716 */ 717 ch->target = NULL; 718 719 ch->qp = NULL; 720 ch->send_cq = ch->recv_cq = NULL; 721 722 if (ch->rx_ring) { 723 for (i = 0; i < target->queue_size; ++i) 724 srp_free_iu(target->srp_host, ch->rx_ring[i]); 725 kfree(ch->rx_ring); 726 ch->rx_ring = NULL; 727 } 728 if (ch->tx_ring) { 729 for (i = 0; i < target->queue_size; ++i) 730 srp_free_iu(target->srp_host, ch->tx_ring[i]); 731 kfree(ch->tx_ring); 732 ch->tx_ring = NULL; 733 } 734 } 735 736 static void srp_path_rec_completion(int status, 737 struct sa_path_rec *pathrec, 738 void *ch_ptr) 739 { 740 struct srp_rdma_ch *ch = ch_ptr; 741 struct srp_target_port *target = ch->target; 742 743 ch->status = status; 744 if (status) 745 shost_printk(KERN_ERR, target->scsi_host, 746 PFX "Got failed path rec status %d\n", status); 747 else 748 ch->ib_cm.path = *pathrec; 749 complete(&ch->done); 750 } 751 752 static int srp_ib_lookup_path(struct srp_rdma_ch *ch) 753 { 754 struct srp_target_port *target = ch->target; 755 int ret; 756 757 ch->ib_cm.path.numb_path = 1; 758 759 init_completion(&ch->done); 760 761 ch->ib_cm.path_query_id = ib_sa_path_rec_get(&srp_sa_client, 762 target->srp_host->srp_dev->dev, 763 target->srp_host->port, 764 &ch->ib_cm.path, 765 IB_SA_PATH_REC_SERVICE_ID | 766 IB_SA_PATH_REC_DGID | 767 IB_SA_PATH_REC_SGID | 768 IB_SA_PATH_REC_NUMB_PATH | 769 IB_SA_PATH_REC_PKEY, 770 SRP_PATH_REC_TIMEOUT_MS, 771 GFP_KERNEL, 772 srp_path_rec_completion, 773 ch, &ch->ib_cm.path_query); 774 if (ch->ib_cm.path_query_id < 0) 775 return ch->ib_cm.path_query_id; 776 777 ret = wait_for_completion_interruptible(&ch->done); 778 if (ret < 0) 779 return ret; 780 781 if (ch->status < 0) 782 shost_printk(KERN_WARNING, target->scsi_host, 783 PFX "Path record query failed: sgid %pI6, dgid %pI6, pkey %#04x, service_id %#16llx\n", 784 ch->ib_cm.path.sgid.raw, ch->ib_cm.path.dgid.raw, 785 be16_to_cpu(target->ib_cm.pkey), 786 be64_to_cpu(target->ib_cm.service_id)); 787 788 return ch->status; 789 } 790 791 static int srp_rdma_lookup_path(struct srp_rdma_ch *ch) 792 { 793 struct srp_target_port *target = ch->target; 794 int ret; 795 796 init_completion(&ch->done); 797 798 ret = rdma_resolve_route(ch->rdma_cm.cm_id, SRP_PATH_REC_TIMEOUT_MS); 799 if (ret) 800 return ret; 801 802 wait_for_completion_interruptible(&ch->done); 803 804 if (ch->status != 0) 805 shost_printk(KERN_WARNING, target->scsi_host, 806 PFX "Path resolution failed\n"); 807 808 return ch->status; 809 } 810 811 static int srp_lookup_path(struct srp_rdma_ch *ch) 812 { 813 struct srp_target_port *target = ch->target; 814 815 return target->using_rdma_cm ? srp_rdma_lookup_path(ch) : 816 srp_ib_lookup_path(ch); 817 } 818 819 static u8 srp_get_subnet_timeout(struct srp_host *host) 820 { 821 struct ib_port_attr attr; 822 int ret; 823 u8 subnet_timeout = 18; 824 825 ret = ib_query_port(host->srp_dev->dev, host->port, &attr); 826 if (ret == 0) 827 subnet_timeout = attr.subnet_timeout; 828 829 if (unlikely(subnet_timeout < 15)) 830 pr_warn("%s: subnet timeout %d may cause SRP login to fail.\n", 831 dev_name(&host->srp_dev->dev->dev), subnet_timeout); 832 833 return subnet_timeout; 834 } 835 836 static int srp_send_req(struct srp_rdma_ch *ch, uint32_t max_iu_len, 837 bool multich) 838 { 839 struct srp_target_port *target = ch->target; 840 struct { 841 struct rdma_conn_param rdma_param; 842 struct srp_login_req_rdma rdma_req; 843 struct ib_cm_req_param ib_param; 844 struct srp_login_req ib_req; 845 } *req = NULL; 846 char *ipi, *tpi; 847 int status; 848 849 req = kzalloc(sizeof *req, GFP_KERNEL); 850 if (!req) 851 return -ENOMEM; 852 853 req->ib_param.flow_control = 1; 854 req->ib_param.retry_count = target->tl_retry_count; 855 856 /* 857 * Pick some arbitrary defaults here; we could make these 858 * module parameters if anyone cared about setting them. 859 */ 860 req->ib_param.responder_resources = 4; 861 req->ib_param.rnr_retry_count = 7; 862 req->ib_param.max_cm_retries = 15; 863 864 req->ib_req.opcode = SRP_LOGIN_REQ; 865 req->ib_req.tag = 0; 866 req->ib_req.req_it_iu_len = cpu_to_be32(max_iu_len); 867 req->ib_req.req_buf_fmt = cpu_to_be16(SRP_BUF_FORMAT_DIRECT | 868 SRP_BUF_FORMAT_INDIRECT); 869 req->ib_req.req_flags = (multich ? SRP_MULTICHAN_MULTI : 870 SRP_MULTICHAN_SINGLE); 871 if (srp_use_imm_data) { 872 req->ib_req.req_flags |= SRP_IMMED_REQUESTED; 873 req->ib_req.imm_data_offset = cpu_to_be16(SRP_IMM_DATA_OFFSET); 874 } 875 876 if (target->using_rdma_cm) { 877 req->rdma_param.flow_control = req->ib_param.flow_control; 878 req->rdma_param.responder_resources = 879 req->ib_param.responder_resources; 880 req->rdma_param.initiator_depth = req->ib_param.initiator_depth; 881 req->rdma_param.retry_count = req->ib_param.retry_count; 882 req->rdma_param.rnr_retry_count = req->ib_param.rnr_retry_count; 883 req->rdma_param.private_data = &req->rdma_req; 884 req->rdma_param.private_data_len = sizeof(req->rdma_req); 885 886 req->rdma_req.opcode = req->ib_req.opcode; 887 req->rdma_req.tag = req->ib_req.tag; 888 req->rdma_req.req_it_iu_len = req->ib_req.req_it_iu_len; 889 req->rdma_req.req_buf_fmt = req->ib_req.req_buf_fmt; 890 req->rdma_req.req_flags = req->ib_req.req_flags; 891 req->rdma_req.imm_data_offset = req->ib_req.imm_data_offset; 892 893 ipi = req->rdma_req.initiator_port_id; 894 tpi = req->rdma_req.target_port_id; 895 } else { 896 u8 subnet_timeout; 897 898 subnet_timeout = srp_get_subnet_timeout(target->srp_host); 899 900 req->ib_param.primary_path = &ch->ib_cm.path; 901 req->ib_param.alternate_path = NULL; 902 req->ib_param.service_id = target->ib_cm.service_id; 903 get_random_bytes(&req->ib_param.starting_psn, 4); 904 req->ib_param.starting_psn &= 0xffffff; 905 req->ib_param.qp_num = ch->qp->qp_num; 906 req->ib_param.qp_type = ch->qp->qp_type; 907 req->ib_param.local_cm_response_timeout = subnet_timeout + 2; 908 req->ib_param.remote_cm_response_timeout = subnet_timeout + 2; 909 req->ib_param.private_data = &req->ib_req; 910 req->ib_param.private_data_len = sizeof(req->ib_req); 911 912 ipi = req->ib_req.initiator_port_id; 913 tpi = req->ib_req.target_port_id; 914 } 915 916 /* 917 * In the published SRP specification (draft rev. 16a), the 918 * port identifier format is 8 bytes of ID extension followed 919 * by 8 bytes of GUID. Older drafts put the two halves in the 920 * opposite order, so that the GUID comes first. 921 * 922 * Targets conforming to these obsolete drafts can be 923 * recognized by the I/O Class they report. 924 */ 925 if (target->io_class == SRP_REV10_IB_IO_CLASS) { 926 memcpy(ipi, &target->sgid.global.interface_id, 8); 927 memcpy(ipi + 8, &target->initiator_ext, 8); 928 memcpy(tpi, &target->ioc_guid, 8); 929 memcpy(tpi + 8, &target->id_ext, 8); 930 } else { 931 memcpy(ipi, &target->initiator_ext, 8); 932 memcpy(ipi + 8, &target->sgid.global.interface_id, 8); 933 memcpy(tpi, &target->id_ext, 8); 934 memcpy(tpi + 8, &target->ioc_guid, 8); 935 } 936 937 /* 938 * Topspin/Cisco SRP targets will reject our login unless we 939 * zero out the first 8 bytes of our initiator port ID and set 940 * the second 8 bytes to the local node GUID. 941 */ 942 if (srp_target_is_topspin(target)) { 943 shost_printk(KERN_DEBUG, target->scsi_host, 944 PFX "Topspin/Cisco initiator port ID workaround " 945 "activated for target GUID %016llx\n", 946 be64_to_cpu(target->ioc_guid)); 947 memset(ipi, 0, 8); 948 memcpy(ipi + 8, &target->srp_host->srp_dev->dev->node_guid, 8); 949 } 950 951 if (target->using_rdma_cm) 952 status = rdma_connect(ch->rdma_cm.cm_id, &req->rdma_param); 953 else 954 status = ib_send_cm_req(ch->ib_cm.cm_id, &req->ib_param); 955 956 kfree(req); 957 958 return status; 959 } 960 961 static bool srp_queue_remove_work(struct srp_target_port *target) 962 { 963 bool changed = false; 964 965 spin_lock_irq(&target->lock); 966 if (target->state != SRP_TARGET_REMOVED) { 967 target->state = SRP_TARGET_REMOVED; 968 changed = true; 969 } 970 spin_unlock_irq(&target->lock); 971 972 if (changed) 973 queue_work(srp_remove_wq, &target->remove_work); 974 975 return changed; 976 } 977 978 static void srp_disconnect_target(struct srp_target_port *target) 979 { 980 struct srp_rdma_ch *ch; 981 int i, ret; 982 983 /* XXX should send SRP_I_LOGOUT request */ 984 985 for (i = 0; i < target->ch_count; i++) { 986 ch = &target->ch[i]; 987 ch->connected = false; 988 ret = 0; 989 if (target->using_rdma_cm) { 990 if (ch->rdma_cm.cm_id) 991 rdma_disconnect(ch->rdma_cm.cm_id); 992 } else { 993 if (ch->ib_cm.cm_id) 994 ret = ib_send_cm_dreq(ch->ib_cm.cm_id, 995 NULL, 0); 996 } 997 if (ret < 0) { 998 shost_printk(KERN_DEBUG, target->scsi_host, 999 PFX "Sending CM DREQ failed\n"); 1000 } 1001 } 1002 } 1003 1004 static void srp_free_req_data(struct srp_target_port *target, 1005 struct srp_rdma_ch *ch) 1006 { 1007 struct srp_device *dev = target->srp_host->srp_dev; 1008 struct ib_device *ibdev = dev->dev; 1009 struct srp_request *req; 1010 int i; 1011 1012 if (!ch->req_ring) 1013 return; 1014 1015 for (i = 0; i < target->req_ring_size; ++i) { 1016 req = &ch->req_ring[i]; 1017 if (dev->use_fast_reg) { 1018 kfree(req->fr_list); 1019 } else { 1020 kfree(req->fmr_list); 1021 kfree(req->map_page); 1022 } 1023 if (req->indirect_dma_addr) { 1024 ib_dma_unmap_single(ibdev, req->indirect_dma_addr, 1025 target->indirect_size, 1026 DMA_TO_DEVICE); 1027 } 1028 kfree(req->indirect_desc); 1029 } 1030 1031 kfree(ch->req_ring); 1032 ch->req_ring = NULL; 1033 } 1034 1035 static int srp_alloc_req_data(struct srp_rdma_ch *ch) 1036 { 1037 struct srp_target_port *target = ch->target; 1038 struct srp_device *srp_dev = target->srp_host->srp_dev; 1039 struct ib_device *ibdev = srp_dev->dev; 1040 struct srp_request *req; 1041 void *mr_list; 1042 dma_addr_t dma_addr; 1043 int i, ret = -ENOMEM; 1044 1045 ch->req_ring = kcalloc(target->req_ring_size, sizeof(*ch->req_ring), 1046 GFP_KERNEL); 1047 if (!ch->req_ring) 1048 goto out; 1049 1050 for (i = 0; i < target->req_ring_size; ++i) { 1051 req = &ch->req_ring[i]; 1052 mr_list = kmalloc_array(target->mr_per_cmd, sizeof(void *), 1053 GFP_KERNEL); 1054 if (!mr_list) 1055 goto out; 1056 if (srp_dev->use_fast_reg) { 1057 req->fr_list = mr_list; 1058 } else { 1059 req->fmr_list = mr_list; 1060 req->map_page = kmalloc_array(srp_dev->max_pages_per_mr, 1061 sizeof(void *), 1062 GFP_KERNEL); 1063 if (!req->map_page) 1064 goto out; 1065 } 1066 req->indirect_desc = kmalloc(target->indirect_size, GFP_KERNEL); 1067 if (!req->indirect_desc) 1068 goto out; 1069 1070 dma_addr = ib_dma_map_single(ibdev, req->indirect_desc, 1071 target->indirect_size, 1072 DMA_TO_DEVICE); 1073 if (ib_dma_mapping_error(ibdev, dma_addr)) 1074 goto out; 1075 1076 req->indirect_dma_addr = dma_addr; 1077 } 1078 ret = 0; 1079 1080 out: 1081 return ret; 1082 } 1083 1084 /** 1085 * srp_del_scsi_host_attr() - Remove attributes defined in the host template. 1086 * @shost: SCSI host whose attributes to remove from sysfs. 1087 * 1088 * Note: Any attributes defined in the host template and that did not exist 1089 * before invocation of this function will be ignored. 1090 */ 1091 static void srp_del_scsi_host_attr(struct Scsi_Host *shost) 1092 { 1093 struct device_attribute **attr; 1094 1095 for (attr = shost->hostt->shost_attrs; attr && *attr; ++attr) 1096 device_remove_file(&shost->shost_dev, *attr); 1097 } 1098 1099 static void srp_remove_target(struct srp_target_port *target) 1100 { 1101 struct srp_rdma_ch *ch; 1102 int i; 1103 1104 WARN_ON_ONCE(target->state != SRP_TARGET_REMOVED); 1105 1106 srp_del_scsi_host_attr(target->scsi_host); 1107 srp_rport_get(target->rport); 1108 srp_remove_host(target->scsi_host); 1109 scsi_remove_host(target->scsi_host); 1110 srp_stop_rport_timers(target->rport); 1111 srp_disconnect_target(target); 1112 kobj_ns_drop(KOBJ_NS_TYPE_NET, target->net); 1113 for (i = 0; i < target->ch_count; i++) { 1114 ch = &target->ch[i]; 1115 srp_free_ch_ib(target, ch); 1116 } 1117 cancel_work_sync(&target->tl_err_work); 1118 srp_rport_put(target->rport); 1119 for (i = 0; i < target->ch_count; i++) { 1120 ch = &target->ch[i]; 1121 srp_free_req_data(target, ch); 1122 } 1123 kfree(target->ch); 1124 target->ch = NULL; 1125 1126 spin_lock(&target->srp_host->target_lock); 1127 list_del(&target->list); 1128 spin_unlock(&target->srp_host->target_lock); 1129 1130 scsi_host_put(target->scsi_host); 1131 } 1132 1133 static void srp_remove_work(struct work_struct *work) 1134 { 1135 struct srp_target_port *target = 1136 container_of(work, struct srp_target_port, remove_work); 1137 1138 WARN_ON_ONCE(target->state != SRP_TARGET_REMOVED); 1139 1140 srp_remove_target(target); 1141 } 1142 1143 static void srp_rport_delete(struct srp_rport *rport) 1144 { 1145 struct srp_target_port *target = rport->lld_data; 1146 1147 srp_queue_remove_work(target); 1148 } 1149 1150 /** 1151 * srp_connected_ch() - number of connected channels 1152 * @target: SRP target port. 1153 */ 1154 static int srp_connected_ch(struct srp_target_port *target) 1155 { 1156 int i, c = 0; 1157 1158 for (i = 0; i < target->ch_count; i++) 1159 c += target->ch[i].connected; 1160 1161 return c; 1162 } 1163 1164 static int srp_connect_ch(struct srp_rdma_ch *ch, uint32_t max_iu_len, 1165 bool multich) 1166 { 1167 struct srp_target_port *target = ch->target; 1168 int ret; 1169 1170 WARN_ON_ONCE(!multich && srp_connected_ch(target) > 0); 1171 1172 ret = srp_lookup_path(ch); 1173 if (ret) 1174 goto out; 1175 1176 while (1) { 1177 init_completion(&ch->done); 1178 ret = srp_send_req(ch, max_iu_len, multich); 1179 if (ret) 1180 goto out; 1181 ret = wait_for_completion_interruptible(&ch->done); 1182 if (ret < 0) 1183 goto out; 1184 1185 /* 1186 * The CM event handling code will set status to 1187 * SRP_PORT_REDIRECT if we get a port redirect REJ 1188 * back, or SRP_DLID_REDIRECT if we get a lid/qp 1189 * redirect REJ back. 1190 */ 1191 ret = ch->status; 1192 switch (ret) { 1193 case 0: 1194 ch->connected = true; 1195 goto out; 1196 1197 case SRP_PORT_REDIRECT: 1198 ret = srp_lookup_path(ch); 1199 if (ret) 1200 goto out; 1201 break; 1202 1203 case SRP_DLID_REDIRECT: 1204 break; 1205 1206 case SRP_STALE_CONN: 1207 shost_printk(KERN_ERR, target->scsi_host, PFX 1208 "giving up on stale connection\n"); 1209 ret = -ECONNRESET; 1210 goto out; 1211 1212 default: 1213 goto out; 1214 } 1215 } 1216 1217 out: 1218 return ret <= 0 ? ret : -ENODEV; 1219 } 1220 1221 static void srp_inv_rkey_err_done(struct ib_cq *cq, struct ib_wc *wc) 1222 { 1223 srp_handle_qp_err(cq, wc, "INV RKEY"); 1224 } 1225 1226 static int srp_inv_rkey(struct srp_request *req, struct srp_rdma_ch *ch, 1227 u32 rkey) 1228 { 1229 struct ib_send_wr wr = { 1230 .opcode = IB_WR_LOCAL_INV, 1231 .next = NULL, 1232 .num_sge = 0, 1233 .send_flags = 0, 1234 .ex.invalidate_rkey = rkey, 1235 }; 1236 1237 wr.wr_cqe = &req->reg_cqe; 1238 req->reg_cqe.done = srp_inv_rkey_err_done; 1239 return ib_post_send(ch->qp, &wr, NULL); 1240 } 1241 1242 static void srp_unmap_data(struct scsi_cmnd *scmnd, 1243 struct srp_rdma_ch *ch, 1244 struct srp_request *req) 1245 { 1246 struct srp_target_port *target = ch->target; 1247 struct srp_device *dev = target->srp_host->srp_dev; 1248 struct ib_device *ibdev = dev->dev; 1249 int i, res; 1250 1251 if (!scsi_sglist(scmnd) || 1252 (scmnd->sc_data_direction != DMA_TO_DEVICE && 1253 scmnd->sc_data_direction != DMA_FROM_DEVICE)) 1254 return; 1255 1256 if (dev->use_fast_reg) { 1257 struct srp_fr_desc **pfr; 1258 1259 for (i = req->nmdesc, pfr = req->fr_list; i > 0; i--, pfr++) { 1260 res = srp_inv_rkey(req, ch, (*pfr)->mr->rkey); 1261 if (res < 0) { 1262 shost_printk(KERN_ERR, target->scsi_host, PFX 1263 "Queueing INV WR for rkey %#x failed (%d)\n", 1264 (*pfr)->mr->rkey, res); 1265 queue_work(system_long_wq, 1266 &target->tl_err_work); 1267 } 1268 } 1269 if (req->nmdesc) 1270 srp_fr_pool_put(ch->fr_pool, req->fr_list, 1271 req->nmdesc); 1272 } else if (dev->use_fmr) { 1273 struct ib_pool_fmr **pfmr; 1274 1275 for (i = req->nmdesc, pfmr = req->fmr_list; i > 0; i--, pfmr++) 1276 ib_fmr_pool_unmap(*pfmr); 1277 } 1278 1279 ib_dma_unmap_sg(ibdev, scsi_sglist(scmnd), scsi_sg_count(scmnd), 1280 scmnd->sc_data_direction); 1281 } 1282 1283 /** 1284 * srp_claim_req - Take ownership of the scmnd associated with a request. 1285 * @ch: SRP RDMA channel. 1286 * @req: SRP request. 1287 * @sdev: If not NULL, only take ownership for this SCSI device. 1288 * @scmnd: If NULL, take ownership of @req->scmnd. If not NULL, only take 1289 * ownership of @req->scmnd if it equals @scmnd. 1290 * 1291 * Return value: 1292 * Either NULL or a pointer to the SCSI command the caller became owner of. 1293 */ 1294 static struct scsi_cmnd *srp_claim_req(struct srp_rdma_ch *ch, 1295 struct srp_request *req, 1296 struct scsi_device *sdev, 1297 struct scsi_cmnd *scmnd) 1298 { 1299 unsigned long flags; 1300 1301 spin_lock_irqsave(&ch->lock, flags); 1302 if (req->scmnd && 1303 (!sdev || req->scmnd->device == sdev) && 1304 (!scmnd || req->scmnd == scmnd)) { 1305 scmnd = req->scmnd; 1306 req->scmnd = NULL; 1307 } else { 1308 scmnd = NULL; 1309 } 1310 spin_unlock_irqrestore(&ch->lock, flags); 1311 1312 return scmnd; 1313 } 1314 1315 /** 1316 * srp_free_req() - Unmap data and adjust ch->req_lim. 1317 * @ch: SRP RDMA channel. 1318 * @req: Request to be freed. 1319 * @scmnd: SCSI command associated with @req. 1320 * @req_lim_delta: Amount to be added to @target->req_lim. 1321 */ 1322 static void srp_free_req(struct srp_rdma_ch *ch, struct srp_request *req, 1323 struct scsi_cmnd *scmnd, s32 req_lim_delta) 1324 { 1325 unsigned long flags; 1326 1327 srp_unmap_data(scmnd, ch, req); 1328 1329 spin_lock_irqsave(&ch->lock, flags); 1330 ch->req_lim += req_lim_delta; 1331 spin_unlock_irqrestore(&ch->lock, flags); 1332 } 1333 1334 static void srp_finish_req(struct srp_rdma_ch *ch, struct srp_request *req, 1335 struct scsi_device *sdev, int result) 1336 { 1337 struct scsi_cmnd *scmnd = srp_claim_req(ch, req, sdev, NULL); 1338 1339 if (scmnd) { 1340 srp_free_req(ch, req, scmnd, 0); 1341 scmnd->result = result; 1342 scmnd->scsi_done(scmnd); 1343 } 1344 } 1345 1346 static void srp_terminate_io(struct srp_rport *rport) 1347 { 1348 struct srp_target_port *target = rport->lld_data; 1349 struct srp_rdma_ch *ch; 1350 int i, j; 1351 1352 for (i = 0; i < target->ch_count; i++) { 1353 ch = &target->ch[i]; 1354 1355 for (j = 0; j < target->req_ring_size; ++j) { 1356 struct srp_request *req = &ch->req_ring[j]; 1357 1358 srp_finish_req(ch, req, NULL, 1359 DID_TRANSPORT_FAILFAST << 16); 1360 } 1361 } 1362 } 1363 1364 /* Calculate maximum initiator to target information unit length. */ 1365 static uint32_t srp_max_it_iu_len(int cmd_sg_cnt, bool use_imm_data) 1366 { 1367 uint32_t max_iu_len = sizeof(struct srp_cmd) + SRP_MAX_ADD_CDB_LEN + 1368 sizeof(struct srp_indirect_buf) + 1369 cmd_sg_cnt * sizeof(struct srp_direct_buf); 1370 1371 if (use_imm_data) 1372 max_iu_len = max(max_iu_len, SRP_IMM_DATA_OFFSET + 1373 srp_max_imm_data); 1374 1375 return max_iu_len; 1376 } 1377 1378 /* 1379 * It is up to the caller to ensure that srp_rport_reconnect() calls are 1380 * serialized and that no concurrent srp_queuecommand(), srp_abort(), 1381 * srp_reset_device() or srp_reset_host() calls will occur while this function 1382 * is in progress. One way to realize that is not to call this function 1383 * directly but to call srp_reconnect_rport() instead since that last function 1384 * serializes calls of this function via rport->mutex and also blocks 1385 * srp_queuecommand() calls before invoking this function. 1386 */ 1387 static int srp_rport_reconnect(struct srp_rport *rport) 1388 { 1389 struct srp_target_port *target = rport->lld_data; 1390 struct srp_rdma_ch *ch; 1391 uint32_t max_iu_len = srp_max_it_iu_len(target->cmd_sg_cnt, 1392 srp_use_imm_data); 1393 int i, j, ret = 0; 1394 bool multich = false; 1395 1396 srp_disconnect_target(target); 1397 1398 if (target->state == SRP_TARGET_SCANNING) 1399 return -ENODEV; 1400 1401 /* 1402 * Now get a new local CM ID so that we avoid confusing the target in 1403 * case things are really fouled up. Doing so also ensures that all CM 1404 * callbacks will have finished before a new QP is allocated. 1405 */ 1406 for (i = 0; i < target->ch_count; i++) { 1407 ch = &target->ch[i]; 1408 ret += srp_new_cm_id(ch); 1409 } 1410 for (i = 0; i < target->ch_count; i++) { 1411 ch = &target->ch[i]; 1412 for (j = 0; j < target->req_ring_size; ++j) { 1413 struct srp_request *req = &ch->req_ring[j]; 1414 1415 srp_finish_req(ch, req, NULL, DID_RESET << 16); 1416 } 1417 } 1418 for (i = 0; i < target->ch_count; i++) { 1419 ch = &target->ch[i]; 1420 /* 1421 * Whether or not creating a new CM ID succeeded, create a new 1422 * QP. This guarantees that all completion callback function 1423 * invocations have finished before request resetting starts. 1424 */ 1425 ret += srp_create_ch_ib(ch); 1426 1427 INIT_LIST_HEAD(&ch->free_tx); 1428 for (j = 0; j < target->queue_size; ++j) 1429 list_add(&ch->tx_ring[j]->list, &ch->free_tx); 1430 } 1431 1432 target->qp_in_error = false; 1433 1434 for (i = 0; i < target->ch_count; i++) { 1435 ch = &target->ch[i]; 1436 if (ret) 1437 break; 1438 ret = srp_connect_ch(ch, max_iu_len, multich); 1439 multich = true; 1440 } 1441 1442 if (ret == 0) 1443 shost_printk(KERN_INFO, target->scsi_host, 1444 PFX "reconnect succeeded\n"); 1445 1446 return ret; 1447 } 1448 1449 static void srp_map_desc(struct srp_map_state *state, dma_addr_t dma_addr, 1450 unsigned int dma_len, u32 rkey) 1451 { 1452 struct srp_direct_buf *desc = state->desc; 1453 1454 WARN_ON_ONCE(!dma_len); 1455 1456 desc->va = cpu_to_be64(dma_addr); 1457 desc->key = cpu_to_be32(rkey); 1458 desc->len = cpu_to_be32(dma_len); 1459 1460 state->total_len += dma_len; 1461 state->desc++; 1462 state->ndesc++; 1463 } 1464 1465 static int srp_map_finish_fmr(struct srp_map_state *state, 1466 struct srp_rdma_ch *ch) 1467 { 1468 struct srp_target_port *target = ch->target; 1469 struct srp_device *dev = target->srp_host->srp_dev; 1470 struct ib_pool_fmr *fmr; 1471 u64 io_addr = 0; 1472 1473 if (state->fmr.next >= state->fmr.end) { 1474 shost_printk(KERN_ERR, ch->target->scsi_host, 1475 PFX "Out of MRs (mr_per_cmd = %d)\n", 1476 ch->target->mr_per_cmd); 1477 return -ENOMEM; 1478 } 1479 1480 WARN_ON_ONCE(!dev->use_fmr); 1481 1482 if (state->npages == 0) 1483 return 0; 1484 1485 if (state->npages == 1 && target->global_rkey) { 1486 srp_map_desc(state, state->base_dma_addr, state->dma_len, 1487 target->global_rkey); 1488 goto reset_state; 1489 } 1490 1491 fmr = ib_fmr_pool_map_phys(ch->fmr_pool, state->pages, 1492 state->npages, io_addr); 1493 if (IS_ERR(fmr)) 1494 return PTR_ERR(fmr); 1495 1496 *state->fmr.next++ = fmr; 1497 state->nmdesc++; 1498 1499 srp_map_desc(state, state->base_dma_addr & ~dev->mr_page_mask, 1500 state->dma_len, fmr->fmr->rkey); 1501 1502 reset_state: 1503 state->npages = 0; 1504 state->dma_len = 0; 1505 1506 return 0; 1507 } 1508 1509 static void srp_reg_mr_err_done(struct ib_cq *cq, struct ib_wc *wc) 1510 { 1511 srp_handle_qp_err(cq, wc, "FAST REG"); 1512 } 1513 1514 /* 1515 * Map up to sg_nents elements of state->sg where *sg_offset_p is the offset 1516 * where to start in the first element. If sg_offset_p != NULL then 1517 * *sg_offset_p is updated to the offset in state->sg[retval] of the first 1518 * byte that has not yet been mapped. 1519 */ 1520 static int srp_map_finish_fr(struct srp_map_state *state, 1521 struct srp_request *req, 1522 struct srp_rdma_ch *ch, int sg_nents, 1523 unsigned int *sg_offset_p) 1524 { 1525 struct srp_target_port *target = ch->target; 1526 struct srp_device *dev = target->srp_host->srp_dev; 1527 struct ib_reg_wr wr; 1528 struct srp_fr_desc *desc; 1529 u32 rkey; 1530 int n, err; 1531 1532 if (state->fr.next >= state->fr.end) { 1533 shost_printk(KERN_ERR, ch->target->scsi_host, 1534 PFX "Out of MRs (mr_per_cmd = %d)\n", 1535 ch->target->mr_per_cmd); 1536 return -ENOMEM; 1537 } 1538 1539 WARN_ON_ONCE(!dev->use_fast_reg); 1540 1541 if (sg_nents == 1 && target->global_rkey) { 1542 unsigned int sg_offset = sg_offset_p ? *sg_offset_p : 0; 1543 1544 srp_map_desc(state, sg_dma_address(state->sg) + sg_offset, 1545 sg_dma_len(state->sg) - sg_offset, 1546 target->global_rkey); 1547 if (sg_offset_p) 1548 *sg_offset_p = 0; 1549 return 1; 1550 } 1551 1552 desc = srp_fr_pool_get(ch->fr_pool); 1553 if (!desc) 1554 return -ENOMEM; 1555 1556 rkey = ib_inc_rkey(desc->mr->rkey); 1557 ib_update_fast_reg_key(desc->mr, rkey); 1558 1559 n = ib_map_mr_sg(desc->mr, state->sg, sg_nents, sg_offset_p, 1560 dev->mr_page_size); 1561 if (unlikely(n < 0)) { 1562 srp_fr_pool_put(ch->fr_pool, &desc, 1); 1563 pr_debug("%s: ib_map_mr_sg(%d, %d) returned %d.\n", 1564 dev_name(&req->scmnd->device->sdev_gendev), sg_nents, 1565 sg_offset_p ? *sg_offset_p : -1, n); 1566 return n; 1567 } 1568 1569 WARN_ON_ONCE(desc->mr->length == 0); 1570 1571 req->reg_cqe.done = srp_reg_mr_err_done; 1572 1573 wr.wr.next = NULL; 1574 wr.wr.opcode = IB_WR_REG_MR; 1575 wr.wr.wr_cqe = &req->reg_cqe; 1576 wr.wr.num_sge = 0; 1577 wr.wr.send_flags = 0; 1578 wr.mr = desc->mr; 1579 wr.key = desc->mr->rkey; 1580 wr.access = (IB_ACCESS_LOCAL_WRITE | 1581 IB_ACCESS_REMOTE_READ | 1582 IB_ACCESS_REMOTE_WRITE); 1583 1584 *state->fr.next++ = desc; 1585 state->nmdesc++; 1586 1587 srp_map_desc(state, desc->mr->iova, 1588 desc->mr->length, desc->mr->rkey); 1589 1590 err = ib_post_send(ch->qp, &wr.wr, NULL); 1591 if (unlikely(err)) { 1592 WARN_ON_ONCE(err == -ENOMEM); 1593 return err; 1594 } 1595 1596 return n; 1597 } 1598 1599 static int srp_map_sg_entry(struct srp_map_state *state, 1600 struct srp_rdma_ch *ch, 1601 struct scatterlist *sg) 1602 { 1603 struct srp_target_port *target = ch->target; 1604 struct srp_device *dev = target->srp_host->srp_dev; 1605 dma_addr_t dma_addr = sg_dma_address(sg); 1606 unsigned int dma_len = sg_dma_len(sg); 1607 unsigned int len = 0; 1608 int ret; 1609 1610 WARN_ON_ONCE(!dma_len); 1611 1612 while (dma_len) { 1613 unsigned offset = dma_addr & ~dev->mr_page_mask; 1614 1615 if (state->npages == dev->max_pages_per_mr || 1616 (state->npages > 0 && offset != 0)) { 1617 ret = srp_map_finish_fmr(state, ch); 1618 if (ret) 1619 return ret; 1620 } 1621 1622 len = min_t(unsigned int, dma_len, dev->mr_page_size - offset); 1623 1624 if (!state->npages) 1625 state->base_dma_addr = dma_addr; 1626 state->pages[state->npages++] = dma_addr & dev->mr_page_mask; 1627 state->dma_len += len; 1628 dma_addr += len; 1629 dma_len -= len; 1630 } 1631 1632 /* 1633 * If the end of the MR is not on a page boundary then we need to 1634 * close it out and start a new one -- we can only merge at page 1635 * boundaries. 1636 */ 1637 ret = 0; 1638 if ((dma_addr & ~dev->mr_page_mask) != 0) 1639 ret = srp_map_finish_fmr(state, ch); 1640 return ret; 1641 } 1642 1643 static int srp_map_sg_fmr(struct srp_map_state *state, struct srp_rdma_ch *ch, 1644 struct srp_request *req, struct scatterlist *scat, 1645 int count) 1646 { 1647 struct scatterlist *sg; 1648 int i, ret; 1649 1650 state->pages = req->map_page; 1651 state->fmr.next = req->fmr_list; 1652 state->fmr.end = req->fmr_list + ch->target->mr_per_cmd; 1653 1654 for_each_sg(scat, sg, count, i) { 1655 ret = srp_map_sg_entry(state, ch, sg); 1656 if (ret) 1657 return ret; 1658 } 1659 1660 ret = srp_map_finish_fmr(state, ch); 1661 if (ret) 1662 return ret; 1663 1664 return 0; 1665 } 1666 1667 static int srp_map_sg_fr(struct srp_map_state *state, struct srp_rdma_ch *ch, 1668 struct srp_request *req, struct scatterlist *scat, 1669 int count) 1670 { 1671 unsigned int sg_offset = 0; 1672 1673 state->fr.next = req->fr_list; 1674 state->fr.end = req->fr_list + ch->target->mr_per_cmd; 1675 state->sg = scat; 1676 1677 if (count == 0) 1678 return 0; 1679 1680 while (count) { 1681 int i, n; 1682 1683 n = srp_map_finish_fr(state, req, ch, count, &sg_offset); 1684 if (unlikely(n < 0)) 1685 return n; 1686 1687 count -= n; 1688 for (i = 0; i < n; i++) 1689 state->sg = sg_next(state->sg); 1690 } 1691 1692 return 0; 1693 } 1694 1695 static int srp_map_sg_dma(struct srp_map_state *state, struct srp_rdma_ch *ch, 1696 struct srp_request *req, struct scatterlist *scat, 1697 int count) 1698 { 1699 struct srp_target_port *target = ch->target; 1700 struct scatterlist *sg; 1701 int i; 1702 1703 for_each_sg(scat, sg, count, i) { 1704 srp_map_desc(state, sg_dma_address(sg), sg_dma_len(sg), 1705 target->global_rkey); 1706 } 1707 1708 return 0; 1709 } 1710 1711 /* 1712 * Register the indirect data buffer descriptor with the HCA. 1713 * 1714 * Note: since the indirect data buffer descriptor has been allocated with 1715 * kmalloc() it is guaranteed that this buffer is a physically contiguous 1716 * memory buffer. 1717 */ 1718 static int srp_map_idb(struct srp_rdma_ch *ch, struct srp_request *req, 1719 void **next_mr, void **end_mr, u32 idb_len, 1720 __be32 *idb_rkey) 1721 { 1722 struct srp_target_port *target = ch->target; 1723 struct srp_device *dev = target->srp_host->srp_dev; 1724 struct srp_map_state state; 1725 struct srp_direct_buf idb_desc; 1726 u64 idb_pages[1]; 1727 struct scatterlist idb_sg[1]; 1728 int ret; 1729 1730 memset(&state, 0, sizeof(state)); 1731 memset(&idb_desc, 0, sizeof(idb_desc)); 1732 state.gen.next = next_mr; 1733 state.gen.end = end_mr; 1734 state.desc = &idb_desc; 1735 state.base_dma_addr = req->indirect_dma_addr; 1736 state.dma_len = idb_len; 1737 1738 if (dev->use_fast_reg) { 1739 state.sg = idb_sg; 1740 sg_init_one(idb_sg, req->indirect_desc, idb_len); 1741 idb_sg->dma_address = req->indirect_dma_addr; /* hack! */ 1742 #ifdef CONFIG_NEED_SG_DMA_LENGTH 1743 idb_sg->dma_length = idb_sg->length; /* hack^2 */ 1744 #endif 1745 ret = srp_map_finish_fr(&state, req, ch, 1, NULL); 1746 if (ret < 0) 1747 return ret; 1748 WARN_ON_ONCE(ret < 1); 1749 } else if (dev->use_fmr) { 1750 state.pages = idb_pages; 1751 state.pages[0] = (req->indirect_dma_addr & 1752 dev->mr_page_mask); 1753 state.npages = 1; 1754 ret = srp_map_finish_fmr(&state, ch); 1755 if (ret < 0) 1756 return ret; 1757 } else { 1758 return -EINVAL; 1759 } 1760 1761 *idb_rkey = idb_desc.key; 1762 1763 return 0; 1764 } 1765 1766 static void srp_check_mapping(struct srp_map_state *state, 1767 struct srp_rdma_ch *ch, struct srp_request *req, 1768 struct scatterlist *scat, int count) 1769 { 1770 struct srp_device *dev = ch->target->srp_host->srp_dev; 1771 struct srp_fr_desc **pfr; 1772 u64 desc_len = 0, mr_len = 0; 1773 int i; 1774 1775 for (i = 0; i < state->ndesc; i++) 1776 desc_len += be32_to_cpu(req->indirect_desc[i].len); 1777 if (dev->use_fast_reg) 1778 for (i = 0, pfr = req->fr_list; i < state->nmdesc; i++, pfr++) 1779 mr_len += (*pfr)->mr->length; 1780 else if (dev->use_fmr) 1781 for (i = 0; i < state->nmdesc; i++) 1782 mr_len += be32_to_cpu(req->indirect_desc[i].len); 1783 if (desc_len != scsi_bufflen(req->scmnd) || 1784 mr_len > scsi_bufflen(req->scmnd)) 1785 pr_err("Inconsistent: scsi len %d <> desc len %lld <> mr len %lld; ndesc %d; nmdesc = %d\n", 1786 scsi_bufflen(req->scmnd), desc_len, mr_len, 1787 state->ndesc, state->nmdesc); 1788 } 1789 1790 /** 1791 * srp_map_data() - map SCSI data buffer onto an SRP request 1792 * @scmnd: SCSI command to map 1793 * @ch: SRP RDMA channel 1794 * @req: SRP request 1795 * 1796 * Returns the length in bytes of the SRP_CMD IU or a negative value if 1797 * mapping failed. The size of any immediate data is not included in the 1798 * return value. 1799 */ 1800 static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_rdma_ch *ch, 1801 struct srp_request *req) 1802 { 1803 struct srp_target_port *target = ch->target; 1804 struct scatterlist *scat, *sg; 1805 struct srp_cmd *cmd = req->cmd->buf; 1806 int i, len, nents, count, ret; 1807 struct srp_device *dev; 1808 struct ib_device *ibdev; 1809 struct srp_map_state state; 1810 struct srp_indirect_buf *indirect_hdr; 1811 u64 data_len; 1812 u32 idb_len, table_len; 1813 __be32 idb_rkey; 1814 u8 fmt; 1815 1816 req->cmd->num_sge = 1; 1817 1818 if (!scsi_sglist(scmnd) || scmnd->sc_data_direction == DMA_NONE) 1819 return sizeof(struct srp_cmd) + cmd->add_cdb_len; 1820 1821 if (scmnd->sc_data_direction != DMA_FROM_DEVICE && 1822 scmnd->sc_data_direction != DMA_TO_DEVICE) { 1823 shost_printk(KERN_WARNING, target->scsi_host, 1824 PFX "Unhandled data direction %d\n", 1825 scmnd->sc_data_direction); 1826 return -EINVAL; 1827 } 1828 1829 nents = scsi_sg_count(scmnd); 1830 scat = scsi_sglist(scmnd); 1831 data_len = scsi_bufflen(scmnd); 1832 1833 dev = target->srp_host->srp_dev; 1834 ibdev = dev->dev; 1835 1836 count = ib_dma_map_sg(ibdev, scat, nents, scmnd->sc_data_direction); 1837 if (unlikely(count == 0)) 1838 return -EIO; 1839 1840 if (ch->use_imm_data && 1841 count <= SRP_MAX_IMM_SGE && 1842 SRP_IMM_DATA_OFFSET + data_len <= ch->max_it_iu_len && 1843 scmnd->sc_data_direction == DMA_TO_DEVICE) { 1844 struct srp_imm_buf *buf; 1845 struct ib_sge *sge = &req->cmd->sge[1]; 1846 1847 fmt = SRP_DATA_DESC_IMM; 1848 len = SRP_IMM_DATA_OFFSET; 1849 req->nmdesc = 0; 1850 buf = (void *)cmd->add_data + cmd->add_cdb_len; 1851 buf->len = cpu_to_be32(data_len); 1852 WARN_ON_ONCE((void *)(buf + 1) > (void *)cmd + len); 1853 for_each_sg(scat, sg, count, i) { 1854 sge[i].addr = sg_dma_address(sg); 1855 sge[i].length = sg_dma_len(sg); 1856 sge[i].lkey = target->lkey; 1857 } 1858 req->cmd->num_sge += count; 1859 goto map_complete; 1860 } 1861 1862 fmt = SRP_DATA_DESC_DIRECT; 1863 len = sizeof(struct srp_cmd) + cmd->add_cdb_len + 1864 sizeof(struct srp_direct_buf); 1865 1866 if (count == 1 && target->global_rkey) { 1867 /* 1868 * The midlayer only generated a single gather/scatter 1869 * entry, or DMA mapping coalesced everything to a 1870 * single entry. So a direct descriptor along with 1871 * the DMA MR suffices. 1872 */ 1873 struct srp_direct_buf *buf; 1874 1875 buf = (void *)cmd->add_data + cmd->add_cdb_len; 1876 buf->va = cpu_to_be64(sg_dma_address(scat)); 1877 buf->key = cpu_to_be32(target->global_rkey); 1878 buf->len = cpu_to_be32(sg_dma_len(scat)); 1879 1880 req->nmdesc = 0; 1881 goto map_complete; 1882 } 1883 1884 /* 1885 * We have more than one scatter/gather entry, so build our indirect 1886 * descriptor table, trying to merge as many entries as we can. 1887 */ 1888 indirect_hdr = (void *)cmd->add_data + cmd->add_cdb_len; 1889 1890 ib_dma_sync_single_for_cpu(ibdev, req->indirect_dma_addr, 1891 target->indirect_size, DMA_TO_DEVICE); 1892 1893 memset(&state, 0, sizeof(state)); 1894 state.desc = req->indirect_desc; 1895 if (dev->use_fast_reg) 1896 ret = srp_map_sg_fr(&state, ch, req, scat, count); 1897 else if (dev->use_fmr) 1898 ret = srp_map_sg_fmr(&state, ch, req, scat, count); 1899 else 1900 ret = srp_map_sg_dma(&state, ch, req, scat, count); 1901 req->nmdesc = state.nmdesc; 1902 if (ret < 0) 1903 goto unmap; 1904 1905 { 1906 DEFINE_DYNAMIC_DEBUG_METADATA(ddm, 1907 "Memory mapping consistency check"); 1908 if (DYNAMIC_DEBUG_BRANCH(ddm)) 1909 srp_check_mapping(&state, ch, req, scat, count); 1910 } 1911 1912 /* We've mapped the request, now pull as much of the indirect 1913 * descriptor table as we can into the command buffer. If this 1914 * target is not using an external indirect table, we are 1915 * guaranteed to fit into the command, as the SCSI layer won't 1916 * give us more S/G entries than we allow. 1917 */ 1918 if (state.ndesc == 1) { 1919 /* 1920 * Memory registration collapsed the sg-list into one entry, 1921 * so use a direct descriptor. 1922 */ 1923 struct srp_direct_buf *buf; 1924 1925 buf = (void *)cmd->add_data + cmd->add_cdb_len; 1926 *buf = req->indirect_desc[0]; 1927 goto map_complete; 1928 } 1929 1930 if (unlikely(target->cmd_sg_cnt < state.ndesc && 1931 !target->allow_ext_sg)) { 1932 shost_printk(KERN_ERR, target->scsi_host, 1933 "Could not fit S/G list into SRP_CMD\n"); 1934 ret = -EIO; 1935 goto unmap; 1936 } 1937 1938 count = min(state.ndesc, target->cmd_sg_cnt); 1939 table_len = state.ndesc * sizeof (struct srp_direct_buf); 1940 idb_len = sizeof(struct srp_indirect_buf) + table_len; 1941 1942 fmt = SRP_DATA_DESC_INDIRECT; 1943 len = sizeof(struct srp_cmd) + cmd->add_cdb_len + 1944 sizeof(struct srp_indirect_buf); 1945 len += count * sizeof (struct srp_direct_buf); 1946 1947 memcpy(indirect_hdr->desc_list, req->indirect_desc, 1948 count * sizeof (struct srp_direct_buf)); 1949 1950 if (!target->global_rkey) { 1951 ret = srp_map_idb(ch, req, state.gen.next, state.gen.end, 1952 idb_len, &idb_rkey); 1953 if (ret < 0) 1954 goto unmap; 1955 req->nmdesc++; 1956 } else { 1957 idb_rkey = cpu_to_be32(target->global_rkey); 1958 } 1959 1960 indirect_hdr->table_desc.va = cpu_to_be64(req->indirect_dma_addr); 1961 indirect_hdr->table_desc.key = idb_rkey; 1962 indirect_hdr->table_desc.len = cpu_to_be32(table_len); 1963 indirect_hdr->len = cpu_to_be32(state.total_len); 1964 1965 if (scmnd->sc_data_direction == DMA_TO_DEVICE) 1966 cmd->data_out_desc_cnt = count; 1967 else 1968 cmd->data_in_desc_cnt = count; 1969 1970 ib_dma_sync_single_for_device(ibdev, req->indirect_dma_addr, table_len, 1971 DMA_TO_DEVICE); 1972 1973 map_complete: 1974 if (scmnd->sc_data_direction == DMA_TO_DEVICE) 1975 cmd->buf_fmt = fmt << 4; 1976 else 1977 cmd->buf_fmt = fmt; 1978 1979 return len; 1980 1981 unmap: 1982 srp_unmap_data(scmnd, ch, req); 1983 if (ret == -ENOMEM && req->nmdesc >= target->mr_pool_size) 1984 ret = -E2BIG; 1985 return ret; 1986 } 1987 1988 /* 1989 * Return an IU and possible credit to the free pool 1990 */ 1991 static void srp_put_tx_iu(struct srp_rdma_ch *ch, struct srp_iu *iu, 1992 enum srp_iu_type iu_type) 1993 { 1994 unsigned long flags; 1995 1996 spin_lock_irqsave(&ch->lock, flags); 1997 list_add(&iu->list, &ch->free_tx); 1998 if (iu_type != SRP_IU_RSP) 1999 ++ch->req_lim; 2000 spin_unlock_irqrestore(&ch->lock, flags); 2001 } 2002 2003 /* 2004 * Must be called with ch->lock held to protect req_lim and free_tx. 2005 * If IU is not sent, it must be returned using srp_put_tx_iu(). 2006 * 2007 * Note: 2008 * An upper limit for the number of allocated information units for each 2009 * request type is: 2010 * - SRP_IU_CMD: SRP_CMD_SQ_SIZE, since the SCSI mid-layer never queues 2011 * more than Scsi_Host.can_queue requests. 2012 * - SRP_IU_TSK_MGMT: SRP_TSK_MGMT_SQ_SIZE. 2013 * - SRP_IU_RSP: 1, since a conforming SRP target never sends more than 2014 * one unanswered SRP request to an initiator. 2015 */ 2016 static struct srp_iu *__srp_get_tx_iu(struct srp_rdma_ch *ch, 2017 enum srp_iu_type iu_type) 2018 { 2019 struct srp_target_port *target = ch->target; 2020 s32 rsv = (iu_type == SRP_IU_TSK_MGMT) ? 0 : SRP_TSK_MGMT_SQ_SIZE; 2021 struct srp_iu *iu; 2022 2023 lockdep_assert_held(&ch->lock); 2024 2025 ib_process_cq_direct(ch->send_cq, -1); 2026 2027 if (list_empty(&ch->free_tx)) 2028 return NULL; 2029 2030 /* Initiator responses to target requests do not consume credits */ 2031 if (iu_type != SRP_IU_RSP) { 2032 if (ch->req_lim <= rsv) { 2033 ++target->zero_req_lim; 2034 return NULL; 2035 } 2036 2037 --ch->req_lim; 2038 } 2039 2040 iu = list_first_entry(&ch->free_tx, struct srp_iu, list); 2041 list_del(&iu->list); 2042 return iu; 2043 } 2044 2045 /* 2046 * Note: if this function is called from inside ib_drain_sq() then it will 2047 * be called without ch->lock being held. If ib_drain_sq() dequeues a WQE 2048 * with status IB_WC_SUCCESS then that's a bug. 2049 */ 2050 static void srp_send_done(struct ib_cq *cq, struct ib_wc *wc) 2051 { 2052 struct srp_iu *iu = container_of(wc->wr_cqe, struct srp_iu, cqe); 2053 struct srp_rdma_ch *ch = cq->cq_context; 2054 2055 if (unlikely(wc->status != IB_WC_SUCCESS)) { 2056 srp_handle_qp_err(cq, wc, "SEND"); 2057 return; 2058 } 2059 2060 lockdep_assert_held(&ch->lock); 2061 2062 list_add(&iu->list, &ch->free_tx); 2063 } 2064 2065 /** 2066 * srp_post_send() - send an SRP information unit 2067 * @ch: RDMA channel over which to send the information unit. 2068 * @iu: Information unit to send. 2069 * @len: Length of the information unit excluding immediate data. 2070 */ 2071 static int srp_post_send(struct srp_rdma_ch *ch, struct srp_iu *iu, int len) 2072 { 2073 struct srp_target_port *target = ch->target; 2074 struct ib_send_wr wr; 2075 2076 if (WARN_ON_ONCE(iu->num_sge > SRP_MAX_SGE)) 2077 return -EINVAL; 2078 2079 iu->sge[0].addr = iu->dma; 2080 iu->sge[0].length = len; 2081 iu->sge[0].lkey = target->lkey; 2082 2083 iu->cqe.done = srp_send_done; 2084 2085 wr.next = NULL; 2086 wr.wr_cqe = &iu->cqe; 2087 wr.sg_list = &iu->sge[0]; 2088 wr.num_sge = iu->num_sge; 2089 wr.opcode = IB_WR_SEND; 2090 wr.send_flags = IB_SEND_SIGNALED; 2091 2092 return ib_post_send(ch->qp, &wr, NULL); 2093 } 2094 2095 static int srp_post_recv(struct srp_rdma_ch *ch, struct srp_iu *iu) 2096 { 2097 struct srp_target_port *target = ch->target; 2098 struct ib_recv_wr wr; 2099 struct ib_sge list; 2100 2101 list.addr = iu->dma; 2102 list.length = iu->size; 2103 list.lkey = target->lkey; 2104 2105 iu->cqe.done = srp_recv_done; 2106 2107 wr.next = NULL; 2108 wr.wr_cqe = &iu->cqe; 2109 wr.sg_list = &list; 2110 wr.num_sge = 1; 2111 2112 return ib_post_recv(ch->qp, &wr, NULL); 2113 } 2114 2115 static void srp_process_rsp(struct srp_rdma_ch *ch, struct srp_rsp *rsp) 2116 { 2117 struct srp_target_port *target = ch->target; 2118 struct srp_request *req; 2119 struct scsi_cmnd *scmnd; 2120 unsigned long flags; 2121 2122 if (unlikely(rsp->tag & SRP_TAG_TSK_MGMT)) { 2123 spin_lock_irqsave(&ch->lock, flags); 2124 ch->req_lim += be32_to_cpu(rsp->req_lim_delta); 2125 if (rsp->tag == ch->tsk_mgmt_tag) { 2126 ch->tsk_mgmt_status = -1; 2127 if (be32_to_cpu(rsp->resp_data_len) >= 4) 2128 ch->tsk_mgmt_status = rsp->data[3]; 2129 complete(&ch->tsk_mgmt_done); 2130 } else { 2131 shost_printk(KERN_ERR, target->scsi_host, 2132 "Received tsk mgmt response too late for tag %#llx\n", 2133 rsp->tag); 2134 } 2135 spin_unlock_irqrestore(&ch->lock, flags); 2136 } else { 2137 scmnd = scsi_host_find_tag(target->scsi_host, rsp->tag); 2138 if (scmnd && scmnd->host_scribble) { 2139 req = (void *)scmnd->host_scribble; 2140 scmnd = srp_claim_req(ch, req, NULL, scmnd); 2141 } else { 2142 scmnd = NULL; 2143 } 2144 if (!scmnd) { 2145 shost_printk(KERN_ERR, target->scsi_host, 2146 "Null scmnd for RSP w/tag %#016llx received on ch %td / QP %#x\n", 2147 rsp->tag, ch - target->ch, ch->qp->qp_num); 2148 2149 spin_lock_irqsave(&ch->lock, flags); 2150 ch->req_lim += be32_to_cpu(rsp->req_lim_delta); 2151 spin_unlock_irqrestore(&ch->lock, flags); 2152 2153 return; 2154 } 2155 scmnd->result = rsp->status; 2156 2157 if (rsp->flags & SRP_RSP_FLAG_SNSVALID) { 2158 memcpy(scmnd->sense_buffer, rsp->data + 2159 be32_to_cpu(rsp->resp_data_len), 2160 min_t(int, be32_to_cpu(rsp->sense_data_len), 2161 SCSI_SENSE_BUFFERSIZE)); 2162 } 2163 2164 if (unlikely(rsp->flags & SRP_RSP_FLAG_DIUNDER)) 2165 scsi_set_resid(scmnd, be32_to_cpu(rsp->data_in_res_cnt)); 2166 else if (unlikely(rsp->flags & SRP_RSP_FLAG_DIOVER)) 2167 scsi_set_resid(scmnd, -be32_to_cpu(rsp->data_in_res_cnt)); 2168 else if (unlikely(rsp->flags & SRP_RSP_FLAG_DOUNDER)) 2169 scsi_set_resid(scmnd, be32_to_cpu(rsp->data_out_res_cnt)); 2170 else if (unlikely(rsp->flags & SRP_RSP_FLAG_DOOVER)) 2171 scsi_set_resid(scmnd, -be32_to_cpu(rsp->data_out_res_cnt)); 2172 2173 srp_free_req(ch, req, scmnd, 2174 be32_to_cpu(rsp->req_lim_delta)); 2175 2176 scmnd->host_scribble = NULL; 2177 scmnd->scsi_done(scmnd); 2178 } 2179 } 2180 2181 static int srp_response_common(struct srp_rdma_ch *ch, s32 req_delta, 2182 void *rsp, int len) 2183 { 2184 struct srp_target_port *target = ch->target; 2185 struct ib_device *dev = target->srp_host->srp_dev->dev; 2186 unsigned long flags; 2187 struct srp_iu *iu; 2188 int err; 2189 2190 spin_lock_irqsave(&ch->lock, flags); 2191 ch->req_lim += req_delta; 2192 iu = __srp_get_tx_iu(ch, SRP_IU_RSP); 2193 spin_unlock_irqrestore(&ch->lock, flags); 2194 2195 if (!iu) { 2196 shost_printk(KERN_ERR, target->scsi_host, PFX 2197 "no IU available to send response\n"); 2198 return 1; 2199 } 2200 2201 iu->num_sge = 1; 2202 ib_dma_sync_single_for_cpu(dev, iu->dma, len, DMA_TO_DEVICE); 2203 memcpy(iu->buf, rsp, len); 2204 ib_dma_sync_single_for_device(dev, iu->dma, len, DMA_TO_DEVICE); 2205 2206 err = srp_post_send(ch, iu, len); 2207 if (err) { 2208 shost_printk(KERN_ERR, target->scsi_host, PFX 2209 "unable to post response: %d\n", err); 2210 srp_put_tx_iu(ch, iu, SRP_IU_RSP); 2211 } 2212 2213 return err; 2214 } 2215 2216 static void srp_process_cred_req(struct srp_rdma_ch *ch, 2217 struct srp_cred_req *req) 2218 { 2219 struct srp_cred_rsp rsp = { 2220 .opcode = SRP_CRED_RSP, 2221 .tag = req->tag, 2222 }; 2223 s32 delta = be32_to_cpu(req->req_lim_delta); 2224 2225 if (srp_response_common(ch, delta, &rsp, sizeof(rsp))) 2226 shost_printk(KERN_ERR, ch->target->scsi_host, PFX 2227 "problems processing SRP_CRED_REQ\n"); 2228 } 2229 2230 static void srp_process_aer_req(struct srp_rdma_ch *ch, 2231 struct srp_aer_req *req) 2232 { 2233 struct srp_target_port *target = ch->target; 2234 struct srp_aer_rsp rsp = { 2235 .opcode = SRP_AER_RSP, 2236 .tag = req->tag, 2237 }; 2238 s32 delta = be32_to_cpu(req->req_lim_delta); 2239 2240 shost_printk(KERN_ERR, target->scsi_host, PFX 2241 "ignoring AER for LUN %llu\n", scsilun_to_int(&req->lun)); 2242 2243 if (srp_response_common(ch, delta, &rsp, sizeof(rsp))) 2244 shost_printk(KERN_ERR, target->scsi_host, PFX 2245 "problems processing SRP_AER_REQ\n"); 2246 } 2247 2248 static void srp_recv_done(struct ib_cq *cq, struct ib_wc *wc) 2249 { 2250 struct srp_iu *iu = container_of(wc->wr_cqe, struct srp_iu, cqe); 2251 struct srp_rdma_ch *ch = cq->cq_context; 2252 struct srp_target_port *target = ch->target; 2253 struct ib_device *dev = target->srp_host->srp_dev->dev; 2254 int res; 2255 u8 opcode; 2256 2257 if (unlikely(wc->status != IB_WC_SUCCESS)) { 2258 srp_handle_qp_err(cq, wc, "RECV"); 2259 return; 2260 } 2261 2262 ib_dma_sync_single_for_cpu(dev, iu->dma, ch->max_ti_iu_len, 2263 DMA_FROM_DEVICE); 2264 2265 opcode = *(u8 *) iu->buf; 2266 2267 if (0) { 2268 shost_printk(KERN_ERR, target->scsi_host, 2269 PFX "recv completion, opcode 0x%02x\n", opcode); 2270 print_hex_dump(KERN_ERR, "", DUMP_PREFIX_OFFSET, 8, 1, 2271 iu->buf, wc->byte_len, true); 2272 } 2273 2274 switch (opcode) { 2275 case SRP_RSP: 2276 srp_process_rsp(ch, iu->buf); 2277 break; 2278 2279 case SRP_CRED_REQ: 2280 srp_process_cred_req(ch, iu->buf); 2281 break; 2282 2283 case SRP_AER_REQ: 2284 srp_process_aer_req(ch, iu->buf); 2285 break; 2286 2287 case SRP_T_LOGOUT: 2288 /* XXX Handle target logout */ 2289 shost_printk(KERN_WARNING, target->scsi_host, 2290 PFX "Got target logout request\n"); 2291 break; 2292 2293 default: 2294 shost_printk(KERN_WARNING, target->scsi_host, 2295 PFX "Unhandled SRP opcode 0x%02x\n", opcode); 2296 break; 2297 } 2298 2299 ib_dma_sync_single_for_device(dev, iu->dma, ch->max_ti_iu_len, 2300 DMA_FROM_DEVICE); 2301 2302 res = srp_post_recv(ch, iu); 2303 if (res != 0) 2304 shost_printk(KERN_ERR, target->scsi_host, 2305 PFX "Recv failed with error code %d\n", res); 2306 } 2307 2308 /** 2309 * srp_tl_err_work() - handle a transport layer error 2310 * @work: Work structure embedded in an SRP target port. 2311 * 2312 * Note: This function may get invoked before the rport has been created, 2313 * hence the target->rport test. 2314 */ 2315 static void srp_tl_err_work(struct work_struct *work) 2316 { 2317 struct srp_target_port *target; 2318 2319 target = container_of(work, struct srp_target_port, tl_err_work); 2320 if (target->rport) 2321 srp_start_tl_fail_timers(target->rport); 2322 } 2323 2324 static void srp_handle_qp_err(struct ib_cq *cq, struct ib_wc *wc, 2325 const char *opname) 2326 { 2327 struct srp_rdma_ch *ch = cq->cq_context; 2328 struct srp_target_port *target = ch->target; 2329 2330 if (ch->connected && !target->qp_in_error) { 2331 shost_printk(KERN_ERR, target->scsi_host, 2332 PFX "failed %s status %s (%d) for CQE %p\n", 2333 opname, ib_wc_status_msg(wc->status), wc->status, 2334 wc->wr_cqe); 2335 queue_work(system_long_wq, &target->tl_err_work); 2336 } 2337 target->qp_in_error = true; 2338 } 2339 2340 static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd) 2341 { 2342 struct srp_target_port *target = host_to_target(shost); 2343 struct srp_rdma_ch *ch; 2344 struct srp_request *req; 2345 struct srp_iu *iu; 2346 struct srp_cmd *cmd; 2347 struct ib_device *dev; 2348 unsigned long flags; 2349 u32 tag; 2350 u16 idx; 2351 int len, ret; 2352 2353 scmnd->result = srp_chkready(target->rport); 2354 if (unlikely(scmnd->result)) 2355 goto err; 2356 2357 WARN_ON_ONCE(scmnd->request->tag < 0); 2358 tag = blk_mq_unique_tag(scmnd->request); 2359 ch = &target->ch[blk_mq_unique_tag_to_hwq(tag)]; 2360 idx = blk_mq_unique_tag_to_tag(tag); 2361 WARN_ONCE(idx >= target->req_ring_size, "%s: tag %#x: idx %d >= %d\n", 2362 dev_name(&shost->shost_gendev), tag, idx, 2363 target->req_ring_size); 2364 2365 spin_lock_irqsave(&ch->lock, flags); 2366 iu = __srp_get_tx_iu(ch, SRP_IU_CMD); 2367 spin_unlock_irqrestore(&ch->lock, flags); 2368 2369 if (!iu) 2370 goto err; 2371 2372 req = &ch->req_ring[idx]; 2373 dev = target->srp_host->srp_dev->dev; 2374 ib_dma_sync_single_for_cpu(dev, iu->dma, ch->max_it_iu_len, 2375 DMA_TO_DEVICE); 2376 2377 scmnd->host_scribble = (void *) req; 2378 2379 cmd = iu->buf; 2380 memset(cmd, 0, sizeof *cmd); 2381 2382 cmd->opcode = SRP_CMD; 2383 int_to_scsilun(scmnd->device->lun, &cmd->lun); 2384 cmd->tag = tag; 2385 memcpy(cmd->cdb, scmnd->cmnd, scmnd->cmd_len); 2386 if (unlikely(scmnd->cmd_len > sizeof(cmd->cdb))) { 2387 cmd->add_cdb_len = round_up(scmnd->cmd_len - sizeof(cmd->cdb), 2388 4); 2389 if (WARN_ON_ONCE(cmd->add_cdb_len > SRP_MAX_ADD_CDB_LEN)) 2390 goto err_iu; 2391 } 2392 2393 req->scmnd = scmnd; 2394 req->cmd = iu; 2395 2396 len = srp_map_data(scmnd, ch, req); 2397 if (len < 0) { 2398 shost_printk(KERN_ERR, target->scsi_host, 2399 PFX "Failed to map data (%d)\n", len); 2400 /* 2401 * If we ran out of memory descriptors (-ENOMEM) because an 2402 * application is queuing many requests with more than 2403 * max_pages_per_mr sg-list elements, tell the SCSI mid-layer 2404 * to reduce queue depth temporarily. 2405 */ 2406 scmnd->result = len == -ENOMEM ? 2407 DID_OK << 16 | QUEUE_FULL << 1 : DID_ERROR << 16; 2408 goto err_iu; 2409 } 2410 2411 ib_dma_sync_single_for_device(dev, iu->dma, ch->max_it_iu_len, 2412 DMA_TO_DEVICE); 2413 2414 if (srp_post_send(ch, iu, len)) { 2415 shost_printk(KERN_ERR, target->scsi_host, PFX "Send failed\n"); 2416 scmnd->result = DID_ERROR << 16; 2417 goto err_unmap; 2418 } 2419 2420 return 0; 2421 2422 err_unmap: 2423 srp_unmap_data(scmnd, ch, req); 2424 2425 err_iu: 2426 srp_put_tx_iu(ch, iu, SRP_IU_CMD); 2427 2428 /* 2429 * Avoid that the loops that iterate over the request ring can 2430 * encounter a dangling SCSI command pointer. 2431 */ 2432 req->scmnd = NULL; 2433 2434 err: 2435 if (scmnd->result) { 2436 scmnd->scsi_done(scmnd); 2437 ret = 0; 2438 } else { 2439 ret = SCSI_MLQUEUE_HOST_BUSY; 2440 } 2441 2442 return ret; 2443 } 2444 2445 /* 2446 * Note: the resources allocated in this function are freed in 2447 * srp_free_ch_ib(). 2448 */ 2449 static int srp_alloc_iu_bufs(struct srp_rdma_ch *ch) 2450 { 2451 struct srp_target_port *target = ch->target; 2452 int i; 2453 2454 ch->rx_ring = kcalloc(target->queue_size, sizeof(*ch->rx_ring), 2455 GFP_KERNEL); 2456 if (!ch->rx_ring) 2457 goto err_no_ring; 2458 ch->tx_ring = kcalloc(target->queue_size, sizeof(*ch->tx_ring), 2459 GFP_KERNEL); 2460 if (!ch->tx_ring) 2461 goto err_no_ring; 2462 2463 for (i = 0; i < target->queue_size; ++i) { 2464 ch->rx_ring[i] = srp_alloc_iu(target->srp_host, 2465 ch->max_ti_iu_len, 2466 GFP_KERNEL, DMA_FROM_DEVICE); 2467 if (!ch->rx_ring[i]) 2468 goto err; 2469 } 2470 2471 for (i = 0; i < target->queue_size; ++i) { 2472 ch->tx_ring[i] = srp_alloc_iu(target->srp_host, 2473 ch->max_it_iu_len, 2474 GFP_KERNEL, DMA_TO_DEVICE); 2475 if (!ch->tx_ring[i]) 2476 goto err; 2477 2478 list_add(&ch->tx_ring[i]->list, &ch->free_tx); 2479 } 2480 2481 return 0; 2482 2483 err: 2484 for (i = 0; i < target->queue_size; ++i) { 2485 srp_free_iu(target->srp_host, ch->rx_ring[i]); 2486 srp_free_iu(target->srp_host, ch->tx_ring[i]); 2487 } 2488 2489 2490 err_no_ring: 2491 kfree(ch->tx_ring); 2492 ch->tx_ring = NULL; 2493 kfree(ch->rx_ring); 2494 ch->rx_ring = NULL; 2495 2496 return -ENOMEM; 2497 } 2498 2499 static uint32_t srp_compute_rq_tmo(struct ib_qp_attr *qp_attr, int attr_mask) 2500 { 2501 uint64_t T_tr_ns, max_compl_time_ms; 2502 uint32_t rq_tmo_jiffies; 2503 2504 /* 2505 * According to section 11.2.4.2 in the IBTA spec (Modify Queue Pair, 2506 * table 91), both the QP timeout and the retry count have to be set 2507 * for RC QP's during the RTR to RTS transition. 2508 */ 2509 WARN_ON_ONCE((attr_mask & (IB_QP_TIMEOUT | IB_QP_RETRY_CNT)) != 2510 (IB_QP_TIMEOUT | IB_QP_RETRY_CNT)); 2511 2512 /* 2513 * Set target->rq_tmo_jiffies to one second more than the largest time 2514 * it can take before an error completion is generated. See also 2515 * C9-140..142 in the IBTA spec for more information about how to 2516 * convert the QP Local ACK Timeout value to nanoseconds. 2517 */ 2518 T_tr_ns = 4096 * (1ULL << qp_attr->timeout); 2519 max_compl_time_ms = qp_attr->retry_cnt * 4 * T_tr_ns; 2520 do_div(max_compl_time_ms, NSEC_PER_MSEC); 2521 rq_tmo_jiffies = msecs_to_jiffies(max_compl_time_ms + 1000); 2522 2523 return rq_tmo_jiffies; 2524 } 2525 2526 static void srp_cm_rep_handler(struct ib_cm_id *cm_id, 2527 const struct srp_login_rsp *lrsp, 2528 struct srp_rdma_ch *ch) 2529 { 2530 struct srp_target_port *target = ch->target; 2531 struct ib_qp_attr *qp_attr = NULL; 2532 int attr_mask = 0; 2533 int ret = 0; 2534 int i; 2535 2536 if (lrsp->opcode == SRP_LOGIN_RSP) { 2537 ch->max_ti_iu_len = be32_to_cpu(lrsp->max_ti_iu_len); 2538 ch->req_lim = be32_to_cpu(lrsp->req_lim_delta); 2539 ch->use_imm_data = lrsp->rsp_flags & SRP_LOGIN_RSP_IMMED_SUPP; 2540 ch->max_it_iu_len = srp_max_it_iu_len(target->cmd_sg_cnt, 2541 ch->use_imm_data); 2542 WARN_ON_ONCE(ch->max_it_iu_len > 2543 be32_to_cpu(lrsp->max_it_iu_len)); 2544 2545 if (ch->use_imm_data) 2546 shost_printk(KERN_DEBUG, target->scsi_host, 2547 PFX "using immediate data\n"); 2548 2549 /* 2550 * Reserve credits for task management so we don't 2551 * bounce requests back to the SCSI mid-layer. 2552 */ 2553 target->scsi_host->can_queue 2554 = min(ch->req_lim - SRP_TSK_MGMT_SQ_SIZE, 2555 target->scsi_host->can_queue); 2556 target->scsi_host->cmd_per_lun 2557 = min_t(int, target->scsi_host->can_queue, 2558 target->scsi_host->cmd_per_lun); 2559 } else { 2560 shost_printk(KERN_WARNING, target->scsi_host, 2561 PFX "Unhandled RSP opcode %#x\n", lrsp->opcode); 2562 ret = -ECONNRESET; 2563 goto error; 2564 } 2565 2566 if (!ch->rx_ring) { 2567 ret = srp_alloc_iu_bufs(ch); 2568 if (ret) 2569 goto error; 2570 } 2571 2572 for (i = 0; i < target->queue_size; i++) { 2573 struct srp_iu *iu = ch->rx_ring[i]; 2574 2575 ret = srp_post_recv(ch, iu); 2576 if (ret) 2577 goto error; 2578 } 2579 2580 if (!target->using_rdma_cm) { 2581 ret = -ENOMEM; 2582 qp_attr = kmalloc(sizeof(*qp_attr), GFP_KERNEL); 2583 if (!qp_attr) 2584 goto error; 2585 2586 qp_attr->qp_state = IB_QPS_RTR; 2587 ret = ib_cm_init_qp_attr(cm_id, qp_attr, &attr_mask); 2588 if (ret) 2589 goto error_free; 2590 2591 ret = ib_modify_qp(ch->qp, qp_attr, attr_mask); 2592 if (ret) 2593 goto error_free; 2594 2595 qp_attr->qp_state = IB_QPS_RTS; 2596 ret = ib_cm_init_qp_attr(cm_id, qp_attr, &attr_mask); 2597 if (ret) 2598 goto error_free; 2599 2600 target->rq_tmo_jiffies = srp_compute_rq_tmo(qp_attr, attr_mask); 2601 2602 ret = ib_modify_qp(ch->qp, qp_attr, attr_mask); 2603 if (ret) 2604 goto error_free; 2605 2606 ret = ib_send_cm_rtu(cm_id, NULL, 0); 2607 } 2608 2609 error_free: 2610 kfree(qp_attr); 2611 2612 error: 2613 ch->status = ret; 2614 } 2615 2616 static void srp_ib_cm_rej_handler(struct ib_cm_id *cm_id, 2617 const struct ib_cm_event *event, 2618 struct srp_rdma_ch *ch) 2619 { 2620 struct srp_target_port *target = ch->target; 2621 struct Scsi_Host *shost = target->scsi_host; 2622 struct ib_class_port_info *cpi; 2623 int opcode; 2624 u16 dlid; 2625 2626 switch (event->param.rej_rcvd.reason) { 2627 case IB_CM_REJ_PORT_CM_REDIRECT: 2628 cpi = event->param.rej_rcvd.ari; 2629 dlid = be16_to_cpu(cpi->redirect_lid); 2630 sa_path_set_dlid(&ch->ib_cm.path, dlid); 2631 ch->ib_cm.path.pkey = cpi->redirect_pkey; 2632 cm_id->remote_cm_qpn = be32_to_cpu(cpi->redirect_qp) & 0x00ffffff; 2633 memcpy(ch->ib_cm.path.dgid.raw, cpi->redirect_gid, 16); 2634 2635 ch->status = dlid ? SRP_DLID_REDIRECT : SRP_PORT_REDIRECT; 2636 break; 2637 2638 case IB_CM_REJ_PORT_REDIRECT: 2639 if (srp_target_is_topspin(target)) { 2640 union ib_gid *dgid = &ch->ib_cm.path.dgid; 2641 2642 /* 2643 * Topspin/Cisco SRP gateways incorrectly send 2644 * reject reason code 25 when they mean 24 2645 * (port redirect). 2646 */ 2647 memcpy(dgid->raw, event->param.rej_rcvd.ari, 16); 2648 2649 shost_printk(KERN_DEBUG, shost, 2650 PFX "Topspin/Cisco redirect to target port GID %016llx%016llx\n", 2651 be64_to_cpu(dgid->global.subnet_prefix), 2652 be64_to_cpu(dgid->global.interface_id)); 2653 2654 ch->status = SRP_PORT_REDIRECT; 2655 } else { 2656 shost_printk(KERN_WARNING, shost, 2657 " REJ reason: IB_CM_REJ_PORT_REDIRECT\n"); 2658 ch->status = -ECONNRESET; 2659 } 2660 break; 2661 2662 case IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID: 2663 shost_printk(KERN_WARNING, shost, 2664 " REJ reason: IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID\n"); 2665 ch->status = -ECONNRESET; 2666 break; 2667 2668 case IB_CM_REJ_CONSUMER_DEFINED: 2669 opcode = *(u8 *) event->private_data; 2670 if (opcode == SRP_LOGIN_REJ) { 2671 struct srp_login_rej *rej = event->private_data; 2672 u32 reason = be32_to_cpu(rej->reason); 2673 2674 if (reason == SRP_LOGIN_REJ_REQ_IT_IU_LENGTH_TOO_LARGE) 2675 shost_printk(KERN_WARNING, shost, 2676 PFX "SRP_LOGIN_REJ: requested max_it_iu_len too large\n"); 2677 else 2678 shost_printk(KERN_WARNING, shost, PFX 2679 "SRP LOGIN from %pI6 to %pI6 REJECTED, reason 0x%08x\n", 2680 target->sgid.raw, 2681 target->ib_cm.orig_dgid.raw, 2682 reason); 2683 } else 2684 shost_printk(KERN_WARNING, shost, 2685 " REJ reason: IB_CM_REJ_CONSUMER_DEFINED," 2686 " opcode 0x%02x\n", opcode); 2687 ch->status = -ECONNRESET; 2688 break; 2689 2690 case IB_CM_REJ_STALE_CONN: 2691 shost_printk(KERN_WARNING, shost, " REJ reason: stale connection\n"); 2692 ch->status = SRP_STALE_CONN; 2693 break; 2694 2695 default: 2696 shost_printk(KERN_WARNING, shost, " REJ reason 0x%x\n", 2697 event->param.rej_rcvd.reason); 2698 ch->status = -ECONNRESET; 2699 } 2700 } 2701 2702 static int srp_ib_cm_handler(struct ib_cm_id *cm_id, 2703 const struct ib_cm_event *event) 2704 { 2705 struct srp_rdma_ch *ch = cm_id->context; 2706 struct srp_target_port *target = ch->target; 2707 int comp = 0; 2708 2709 switch (event->event) { 2710 case IB_CM_REQ_ERROR: 2711 shost_printk(KERN_DEBUG, target->scsi_host, 2712 PFX "Sending CM REQ failed\n"); 2713 comp = 1; 2714 ch->status = -ECONNRESET; 2715 break; 2716 2717 case IB_CM_REP_RECEIVED: 2718 comp = 1; 2719 srp_cm_rep_handler(cm_id, event->private_data, ch); 2720 break; 2721 2722 case IB_CM_REJ_RECEIVED: 2723 shost_printk(KERN_DEBUG, target->scsi_host, PFX "REJ received\n"); 2724 comp = 1; 2725 2726 srp_ib_cm_rej_handler(cm_id, event, ch); 2727 break; 2728 2729 case IB_CM_DREQ_RECEIVED: 2730 shost_printk(KERN_WARNING, target->scsi_host, 2731 PFX "DREQ received - connection closed\n"); 2732 ch->connected = false; 2733 if (ib_send_cm_drep(cm_id, NULL, 0)) 2734 shost_printk(KERN_ERR, target->scsi_host, 2735 PFX "Sending CM DREP failed\n"); 2736 queue_work(system_long_wq, &target->tl_err_work); 2737 break; 2738 2739 case IB_CM_TIMEWAIT_EXIT: 2740 shost_printk(KERN_ERR, target->scsi_host, 2741 PFX "connection closed\n"); 2742 comp = 1; 2743 2744 ch->status = 0; 2745 break; 2746 2747 case IB_CM_MRA_RECEIVED: 2748 case IB_CM_DREQ_ERROR: 2749 case IB_CM_DREP_RECEIVED: 2750 break; 2751 2752 default: 2753 shost_printk(KERN_WARNING, target->scsi_host, 2754 PFX "Unhandled CM event %d\n", event->event); 2755 break; 2756 } 2757 2758 if (comp) 2759 complete(&ch->done); 2760 2761 return 0; 2762 } 2763 2764 static void srp_rdma_cm_rej_handler(struct srp_rdma_ch *ch, 2765 struct rdma_cm_event *event) 2766 { 2767 struct srp_target_port *target = ch->target; 2768 struct Scsi_Host *shost = target->scsi_host; 2769 int opcode; 2770 2771 switch (event->status) { 2772 case IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID: 2773 shost_printk(KERN_WARNING, shost, 2774 " REJ reason: IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID\n"); 2775 ch->status = -ECONNRESET; 2776 break; 2777 2778 case IB_CM_REJ_CONSUMER_DEFINED: 2779 opcode = *(u8 *) event->param.conn.private_data; 2780 if (opcode == SRP_LOGIN_REJ) { 2781 struct srp_login_rej *rej = 2782 (struct srp_login_rej *) 2783 event->param.conn.private_data; 2784 u32 reason = be32_to_cpu(rej->reason); 2785 2786 if (reason == SRP_LOGIN_REJ_REQ_IT_IU_LENGTH_TOO_LARGE) 2787 shost_printk(KERN_WARNING, shost, 2788 PFX "SRP_LOGIN_REJ: requested max_it_iu_len too large\n"); 2789 else 2790 shost_printk(KERN_WARNING, shost, 2791 PFX "SRP LOGIN REJECTED, reason 0x%08x\n", reason); 2792 } else { 2793 shost_printk(KERN_WARNING, shost, 2794 " REJ reason: IB_CM_REJ_CONSUMER_DEFINED, opcode 0x%02x\n", 2795 opcode); 2796 } 2797 ch->status = -ECONNRESET; 2798 break; 2799 2800 case IB_CM_REJ_STALE_CONN: 2801 shost_printk(KERN_WARNING, shost, 2802 " REJ reason: stale connection\n"); 2803 ch->status = SRP_STALE_CONN; 2804 break; 2805 2806 default: 2807 shost_printk(KERN_WARNING, shost, " REJ reason 0x%x\n", 2808 event->status); 2809 ch->status = -ECONNRESET; 2810 break; 2811 } 2812 } 2813 2814 static int srp_rdma_cm_handler(struct rdma_cm_id *cm_id, 2815 struct rdma_cm_event *event) 2816 { 2817 struct srp_rdma_ch *ch = cm_id->context; 2818 struct srp_target_port *target = ch->target; 2819 int comp = 0; 2820 2821 switch (event->event) { 2822 case RDMA_CM_EVENT_ADDR_RESOLVED: 2823 ch->status = 0; 2824 comp = 1; 2825 break; 2826 2827 case RDMA_CM_EVENT_ADDR_ERROR: 2828 ch->status = -ENXIO; 2829 comp = 1; 2830 break; 2831 2832 case RDMA_CM_EVENT_ROUTE_RESOLVED: 2833 ch->status = 0; 2834 comp = 1; 2835 break; 2836 2837 case RDMA_CM_EVENT_ROUTE_ERROR: 2838 case RDMA_CM_EVENT_UNREACHABLE: 2839 ch->status = -EHOSTUNREACH; 2840 comp = 1; 2841 break; 2842 2843 case RDMA_CM_EVENT_CONNECT_ERROR: 2844 shost_printk(KERN_DEBUG, target->scsi_host, 2845 PFX "Sending CM REQ failed\n"); 2846 comp = 1; 2847 ch->status = -ECONNRESET; 2848 break; 2849 2850 case RDMA_CM_EVENT_ESTABLISHED: 2851 comp = 1; 2852 srp_cm_rep_handler(NULL, event->param.conn.private_data, ch); 2853 break; 2854 2855 case RDMA_CM_EVENT_REJECTED: 2856 shost_printk(KERN_DEBUG, target->scsi_host, PFX "REJ received\n"); 2857 comp = 1; 2858 2859 srp_rdma_cm_rej_handler(ch, event); 2860 break; 2861 2862 case RDMA_CM_EVENT_DISCONNECTED: 2863 if (ch->connected) { 2864 shost_printk(KERN_WARNING, target->scsi_host, 2865 PFX "received DREQ\n"); 2866 rdma_disconnect(ch->rdma_cm.cm_id); 2867 comp = 1; 2868 ch->status = 0; 2869 queue_work(system_long_wq, &target->tl_err_work); 2870 } 2871 break; 2872 2873 case RDMA_CM_EVENT_TIMEWAIT_EXIT: 2874 shost_printk(KERN_ERR, target->scsi_host, 2875 PFX "connection closed\n"); 2876 2877 comp = 1; 2878 ch->status = 0; 2879 break; 2880 2881 default: 2882 shost_printk(KERN_WARNING, target->scsi_host, 2883 PFX "Unhandled CM event %d\n", event->event); 2884 break; 2885 } 2886 2887 if (comp) 2888 complete(&ch->done); 2889 2890 return 0; 2891 } 2892 2893 /** 2894 * srp_change_queue_depth - setting device queue depth 2895 * @sdev: scsi device struct 2896 * @qdepth: requested queue depth 2897 * 2898 * Returns queue depth. 2899 */ 2900 static int 2901 srp_change_queue_depth(struct scsi_device *sdev, int qdepth) 2902 { 2903 if (!sdev->tagged_supported) 2904 qdepth = 1; 2905 return scsi_change_queue_depth(sdev, qdepth); 2906 } 2907 2908 static int srp_send_tsk_mgmt(struct srp_rdma_ch *ch, u64 req_tag, u64 lun, 2909 u8 func, u8 *status) 2910 { 2911 struct srp_target_port *target = ch->target; 2912 struct srp_rport *rport = target->rport; 2913 struct ib_device *dev = target->srp_host->srp_dev->dev; 2914 struct srp_iu *iu; 2915 struct srp_tsk_mgmt *tsk_mgmt; 2916 int res; 2917 2918 if (!ch->connected || target->qp_in_error) 2919 return -1; 2920 2921 /* 2922 * Lock the rport mutex to avoid that srp_create_ch_ib() is 2923 * invoked while a task management function is being sent. 2924 */ 2925 mutex_lock(&rport->mutex); 2926 spin_lock_irq(&ch->lock); 2927 iu = __srp_get_tx_iu(ch, SRP_IU_TSK_MGMT); 2928 spin_unlock_irq(&ch->lock); 2929 2930 if (!iu) { 2931 mutex_unlock(&rport->mutex); 2932 2933 return -1; 2934 } 2935 2936 iu->num_sge = 1; 2937 2938 ib_dma_sync_single_for_cpu(dev, iu->dma, sizeof *tsk_mgmt, 2939 DMA_TO_DEVICE); 2940 tsk_mgmt = iu->buf; 2941 memset(tsk_mgmt, 0, sizeof *tsk_mgmt); 2942 2943 tsk_mgmt->opcode = SRP_TSK_MGMT; 2944 int_to_scsilun(lun, &tsk_mgmt->lun); 2945 tsk_mgmt->tsk_mgmt_func = func; 2946 tsk_mgmt->task_tag = req_tag; 2947 2948 spin_lock_irq(&ch->lock); 2949 ch->tsk_mgmt_tag = (ch->tsk_mgmt_tag + 1) | SRP_TAG_TSK_MGMT; 2950 tsk_mgmt->tag = ch->tsk_mgmt_tag; 2951 spin_unlock_irq(&ch->lock); 2952 2953 init_completion(&ch->tsk_mgmt_done); 2954 2955 ib_dma_sync_single_for_device(dev, iu->dma, sizeof *tsk_mgmt, 2956 DMA_TO_DEVICE); 2957 if (srp_post_send(ch, iu, sizeof(*tsk_mgmt))) { 2958 srp_put_tx_iu(ch, iu, SRP_IU_TSK_MGMT); 2959 mutex_unlock(&rport->mutex); 2960 2961 return -1; 2962 } 2963 res = wait_for_completion_timeout(&ch->tsk_mgmt_done, 2964 msecs_to_jiffies(SRP_ABORT_TIMEOUT_MS)); 2965 if (res > 0 && status) 2966 *status = ch->tsk_mgmt_status; 2967 mutex_unlock(&rport->mutex); 2968 2969 WARN_ON_ONCE(res < 0); 2970 2971 return res > 0 ? 0 : -1; 2972 } 2973 2974 static int srp_abort(struct scsi_cmnd *scmnd) 2975 { 2976 struct srp_target_port *target = host_to_target(scmnd->device->host); 2977 struct srp_request *req = (struct srp_request *) scmnd->host_scribble; 2978 u32 tag; 2979 u16 ch_idx; 2980 struct srp_rdma_ch *ch; 2981 int ret; 2982 2983 shost_printk(KERN_ERR, target->scsi_host, "SRP abort called\n"); 2984 2985 if (!req) 2986 return SUCCESS; 2987 tag = blk_mq_unique_tag(scmnd->request); 2988 ch_idx = blk_mq_unique_tag_to_hwq(tag); 2989 if (WARN_ON_ONCE(ch_idx >= target->ch_count)) 2990 return SUCCESS; 2991 ch = &target->ch[ch_idx]; 2992 if (!srp_claim_req(ch, req, NULL, scmnd)) 2993 return SUCCESS; 2994 shost_printk(KERN_ERR, target->scsi_host, 2995 "Sending SRP abort for tag %#x\n", tag); 2996 if (srp_send_tsk_mgmt(ch, tag, scmnd->device->lun, 2997 SRP_TSK_ABORT_TASK, NULL) == 0) 2998 ret = SUCCESS; 2999 else if (target->rport->state == SRP_RPORT_LOST) 3000 ret = FAST_IO_FAIL; 3001 else 3002 ret = FAILED; 3003 if (ret == SUCCESS) { 3004 srp_free_req(ch, req, scmnd, 0); 3005 scmnd->result = DID_ABORT << 16; 3006 scmnd->scsi_done(scmnd); 3007 } 3008 3009 return ret; 3010 } 3011 3012 static int srp_reset_device(struct scsi_cmnd *scmnd) 3013 { 3014 struct srp_target_port *target = host_to_target(scmnd->device->host); 3015 struct srp_rdma_ch *ch; 3016 u8 status; 3017 3018 shost_printk(KERN_ERR, target->scsi_host, "SRP reset_device called\n"); 3019 3020 ch = &target->ch[0]; 3021 if (srp_send_tsk_mgmt(ch, SRP_TAG_NO_REQ, scmnd->device->lun, 3022 SRP_TSK_LUN_RESET, &status)) 3023 return FAILED; 3024 if (status) 3025 return FAILED; 3026 3027 return SUCCESS; 3028 } 3029 3030 static int srp_reset_host(struct scsi_cmnd *scmnd) 3031 { 3032 struct srp_target_port *target = host_to_target(scmnd->device->host); 3033 3034 shost_printk(KERN_ERR, target->scsi_host, PFX "SRP reset_host called\n"); 3035 3036 return srp_reconnect_rport(target->rport) == 0 ? SUCCESS : FAILED; 3037 } 3038 3039 static int srp_target_alloc(struct scsi_target *starget) 3040 { 3041 struct Scsi_Host *shost = dev_to_shost(starget->dev.parent); 3042 struct srp_target_port *target = host_to_target(shost); 3043 3044 if (target->target_can_queue) 3045 starget->can_queue = target->target_can_queue; 3046 return 0; 3047 } 3048 3049 static int srp_slave_configure(struct scsi_device *sdev) 3050 { 3051 struct Scsi_Host *shost = sdev->host; 3052 struct srp_target_port *target = host_to_target(shost); 3053 struct request_queue *q = sdev->request_queue; 3054 unsigned long timeout; 3055 3056 if (sdev->type == TYPE_DISK) { 3057 timeout = max_t(unsigned, 30 * HZ, target->rq_tmo_jiffies); 3058 blk_queue_rq_timeout(q, timeout); 3059 } 3060 3061 return 0; 3062 } 3063 3064 static ssize_t show_id_ext(struct device *dev, struct device_attribute *attr, 3065 char *buf) 3066 { 3067 struct srp_target_port *target = host_to_target(class_to_shost(dev)); 3068 3069 return sprintf(buf, "0x%016llx\n", be64_to_cpu(target->id_ext)); 3070 } 3071 3072 static ssize_t show_ioc_guid(struct device *dev, struct device_attribute *attr, 3073 char *buf) 3074 { 3075 struct srp_target_port *target = host_to_target(class_to_shost(dev)); 3076 3077 return sprintf(buf, "0x%016llx\n", be64_to_cpu(target->ioc_guid)); 3078 } 3079 3080 static ssize_t show_service_id(struct device *dev, 3081 struct device_attribute *attr, char *buf) 3082 { 3083 struct srp_target_port *target = host_to_target(class_to_shost(dev)); 3084 3085 if (target->using_rdma_cm) 3086 return -ENOENT; 3087 return sprintf(buf, "0x%016llx\n", 3088 be64_to_cpu(target->ib_cm.service_id)); 3089 } 3090 3091 static ssize_t show_pkey(struct device *dev, struct device_attribute *attr, 3092 char *buf) 3093 { 3094 struct srp_target_port *target = host_to_target(class_to_shost(dev)); 3095 3096 if (target->using_rdma_cm) 3097 return -ENOENT; 3098 return sprintf(buf, "0x%04x\n", be16_to_cpu(target->ib_cm.pkey)); 3099 } 3100 3101 static ssize_t show_sgid(struct device *dev, struct device_attribute *attr, 3102 char *buf) 3103 { 3104 struct srp_target_port *target = host_to_target(class_to_shost(dev)); 3105 3106 return sprintf(buf, "%pI6\n", target->sgid.raw); 3107 } 3108 3109 static ssize_t show_dgid(struct device *dev, struct device_attribute *attr, 3110 char *buf) 3111 { 3112 struct srp_target_port *target = host_to_target(class_to_shost(dev)); 3113 struct srp_rdma_ch *ch = &target->ch[0]; 3114 3115 if (target->using_rdma_cm) 3116 return -ENOENT; 3117 return sprintf(buf, "%pI6\n", ch->ib_cm.path.dgid.raw); 3118 } 3119 3120 static ssize_t show_orig_dgid(struct device *dev, 3121 struct device_attribute *attr, char *buf) 3122 { 3123 struct srp_target_port *target = host_to_target(class_to_shost(dev)); 3124 3125 if (target->using_rdma_cm) 3126 return -ENOENT; 3127 return sprintf(buf, "%pI6\n", target->ib_cm.orig_dgid.raw); 3128 } 3129 3130 static ssize_t show_req_lim(struct device *dev, 3131 struct device_attribute *attr, char *buf) 3132 { 3133 struct srp_target_port *target = host_to_target(class_to_shost(dev)); 3134 struct srp_rdma_ch *ch; 3135 int i, req_lim = INT_MAX; 3136 3137 for (i = 0; i < target->ch_count; i++) { 3138 ch = &target->ch[i]; 3139 req_lim = min(req_lim, ch->req_lim); 3140 } 3141 return sprintf(buf, "%d\n", req_lim); 3142 } 3143 3144 static ssize_t show_zero_req_lim(struct device *dev, 3145 struct device_attribute *attr, char *buf) 3146 { 3147 struct srp_target_port *target = host_to_target(class_to_shost(dev)); 3148 3149 return sprintf(buf, "%d\n", target->zero_req_lim); 3150 } 3151 3152 static ssize_t show_local_ib_port(struct device *dev, 3153 struct device_attribute *attr, char *buf) 3154 { 3155 struct srp_target_port *target = host_to_target(class_to_shost(dev)); 3156 3157 return sprintf(buf, "%d\n", target->srp_host->port); 3158 } 3159 3160 static ssize_t show_local_ib_device(struct device *dev, 3161 struct device_attribute *attr, char *buf) 3162 { 3163 struct srp_target_port *target = host_to_target(class_to_shost(dev)); 3164 3165 return sprintf(buf, "%s\n", 3166 dev_name(&target->srp_host->srp_dev->dev->dev)); 3167 } 3168 3169 static ssize_t show_ch_count(struct device *dev, struct device_attribute *attr, 3170 char *buf) 3171 { 3172 struct srp_target_port *target = host_to_target(class_to_shost(dev)); 3173 3174 return sprintf(buf, "%d\n", target->ch_count); 3175 } 3176 3177 static ssize_t show_comp_vector(struct device *dev, 3178 struct device_attribute *attr, char *buf) 3179 { 3180 struct srp_target_port *target = host_to_target(class_to_shost(dev)); 3181 3182 return sprintf(buf, "%d\n", target->comp_vector); 3183 } 3184 3185 static ssize_t show_tl_retry_count(struct device *dev, 3186 struct device_attribute *attr, char *buf) 3187 { 3188 struct srp_target_port *target = host_to_target(class_to_shost(dev)); 3189 3190 return sprintf(buf, "%d\n", target->tl_retry_count); 3191 } 3192 3193 static ssize_t show_cmd_sg_entries(struct device *dev, 3194 struct device_attribute *attr, char *buf) 3195 { 3196 struct srp_target_port *target = host_to_target(class_to_shost(dev)); 3197 3198 return sprintf(buf, "%u\n", target->cmd_sg_cnt); 3199 } 3200 3201 static ssize_t show_allow_ext_sg(struct device *dev, 3202 struct device_attribute *attr, char *buf) 3203 { 3204 struct srp_target_port *target = host_to_target(class_to_shost(dev)); 3205 3206 return sprintf(buf, "%s\n", target->allow_ext_sg ? "true" : "false"); 3207 } 3208 3209 static DEVICE_ATTR(id_ext, S_IRUGO, show_id_ext, NULL); 3210 static DEVICE_ATTR(ioc_guid, S_IRUGO, show_ioc_guid, NULL); 3211 static DEVICE_ATTR(service_id, S_IRUGO, show_service_id, NULL); 3212 static DEVICE_ATTR(pkey, S_IRUGO, show_pkey, NULL); 3213 static DEVICE_ATTR(sgid, S_IRUGO, show_sgid, NULL); 3214 static DEVICE_ATTR(dgid, S_IRUGO, show_dgid, NULL); 3215 static DEVICE_ATTR(orig_dgid, S_IRUGO, show_orig_dgid, NULL); 3216 static DEVICE_ATTR(req_lim, S_IRUGO, show_req_lim, NULL); 3217 static DEVICE_ATTR(zero_req_lim, S_IRUGO, show_zero_req_lim, NULL); 3218 static DEVICE_ATTR(local_ib_port, S_IRUGO, show_local_ib_port, NULL); 3219 static DEVICE_ATTR(local_ib_device, S_IRUGO, show_local_ib_device, NULL); 3220 static DEVICE_ATTR(ch_count, S_IRUGO, show_ch_count, NULL); 3221 static DEVICE_ATTR(comp_vector, S_IRUGO, show_comp_vector, NULL); 3222 static DEVICE_ATTR(tl_retry_count, S_IRUGO, show_tl_retry_count, NULL); 3223 static DEVICE_ATTR(cmd_sg_entries, S_IRUGO, show_cmd_sg_entries, NULL); 3224 static DEVICE_ATTR(allow_ext_sg, S_IRUGO, show_allow_ext_sg, NULL); 3225 3226 static struct device_attribute *srp_host_attrs[] = { 3227 &dev_attr_id_ext, 3228 &dev_attr_ioc_guid, 3229 &dev_attr_service_id, 3230 &dev_attr_pkey, 3231 &dev_attr_sgid, 3232 &dev_attr_dgid, 3233 &dev_attr_orig_dgid, 3234 &dev_attr_req_lim, 3235 &dev_attr_zero_req_lim, 3236 &dev_attr_local_ib_port, 3237 &dev_attr_local_ib_device, 3238 &dev_attr_ch_count, 3239 &dev_attr_comp_vector, 3240 &dev_attr_tl_retry_count, 3241 &dev_attr_cmd_sg_entries, 3242 &dev_attr_allow_ext_sg, 3243 NULL 3244 }; 3245 3246 static struct scsi_host_template srp_template = { 3247 .module = THIS_MODULE, 3248 .name = "InfiniBand SRP initiator", 3249 .proc_name = DRV_NAME, 3250 .target_alloc = srp_target_alloc, 3251 .slave_configure = srp_slave_configure, 3252 .info = srp_target_info, 3253 .queuecommand = srp_queuecommand, 3254 .change_queue_depth = srp_change_queue_depth, 3255 .eh_timed_out = srp_timed_out, 3256 .eh_abort_handler = srp_abort, 3257 .eh_device_reset_handler = srp_reset_device, 3258 .eh_host_reset_handler = srp_reset_host, 3259 .skip_settle_delay = true, 3260 .sg_tablesize = SRP_DEF_SG_TABLESIZE, 3261 .can_queue = SRP_DEFAULT_CMD_SQ_SIZE, 3262 .this_id = -1, 3263 .cmd_per_lun = SRP_DEFAULT_CMD_SQ_SIZE, 3264 .shost_attrs = srp_host_attrs, 3265 .track_queue_depth = 1, 3266 }; 3267 3268 static int srp_sdev_count(struct Scsi_Host *host) 3269 { 3270 struct scsi_device *sdev; 3271 int c = 0; 3272 3273 shost_for_each_device(sdev, host) 3274 c++; 3275 3276 return c; 3277 } 3278 3279 /* 3280 * Return values: 3281 * < 0 upon failure. Caller is responsible for SRP target port cleanup. 3282 * 0 and target->state == SRP_TARGET_REMOVED if asynchronous target port 3283 * removal has been scheduled. 3284 * 0 and target->state != SRP_TARGET_REMOVED upon success. 3285 */ 3286 static int srp_add_target(struct srp_host *host, struct srp_target_port *target) 3287 { 3288 struct srp_rport_identifiers ids; 3289 struct srp_rport *rport; 3290 3291 target->state = SRP_TARGET_SCANNING; 3292 sprintf(target->target_name, "SRP.T10:%016llX", 3293 be64_to_cpu(target->id_ext)); 3294 3295 if (scsi_add_host(target->scsi_host, host->srp_dev->dev->dev.parent)) 3296 return -ENODEV; 3297 3298 memcpy(ids.port_id, &target->id_ext, 8); 3299 memcpy(ids.port_id + 8, &target->ioc_guid, 8); 3300 ids.roles = SRP_RPORT_ROLE_TARGET; 3301 rport = srp_rport_add(target->scsi_host, &ids); 3302 if (IS_ERR(rport)) { 3303 scsi_remove_host(target->scsi_host); 3304 return PTR_ERR(rport); 3305 } 3306 3307 rport->lld_data = target; 3308 target->rport = rport; 3309 3310 spin_lock(&host->target_lock); 3311 list_add_tail(&target->list, &host->target_list); 3312 spin_unlock(&host->target_lock); 3313 3314 scsi_scan_target(&target->scsi_host->shost_gendev, 3315 0, target->scsi_id, SCAN_WILD_CARD, SCSI_SCAN_INITIAL); 3316 3317 if (srp_connected_ch(target) < target->ch_count || 3318 target->qp_in_error) { 3319 shost_printk(KERN_INFO, target->scsi_host, 3320 PFX "SCSI scan failed - removing SCSI host\n"); 3321 srp_queue_remove_work(target); 3322 goto out; 3323 } 3324 3325 pr_debug("%s: SCSI scan succeeded - detected %d LUNs\n", 3326 dev_name(&target->scsi_host->shost_gendev), 3327 srp_sdev_count(target->scsi_host)); 3328 3329 spin_lock_irq(&target->lock); 3330 if (target->state == SRP_TARGET_SCANNING) 3331 target->state = SRP_TARGET_LIVE; 3332 spin_unlock_irq(&target->lock); 3333 3334 out: 3335 return 0; 3336 } 3337 3338 static void srp_release_dev(struct device *dev) 3339 { 3340 struct srp_host *host = 3341 container_of(dev, struct srp_host, dev); 3342 3343 complete(&host->released); 3344 } 3345 3346 static struct class srp_class = { 3347 .name = "infiniband_srp", 3348 .dev_release = srp_release_dev 3349 }; 3350 3351 /** 3352 * srp_conn_unique() - check whether the connection to a target is unique 3353 * @host: SRP host. 3354 * @target: SRP target port. 3355 */ 3356 static bool srp_conn_unique(struct srp_host *host, 3357 struct srp_target_port *target) 3358 { 3359 struct srp_target_port *t; 3360 bool ret = false; 3361 3362 if (target->state == SRP_TARGET_REMOVED) 3363 goto out; 3364 3365 ret = true; 3366 3367 spin_lock(&host->target_lock); 3368 list_for_each_entry(t, &host->target_list, list) { 3369 if (t != target && 3370 target->id_ext == t->id_ext && 3371 target->ioc_guid == t->ioc_guid && 3372 target->initiator_ext == t->initiator_ext) { 3373 ret = false; 3374 break; 3375 } 3376 } 3377 spin_unlock(&host->target_lock); 3378 3379 out: 3380 return ret; 3381 } 3382 3383 /* 3384 * Target ports are added by writing 3385 * 3386 * id_ext=<SRP ID ext>,ioc_guid=<SRP IOC GUID>,dgid=<dest GID>, 3387 * pkey=<P_Key>,service_id=<service ID> 3388 * or 3389 * id_ext=<SRP ID ext>,ioc_guid=<SRP IOC GUID>, 3390 * [src=<IPv4 address>,]dest=<IPv4 address>:<port number> 3391 * 3392 * to the add_target sysfs attribute. 3393 */ 3394 enum { 3395 SRP_OPT_ERR = 0, 3396 SRP_OPT_ID_EXT = 1 << 0, 3397 SRP_OPT_IOC_GUID = 1 << 1, 3398 SRP_OPT_DGID = 1 << 2, 3399 SRP_OPT_PKEY = 1 << 3, 3400 SRP_OPT_SERVICE_ID = 1 << 4, 3401 SRP_OPT_MAX_SECT = 1 << 5, 3402 SRP_OPT_MAX_CMD_PER_LUN = 1 << 6, 3403 SRP_OPT_IO_CLASS = 1 << 7, 3404 SRP_OPT_INITIATOR_EXT = 1 << 8, 3405 SRP_OPT_CMD_SG_ENTRIES = 1 << 9, 3406 SRP_OPT_ALLOW_EXT_SG = 1 << 10, 3407 SRP_OPT_SG_TABLESIZE = 1 << 11, 3408 SRP_OPT_COMP_VECTOR = 1 << 12, 3409 SRP_OPT_TL_RETRY_COUNT = 1 << 13, 3410 SRP_OPT_QUEUE_SIZE = 1 << 14, 3411 SRP_OPT_IP_SRC = 1 << 15, 3412 SRP_OPT_IP_DEST = 1 << 16, 3413 SRP_OPT_TARGET_CAN_QUEUE= 1 << 17, 3414 }; 3415 3416 static unsigned int srp_opt_mandatory[] = { 3417 SRP_OPT_ID_EXT | 3418 SRP_OPT_IOC_GUID | 3419 SRP_OPT_DGID | 3420 SRP_OPT_PKEY | 3421 SRP_OPT_SERVICE_ID, 3422 SRP_OPT_ID_EXT | 3423 SRP_OPT_IOC_GUID | 3424 SRP_OPT_IP_DEST, 3425 }; 3426 3427 static const match_table_t srp_opt_tokens = { 3428 { SRP_OPT_ID_EXT, "id_ext=%s" }, 3429 { SRP_OPT_IOC_GUID, "ioc_guid=%s" }, 3430 { SRP_OPT_DGID, "dgid=%s" }, 3431 { SRP_OPT_PKEY, "pkey=%x" }, 3432 { SRP_OPT_SERVICE_ID, "service_id=%s" }, 3433 { SRP_OPT_MAX_SECT, "max_sect=%d" }, 3434 { SRP_OPT_MAX_CMD_PER_LUN, "max_cmd_per_lun=%d" }, 3435 { SRP_OPT_TARGET_CAN_QUEUE, "target_can_queue=%d" }, 3436 { SRP_OPT_IO_CLASS, "io_class=%x" }, 3437 { SRP_OPT_INITIATOR_EXT, "initiator_ext=%s" }, 3438 { SRP_OPT_CMD_SG_ENTRIES, "cmd_sg_entries=%u" }, 3439 { SRP_OPT_ALLOW_EXT_SG, "allow_ext_sg=%u" }, 3440 { SRP_OPT_SG_TABLESIZE, "sg_tablesize=%u" }, 3441 { SRP_OPT_COMP_VECTOR, "comp_vector=%u" }, 3442 { SRP_OPT_TL_RETRY_COUNT, "tl_retry_count=%u" }, 3443 { SRP_OPT_QUEUE_SIZE, "queue_size=%d" }, 3444 { SRP_OPT_IP_SRC, "src=%s" }, 3445 { SRP_OPT_IP_DEST, "dest=%s" }, 3446 { SRP_OPT_ERR, NULL } 3447 }; 3448 3449 /** 3450 * srp_parse_in - parse an IP address and port number combination 3451 * @net: [in] Network namespace. 3452 * @sa: [out] Address family, IP address and port number. 3453 * @addr_port_str: [in] IP address and port number. 3454 * @has_port: [out] Whether or not @addr_port_str includes a port number. 3455 * 3456 * Parse the following address formats: 3457 * - IPv4: <ip_address>:<port>, e.g. 1.2.3.4:5. 3458 * - IPv6: \[<ipv6_address>\]:<port>, e.g. [1::2:3%4]:5. 3459 */ 3460 static int srp_parse_in(struct net *net, struct sockaddr_storage *sa, 3461 const char *addr_port_str, bool *has_port) 3462 { 3463 char *addr_end, *addr = kstrdup(addr_port_str, GFP_KERNEL); 3464 char *port_str; 3465 int ret; 3466 3467 if (!addr) 3468 return -ENOMEM; 3469 port_str = strrchr(addr, ':'); 3470 if (port_str && strchr(port_str, ']')) 3471 port_str = NULL; 3472 if (port_str) 3473 *port_str++ = '\0'; 3474 if (has_port) 3475 *has_port = port_str != NULL; 3476 ret = inet_pton_with_scope(net, AF_INET, addr, port_str, sa); 3477 if (ret && addr[0]) { 3478 addr_end = addr + strlen(addr) - 1; 3479 if (addr[0] == '[' && *addr_end == ']') { 3480 *addr_end = '\0'; 3481 ret = inet_pton_with_scope(net, AF_INET6, addr + 1, 3482 port_str, sa); 3483 } 3484 } 3485 kfree(addr); 3486 pr_debug("%s -> %pISpfsc\n", addr_port_str, sa); 3487 return ret; 3488 } 3489 3490 static int srp_parse_options(struct net *net, const char *buf, 3491 struct srp_target_port *target) 3492 { 3493 char *options, *sep_opt; 3494 char *p; 3495 substring_t args[MAX_OPT_ARGS]; 3496 unsigned long long ull; 3497 bool has_port; 3498 int opt_mask = 0; 3499 int token; 3500 int ret = -EINVAL; 3501 int i; 3502 3503 options = kstrdup(buf, GFP_KERNEL); 3504 if (!options) 3505 return -ENOMEM; 3506 3507 sep_opt = options; 3508 while ((p = strsep(&sep_opt, ",\n")) != NULL) { 3509 if (!*p) 3510 continue; 3511 3512 token = match_token(p, srp_opt_tokens, args); 3513 opt_mask |= token; 3514 3515 switch (token) { 3516 case SRP_OPT_ID_EXT: 3517 p = match_strdup(args); 3518 if (!p) { 3519 ret = -ENOMEM; 3520 goto out; 3521 } 3522 ret = kstrtoull(p, 16, &ull); 3523 if (ret) { 3524 pr_warn("invalid id_ext parameter '%s'\n", p); 3525 kfree(p); 3526 goto out; 3527 } 3528 target->id_ext = cpu_to_be64(ull); 3529 kfree(p); 3530 break; 3531 3532 case SRP_OPT_IOC_GUID: 3533 p = match_strdup(args); 3534 if (!p) { 3535 ret = -ENOMEM; 3536 goto out; 3537 } 3538 ret = kstrtoull(p, 16, &ull); 3539 if (ret) { 3540 pr_warn("invalid ioc_guid parameter '%s'\n", p); 3541 kfree(p); 3542 goto out; 3543 } 3544 target->ioc_guid = cpu_to_be64(ull); 3545 kfree(p); 3546 break; 3547 3548 case SRP_OPT_DGID: 3549 p = match_strdup(args); 3550 if (!p) { 3551 ret = -ENOMEM; 3552 goto out; 3553 } 3554 if (strlen(p) != 32) { 3555 pr_warn("bad dest GID parameter '%s'\n", p); 3556 kfree(p); 3557 goto out; 3558 } 3559 3560 ret = hex2bin(target->ib_cm.orig_dgid.raw, p, 16); 3561 kfree(p); 3562 if (ret < 0) 3563 goto out; 3564 break; 3565 3566 case SRP_OPT_PKEY: 3567 if (match_hex(args, &token)) { 3568 pr_warn("bad P_Key parameter '%s'\n", p); 3569 goto out; 3570 } 3571 target->ib_cm.pkey = cpu_to_be16(token); 3572 break; 3573 3574 case SRP_OPT_SERVICE_ID: 3575 p = match_strdup(args); 3576 if (!p) { 3577 ret = -ENOMEM; 3578 goto out; 3579 } 3580 ret = kstrtoull(p, 16, &ull); 3581 if (ret) { 3582 pr_warn("bad service_id parameter '%s'\n", p); 3583 kfree(p); 3584 goto out; 3585 } 3586 target->ib_cm.service_id = cpu_to_be64(ull); 3587 kfree(p); 3588 break; 3589 3590 case SRP_OPT_IP_SRC: 3591 p = match_strdup(args); 3592 if (!p) { 3593 ret = -ENOMEM; 3594 goto out; 3595 } 3596 ret = srp_parse_in(net, &target->rdma_cm.src.ss, p, 3597 NULL); 3598 if (ret < 0) { 3599 pr_warn("bad source parameter '%s'\n", p); 3600 kfree(p); 3601 goto out; 3602 } 3603 target->rdma_cm.src_specified = true; 3604 kfree(p); 3605 break; 3606 3607 case SRP_OPT_IP_DEST: 3608 p = match_strdup(args); 3609 if (!p) { 3610 ret = -ENOMEM; 3611 goto out; 3612 } 3613 ret = srp_parse_in(net, &target->rdma_cm.dst.ss, p, 3614 &has_port); 3615 if (!has_port) 3616 ret = -EINVAL; 3617 if (ret < 0) { 3618 pr_warn("bad dest parameter '%s'\n", p); 3619 kfree(p); 3620 goto out; 3621 } 3622 target->using_rdma_cm = true; 3623 kfree(p); 3624 break; 3625 3626 case SRP_OPT_MAX_SECT: 3627 if (match_int(args, &token)) { 3628 pr_warn("bad max sect parameter '%s'\n", p); 3629 goto out; 3630 } 3631 target->scsi_host->max_sectors = token; 3632 break; 3633 3634 case SRP_OPT_QUEUE_SIZE: 3635 if (match_int(args, &token) || token < 1) { 3636 pr_warn("bad queue_size parameter '%s'\n", p); 3637 goto out; 3638 } 3639 target->scsi_host->can_queue = token; 3640 target->queue_size = token + SRP_RSP_SQ_SIZE + 3641 SRP_TSK_MGMT_SQ_SIZE; 3642 if (!(opt_mask & SRP_OPT_MAX_CMD_PER_LUN)) 3643 target->scsi_host->cmd_per_lun = token; 3644 break; 3645 3646 case SRP_OPT_MAX_CMD_PER_LUN: 3647 if (match_int(args, &token) || token < 1) { 3648 pr_warn("bad max cmd_per_lun parameter '%s'\n", 3649 p); 3650 goto out; 3651 } 3652 target->scsi_host->cmd_per_lun = token; 3653 break; 3654 3655 case SRP_OPT_TARGET_CAN_QUEUE: 3656 if (match_int(args, &token) || token < 1) { 3657 pr_warn("bad max target_can_queue parameter '%s'\n", 3658 p); 3659 goto out; 3660 } 3661 target->target_can_queue = token; 3662 break; 3663 3664 case SRP_OPT_IO_CLASS: 3665 if (match_hex(args, &token)) { 3666 pr_warn("bad IO class parameter '%s'\n", p); 3667 goto out; 3668 } 3669 if (token != SRP_REV10_IB_IO_CLASS && 3670 token != SRP_REV16A_IB_IO_CLASS) { 3671 pr_warn("unknown IO class parameter value %x specified (use %x or %x).\n", 3672 token, SRP_REV10_IB_IO_CLASS, 3673 SRP_REV16A_IB_IO_CLASS); 3674 goto out; 3675 } 3676 target->io_class = token; 3677 break; 3678 3679 case SRP_OPT_INITIATOR_EXT: 3680 p = match_strdup(args); 3681 if (!p) { 3682 ret = -ENOMEM; 3683 goto out; 3684 } 3685 ret = kstrtoull(p, 16, &ull); 3686 if (ret) { 3687 pr_warn("bad initiator_ext value '%s'\n", p); 3688 kfree(p); 3689 goto out; 3690 } 3691 target->initiator_ext = cpu_to_be64(ull); 3692 kfree(p); 3693 break; 3694 3695 case SRP_OPT_CMD_SG_ENTRIES: 3696 if (match_int(args, &token) || token < 1 || token > 255) { 3697 pr_warn("bad max cmd_sg_entries parameter '%s'\n", 3698 p); 3699 goto out; 3700 } 3701 target->cmd_sg_cnt = token; 3702 break; 3703 3704 case SRP_OPT_ALLOW_EXT_SG: 3705 if (match_int(args, &token)) { 3706 pr_warn("bad allow_ext_sg parameter '%s'\n", p); 3707 goto out; 3708 } 3709 target->allow_ext_sg = !!token; 3710 break; 3711 3712 case SRP_OPT_SG_TABLESIZE: 3713 if (match_int(args, &token) || token < 1 || 3714 token > SG_MAX_SEGMENTS) { 3715 pr_warn("bad max sg_tablesize parameter '%s'\n", 3716 p); 3717 goto out; 3718 } 3719 target->sg_tablesize = token; 3720 break; 3721 3722 case SRP_OPT_COMP_VECTOR: 3723 if (match_int(args, &token) || token < 0) { 3724 pr_warn("bad comp_vector parameter '%s'\n", p); 3725 goto out; 3726 } 3727 target->comp_vector = token; 3728 break; 3729 3730 case SRP_OPT_TL_RETRY_COUNT: 3731 if (match_int(args, &token) || token < 2 || token > 7) { 3732 pr_warn("bad tl_retry_count parameter '%s' (must be a number between 2 and 7)\n", 3733 p); 3734 goto out; 3735 } 3736 target->tl_retry_count = token; 3737 break; 3738 3739 default: 3740 pr_warn("unknown parameter or missing value '%s' in target creation request\n", 3741 p); 3742 goto out; 3743 } 3744 } 3745 3746 for (i = 0; i < ARRAY_SIZE(srp_opt_mandatory); i++) { 3747 if ((opt_mask & srp_opt_mandatory[i]) == srp_opt_mandatory[i]) { 3748 ret = 0; 3749 break; 3750 } 3751 } 3752 if (ret) 3753 pr_warn("target creation request is missing one or more parameters\n"); 3754 3755 if (target->scsi_host->cmd_per_lun > target->scsi_host->can_queue 3756 && (opt_mask & SRP_OPT_MAX_CMD_PER_LUN)) 3757 pr_warn("cmd_per_lun = %d > queue_size = %d\n", 3758 target->scsi_host->cmd_per_lun, 3759 target->scsi_host->can_queue); 3760 3761 out: 3762 kfree(options); 3763 return ret; 3764 } 3765 3766 static ssize_t srp_create_target(struct device *dev, 3767 struct device_attribute *attr, 3768 const char *buf, size_t count) 3769 { 3770 struct srp_host *host = 3771 container_of(dev, struct srp_host, dev); 3772 struct Scsi_Host *target_host; 3773 struct srp_target_port *target; 3774 struct srp_rdma_ch *ch; 3775 struct srp_device *srp_dev = host->srp_dev; 3776 struct ib_device *ibdev = srp_dev->dev; 3777 int ret, node_idx, node, cpu, i; 3778 unsigned int max_sectors_per_mr, mr_per_cmd = 0; 3779 bool multich = false; 3780 uint32_t max_iu_len; 3781 3782 target_host = scsi_host_alloc(&srp_template, 3783 sizeof (struct srp_target_port)); 3784 if (!target_host) 3785 return -ENOMEM; 3786 3787 target_host->transportt = ib_srp_transport_template; 3788 target_host->max_channel = 0; 3789 target_host->max_id = 1; 3790 target_host->max_lun = -1LL; 3791 target_host->max_cmd_len = sizeof ((struct srp_cmd *) (void *) 0L)->cdb; 3792 target_host->max_segment_size = ib_dma_max_seg_size(ibdev); 3793 3794 if (!(ibdev->attrs.device_cap_flags & IB_DEVICE_SG_GAPS_REG)) 3795 target_host->virt_boundary_mask = ~srp_dev->mr_page_mask; 3796 3797 target = host_to_target(target_host); 3798 3799 target->net = kobj_ns_grab_current(KOBJ_NS_TYPE_NET); 3800 target->io_class = SRP_REV16A_IB_IO_CLASS; 3801 target->scsi_host = target_host; 3802 target->srp_host = host; 3803 target->lkey = host->srp_dev->pd->local_dma_lkey; 3804 target->global_rkey = host->srp_dev->global_rkey; 3805 target->cmd_sg_cnt = cmd_sg_entries; 3806 target->sg_tablesize = indirect_sg_entries ? : cmd_sg_entries; 3807 target->allow_ext_sg = allow_ext_sg; 3808 target->tl_retry_count = 7; 3809 target->queue_size = SRP_DEFAULT_QUEUE_SIZE; 3810 3811 /* 3812 * Avoid that the SCSI host can be removed by srp_remove_target() 3813 * before this function returns. 3814 */ 3815 scsi_host_get(target->scsi_host); 3816 3817 ret = mutex_lock_interruptible(&host->add_target_mutex); 3818 if (ret < 0) 3819 goto put; 3820 3821 ret = srp_parse_options(target->net, buf, target); 3822 if (ret) 3823 goto out; 3824 3825 target->req_ring_size = target->queue_size - SRP_TSK_MGMT_SQ_SIZE; 3826 3827 if (!srp_conn_unique(target->srp_host, target)) { 3828 if (target->using_rdma_cm) { 3829 shost_printk(KERN_INFO, target->scsi_host, 3830 PFX "Already connected to target port with id_ext=%016llx;ioc_guid=%016llx;dest=%pIS\n", 3831 be64_to_cpu(target->id_ext), 3832 be64_to_cpu(target->ioc_guid), 3833 &target->rdma_cm.dst); 3834 } else { 3835 shost_printk(KERN_INFO, target->scsi_host, 3836 PFX "Already connected to target port with id_ext=%016llx;ioc_guid=%016llx;initiator_ext=%016llx\n", 3837 be64_to_cpu(target->id_ext), 3838 be64_to_cpu(target->ioc_guid), 3839 be64_to_cpu(target->initiator_ext)); 3840 } 3841 ret = -EEXIST; 3842 goto out; 3843 } 3844 3845 if (!srp_dev->has_fmr && !srp_dev->has_fr && !target->allow_ext_sg && 3846 target->cmd_sg_cnt < target->sg_tablesize) { 3847 pr_warn("No MR pool and no external indirect descriptors, limiting sg_tablesize to cmd_sg_cnt\n"); 3848 target->sg_tablesize = target->cmd_sg_cnt; 3849 } 3850 3851 if (srp_dev->use_fast_reg || srp_dev->use_fmr) { 3852 bool gaps_reg = (ibdev->attrs.device_cap_flags & 3853 IB_DEVICE_SG_GAPS_REG); 3854 3855 max_sectors_per_mr = srp_dev->max_pages_per_mr << 3856 (ilog2(srp_dev->mr_page_size) - 9); 3857 if (!gaps_reg) { 3858 /* 3859 * FR and FMR can only map one HCA page per entry. If 3860 * the start address is not aligned on a HCA page 3861 * boundary two entries will be used for the head and 3862 * the tail although these two entries combined 3863 * contain at most one HCA page of data. Hence the "+ 3864 * 1" in the calculation below. 3865 * 3866 * The indirect data buffer descriptor is contiguous 3867 * so the memory for that buffer will only be 3868 * registered if register_always is true. Hence add 3869 * one to mr_per_cmd if register_always has been set. 3870 */ 3871 mr_per_cmd = register_always + 3872 (target->scsi_host->max_sectors + 1 + 3873 max_sectors_per_mr - 1) / max_sectors_per_mr; 3874 } else { 3875 mr_per_cmd = register_always + 3876 (target->sg_tablesize + 3877 srp_dev->max_pages_per_mr - 1) / 3878 srp_dev->max_pages_per_mr; 3879 } 3880 pr_debug("max_sectors = %u; max_pages_per_mr = %u; mr_page_size = %u; max_sectors_per_mr = %u; mr_per_cmd = %u\n", 3881 target->scsi_host->max_sectors, srp_dev->max_pages_per_mr, srp_dev->mr_page_size, 3882 max_sectors_per_mr, mr_per_cmd); 3883 } 3884 3885 target_host->sg_tablesize = target->sg_tablesize; 3886 target->mr_pool_size = target->scsi_host->can_queue * mr_per_cmd; 3887 target->mr_per_cmd = mr_per_cmd; 3888 target->indirect_size = target->sg_tablesize * 3889 sizeof (struct srp_direct_buf); 3890 max_iu_len = srp_max_it_iu_len(target->cmd_sg_cnt, srp_use_imm_data); 3891 3892 INIT_WORK(&target->tl_err_work, srp_tl_err_work); 3893 INIT_WORK(&target->remove_work, srp_remove_work); 3894 spin_lock_init(&target->lock); 3895 ret = rdma_query_gid(ibdev, host->port, 0, &target->sgid); 3896 if (ret) 3897 goto out; 3898 3899 ret = -ENOMEM; 3900 target->ch_count = max_t(unsigned, num_online_nodes(), 3901 min(ch_count ? : 3902 min(4 * num_online_nodes(), 3903 ibdev->num_comp_vectors), 3904 num_online_cpus())); 3905 target->ch = kcalloc(target->ch_count, sizeof(*target->ch), 3906 GFP_KERNEL); 3907 if (!target->ch) 3908 goto out; 3909 3910 node_idx = 0; 3911 for_each_online_node(node) { 3912 const int ch_start = (node_idx * target->ch_count / 3913 num_online_nodes()); 3914 const int ch_end = ((node_idx + 1) * target->ch_count / 3915 num_online_nodes()); 3916 const int cv_start = node_idx * ibdev->num_comp_vectors / 3917 num_online_nodes(); 3918 const int cv_end = (node_idx + 1) * ibdev->num_comp_vectors / 3919 num_online_nodes(); 3920 int cpu_idx = 0; 3921 3922 for_each_online_cpu(cpu) { 3923 if (cpu_to_node(cpu) != node) 3924 continue; 3925 if (ch_start + cpu_idx >= ch_end) 3926 continue; 3927 ch = &target->ch[ch_start + cpu_idx]; 3928 ch->target = target; 3929 ch->comp_vector = cv_start == cv_end ? cv_start : 3930 cv_start + cpu_idx % (cv_end - cv_start); 3931 spin_lock_init(&ch->lock); 3932 INIT_LIST_HEAD(&ch->free_tx); 3933 ret = srp_new_cm_id(ch); 3934 if (ret) 3935 goto err_disconnect; 3936 3937 ret = srp_create_ch_ib(ch); 3938 if (ret) 3939 goto err_disconnect; 3940 3941 ret = srp_alloc_req_data(ch); 3942 if (ret) 3943 goto err_disconnect; 3944 3945 ret = srp_connect_ch(ch, max_iu_len, multich); 3946 if (ret) { 3947 char dst[64]; 3948 3949 if (target->using_rdma_cm) 3950 snprintf(dst, sizeof(dst), "%pIS", 3951 &target->rdma_cm.dst); 3952 else 3953 snprintf(dst, sizeof(dst), "%pI6", 3954 target->ib_cm.orig_dgid.raw); 3955 shost_printk(KERN_ERR, target->scsi_host, 3956 PFX "Connection %d/%d to %s failed\n", 3957 ch_start + cpu_idx, 3958 target->ch_count, dst); 3959 if (node_idx == 0 && cpu_idx == 0) { 3960 goto free_ch; 3961 } else { 3962 srp_free_ch_ib(target, ch); 3963 srp_free_req_data(target, ch); 3964 target->ch_count = ch - target->ch; 3965 goto connected; 3966 } 3967 } 3968 3969 multich = true; 3970 cpu_idx++; 3971 } 3972 node_idx++; 3973 } 3974 3975 connected: 3976 target->scsi_host->nr_hw_queues = target->ch_count; 3977 3978 ret = srp_add_target(host, target); 3979 if (ret) 3980 goto err_disconnect; 3981 3982 if (target->state != SRP_TARGET_REMOVED) { 3983 if (target->using_rdma_cm) { 3984 shost_printk(KERN_DEBUG, target->scsi_host, PFX 3985 "new target: id_ext %016llx ioc_guid %016llx sgid %pI6 dest %pIS\n", 3986 be64_to_cpu(target->id_ext), 3987 be64_to_cpu(target->ioc_guid), 3988 target->sgid.raw, &target->rdma_cm.dst); 3989 } else { 3990 shost_printk(KERN_DEBUG, target->scsi_host, PFX 3991 "new target: id_ext %016llx ioc_guid %016llx pkey %04x service_id %016llx sgid %pI6 dgid %pI6\n", 3992 be64_to_cpu(target->id_ext), 3993 be64_to_cpu(target->ioc_guid), 3994 be16_to_cpu(target->ib_cm.pkey), 3995 be64_to_cpu(target->ib_cm.service_id), 3996 target->sgid.raw, 3997 target->ib_cm.orig_dgid.raw); 3998 } 3999 } 4000 4001 ret = count; 4002 4003 out: 4004 mutex_unlock(&host->add_target_mutex); 4005 4006 put: 4007 scsi_host_put(target->scsi_host); 4008 if (ret < 0) { 4009 /* 4010 * If a call to srp_remove_target() has not been scheduled, 4011 * drop the network namespace reference now that was obtained 4012 * earlier in this function. 4013 */ 4014 if (target->state != SRP_TARGET_REMOVED) 4015 kobj_ns_drop(KOBJ_NS_TYPE_NET, target->net); 4016 scsi_host_put(target->scsi_host); 4017 } 4018 4019 return ret; 4020 4021 err_disconnect: 4022 srp_disconnect_target(target); 4023 4024 free_ch: 4025 for (i = 0; i < target->ch_count; i++) { 4026 ch = &target->ch[i]; 4027 srp_free_ch_ib(target, ch); 4028 srp_free_req_data(target, ch); 4029 } 4030 4031 kfree(target->ch); 4032 goto out; 4033 } 4034 4035 static DEVICE_ATTR(add_target, S_IWUSR, NULL, srp_create_target); 4036 4037 static ssize_t show_ibdev(struct device *dev, struct device_attribute *attr, 4038 char *buf) 4039 { 4040 struct srp_host *host = container_of(dev, struct srp_host, dev); 4041 4042 return sprintf(buf, "%s\n", dev_name(&host->srp_dev->dev->dev)); 4043 } 4044 4045 static DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL); 4046 4047 static ssize_t show_port(struct device *dev, struct device_attribute *attr, 4048 char *buf) 4049 { 4050 struct srp_host *host = container_of(dev, struct srp_host, dev); 4051 4052 return sprintf(buf, "%d\n", host->port); 4053 } 4054 4055 static DEVICE_ATTR(port, S_IRUGO, show_port, NULL); 4056 4057 static struct srp_host *srp_add_port(struct srp_device *device, u8 port) 4058 { 4059 struct srp_host *host; 4060 4061 host = kzalloc(sizeof *host, GFP_KERNEL); 4062 if (!host) 4063 return NULL; 4064 4065 INIT_LIST_HEAD(&host->target_list); 4066 spin_lock_init(&host->target_lock); 4067 init_completion(&host->released); 4068 mutex_init(&host->add_target_mutex); 4069 host->srp_dev = device; 4070 host->port = port; 4071 4072 host->dev.class = &srp_class; 4073 host->dev.parent = device->dev->dev.parent; 4074 dev_set_name(&host->dev, "srp-%s-%d", dev_name(&device->dev->dev), 4075 port); 4076 4077 if (device_register(&host->dev)) 4078 goto free_host; 4079 if (device_create_file(&host->dev, &dev_attr_add_target)) 4080 goto err_class; 4081 if (device_create_file(&host->dev, &dev_attr_ibdev)) 4082 goto err_class; 4083 if (device_create_file(&host->dev, &dev_attr_port)) 4084 goto err_class; 4085 4086 return host; 4087 4088 err_class: 4089 device_unregister(&host->dev); 4090 4091 free_host: 4092 kfree(host); 4093 4094 return NULL; 4095 } 4096 4097 static void srp_rename_dev(struct ib_device *device, void *client_data) 4098 { 4099 struct srp_device *srp_dev = client_data; 4100 struct srp_host *host, *tmp_host; 4101 4102 list_for_each_entry_safe(host, tmp_host, &srp_dev->dev_list, list) { 4103 char name[IB_DEVICE_NAME_MAX + 8]; 4104 4105 snprintf(name, sizeof(name), "srp-%s-%d", 4106 dev_name(&device->dev), host->port); 4107 device_rename(&host->dev, name); 4108 } 4109 } 4110 4111 static void srp_add_one(struct ib_device *device) 4112 { 4113 struct srp_device *srp_dev; 4114 struct ib_device_attr *attr = &device->attrs; 4115 struct srp_host *host; 4116 int mr_page_shift; 4117 unsigned int p; 4118 u64 max_pages_per_mr; 4119 unsigned int flags = 0; 4120 4121 srp_dev = kzalloc(sizeof(*srp_dev), GFP_KERNEL); 4122 if (!srp_dev) 4123 return; 4124 4125 /* 4126 * Use the smallest page size supported by the HCA, down to a 4127 * minimum of 4096 bytes. We're unlikely to build large sglists 4128 * out of smaller entries. 4129 */ 4130 mr_page_shift = max(12, ffs(attr->page_size_cap) - 1); 4131 srp_dev->mr_page_size = 1 << mr_page_shift; 4132 srp_dev->mr_page_mask = ~((u64) srp_dev->mr_page_size - 1); 4133 max_pages_per_mr = attr->max_mr_size; 4134 do_div(max_pages_per_mr, srp_dev->mr_page_size); 4135 pr_debug("%s: %llu / %u = %llu <> %u\n", __func__, 4136 attr->max_mr_size, srp_dev->mr_page_size, 4137 max_pages_per_mr, SRP_MAX_PAGES_PER_MR); 4138 srp_dev->max_pages_per_mr = min_t(u64, SRP_MAX_PAGES_PER_MR, 4139 max_pages_per_mr); 4140 4141 srp_dev->has_fmr = (device->ops.alloc_fmr && 4142 device->ops.dealloc_fmr && 4143 device->ops.map_phys_fmr && 4144 device->ops.unmap_fmr); 4145 srp_dev->has_fr = (attr->device_cap_flags & 4146 IB_DEVICE_MEM_MGT_EXTENSIONS); 4147 if (!never_register && !srp_dev->has_fmr && !srp_dev->has_fr) { 4148 dev_warn(&device->dev, "neither FMR nor FR is supported\n"); 4149 } else if (!never_register && 4150 attr->max_mr_size >= 2 * srp_dev->mr_page_size) { 4151 srp_dev->use_fast_reg = (srp_dev->has_fr && 4152 (!srp_dev->has_fmr || prefer_fr)); 4153 srp_dev->use_fmr = !srp_dev->use_fast_reg && srp_dev->has_fmr; 4154 } 4155 4156 if (never_register || !register_always || 4157 (!srp_dev->has_fmr && !srp_dev->has_fr)) 4158 flags |= IB_PD_UNSAFE_GLOBAL_RKEY; 4159 4160 if (srp_dev->use_fast_reg) { 4161 srp_dev->max_pages_per_mr = 4162 min_t(u32, srp_dev->max_pages_per_mr, 4163 attr->max_fast_reg_page_list_len); 4164 } 4165 srp_dev->mr_max_size = srp_dev->mr_page_size * 4166 srp_dev->max_pages_per_mr; 4167 pr_debug("%s: mr_page_shift = %d, device->max_mr_size = %#llx, device->max_fast_reg_page_list_len = %u, max_pages_per_mr = %d, mr_max_size = %#x\n", 4168 dev_name(&device->dev), mr_page_shift, attr->max_mr_size, 4169 attr->max_fast_reg_page_list_len, 4170 srp_dev->max_pages_per_mr, srp_dev->mr_max_size); 4171 4172 INIT_LIST_HEAD(&srp_dev->dev_list); 4173 4174 srp_dev->dev = device; 4175 srp_dev->pd = ib_alloc_pd(device, flags); 4176 if (IS_ERR(srp_dev->pd)) 4177 goto free_dev; 4178 4179 if (flags & IB_PD_UNSAFE_GLOBAL_RKEY) { 4180 srp_dev->global_rkey = srp_dev->pd->unsafe_global_rkey; 4181 WARN_ON_ONCE(srp_dev->global_rkey == 0); 4182 } 4183 4184 rdma_for_each_port (device, p) { 4185 host = srp_add_port(srp_dev, p); 4186 if (host) 4187 list_add_tail(&host->list, &srp_dev->dev_list); 4188 } 4189 4190 ib_set_client_data(device, &srp_client, srp_dev); 4191 return; 4192 4193 free_dev: 4194 kfree(srp_dev); 4195 } 4196 4197 static void srp_remove_one(struct ib_device *device, void *client_data) 4198 { 4199 struct srp_device *srp_dev; 4200 struct srp_host *host, *tmp_host; 4201 struct srp_target_port *target; 4202 4203 srp_dev = client_data; 4204 if (!srp_dev) 4205 return; 4206 4207 list_for_each_entry_safe(host, tmp_host, &srp_dev->dev_list, list) { 4208 device_unregister(&host->dev); 4209 /* 4210 * Wait for the sysfs entry to go away, so that no new 4211 * target ports can be created. 4212 */ 4213 wait_for_completion(&host->released); 4214 4215 /* 4216 * Remove all target ports. 4217 */ 4218 spin_lock(&host->target_lock); 4219 list_for_each_entry(target, &host->target_list, list) 4220 srp_queue_remove_work(target); 4221 spin_unlock(&host->target_lock); 4222 4223 /* 4224 * Wait for tl_err and target port removal tasks. 4225 */ 4226 flush_workqueue(system_long_wq); 4227 flush_workqueue(srp_remove_wq); 4228 4229 kfree(host); 4230 } 4231 4232 ib_dealloc_pd(srp_dev->pd); 4233 4234 kfree(srp_dev); 4235 } 4236 4237 static struct srp_function_template ib_srp_transport_functions = { 4238 .has_rport_state = true, 4239 .reset_timer_if_blocked = true, 4240 .reconnect_delay = &srp_reconnect_delay, 4241 .fast_io_fail_tmo = &srp_fast_io_fail_tmo, 4242 .dev_loss_tmo = &srp_dev_loss_tmo, 4243 .reconnect = srp_rport_reconnect, 4244 .rport_delete = srp_rport_delete, 4245 .terminate_rport_io = srp_terminate_io, 4246 }; 4247 4248 static int __init srp_init_module(void) 4249 { 4250 int ret; 4251 4252 BUILD_BUG_ON(sizeof(struct srp_imm_buf) != 4); 4253 BUILD_BUG_ON(sizeof(struct srp_login_req) != 64); 4254 BUILD_BUG_ON(sizeof(struct srp_login_req_rdma) != 56); 4255 BUILD_BUG_ON(sizeof(struct srp_cmd) != 48); 4256 4257 if (srp_sg_tablesize) { 4258 pr_warn("srp_sg_tablesize is deprecated, please use cmd_sg_entries\n"); 4259 if (!cmd_sg_entries) 4260 cmd_sg_entries = srp_sg_tablesize; 4261 } 4262 4263 if (!cmd_sg_entries) 4264 cmd_sg_entries = SRP_DEF_SG_TABLESIZE; 4265 4266 if (cmd_sg_entries > 255) { 4267 pr_warn("Clamping cmd_sg_entries to 255\n"); 4268 cmd_sg_entries = 255; 4269 } 4270 4271 if (!indirect_sg_entries) 4272 indirect_sg_entries = cmd_sg_entries; 4273 else if (indirect_sg_entries < cmd_sg_entries) { 4274 pr_warn("Bumping up indirect_sg_entries to match cmd_sg_entries (%u)\n", 4275 cmd_sg_entries); 4276 indirect_sg_entries = cmd_sg_entries; 4277 } 4278 4279 if (indirect_sg_entries > SG_MAX_SEGMENTS) { 4280 pr_warn("Clamping indirect_sg_entries to %u\n", 4281 SG_MAX_SEGMENTS); 4282 indirect_sg_entries = SG_MAX_SEGMENTS; 4283 } 4284 4285 srp_remove_wq = create_workqueue("srp_remove"); 4286 if (!srp_remove_wq) { 4287 ret = -ENOMEM; 4288 goto out; 4289 } 4290 4291 ret = -ENOMEM; 4292 ib_srp_transport_template = 4293 srp_attach_transport(&ib_srp_transport_functions); 4294 if (!ib_srp_transport_template) 4295 goto destroy_wq; 4296 4297 ret = class_register(&srp_class); 4298 if (ret) { 4299 pr_err("couldn't register class infiniband_srp\n"); 4300 goto release_tr; 4301 } 4302 4303 ib_sa_register_client(&srp_sa_client); 4304 4305 ret = ib_register_client(&srp_client); 4306 if (ret) { 4307 pr_err("couldn't register IB client\n"); 4308 goto unreg_sa; 4309 } 4310 4311 out: 4312 return ret; 4313 4314 unreg_sa: 4315 ib_sa_unregister_client(&srp_sa_client); 4316 class_unregister(&srp_class); 4317 4318 release_tr: 4319 srp_release_transport(ib_srp_transport_template); 4320 4321 destroy_wq: 4322 destroy_workqueue(srp_remove_wq); 4323 goto out; 4324 } 4325 4326 static void __exit srp_cleanup_module(void) 4327 { 4328 ib_unregister_client(&srp_client); 4329 ib_sa_unregister_client(&srp_sa_client); 4330 class_unregister(&srp_class); 4331 srp_release_transport(ib_srp_transport_template); 4332 destroy_workqueue(srp_remove_wq); 4333 } 4334 4335 module_init(srp_init_module); 4336 module_exit(srp_cleanup_module); 4337