1 /*
2  * Copyright (c) 2005 Cisco Systems.  All rights reserved.
3  *
4  * This software is available to you under a choice of one of two
5  * licenses.  You may choose to be licensed under the terms of the GNU
6  * General Public License (GPL) Version 2, available from the file
7  * COPYING in the main directory of this source tree, or the
8  * OpenIB.org BSD license below:
9  *
10  *     Redistribution and use in source and binary forms, with or
11  *     without modification, are permitted provided that the following
12  *     conditions are met:
13  *
14  *      - Redistributions of source code must retain the above
15  *        copyright notice, this list of conditions and the following
16  *        disclaimer.
17  *
18  *      - Redistributions in binary form must reproduce the above
19  *        copyright notice, this list of conditions and the following
20  *        disclaimer in the documentation and/or other materials
21  *        provided with the distribution.
22  *
23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30  * SOFTWARE.
31  */
32 
33 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
34 
35 #include <linux/module.h>
36 #include <linux/init.h>
37 #include <linux/slab.h>
38 #include <linux/err.h>
39 #include <linux/string.h>
40 #include <linux/parser.h>
41 #include <linux/random.h>
42 #include <linux/jiffies.h>
43 #include <linux/lockdep.h>
44 #include <linux/inet.h>
45 #include <rdma/ib_cache.h>
46 
47 #include <linux/atomic.h>
48 
49 #include <scsi/scsi.h>
50 #include <scsi/scsi_device.h>
51 #include <scsi/scsi_dbg.h>
52 #include <scsi/scsi_tcq.h>
53 #include <scsi/srp.h>
54 #include <scsi/scsi_transport_srp.h>
55 
56 #include "ib_srp.h"
57 
58 #define DRV_NAME	"ib_srp"
59 #define PFX		DRV_NAME ": "
60 
61 MODULE_AUTHOR("Roland Dreier");
62 MODULE_DESCRIPTION("InfiniBand SCSI RDMA Protocol initiator");
63 MODULE_LICENSE("Dual BSD/GPL");
64 
65 #if !defined(CONFIG_DYNAMIC_DEBUG)
66 #define DEFINE_DYNAMIC_DEBUG_METADATA(name, fmt)
67 #define DYNAMIC_DEBUG_BRANCH(descriptor) false
68 #endif
69 
70 static unsigned int srp_sg_tablesize;
71 static unsigned int cmd_sg_entries;
72 static unsigned int indirect_sg_entries;
73 static bool allow_ext_sg;
74 static bool prefer_fr = true;
75 static bool register_always = true;
76 static bool never_register;
77 static int topspin_workarounds = 1;
78 
79 module_param(srp_sg_tablesize, uint, 0444);
80 MODULE_PARM_DESC(srp_sg_tablesize, "Deprecated name for cmd_sg_entries");
81 
82 module_param(cmd_sg_entries, uint, 0444);
83 MODULE_PARM_DESC(cmd_sg_entries,
84 		 "Default number of gather/scatter entries in the SRP command (default is 12, max 255)");
85 
86 module_param(indirect_sg_entries, uint, 0444);
87 MODULE_PARM_DESC(indirect_sg_entries,
88 		 "Default max number of gather/scatter entries (default is 12, max is " __stringify(SG_MAX_SEGMENTS) ")");
89 
90 module_param(allow_ext_sg, bool, 0444);
91 MODULE_PARM_DESC(allow_ext_sg,
92 		  "Default behavior when there are more than cmd_sg_entries S/G entries after mapping; fails the request when false (default false)");
93 
94 module_param(topspin_workarounds, int, 0444);
95 MODULE_PARM_DESC(topspin_workarounds,
96 		 "Enable workarounds for Topspin/Cisco SRP target bugs if != 0");
97 
98 module_param(prefer_fr, bool, 0444);
99 MODULE_PARM_DESC(prefer_fr,
100 "Whether to use fast registration if both FMR and fast registration are supported");
101 
102 module_param(register_always, bool, 0444);
103 MODULE_PARM_DESC(register_always,
104 		 "Use memory registration even for contiguous memory regions");
105 
106 module_param(never_register, bool, 0444);
107 MODULE_PARM_DESC(never_register, "Never register memory");
108 
109 static const struct kernel_param_ops srp_tmo_ops;
110 
111 static int srp_reconnect_delay = 10;
112 module_param_cb(reconnect_delay, &srp_tmo_ops, &srp_reconnect_delay,
113 		S_IRUGO | S_IWUSR);
114 MODULE_PARM_DESC(reconnect_delay, "Time between successive reconnect attempts");
115 
116 static int srp_fast_io_fail_tmo = 15;
117 module_param_cb(fast_io_fail_tmo, &srp_tmo_ops, &srp_fast_io_fail_tmo,
118 		S_IRUGO | S_IWUSR);
119 MODULE_PARM_DESC(fast_io_fail_tmo,
120 		 "Number of seconds between the observation of a transport"
121 		 " layer error and failing all I/O. \"off\" means that this"
122 		 " functionality is disabled.");
123 
124 static int srp_dev_loss_tmo = 600;
125 module_param_cb(dev_loss_tmo, &srp_tmo_ops, &srp_dev_loss_tmo,
126 		S_IRUGO | S_IWUSR);
127 MODULE_PARM_DESC(dev_loss_tmo,
128 		 "Maximum number of seconds that the SRP transport should"
129 		 " insulate transport layer errors. After this time has been"
130 		 " exceeded the SCSI host is removed. Should be"
131 		 " between 1 and " __stringify(SCSI_DEVICE_BLOCK_MAX_TIMEOUT)
132 		 " if fast_io_fail_tmo has not been set. \"off\" means that"
133 		 " this functionality is disabled.");
134 
135 static bool srp_use_imm_data = true;
136 module_param_named(use_imm_data, srp_use_imm_data, bool, 0644);
137 MODULE_PARM_DESC(use_imm_data,
138 		 "Whether or not to request permission to use immediate data during SRP login.");
139 
140 static unsigned int srp_max_imm_data = 8 * 1024;
141 module_param_named(max_imm_data, srp_max_imm_data, uint, 0644);
142 MODULE_PARM_DESC(max_imm_data, "Maximum immediate data size.");
143 
144 static unsigned ch_count;
145 module_param(ch_count, uint, 0444);
146 MODULE_PARM_DESC(ch_count,
147 		 "Number of RDMA channels to use for communication with an SRP target. Using more than one channel improves performance if the HCA supports multiple completion vectors. The default value is the minimum of four times the number of online CPU sockets and the number of completion vectors supported by the HCA.");
148 
149 static void srp_add_one(struct ib_device *device);
150 static void srp_remove_one(struct ib_device *device, void *client_data);
151 static void srp_recv_done(struct ib_cq *cq, struct ib_wc *wc);
152 static void srp_handle_qp_err(struct ib_cq *cq, struct ib_wc *wc,
153 		const char *opname);
154 static int srp_ib_cm_handler(struct ib_cm_id *cm_id,
155 			     const struct ib_cm_event *event);
156 static int srp_rdma_cm_handler(struct rdma_cm_id *cm_id,
157 			       struct rdma_cm_event *event);
158 
159 static struct scsi_transport_template *ib_srp_transport_template;
160 static struct workqueue_struct *srp_remove_wq;
161 
162 static struct ib_client srp_client = {
163 	.name   = "srp",
164 	.add    = srp_add_one,
165 	.remove = srp_remove_one
166 };
167 
168 static struct ib_sa_client srp_sa_client;
169 
170 static int srp_tmo_get(char *buffer, const struct kernel_param *kp)
171 {
172 	int tmo = *(int *)kp->arg;
173 
174 	if (tmo >= 0)
175 		return sprintf(buffer, "%d", tmo);
176 	else
177 		return sprintf(buffer, "off");
178 }
179 
180 static int srp_tmo_set(const char *val, const struct kernel_param *kp)
181 {
182 	int tmo, res;
183 
184 	res = srp_parse_tmo(&tmo, val);
185 	if (res)
186 		goto out;
187 
188 	if (kp->arg == &srp_reconnect_delay)
189 		res = srp_tmo_valid(tmo, srp_fast_io_fail_tmo,
190 				    srp_dev_loss_tmo);
191 	else if (kp->arg == &srp_fast_io_fail_tmo)
192 		res = srp_tmo_valid(srp_reconnect_delay, tmo, srp_dev_loss_tmo);
193 	else
194 		res = srp_tmo_valid(srp_reconnect_delay, srp_fast_io_fail_tmo,
195 				    tmo);
196 	if (res)
197 		goto out;
198 	*(int *)kp->arg = tmo;
199 
200 out:
201 	return res;
202 }
203 
204 static const struct kernel_param_ops srp_tmo_ops = {
205 	.get = srp_tmo_get,
206 	.set = srp_tmo_set,
207 };
208 
209 static inline struct srp_target_port *host_to_target(struct Scsi_Host *host)
210 {
211 	return (struct srp_target_port *) host->hostdata;
212 }
213 
214 static const char *srp_target_info(struct Scsi_Host *host)
215 {
216 	return host_to_target(host)->target_name;
217 }
218 
219 static int srp_target_is_topspin(struct srp_target_port *target)
220 {
221 	static const u8 topspin_oui[3] = { 0x00, 0x05, 0xad };
222 	static const u8 cisco_oui[3]   = { 0x00, 0x1b, 0x0d };
223 
224 	return topspin_workarounds &&
225 		(!memcmp(&target->ioc_guid, topspin_oui, sizeof topspin_oui) ||
226 		 !memcmp(&target->ioc_guid, cisco_oui, sizeof cisco_oui));
227 }
228 
229 static struct srp_iu *srp_alloc_iu(struct srp_host *host, size_t size,
230 				   gfp_t gfp_mask,
231 				   enum dma_data_direction direction)
232 {
233 	struct srp_iu *iu;
234 
235 	iu = kmalloc(sizeof *iu, gfp_mask);
236 	if (!iu)
237 		goto out;
238 
239 	iu->buf = kzalloc(size, gfp_mask);
240 	if (!iu->buf)
241 		goto out_free_iu;
242 
243 	iu->dma = ib_dma_map_single(host->srp_dev->dev, iu->buf, size,
244 				    direction);
245 	if (ib_dma_mapping_error(host->srp_dev->dev, iu->dma))
246 		goto out_free_buf;
247 
248 	iu->size      = size;
249 	iu->direction = direction;
250 
251 	return iu;
252 
253 out_free_buf:
254 	kfree(iu->buf);
255 out_free_iu:
256 	kfree(iu);
257 out:
258 	return NULL;
259 }
260 
261 static void srp_free_iu(struct srp_host *host, struct srp_iu *iu)
262 {
263 	if (!iu)
264 		return;
265 
266 	ib_dma_unmap_single(host->srp_dev->dev, iu->dma, iu->size,
267 			    iu->direction);
268 	kfree(iu->buf);
269 	kfree(iu);
270 }
271 
272 static void srp_qp_event(struct ib_event *event, void *context)
273 {
274 	pr_debug("QP event %s (%d)\n",
275 		 ib_event_msg(event->event), event->event);
276 }
277 
278 static int srp_init_ib_qp(struct srp_target_port *target,
279 			  struct ib_qp *qp)
280 {
281 	struct ib_qp_attr *attr;
282 	int ret;
283 
284 	attr = kmalloc(sizeof *attr, GFP_KERNEL);
285 	if (!attr)
286 		return -ENOMEM;
287 
288 	ret = ib_find_cached_pkey(target->srp_host->srp_dev->dev,
289 				  target->srp_host->port,
290 				  be16_to_cpu(target->ib_cm.pkey),
291 				  &attr->pkey_index);
292 	if (ret)
293 		goto out;
294 
295 	attr->qp_state        = IB_QPS_INIT;
296 	attr->qp_access_flags = (IB_ACCESS_REMOTE_READ |
297 				    IB_ACCESS_REMOTE_WRITE);
298 	attr->port_num        = target->srp_host->port;
299 
300 	ret = ib_modify_qp(qp, attr,
301 			   IB_QP_STATE		|
302 			   IB_QP_PKEY_INDEX	|
303 			   IB_QP_ACCESS_FLAGS	|
304 			   IB_QP_PORT);
305 
306 out:
307 	kfree(attr);
308 	return ret;
309 }
310 
311 static int srp_new_ib_cm_id(struct srp_rdma_ch *ch)
312 {
313 	struct srp_target_port *target = ch->target;
314 	struct ib_cm_id *new_cm_id;
315 
316 	new_cm_id = ib_create_cm_id(target->srp_host->srp_dev->dev,
317 				    srp_ib_cm_handler, ch);
318 	if (IS_ERR(new_cm_id))
319 		return PTR_ERR(new_cm_id);
320 
321 	if (ch->ib_cm.cm_id)
322 		ib_destroy_cm_id(ch->ib_cm.cm_id);
323 	ch->ib_cm.cm_id = new_cm_id;
324 	if (rdma_cap_opa_ah(target->srp_host->srp_dev->dev,
325 			    target->srp_host->port))
326 		ch->ib_cm.path.rec_type = SA_PATH_REC_TYPE_OPA;
327 	else
328 		ch->ib_cm.path.rec_type = SA_PATH_REC_TYPE_IB;
329 	ch->ib_cm.path.sgid = target->sgid;
330 	ch->ib_cm.path.dgid = target->ib_cm.orig_dgid;
331 	ch->ib_cm.path.pkey = target->ib_cm.pkey;
332 	ch->ib_cm.path.service_id = target->ib_cm.service_id;
333 
334 	return 0;
335 }
336 
337 static int srp_new_rdma_cm_id(struct srp_rdma_ch *ch)
338 {
339 	struct srp_target_port *target = ch->target;
340 	struct rdma_cm_id *new_cm_id;
341 	int ret;
342 
343 	new_cm_id = rdma_create_id(target->net, srp_rdma_cm_handler, ch,
344 				   RDMA_PS_TCP, IB_QPT_RC);
345 	if (IS_ERR(new_cm_id)) {
346 		ret = PTR_ERR(new_cm_id);
347 		new_cm_id = NULL;
348 		goto out;
349 	}
350 
351 	init_completion(&ch->done);
352 	ret = rdma_resolve_addr(new_cm_id, target->rdma_cm.src_specified ?
353 				(struct sockaddr *)&target->rdma_cm.src : NULL,
354 				(struct sockaddr *)&target->rdma_cm.dst,
355 				SRP_PATH_REC_TIMEOUT_MS);
356 	if (ret) {
357 		pr_err("No route available from %pIS to %pIS (%d)\n",
358 		       &target->rdma_cm.src, &target->rdma_cm.dst, ret);
359 		goto out;
360 	}
361 	ret = wait_for_completion_interruptible(&ch->done);
362 	if (ret < 0)
363 		goto out;
364 
365 	ret = ch->status;
366 	if (ret) {
367 		pr_err("Resolving address %pIS failed (%d)\n",
368 		       &target->rdma_cm.dst, ret);
369 		goto out;
370 	}
371 
372 	swap(ch->rdma_cm.cm_id, new_cm_id);
373 
374 out:
375 	if (new_cm_id)
376 		rdma_destroy_id(new_cm_id);
377 
378 	return ret;
379 }
380 
381 static int srp_new_cm_id(struct srp_rdma_ch *ch)
382 {
383 	struct srp_target_port *target = ch->target;
384 
385 	return target->using_rdma_cm ? srp_new_rdma_cm_id(ch) :
386 		srp_new_ib_cm_id(ch);
387 }
388 
389 static struct ib_fmr_pool *srp_alloc_fmr_pool(struct srp_target_port *target)
390 {
391 	struct srp_device *dev = target->srp_host->srp_dev;
392 	struct ib_fmr_pool_param fmr_param;
393 
394 	memset(&fmr_param, 0, sizeof(fmr_param));
395 	fmr_param.pool_size	    = target->mr_pool_size;
396 	fmr_param.dirty_watermark   = fmr_param.pool_size / 4;
397 	fmr_param.cache		    = 1;
398 	fmr_param.max_pages_per_fmr = dev->max_pages_per_mr;
399 	fmr_param.page_shift	    = ilog2(dev->mr_page_size);
400 	fmr_param.access	    = (IB_ACCESS_LOCAL_WRITE |
401 				       IB_ACCESS_REMOTE_WRITE |
402 				       IB_ACCESS_REMOTE_READ);
403 
404 	return ib_create_fmr_pool(dev->pd, &fmr_param);
405 }
406 
407 /**
408  * srp_destroy_fr_pool() - free the resources owned by a pool
409  * @pool: Fast registration pool to be destroyed.
410  */
411 static void srp_destroy_fr_pool(struct srp_fr_pool *pool)
412 {
413 	int i;
414 	struct srp_fr_desc *d;
415 
416 	if (!pool)
417 		return;
418 
419 	for (i = 0, d = &pool->desc[0]; i < pool->size; i++, d++) {
420 		if (d->mr)
421 			ib_dereg_mr(d->mr);
422 	}
423 	kfree(pool);
424 }
425 
426 /**
427  * srp_create_fr_pool() - allocate and initialize a pool for fast registration
428  * @device:            IB device to allocate fast registration descriptors for.
429  * @pd:                Protection domain associated with the FR descriptors.
430  * @pool_size:         Number of descriptors to allocate.
431  * @max_page_list_len: Maximum fast registration work request page list length.
432  */
433 static struct srp_fr_pool *srp_create_fr_pool(struct ib_device *device,
434 					      struct ib_pd *pd, int pool_size,
435 					      int max_page_list_len)
436 {
437 	struct srp_fr_pool *pool;
438 	struct srp_fr_desc *d;
439 	struct ib_mr *mr;
440 	int i, ret = -EINVAL;
441 	enum ib_mr_type mr_type;
442 
443 	if (pool_size <= 0)
444 		goto err;
445 	ret = -ENOMEM;
446 	pool = kzalloc(sizeof(struct srp_fr_pool) +
447 		       pool_size * sizeof(struct srp_fr_desc), GFP_KERNEL);
448 	if (!pool)
449 		goto err;
450 	pool->size = pool_size;
451 	pool->max_page_list_len = max_page_list_len;
452 	spin_lock_init(&pool->lock);
453 	INIT_LIST_HEAD(&pool->free_list);
454 
455 	if (device->attrs.device_cap_flags & IB_DEVICE_SG_GAPS_REG)
456 		mr_type = IB_MR_TYPE_SG_GAPS;
457 	else
458 		mr_type = IB_MR_TYPE_MEM_REG;
459 
460 	for (i = 0, d = &pool->desc[0]; i < pool->size; i++, d++) {
461 		mr = ib_alloc_mr(pd, mr_type, max_page_list_len);
462 		if (IS_ERR(mr)) {
463 			ret = PTR_ERR(mr);
464 			if (ret == -ENOMEM)
465 				pr_info("%s: ib_alloc_mr() failed. Try to reduce max_cmd_per_lun, max_sect or ch_count\n",
466 					dev_name(&device->dev));
467 			goto destroy_pool;
468 		}
469 		d->mr = mr;
470 		list_add_tail(&d->entry, &pool->free_list);
471 	}
472 
473 out:
474 	return pool;
475 
476 destroy_pool:
477 	srp_destroy_fr_pool(pool);
478 
479 err:
480 	pool = ERR_PTR(ret);
481 	goto out;
482 }
483 
484 /**
485  * srp_fr_pool_get() - obtain a descriptor suitable for fast registration
486  * @pool: Pool to obtain descriptor from.
487  */
488 static struct srp_fr_desc *srp_fr_pool_get(struct srp_fr_pool *pool)
489 {
490 	struct srp_fr_desc *d = NULL;
491 	unsigned long flags;
492 
493 	spin_lock_irqsave(&pool->lock, flags);
494 	if (!list_empty(&pool->free_list)) {
495 		d = list_first_entry(&pool->free_list, typeof(*d), entry);
496 		list_del(&d->entry);
497 	}
498 	spin_unlock_irqrestore(&pool->lock, flags);
499 
500 	return d;
501 }
502 
503 /**
504  * srp_fr_pool_put() - put an FR descriptor back in the free list
505  * @pool: Pool the descriptor was allocated from.
506  * @desc: Pointer to an array of fast registration descriptor pointers.
507  * @n:    Number of descriptors to put back.
508  *
509  * Note: The caller must already have queued an invalidation request for
510  * desc->mr->rkey before calling this function.
511  */
512 static void srp_fr_pool_put(struct srp_fr_pool *pool, struct srp_fr_desc **desc,
513 			    int n)
514 {
515 	unsigned long flags;
516 	int i;
517 
518 	spin_lock_irqsave(&pool->lock, flags);
519 	for (i = 0; i < n; i++)
520 		list_add(&desc[i]->entry, &pool->free_list);
521 	spin_unlock_irqrestore(&pool->lock, flags);
522 }
523 
524 static struct srp_fr_pool *srp_alloc_fr_pool(struct srp_target_port *target)
525 {
526 	struct srp_device *dev = target->srp_host->srp_dev;
527 
528 	return srp_create_fr_pool(dev->dev, dev->pd, target->mr_pool_size,
529 				  dev->max_pages_per_mr);
530 }
531 
532 /**
533  * srp_destroy_qp() - destroy an RDMA queue pair
534  * @ch: SRP RDMA channel.
535  *
536  * Drain the qp before destroying it.  This avoids that the receive
537  * completion handler can access the queue pair while it is
538  * being destroyed.
539  */
540 static void srp_destroy_qp(struct srp_rdma_ch *ch)
541 {
542 	spin_lock_irq(&ch->lock);
543 	ib_process_cq_direct(ch->send_cq, -1);
544 	spin_unlock_irq(&ch->lock);
545 
546 	ib_drain_qp(ch->qp);
547 	ib_destroy_qp(ch->qp);
548 }
549 
550 static int srp_create_ch_ib(struct srp_rdma_ch *ch)
551 {
552 	struct srp_target_port *target = ch->target;
553 	struct srp_device *dev = target->srp_host->srp_dev;
554 	struct ib_qp_init_attr *init_attr;
555 	struct ib_cq *recv_cq, *send_cq;
556 	struct ib_qp *qp;
557 	struct ib_fmr_pool *fmr_pool = NULL;
558 	struct srp_fr_pool *fr_pool = NULL;
559 	const int m = 1 + dev->use_fast_reg * target->mr_per_cmd * 2;
560 	int ret;
561 
562 	init_attr = kzalloc(sizeof *init_attr, GFP_KERNEL);
563 	if (!init_attr)
564 		return -ENOMEM;
565 
566 	/* queue_size + 1 for ib_drain_rq() */
567 	recv_cq = ib_alloc_cq(dev->dev, ch, target->queue_size + 1,
568 				ch->comp_vector, IB_POLL_SOFTIRQ);
569 	if (IS_ERR(recv_cq)) {
570 		ret = PTR_ERR(recv_cq);
571 		goto err;
572 	}
573 
574 	send_cq = ib_alloc_cq(dev->dev, ch, m * target->queue_size,
575 				ch->comp_vector, IB_POLL_DIRECT);
576 	if (IS_ERR(send_cq)) {
577 		ret = PTR_ERR(send_cq);
578 		goto err_recv_cq;
579 	}
580 
581 	init_attr->event_handler       = srp_qp_event;
582 	init_attr->cap.max_send_wr     = m * target->queue_size;
583 	init_attr->cap.max_recv_wr     = target->queue_size + 1;
584 	init_attr->cap.max_recv_sge    = 1;
585 	init_attr->cap.max_send_sge    = SRP_MAX_SGE;
586 	init_attr->sq_sig_type         = IB_SIGNAL_REQ_WR;
587 	init_attr->qp_type             = IB_QPT_RC;
588 	init_attr->send_cq             = send_cq;
589 	init_attr->recv_cq             = recv_cq;
590 
591 	if (target->using_rdma_cm) {
592 		ret = rdma_create_qp(ch->rdma_cm.cm_id, dev->pd, init_attr);
593 		qp = ch->rdma_cm.cm_id->qp;
594 	} else {
595 		qp = ib_create_qp(dev->pd, init_attr);
596 		if (!IS_ERR(qp)) {
597 			ret = srp_init_ib_qp(target, qp);
598 			if (ret)
599 				ib_destroy_qp(qp);
600 		} else {
601 			ret = PTR_ERR(qp);
602 		}
603 	}
604 	if (ret) {
605 		pr_err("QP creation failed for dev %s: %d\n",
606 		       dev_name(&dev->dev->dev), ret);
607 		goto err_send_cq;
608 	}
609 
610 	if (dev->use_fast_reg) {
611 		fr_pool = srp_alloc_fr_pool(target);
612 		if (IS_ERR(fr_pool)) {
613 			ret = PTR_ERR(fr_pool);
614 			shost_printk(KERN_WARNING, target->scsi_host, PFX
615 				     "FR pool allocation failed (%d)\n", ret);
616 			goto err_qp;
617 		}
618 	} else if (dev->use_fmr) {
619 		fmr_pool = srp_alloc_fmr_pool(target);
620 		if (IS_ERR(fmr_pool)) {
621 			ret = PTR_ERR(fmr_pool);
622 			shost_printk(KERN_WARNING, target->scsi_host, PFX
623 				     "FMR pool allocation failed (%d)\n", ret);
624 			goto err_qp;
625 		}
626 	}
627 
628 	if (ch->qp)
629 		srp_destroy_qp(ch);
630 	if (ch->recv_cq)
631 		ib_free_cq(ch->recv_cq);
632 	if (ch->send_cq)
633 		ib_free_cq(ch->send_cq);
634 
635 	ch->qp = qp;
636 	ch->recv_cq = recv_cq;
637 	ch->send_cq = send_cq;
638 
639 	if (dev->use_fast_reg) {
640 		if (ch->fr_pool)
641 			srp_destroy_fr_pool(ch->fr_pool);
642 		ch->fr_pool = fr_pool;
643 	} else if (dev->use_fmr) {
644 		if (ch->fmr_pool)
645 			ib_destroy_fmr_pool(ch->fmr_pool);
646 		ch->fmr_pool = fmr_pool;
647 	}
648 
649 	kfree(init_attr);
650 	return 0;
651 
652 err_qp:
653 	if (target->using_rdma_cm)
654 		rdma_destroy_qp(ch->rdma_cm.cm_id);
655 	else
656 		ib_destroy_qp(qp);
657 
658 err_send_cq:
659 	ib_free_cq(send_cq);
660 
661 err_recv_cq:
662 	ib_free_cq(recv_cq);
663 
664 err:
665 	kfree(init_attr);
666 	return ret;
667 }
668 
669 /*
670  * Note: this function may be called without srp_alloc_iu_bufs() having been
671  * invoked. Hence the ch->[rt]x_ring checks.
672  */
673 static void srp_free_ch_ib(struct srp_target_port *target,
674 			   struct srp_rdma_ch *ch)
675 {
676 	struct srp_device *dev = target->srp_host->srp_dev;
677 	int i;
678 
679 	if (!ch->target)
680 		return;
681 
682 	if (target->using_rdma_cm) {
683 		if (ch->rdma_cm.cm_id) {
684 			rdma_destroy_id(ch->rdma_cm.cm_id);
685 			ch->rdma_cm.cm_id = NULL;
686 		}
687 	} else {
688 		if (ch->ib_cm.cm_id) {
689 			ib_destroy_cm_id(ch->ib_cm.cm_id);
690 			ch->ib_cm.cm_id = NULL;
691 		}
692 	}
693 
694 	/* If srp_new_cm_id() succeeded but srp_create_ch_ib() not, return. */
695 	if (!ch->qp)
696 		return;
697 
698 	if (dev->use_fast_reg) {
699 		if (ch->fr_pool)
700 			srp_destroy_fr_pool(ch->fr_pool);
701 	} else if (dev->use_fmr) {
702 		if (ch->fmr_pool)
703 			ib_destroy_fmr_pool(ch->fmr_pool);
704 	}
705 
706 	srp_destroy_qp(ch);
707 	ib_free_cq(ch->send_cq);
708 	ib_free_cq(ch->recv_cq);
709 
710 	/*
711 	 * Avoid that the SCSI error handler tries to use this channel after
712 	 * it has been freed. The SCSI error handler can namely continue
713 	 * trying to perform recovery actions after scsi_remove_host()
714 	 * returned.
715 	 */
716 	ch->target = NULL;
717 
718 	ch->qp = NULL;
719 	ch->send_cq = ch->recv_cq = NULL;
720 
721 	if (ch->rx_ring) {
722 		for (i = 0; i < target->queue_size; ++i)
723 			srp_free_iu(target->srp_host, ch->rx_ring[i]);
724 		kfree(ch->rx_ring);
725 		ch->rx_ring = NULL;
726 	}
727 	if (ch->tx_ring) {
728 		for (i = 0; i < target->queue_size; ++i)
729 			srp_free_iu(target->srp_host, ch->tx_ring[i]);
730 		kfree(ch->tx_ring);
731 		ch->tx_ring = NULL;
732 	}
733 }
734 
735 static void srp_path_rec_completion(int status,
736 				    struct sa_path_rec *pathrec,
737 				    void *ch_ptr)
738 {
739 	struct srp_rdma_ch *ch = ch_ptr;
740 	struct srp_target_port *target = ch->target;
741 
742 	ch->status = status;
743 	if (status)
744 		shost_printk(KERN_ERR, target->scsi_host,
745 			     PFX "Got failed path rec status %d\n", status);
746 	else
747 		ch->ib_cm.path = *pathrec;
748 	complete(&ch->done);
749 }
750 
751 static int srp_ib_lookup_path(struct srp_rdma_ch *ch)
752 {
753 	struct srp_target_port *target = ch->target;
754 	int ret;
755 
756 	ch->ib_cm.path.numb_path = 1;
757 
758 	init_completion(&ch->done);
759 
760 	ch->ib_cm.path_query_id = ib_sa_path_rec_get(&srp_sa_client,
761 					       target->srp_host->srp_dev->dev,
762 					       target->srp_host->port,
763 					       &ch->ib_cm.path,
764 					       IB_SA_PATH_REC_SERVICE_ID |
765 					       IB_SA_PATH_REC_DGID	 |
766 					       IB_SA_PATH_REC_SGID	 |
767 					       IB_SA_PATH_REC_NUMB_PATH	 |
768 					       IB_SA_PATH_REC_PKEY,
769 					       SRP_PATH_REC_TIMEOUT_MS,
770 					       GFP_KERNEL,
771 					       srp_path_rec_completion,
772 					       ch, &ch->ib_cm.path_query);
773 	if (ch->ib_cm.path_query_id < 0)
774 		return ch->ib_cm.path_query_id;
775 
776 	ret = wait_for_completion_interruptible(&ch->done);
777 	if (ret < 0)
778 		return ret;
779 
780 	if (ch->status < 0)
781 		shost_printk(KERN_WARNING, target->scsi_host,
782 			     PFX "Path record query failed: sgid %pI6, dgid %pI6, pkey %#04x, service_id %#16llx\n",
783 			     ch->ib_cm.path.sgid.raw, ch->ib_cm.path.dgid.raw,
784 			     be16_to_cpu(target->ib_cm.pkey),
785 			     be64_to_cpu(target->ib_cm.service_id));
786 
787 	return ch->status;
788 }
789 
790 static int srp_rdma_lookup_path(struct srp_rdma_ch *ch)
791 {
792 	struct srp_target_port *target = ch->target;
793 	int ret;
794 
795 	init_completion(&ch->done);
796 
797 	ret = rdma_resolve_route(ch->rdma_cm.cm_id, SRP_PATH_REC_TIMEOUT_MS);
798 	if (ret)
799 		return ret;
800 
801 	wait_for_completion_interruptible(&ch->done);
802 
803 	if (ch->status != 0)
804 		shost_printk(KERN_WARNING, target->scsi_host,
805 			     PFX "Path resolution failed\n");
806 
807 	return ch->status;
808 }
809 
810 static int srp_lookup_path(struct srp_rdma_ch *ch)
811 {
812 	struct srp_target_port *target = ch->target;
813 
814 	return target->using_rdma_cm ? srp_rdma_lookup_path(ch) :
815 		srp_ib_lookup_path(ch);
816 }
817 
818 static u8 srp_get_subnet_timeout(struct srp_host *host)
819 {
820 	struct ib_port_attr attr;
821 	int ret;
822 	u8 subnet_timeout = 18;
823 
824 	ret = ib_query_port(host->srp_dev->dev, host->port, &attr);
825 	if (ret == 0)
826 		subnet_timeout = attr.subnet_timeout;
827 
828 	if (unlikely(subnet_timeout < 15))
829 		pr_warn("%s: subnet timeout %d may cause SRP login to fail.\n",
830 			dev_name(&host->srp_dev->dev->dev), subnet_timeout);
831 
832 	return subnet_timeout;
833 }
834 
835 static int srp_send_req(struct srp_rdma_ch *ch, uint32_t max_iu_len,
836 			bool multich)
837 {
838 	struct srp_target_port *target = ch->target;
839 	struct {
840 		struct rdma_conn_param	  rdma_param;
841 		struct srp_login_req_rdma rdma_req;
842 		struct ib_cm_req_param	  ib_param;
843 		struct srp_login_req	  ib_req;
844 	} *req = NULL;
845 	char *ipi, *tpi;
846 	int status;
847 
848 	req = kzalloc(sizeof *req, GFP_KERNEL);
849 	if (!req)
850 		return -ENOMEM;
851 
852 	req->ib_param.flow_control = 1;
853 	req->ib_param.retry_count = target->tl_retry_count;
854 
855 	/*
856 	 * Pick some arbitrary defaults here; we could make these
857 	 * module parameters if anyone cared about setting them.
858 	 */
859 	req->ib_param.responder_resources = 4;
860 	req->ib_param.rnr_retry_count = 7;
861 	req->ib_param.max_cm_retries = 15;
862 
863 	req->ib_req.opcode = SRP_LOGIN_REQ;
864 	req->ib_req.tag = 0;
865 	req->ib_req.req_it_iu_len = cpu_to_be32(max_iu_len);
866 	req->ib_req.req_buf_fmt	= cpu_to_be16(SRP_BUF_FORMAT_DIRECT |
867 					      SRP_BUF_FORMAT_INDIRECT);
868 	req->ib_req.req_flags = (multich ? SRP_MULTICHAN_MULTI :
869 				 SRP_MULTICHAN_SINGLE);
870 	if (srp_use_imm_data) {
871 		req->ib_req.req_flags |= SRP_IMMED_REQUESTED;
872 		req->ib_req.imm_data_offset = cpu_to_be16(SRP_IMM_DATA_OFFSET);
873 	}
874 
875 	if (target->using_rdma_cm) {
876 		req->rdma_param.flow_control = req->ib_param.flow_control;
877 		req->rdma_param.responder_resources =
878 			req->ib_param.responder_resources;
879 		req->rdma_param.initiator_depth = req->ib_param.initiator_depth;
880 		req->rdma_param.retry_count = req->ib_param.retry_count;
881 		req->rdma_param.rnr_retry_count = req->ib_param.rnr_retry_count;
882 		req->rdma_param.private_data = &req->rdma_req;
883 		req->rdma_param.private_data_len = sizeof(req->rdma_req);
884 
885 		req->rdma_req.opcode = req->ib_req.opcode;
886 		req->rdma_req.tag = req->ib_req.tag;
887 		req->rdma_req.req_it_iu_len = req->ib_req.req_it_iu_len;
888 		req->rdma_req.req_buf_fmt = req->ib_req.req_buf_fmt;
889 		req->rdma_req.req_flags	= req->ib_req.req_flags;
890 		req->rdma_req.imm_data_offset = req->ib_req.imm_data_offset;
891 
892 		ipi = req->rdma_req.initiator_port_id;
893 		tpi = req->rdma_req.target_port_id;
894 	} else {
895 		u8 subnet_timeout;
896 
897 		subnet_timeout = srp_get_subnet_timeout(target->srp_host);
898 
899 		req->ib_param.primary_path = &ch->ib_cm.path;
900 		req->ib_param.alternate_path = NULL;
901 		req->ib_param.service_id = target->ib_cm.service_id;
902 		get_random_bytes(&req->ib_param.starting_psn, 4);
903 		req->ib_param.starting_psn &= 0xffffff;
904 		req->ib_param.qp_num = ch->qp->qp_num;
905 		req->ib_param.qp_type = ch->qp->qp_type;
906 		req->ib_param.local_cm_response_timeout = subnet_timeout + 2;
907 		req->ib_param.remote_cm_response_timeout = subnet_timeout + 2;
908 		req->ib_param.private_data = &req->ib_req;
909 		req->ib_param.private_data_len = sizeof(req->ib_req);
910 
911 		ipi = req->ib_req.initiator_port_id;
912 		tpi = req->ib_req.target_port_id;
913 	}
914 
915 	/*
916 	 * In the published SRP specification (draft rev. 16a), the
917 	 * port identifier format is 8 bytes of ID extension followed
918 	 * by 8 bytes of GUID.  Older drafts put the two halves in the
919 	 * opposite order, so that the GUID comes first.
920 	 *
921 	 * Targets conforming to these obsolete drafts can be
922 	 * recognized by the I/O Class they report.
923 	 */
924 	if (target->io_class == SRP_REV10_IB_IO_CLASS) {
925 		memcpy(ipi,     &target->sgid.global.interface_id, 8);
926 		memcpy(ipi + 8, &target->initiator_ext, 8);
927 		memcpy(tpi,     &target->ioc_guid, 8);
928 		memcpy(tpi + 8, &target->id_ext, 8);
929 	} else {
930 		memcpy(ipi,     &target->initiator_ext, 8);
931 		memcpy(ipi + 8, &target->sgid.global.interface_id, 8);
932 		memcpy(tpi,     &target->id_ext, 8);
933 		memcpy(tpi + 8, &target->ioc_guid, 8);
934 	}
935 
936 	/*
937 	 * Topspin/Cisco SRP targets will reject our login unless we
938 	 * zero out the first 8 bytes of our initiator port ID and set
939 	 * the second 8 bytes to the local node GUID.
940 	 */
941 	if (srp_target_is_topspin(target)) {
942 		shost_printk(KERN_DEBUG, target->scsi_host,
943 			     PFX "Topspin/Cisco initiator port ID workaround "
944 			     "activated for target GUID %016llx\n",
945 			     be64_to_cpu(target->ioc_guid));
946 		memset(ipi, 0, 8);
947 		memcpy(ipi + 8, &target->srp_host->srp_dev->dev->node_guid, 8);
948 	}
949 
950 	if (target->using_rdma_cm)
951 		status = rdma_connect(ch->rdma_cm.cm_id, &req->rdma_param);
952 	else
953 		status = ib_send_cm_req(ch->ib_cm.cm_id, &req->ib_param);
954 
955 	kfree(req);
956 
957 	return status;
958 }
959 
960 static bool srp_queue_remove_work(struct srp_target_port *target)
961 {
962 	bool changed = false;
963 
964 	spin_lock_irq(&target->lock);
965 	if (target->state != SRP_TARGET_REMOVED) {
966 		target->state = SRP_TARGET_REMOVED;
967 		changed = true;
968 	}
969 	spin_unlock_irq(&target->lock);
970 
971 	if (changed)
972 		queue_work(srp_remove_wq, &target->remove_work);
973 
974 	return changed;
975 }
976 
977 static void srp_disconnect_target(struct srp_target_port *target)
978 {
979 	struct srp_rdma_ch *ch;
980 	int i, ret;
981 
982 	/* XXX should send SRP_I_LOGOUT request */
983 
984 	for (i = 0; i < target->ch_count; i++) {
985 		ch = &target->ch[i];
986 		ch->connected = false;
987 		ret = 0;
988 		if (target->using_rdma_cm) {
989 			if (ch->rdma_cm.cm_id)
990 				rdma_disconnect(ch->rdma_cm.cm_id);
991 		} else {
992 			if (ch->ib_cm.cm_id)
993 				ret = ib_send_cm_dreq(ch->ib_cm.cm_id,
994 						      NULL, 0);
995 		}
996 		if (ret < 0) {
997 			shost_printk(KERN_DEBUG, target->scsi_host,
998 				     PFX "Sending CM DREQ failed\n");
999 		}
1000 	}
1001 }
1002 
1003 static void srp_free_req_data(struct srp_target_port *target,
1004 			      struct srp_rdma_ch *ch)
1005 {
1006 	struct srp_device *dev = target->srp_host->srp_dev;
1007 	struct ib_device *ibdev = dev->dev;
1008 	struct srp_request *req;
1009 	int i;
1010 
1011 	if (!ch->req_ring)
1012 		return;
1013 
1014 	for (i = 0; i < target->req_ring_size; ++i) {
1015 		req = &ch->req_ring[i];
1016 		if (dev->use_fast_reg) {
1017 			kfree(req->fr_list);
1018 		} else {
1019 			kfree(req->fmr_list);
1020 			kfree(req->map_page);
1021 		}
1022 		if (req->indirect_dma_addr) {
1023 			ib_dma_unmap_single(ibdev, req->indirect_dma_addr,
1024 					    target->indirect_size,
1025 					    DMA_TO_DEVICE);
1026 		}
1027 		kfree(req->indirect_desc);
1028 	}
1029 
1030 	kfree(ch->req_ring);
1031 	ch->req_ring = NULL;
1032 }
1033 
1034 static int srp_alloc_req_data(struct srp_rdma_ch *ch)
1035 {
1036 	struct srp_target_port *target = ch->target;
1037 	struct srp_device *srp_dev = target->srp_host->srp_dev;
1038 	struct ib_device *ibdev = srp_dev->dev;
1039 	struct srp_request *req;
1040 	void *mr_list;
1041 	dma_addr_t dma_addr;
1042 	int i, ret = -ENOMEM;
1043 
1044 	ch->req_ring = kcalloc(target->req_ring_size, sizeof(*ch->req_ring),
1045 			       GFP_KERNEL);
1046 	if (!ch->req_ring)
1047 		goto out;
1048 
1049 	for (i = 0; i < target->req_ring_size; ++i) {
1050 		req = &ch->req_ring[i];
1051 		mr_list = kmalloc_array(target->mr_per_cmd, sizeof(void *),
1052 					GFP_KERNEL);
1053 		if (!mr_list)
1054 			goto out;
1055 		if (srp_dev->use_fast_reg) {
1056 			req->fr_list = mr_list;
1057 		} else {
1058 			req->fmr_list = mr_list;
1059 			req->map_page = kmalloc_array(srp_dev->max_pages_per_mr,
1060 						      sizeof(void *),
1061 						      GFP_KERNEL);
1062 			if (!req->map_page)
1063 				goto out;
1064 		}
1065 		req->indirect_desc = kmalloc(target->indirect_size, GFP_KERNEL);
1066 		if (!req->indirect_desc)
1067 			goto out;
1068 
1069 		dma_addr = ib_dma_map_single(ibdev, req->indirect_desc,
1070 					     target->indirect_size,
1071 					     DMA_TO_DEVICE);
1072 		if (ib_dma_mapping_error(ibdev, dma_addr))
1073 			goto out;
1074 
1075 		req->indirect_dma_addr = dma_addr;
1076 	}
1077 	ret = 0;
1078 
1079 out:
1080 	return ret;
1081 }
1082 
1083 /**
1084  * srp_del_scsi_host_attr() - Remove attributes defined in the host template.
1085  * @shost: SCSI host whose attributes to remove from sysfs.
1086  *
1087  * Note: Any attributes defined in the host template and that did not exist
1088  * before invocation of this function will be ignored.
1089  */
1090 static void srp_del_scsi_host_attr(struct Scsi_Host *shost)
1091 {
1092 	struct device_attribute **attr;
1093 
1094 	for (attr = shost->hostt->shost_attrs; attr && *attr; ++attr)
1095 		device_remove_file(&shost->shost_dev, *attr);
1096 }
1097 
1098 static void srp_remove_target(struct srp_target_port *target)
1099 {
1100 	struct srp_rdma_ch *ch;
1101 	int i;
1102 
1103 	WARN_ON_ONCE(target->state != SRP_TARGET_REMOVED);
1104 
1105 	srp_del_scsi_host_attr(target->scsi_host);
1106 	srp_rport_get(target->rport);
1107 	srp_remove_host(target->scsi_host);
1108 	scsi_remove_host(target->scsi_host);
1109 	srp_stop_rport_timers(target->rport);
1110 	srp_disconnect_target(target);
1111 	kobj_ns_drop(KOBJ_NS_TYPE_NET, target->net);
1112 	for (i = 0; i < target->ch_count; i++) {
1113 		ch = &target->ch[i];
1114 		srp_free_ch_ib(target, ch);
1115 	}
1116 	cancel_work_sync(&target->tl_err_work);
1117 	srp_rport_put(target->rport);
1118 	for (i = 0; i < target->ch_count; i++) {
1119 		ch = &target->ch[i];
1120 		srp_free_req_data(target, ch);
1121 	}
1122 	kfree(target->ch);
1123 	target->ch = NULL;
1124 
1125 	spin_lock(&target->srp_host->target_lock);
1126 	list_del(&target->list);
1127 	spin_unlock(&target->srp_host->target_lock);
1128 
1129 	scsi_host_put(target->scsi_host);
1130 }
1131 
1132 static void srp_remove_work(struct work_struct *work)
1133 {
1134 	struct srp_target_port *target =
1135 		container_of(work, struct srp_target_port, remove_work);
1136 
1137 	WARN_ON_ONCE(target->state != SRP_TARGET_REMOVED);
1138 
1139 	srp_remove_target(target);
1140 }
1141 
1142 static void srp_rport_delete(struct srp_rport *rport)
1143 {
1144 	struct srp_target_port *target = rport->lld_data;
1145 
1146 	srp_queue_remove_work(target);
1147 }
1148 
1149 /**
1150  * srp_connected_ch() - number of connected channels
1151  * @target: SRP target port.
1152  */
1153 static int srp_connected_ch(struct srp_target_port *target)
1154 {
1155 	int i, c = 0;
1156 
1157 	for (i = 0; i < target->ch_count; i++)
1158 		c += target->ch[i].connected;
1159 
1160 	return c;
1161 }
1162 
1163 static int srp_connect_ch(struct srp_rdma_ch *ch, uint32_t max_iu_len,
1164 			  bool multich)
1165 {
1166 	struct srp_target_port *target = ch->target;
1167 	int ret;
1168 
1169 	WARN_ON_ONCE(!multich && srp_connected_ch(target) > 0);
1170 
1171 	ret = srp_lookup_path(ch);
1172 	if (ret)
1173 		goto out;
1174 
1175 	while (1) {
1176 		init_completion(&ch->done);
1177 		ret = srp_send_req(ch, max_iu_len, multich);
1178 		if (ret)
1179 			goto out;
1180 		ret = wait_for_completion_interruptible(&ch->done);
1181 		if (ret < 0)
1182 			goto out;
1183 
1184 		/*
1185 		 * The CM event handling code will set status to
1186 		 * SRP_PORT_REDIRECT if we get a port redirect REJ
1187 		 * back, or SRP_DLID_REDIRECT if we get a lid/qp
1188 		 * redirect REJ back.
1189 		 */
1190 		ret = ch->status;
1191 		switch (ret) {
1192 		case 0:
1193 			ch->connected = true;
1194 			goto out;
1195 
1196 		case SRP_PORT_REDIRECT:
1197 			ret = srp_lookup_path(ch);
1198 			if (ret)
1199 				goto out;
1200 			break;
1201 
1202 		case SRP_DLID_REDIRECT:
1203 			break;
1204 
1205 		case SRP_STALE_CONN:
1206 			shost_printk(KERN_ERR, target->scsi_host, PFX
1207 				     "giving up on stale connection\n");
1208 			ret = -ECONNRESET;
1209 			goto out;
1210 
1211 		default:
1212 			goto out;
1213 		}
1214 	}
1215 
1216 out:
1217 	return ret <= 0 ? ret : -ENODEV;
1218 }
1219 
1220 static void srp_inv_rkey_err_done(struct ib_cq *cq, struct ib_wc *wc)
1221 {
1222 	srp_handle_qp_err(cq, wc, "INV RKEY");
1223 }
1224 
1225 static int srp_inv_rkey(struct srp_request *req, struct srp_rdma_ch *ch,
1226 		u32 rkey)
1227 {
1228 	struct ib_send_wr wr = {
1229 		.opcode		    = IB_WR_LOCAL_INV,
1230 		.next		    = NULL,
1231 		.num_sge	    = 0,
1232 		.send_flags	    = 0,
1233 		.ex.invalidate_rkey = rkey,
1234 	};
1235 
1236 	wr.wr_cqe = &req->reg_cqe;
1237 	req->reg_cqe.done = srp_inv_rkey_err_done;
1238 	return ib_post_send(ch->qp, &wr, NULL);
1239 }
1240 
1241 static void srp_unmap_data(struct scsi_cmnd *scmnd,
1242 			   struct srp_rdma_ch *ch,
1243 			   struct srp_request *req)
1244 {
1245 	struct srp_target_port *target = ch->target;
1246 	struct srp_device *dev = target->srp_host->srp_dev;
1247 	struct ib_device *ibdev = dev->dev;
1248 	int i, res;
1249 
1250 	if (!scsi_sglist(scmnd) ||
1251 	    (scmnd->sc_data_direction != DMA_TO_DEVICE &&
1252 	     scmnd->sc_data_direction != DMA_FROM_DEVICE))
1253 		return;
1254 
1255 	if (dev->use_fast_reg) {
1256 		struct srp_fr_desc **pfr;
1257 
1258 		for (i = req->nmdesc, pfr = req->fr_list; i > 0; i--, pfr++) {
1259 			res = srp_inv_rkey(req, ch, (*pfr)->mr->rkey);
1260 			if (res < 0) {
1261 				shost_printk(KERN_ERR, target->scsi_host, PFX
1262 				  "Queueing INV WR for rkey %#x failed (%d)\n",
1263 				  (*pfr)->mr->rkey, res);
1264 				queue_work(system_long_wq,
1265 					   &target->tl_err_work);
1266 			}
1267 		}
1268 		if (req->nmdesc)
1269 			srp_fr_pool_put(ch->fr_pool, req->fr_list,
1270 					req->nmdesc);
1271 	} else if (dev->use_fmr) {
1272 		struct ib_pool_fmr **pfmr;
1273 
1274 		for (i = req->nmdesc, pfmr = req->fmr_list; i > 0; i--, pfmr++)
1275 			ib_fmr_pool_unmap(*pfmr);
1276 	}
1277 
1278 	ib_dma_unmap_sg(ibdev, scsi_sglist(scmnd), scsi_sg_count(scmnd),
1279 			scmnd->sc_data_direction);
1280 }
1281 
1282 /**
1283  * srp_claim_req - Take ownership of the scmnd associated with a request.
1284  * @ch: SRP RDMA channel.
1285  * @req: SRP request.
1286  * @sdev: If not NULL, only take ownership for this SCSI device.
1287  * @scmnd: If NULL, take ownership of @req->scmnd. If not NULL, only take
1288  *         ownership of @req->scmnd if it equals @scmnd.
1289  *
1290  * Return value:
1291  * Either NULL or a pointer to the SCSI command the caller became owner of.
1292  */
1293 static struct scsi_cmnd *srp_claim_req(struct srp_rdma_ch *ch,
1294 				       struct srp_request *req,
1295 				       struct scsi_device *sdev,
1296 				       struct scsi_cmnd *scmnd)
1297 {
1298 	unsigned long flags;
1299 
1300 	spin_lock_irqsave(&ch->lock, flags);
1301 	if (req->scmnd &&
1302 	    (!sdev || req->scmnd->device == sdev) &&
1303 	    (!scmnd || req->scmnd == scmnd)) {
1304 		scmnd = req->scmnd;
1305 		req->scmnd = NULL;
1306 	} else {
1307 		scmnd = NULL;
1308 	}
1309 	spin_unlock_irqrestore(&ch->lock, flags);
1310 
1311 	return scmnd;
1312 }
1313 
1314 /**
1315  * srp_free_req() - Unmap data and adjust ch->req_lim.
1316  * @ch:     SRP RDMA channel.
1317  * @req:    Request to be freed.
1318  * @scmnd:  SCSI command associated with @req.
1319  * @req_lim_delta: Amount to be added to @target->req_lim.
1320  */
1321 static void srp_free_req(struct srp_rdma_ch *ch, struct srp_request *req,
1322 			 struct scsi_cmnd *scmnd, s32 req_lim_delta)
1323 {
1324 	unsigned long flags;
1325 
1326 	srp_unmap_data(scmnd, ch, req);
1327 
1328 	spin_lock_irqsave(&ch->lock, flags);
1329 	ch->req_lim += req_lim_delta;
1330 	spin_unlock_irqrestore(&ch->lock, flags);
1331 }
1332 
1333 static void srp_finish_req(struct srp_rdma_ch *ch, struct srp_request *req,
1334 			   struct scsi_device *sdev, int result)
1335 {
1336 	struct scsi_cmnd *scmnd = srp_claim_req(ch, req, sdev, NULL);
1337 
1338 	if (scmnd) {
1339 		srp_free_req(ch, req, scmnd, 0);
1340 		scmnd->result = result;
1341 		scmnd->scsi_done(scmnd);
1342 	}
1343 }
1344 
1345 static void srp_terminate_io(struct srp_rport *rport)
1346 {
1347 	struct srp_target_port *target = rport->lld_data;
1348 	struct srp_rdma_ch *ch;
1349 	int i, j;
1350 
1351 	for (i = 0; i < target->ch_count; i++) {
1352 		ch = &target->ch[i];
1353 
1354 		for (j = 0; j < target->req_ring_size; ++j) {
1355 			struct srp_request *req = &ch->req_ring[j];
1356 
1357 			srp_finish_req(ch, req, NULL,
1358 				       DID_TRANSPORT_FAILFAST << 16);
1359 		}
1360 	}
1361 }
1362 
1363 /* Calculate maximum initiator to target information unit length. */
1364 static uint32_t srp_max_it_iu_len(int cmd_sg_cnt, bool use_imm_data)
1365 {
1366 	uint32_t max_iu_len = sizeof(struct srp_cmd) + SRP_MAX_ADD_CDB_LEN +
1367 		sizeof(struct srp_indirect_buf) +
1368 		cmd_sg_cnt * sizeof(struct srp_direct_buf);
1369 
1370 	if (use_imm_data)
1371 		max_iu_len = max(max_iu_len, SRP_IMM_DATA_OFFSET +
1372 				 srp_max_imm_data);
1373 
1374 	return max_iu_len;
1375 }
1376 
1377 /*
1378  * It is up to the caller to ensure that srp_rport_reconnect() calls are
1379  * serialized and that no concurrent srp_queuecommand(), srp_abort(),
1380  * srp_reset_device() or srp_reset_host() calls will occur while this function
1381  * is in progress. One way to realize that is not to call this function
1382  * directly but to call srp_reconnect_rport() instead since that last function
1383  * serializes calls of this function via rport->mutex and also blocks
1384  * srp_queuecommand() calls before invoking this function.
1385  */
1386 static int srp_rport_reconnect(struct srp_rport *rport)
1387 {
1388 	struct srp_target_port *target = rport->lld_data;
1389 	struct srp_rdma_ch *ch;
1390 	uint32_t max_iu_len = srp_max_it_iu_len(target->cmd_sg_cnt,
1391 						srp_use_imm_data);
1392 	int i, j, ret = 0;
1393 	bool multich = false;
1394 
1395 	srp_disconnect_target(target);
1396 
1397 	if (target->state == SRP_TARGET_SCANNING)
1398 		return -ENODEV;
1399 
1400 	/*
1401 	 * Now get a new local CM ID so that we avoid confusing the target in
1402 	 * case things are really fouled up. Doing so also ensures that all CM
1403 	 * callbacks will have finished before a new QP is allocated.
1404 	 */
1405 	for (i = 0; i < target->ch_count; i++) {
1406 		ch = &target->ch[i];
1407 		ret += srp_new_cm_id(ch);
1408 	}
1409 	for (i = 0; i < target->ch_count; i++) {
1410 		ch = &target->ch[i];
1411 		for (j = 0; j < target->req_ring_size; ++j) {
1412 			struct srp_request *req = &ch->req_ring[j];
1413 
1414 			srp_finish_req(ch, req, NULL, DID_RESET << 16);
1415 		}
1416 	}
1417 	for (i = 0; i < target->ch_count; i++) {
1418 		ch = &target->ch[i];
1419 		/*
1420 		 * Whether or not creating a new CM ID succeeded, create a new
1421 		 * QP. This guarantees that all completion callback function
1422 		 * invocations have finished before request resetting starts.
1423 		 */
1424 		ret += srp_create_ch_ib(ch);
1425 
1426 		INIT_LIST_HEAD(&ch->free_tx);
1427 		for (j = 0; j < target->queue_size; ++j)
1428 			list_add(&ch->tx_ring[j]->list, &ch->free_tx);
1429 	}
1430 
1431 	target->qp_in_error = false;
1432 
1433 	for (i = 0; i < target->ch_count; i++) {
1434 		ch = &target->ch[i];
1435 		if (ret)
1436 			break;
1437 		ret = srp_connect_ch(ch, max_iu_len, multich);
1438 		multich = true;
1439 	}
1440 
1441 	if (ret == 0)
1442 		shost_printk(KERN_INFO, target->scsi_host,
1443 			     PFX "reconnect succeeded\n");
1444 
1445 	return ret;
1446 }
1447 
1448 static void srp_map_desc(struct srp_map_state *state, dma_addr_t dma_addr,
1449 			 unsigned int dma_len, u32 rkey)
1450 {
1451 	struct srp_direct_buf *desc = state->desc;
1452 
1453 	WARN_ON_ONCE(!dma_len);
1454 
1455 	desc->va = cpu_to_be64(dma_addr);
1456 	desc->key = cpu_to_be32(rkey);
1457 	desc->len = cpu_to_be32(dma_len);
1458 
1459 	state->total_len += dma_len;
1460 	state->desc++;
1461 	state->ndesc++;
1462 }
1463 
1464 static int srp_map_finish_fmr(struct srp_map_state *state,
1465 			      struct srp_rdma_ch *ch)
1466 {
1467 	struct srp_target_port *target = ch->target;
1468 	struct srp_device *dev = target->srp_host->srp_dev;
1469 	struct ib_pool_fmr *fmr;
1470 	u64 io_addr = 0;
1471 
1472 	if (state->fmr.next >= state->fmr.end) {
1473 		shost_printk(KERN_ERR, ch->target->scsi_host,
1474 			     PFX "Out of MRs (mr_per_cmd = %d)\n",
1475 			     ch->target->mr_per_cmd);
1476 		return -ENOMEM;
1477 	}
1478 
1479 	WARN_ON_ONCE(!dev->use_fmr);
1480 
1481 	if (state->npages == 0)
1482 		return 0;
1483 
1484 	if (state->npages == 1 && target->global_rkey) {
1485 		srp_map_desc(state, state->base_dma_addr, state->dma_len,
1486 			     target->global_rkey);
1487 		goto reset_state;
1488 	}
1489 
1490 	fmr = ib_fmr_pool_map_phys(ch->fmr_pool, state->pages,
1491 				   state->npages, io_addr);
1492 	if (IS_ERR(fmr))
1493 		return PTR_ERR(fmr);
1494 
1495 	*state->fmr.next++ = fmr;
1496 	state->nmdesc++;
1497 
1498 	srp_map_desc(state, state->base_dma_addr & ~dev->mr_page_mask,
1499 		     state->dma_len, fmr->fmr->rkey);
1500 
1501 reset_state:
1502 	state->npages = 0;
1503 	state->dma_len = 0;
1504 
1505 	return 0;
1506 }
1507 
1508 static void srp_reg_mr_err_done(struct ib_cq *cq, struct ib_wc *wc)
1509 {
1510 	srp_handle_qp_err(cq, wc, "FAST REG");
1511 }
1512 
1513 /*
1514  * Map up to sg_nents elements of state->sg where *sg_offset_p is the offset
1515  * where to start in the first element. If sg_offset_p != NULL then
1516  * *sg_offset_p is updated to the offset in state->sg[retval] of the first
1517  * byte that has not yet been mapped.
1518  */
1519 static int srp_map_finish_fr(struct srp_map_state *state,
1520 			     struct srp_request *req,
1521 			     struct srp_rdma_ch *ch, int sg_nents,
1522 			     unsigned int *sg_offset_p)
1523 {
1524 	struct srp_target_port *target = ch->target;
1525 	struct srp_device *dev = target->srp_host->srp_dev;
1526 	struct ib_reg_wr wr;
1527 	struct srp_fr_desc *desc;
1528 	u32 rkey;
1529 	int n, err;
1530 
1531 	if (state->fr.next >= state->fr.end) {
1532 		shost_printk(KERN_ERR, ch->target->scsi_host,
1533 			     PFX "Out of MRs (mr_per_cmd = %d)\n",
1534 			     ch->target->mr_per_cmd);
1535 		return -ENOMEM;
1536 	}
1537 
1538 	WARN_ON_ONCE(!dev->use_fast_reg);
1539 
1540 	if (sg_nents == 1 && target->global_rkey) {
1541 		unsigned int sg_offset = sg_offset_p ? *sg_offset_p : 0;
1542 
1543 		srp_map_desc(state, sg_dma_address(state->sg) + sg_offset,
1544 			     sg_dma_len(state->sg) - sg_offset,
1545 			     target->global_rkey);
1546 		if (sg_offset_p)
1547 			*sg_offset_p = 0;
1548 		return 1;
1549 	}
1550 
1551 	desc = srp_fr_pool_get(ch->fr_pool);
1552 	if (!desc)
1553 		return -ENOMEM;
1554 
1555 	rkey = ib_inc_rkey(desc->mr->rkey);
1556 	ib_update_fast_reg_key(desc->mr, rkey);
1557 
1558 	n = ib_map_mr_sg(desc->mr, state->sg, sg_nents, sg_offset_p,
1559 			 dev->mr_page_size);
1560 	if (unlikely(n < 0)) {
1561 		srp_fr_pool_put(ch->fr_pool, &desc, 1);
1562 		pr_debug("%s: ib_map_mr_sg(%d, %d) returned %d.\n",
1563 			 dev_name(&req->scmnd->device->sdev_gendev), sg_nents,
1564 			 sg_offset_p ? *sg_offset_p : -1, n);
1565 		return n;
1566 	}
1567 
1568 	WARN_ON_ONCE(desc->mr->length == 0);
1569 
1570 	req->reg_cqe.done = srp_reg_mr_err_done;
1571 
1572 	wr.wr.next = NULL;
1573 	wr.wr.opcode = IB_WR_REG_MR;
1574 	wr.wr.wr_cqe = &req->reg_cqe;
1575 	wr.wr.num_sge = 0;
1576 	wr.wr.send_flags = 0;
1577 	wr.mr = desc->mr;
1578 	wr.key = desc->mr->rkey;
1579 	wr.access = (IB_ACCESS_LOCAL_WRITE |
1580 		     IB_ACCESS_REMOTE_READ |
1581 		     IB_ACCESS_REMOTE_WRITE);
1582 
1583 	*state->fr.next++ = desc;
1584 	state->nmdesc++;
1585 
1586 	srp_map_desc(state, desc->mr->iova,
1587 		     desc->mr->length, desc->mr->rkey);
1588 
1589 	err = ib_post_send(ch->qp, &wr.wr, NULL);
1590 	if (unlikely(err)) {
1591 		WARN_ON_ONCE(err == -ENOMEM);
1592 		return err;
1593 	}
1594 
1595 	return n;
1596 }
1597 
1598 static int srp_map_sg_entry(struct srp_map_state *state,
1599 			    struct srp_rdma_ch *ch,
1600 			    struct scatterlist *sg)
1601 {
1602 	struct srp_target_port *target = ch->target;
1603 	struct srp_device *dev = target->srp_host->srp_dev;
1604 	struct ib_device *ibdev = dev->dev;
1605 	dma_addr_t dma_addr = ib_sg_dma_address(ibdev, sg);
1606 	unsigned int dma_len = ib_sg_dma_len(ibdev, sg);
1607 	unsigned int len = 0;
1608 	int ret;
1609 
1610 	WARN_ON_ONCE(!dma_len);
1611 
1612 	while (dma_len) {
1613 		unsigned offset = dma_addr & ~dev->mr_page_mask;
1614 
1615 		if (state->npages == dev->max_pages_per_mr ||
1616 		    (state->npages > 0 && offset != 0)) {
1617 			ret = srp_map_finish_fmr(state, ch);
1618 			if (ret)
1619 				return ret;
1620 		}
1621 
1622 		len = min_t(unsigned int, dma_len, dev->mr_page_size - offset);
1623 
1624 		if (!state->npages)
1625 			state->base_dma_addr = dma_addr;
1626 		state->pages[state->npages++] = dma_addr & dev->mr_page_mask;
1627 		state->dma_len += len;
1628 		dma_addr += len;
1629 		dma_len -= len;
1630 	}
1631 
1632 	/*
1633 	 * If the end of the MR is not on a page boundary then we need to
1634 	 * close it out and start a new one -- we can only merge at page
1635 	 * boundaries.
1636 	 */
1637 	ret = 0;
1638 	if ((dma_addr & ~dev->mr_page_mask) != 0)
1639 		ret = srp_map_finish_fmr(state, ch);
1640 	return ret;
1641 }
1642 
1643 static int srp_map_sg_fmr(struct srp_map_state *state, struct srp_rdma_ch *ch,
1644 			  struct srp_request *req, struct scatterlist *scat,
1645 			  int count)
1646 {
1647 	struct scatterlist *sg;
1648 	int i, ret;
1649 
1650 	state->pages = req->map_page;
1651 	state->fmr.next = req->fmr_list;
1652 	state->fmr.end = req->fmr_list + ch->target->mr_per_cmd;
1653 
1654 	for_each_sg(scat, sg, count, i) {
1655 		ret = srp_map_sg_entry(state, ch, sg);
1656 		if (ret)
1657 			return ret;
1658 	}
1659 
1660 	ret = srp_map_finish_fmr(state, ch);
1661 	if (ret)
1662 		return ret;
1663 
1664 	return 0;
1665 }
1666 
1667 static int srp_map_sg_fr(struct srp_map_state *state, struct srp_rdma_ch *ch,
1668 			 struct srp_request *req, struct scatterlist *scat,
1669 			 int count)
1670 {
1671 	unsigned int sg_offset = 0;
1672 
1673 	state->fr.next = req->fr_list;
1674 	state->fr.end = req->fr_list + ch->target->mr_per_cmd;
1675 	state->sg = scat;
1676 
1677 	if (count == 0)
1678 		return 0;
1679 
1680 	while (count) {
1681 		int i, n;
1682 
1683 		n = srp_map_finish_fr(state, req, ch, count, &sg_offset);
1684 		if (unlikely(n < 0))
1685 			return n;
1686 
1687 		count -= n;
1688 		for (i = 0; i < n; i++)
1689 			state->sg = sg_next(state->sg);
1690 	}
1691 
1692 	return 0;
1693 }
1694 
1695 static int srp_map_sg_dma(struct srp_map_state *state, struct srp_rdma_ch *ch,
1696 			  struct srp_request *req, struct scatterlist *scat,
1697 			  int count)
1698 {
1699 	struct srp_target_port *target = ch->target;
1700 	struct srp_device *dev = target->srp_host->srp_dev;
1701 	struct scatterlist *sg;
1702 	int i;
1703 
1704 	for_each_sg(scat, sg, count, i) {
1705 		srp_map_desc(state, ib_sg_dma_address(dev->dev, sg),
1706 			     ib_sg_dma_len(dev->dev, sg),
1707 			     target->global_rkey);
1708 	}
1709 
1710 	return 0;
1711 }
1712 
1713 /*
1714  * Register the indirect data buffer descriptor with the HCA.
1715  *
1716  * Note: since the indirect data buffer descriptor has been allocated with
1717  * kmalloc() it is guaranteed that this buffer is a physically contiguous
1718  * memory buffer.
1719  */
1720 static int srp_map_idb(struct srp_rdma_ch *ch, struct srp_request *req,
1721 		       void **next_mr, void **end_mr, u32 idb_len,
1722 		       __be32 *idb_rkey)
1723 {
1724 	struct srp_target_port *target = ch->target;
1725 	struct srp_device *dev = target->srp_host->srp_dev;
1726 	struct srp_map_state state;
1727 	struct srp_direct_buf idb_desc;
1728 	u64 idb_pages[1];
1729 	struct scatterlist idb_sg[1];
1730 	int ret;
1731 
1732 	memset(&state, 0, sizeof(state));
1733 	memset(&idb_desc, 0, sizeof(idb_desc));
1734 	state.gen.next = next_mr;
1735 	state.gen.end = end_mr;
1736 	state.desc = &idb_desc;
1737 	state.base_dma_addr = req->indirect_dma_addr;
1738 	state.dma_len = idb_len;
1739 
1740 	if (dev->use_fast_reg) {
1741 		state.sg = idb_sg;
1742 		sg_init_one(idb_sg, req->indirect_desc, idb_len);
1743 		idb_sg->dma_address = req->indirect_dma_addr; /* hack! */
1744 #ifdef CONFIG_NEED_SG_DMA_LENGTH
1745 		idb_sg->dma_length = idb_sg->length;	      /* hack^2 */
1746 #endif
1747 		ret = srp_map_finish_fr(&state, req, ch, 1, NULL);
1748 		if (ret < 0)
1749 			return ret;
1750 		WARN_ON_ONCE(ret < 1);
1751 	} else if (dev->use_fmr) {
1752 		state.pages = idb_pages;
1753 		state.pages[0] = (req->indirect_dma_addr &
1754 				  dev->mr_page_mask);
1755 		state.npages = 1;
1756 		ret = srp_map_finish_fmr(&state, ch);
1757 		if (ret < 0)
1758 			return ret;
1759 	} else {
1760 		return -EINVAL;
1761 	}
1762 
1763 	*idb_rkey = idb_desc.key;
1764 
1765 	return 0;
1766 }
1767 
1768 static void srp_check_mapping(struct srp_map_state *state,
1769 			      struct srp_rdma_ch *ch, struct srp_request *req,
1770 			      struct scatterlist *scat, int count)
1771 {
1772 	struct srp_device *dev = ch->target->srp_host->srp_dev;
1773 	struct srp_fr_desc **pfr;
1774 	u64 desc_len = 0, mr_len = 0;
1775 	int i;
1776 
1777 	for (i = 0; i < state->ndesc; i++)
1778 		desc_len += be32_to_cpu(req->indirect_desc[i].len);
1779 	if (dev->use_fast_reg)
1780 		for (i = 0, pfr = req->fr_list; i < state->nmdesc; i++, pfr++)
1781 			mr_len += (*pfr)->mr->length;
1782 	else if (dev->use_fmr)
1783 		for (i = 0; i < state->nmdesc; i++)
1784 			mr_len += be32_to_cpu(req->indirect_desc[i].len);
1785 	if (desc_len != scsi_bufflen(req->scmnd) ||
1786 	    mr_len > scsi_bufflen(req->scmnd))
1787 		pr_err("Inconsistent: scsi len %d <> desc len %lld <> mr len %lld; ndesc %d; nmdesc = %d\n",
1788 		       scsi_bufflen(req->scmnd), desc_len, mr_len,
1789 		       state->ndesc, state->nmdesc);
1790 }
1791 
1792 /**
1793  * srp_map_data() - map SCSI data buffer onto an SRP request
1794  * @scmnd: SCSI command to map
1795  * @ch: SRP RDMA channel
1796  * @req: SRP request
1797  *
1798  * Returns the length in bytes of the SRP_CMD IU or a negative value if
1799  * mapping failed. The size of any immediate data is not included in the
1800  * return value.
1801  */
1802 static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_rdma_ch *ch,
1803 			struct srp_request *req)
1804 {
1805 	struct srp_target_port *target = ch->target;
1806 	struct scatterlist *scat, *sg;
1807 	struct srp_cmd *cmd = req->cmd->buf;
1808 	int i, len, nents, count, ret;
1809 	struct srp_device *dev;
1810 	struct ib_device *ibdev;
1811 	struct srp_map_state state;
1812 	struct srp_indirect_buf *indirect_hdr;
1813 	u64 data_len;
1814 	u32 idb_len, table_len;
1815 	__be32 idb_rkey;
1816 	u8 fmt;
1817 
1818 	req->cmd->num_sge = 1;
1819 
1820 	if (!scsi_sglist(scmnd) || scmnd->sc_data_direction == DMA_NONE)
1821 		return sizeof(struct srp_cmd) + cmd->add_cdb_len;
1822 
1823 	if (scmnd->sc_data_direction != DMA_FROM_DEVICE &&
1824 	    scmnd->sc_data_direction != DMA_TO_DEVICE) {
1825 		shost_printk(KERN_WARNING, target->scsi_host,
1826 			     PFX "Unhandled data direction %d\n",
1827 			     scmnd->sc_data_direction);
1828 		return -EINVAL;
1829 	}
1830 
1831 	nents = scsi_sg_count(scmnd);
1832 	scat  = scsi_sglist(scmnd);
1833 	data_len = scsi_bufflen(scmnd);
1834 
1835 	dev = target->srp_host->srp_dev;
1836 	ibdev = dev->dev;
1837 
1838 	count = ib_dma_map_sg(ibdev, scat, nents, scmnd->sc_data_direction);
1839 	if (unlikely(count == 0))
1840 		return -EIO;
1841 
1842 	if (ch->use_imm_data &&
1843 	    count <= SRP_MAX_IMM_SGE &&
1844 	    SRP_IMM_DATA_OFFSET + data_len <= ch->max_it_iu_len &&
1845 	    scmnd->sc_data_direction == DMA_TO_DEVICE) {
1846 		struct srp_imm_buf *buf;
1847 		struct ib_sge *sge = &req->cmd->sge[1];
1848 
1849 		fmt = SRP_DATA_DESC_IMM;
1850 		len = SRP_IMM_DATA_OFFSET;
1851 		req->nmdesc = 0;
1852 		buf = (void *)cmd->add_data + cmd->add_cdb_len;
1853 		buf->len = cpu_to_be32(data_len);
1854 		WARN_ON_ONCE((void *)(buf + 1) > (void *)cmd + len);
1855 		for_each_sg(scat, sg, count, i) {
1856 			sge[i].addr   = ib_sg_dma_address(ibdev, sg);
1857 			sge[i].length = ib_sg_dma_len(ibdev, sg);
1858 			sge[i].lkey   = target->lkey;
1859 		}
1860 		req->cmd->num_sge += count;
1861 		goto map_complete;
1862 	}
1863 
1864 	fmt = SRP_DATA_DESC_DIRECT;
1865 	len = sizeof(struct srp_cmd) + cmd->add_cdb_len +
1866 		sizeof(struct srp_direct_buf);
1867 
1868 	if (count == 1 && target->global_rkey) {
1869 		/*
1870 		 * The midlayer only generated a single gather/scatter
1871 		 * entry, or DMA mapping coalesced everything to a
1872 		 * single entry.  So a direct descriptor along with
1873 		 * the DMA MR suffices.
1874 		 */
1875 		struct srp_direct_buf *buf;
1876 
1877 		buf = (void *)cmd->add_data + cmd->add_cdb_len;
1878 		buf->va  = cpu_to_be64(ib_sg_dma_address(ibdev, scat));
1879 		buf->key = cpu_to_be32(target->global_rkey);
1880 		buf->len = cpu_to_be32(ib_sg_dma_len(ibdev, scat));
1881 
1882 		req->nmdesc = 0;
1883 		goto map_complete;
1884 	}
1885 
1886 	/*
1887 	 * We have more than one scatter/gather entry, so build our indirect
1888 	 * descriptor table, trying to merge as many entries as we can.
1889 	 */
1890 	indirect_hdr = (void *)cmd->add_data + cmd->add_cdb_len;
1891 
1892 	ib_dma_sync_single_for_cpu(ibdev, req->indirect_dma_addr,
1893 				   target->indirect_size, DMA_TO_DEVICE);
1894 
1895 	memset(&state, 0, sizeof(state));
1896 	state.desc = req->indirect_desc;
1897 	if (dev->use_fast_reg)
1898 		ret = srp_map_sg_fr(&state, ch, req, scat, count);
1899 	else if (dev->use_fmr)
1900 		ret = srp_map_sg_fmr(&state, ch, req, scat, count);
1901 	else
1902 		ret = srp_map_sg_dma(&state, ch, req, scat, count);
1903 	req->nmdesc = state.nmdesc;
1904 	if (ret < 0)
1905 		goto unmap;
1906 
1907 	{
1908 		DEFINE_DYNAMIC_DEBUG_METADATA(ddm,
1909 			"Memory mapping consistency check");
1910 		if (DYNAMIC_DEBUG_BRANCH(ddm))
1911 			srp_check_mapping(&state, ch, req, scat, count);
1912 	}
1913 
1914 	/* We've mapped the request, now pull as much of the indirect
1915 	 * descriptor table as we can into the command buffer. If this
1916 	 * target is not using an external indirect table, we are
1917 	 * guaranteed to fit into the command, as the SCSI layer won't
1918 	 * give us more S/G entries than we allow.
1919 	 */
1920 	if (state.ndesc == 1) {
1921 		/*
1922 		 * Memory registration collapsed the sg-list into one entry,
1923 		 * so use a direct descriptor.
1924 		 */
1925 		struct srp_direct_buf *buf;
1926 
1927 		buf = (void *)cmd->add_data + cmd->add_cdb_len;
1928 		*buf = req->indirect_desc[0];
1929 		goto map_complete;
1930 	}
1931 
1932 	if (unlikely(target->cmd_sg_cnt < state.ndesc &&
1933 						!target->allow_ext_sg)) {
1934 		shost_printk(KERN_ERR, target->scsi_host,
1935 			     "Could not fit S/G list into SRP_CMD\n");
1936 		ret = -EIO;
1937 		goto unmap;
1938 	}
1939 
1940 	count = min(state.ndesc, target->cmd_sg_cnt);
1941 	table_len = state.ndesc * sizeof (struct srp_direct_buf);
1942 	idb_len = sizeof(struct srp_indirect_buf) + table_len;
1943 
1944 	fmt = SRP_DATA_DESC_INDIRECT;
1945 	len = sizeof(struct srp_cmd) + cmd->add_cdb_len +
1946 		sizeof(struct srp_indirect_buf);
1947 	len += count * sizeof (struct srp_direct_buf);
1948 
1949 	memcpy(indirect_hdr->desc_list, req->indirect_desc,
1950 	       count * sizeof (struct srp_direct_buf));
1951 
1952 	if (!target->global_rkey) {
1953 		ret = srp_map_idb(ch, req, state.gen.next, state.gen.end,
1954 				  idb_len, &idb_rkey);
1955 		if (ret < 0)
1956 			goto unmap;
1957 		req->nmdesc++;
1958 	} else {
1959 		idb_rkey = cpu_to_be32(target->global_rkey);
1960 	}
1961 
1962 	indirect_hdr->table_desc.va = cpu_to_be64(req->indirect_dma_addr);
1963 	indirect_hdr->table_desc.key = idb_rkey;
1964 	indirect_hdr->table_desc.len = cpu_to_be32(table_len);
1965 	indirect_hdr->len = cpu_to_be32(state.total_len);
1966 
1967 	if (scmnd->sc_data_direction == DMA_TO_DEVICE)
1968 		cmd->data_out_desc_cnt = count;
1969 	else
1970 		cmd->data_in_desc_cnt = count;
1971 
1972 	ib_dma_sync_single_for_device(ibdev, req->indirect_dma_addr, table_len,
1973 				      DMA_TO_DEVICE);
1974 
1975 map_complete:
1976 	if (scmnd->sc_data_direction == DMA_TO_DEVICE)
1977 		cmd->buf_fmt = fmt << 4;
1978 	else
1979 		cmd->buf_fmt = fmt;
1980 
1981 	return len;
1982 
1983 unmap:
1984 	srp_unmap_data(scmnd, ch, req);
1985 	if (ret == -ENOMEM && req->nmdesc >= target->mr_pool_size)
1986 		ret = -E2BIG;
1987 	return ret;
1988 }
1989 
1990 /*
1991  * Return an IU and possible credit to the free pool
1992  */
1993 static void srp_put_tx_iu(struct srp_rdma_ch *ch, struct srp_iu *iu,
1994 			  enum srp_iu_type iu_type)
1995 {
1996 	unsigned long flags;
1997 
1998 	spin_lock_irqsave(&ch->lock, flags);
1999 	list_add(&iu->list, &ch->free_tx);
2000 	if (iu_type != SRP_IU_RSP)
2001 		++ch->req_lim;
2002 	spin_unlock_irqrestore(&ch->lock, flags);
2003 }
2004 
2005 /*
2006  * Must be called with ch->lock held to protect req_lim and free_tx.
2007  * If IU is not sent, it must be returned using srp_put_tx_iu().
2008  *
2009  * Note:
2010  * An upper limit for the number of allocated information units for each
2011  * request type is:
2012  * - SRP_IU_CMD: SRP_CMD_SQ_SIZE, since the SCSI mid-layer never queues
2013  *   more than Scsi_Host.can_queue requests.
2014  * - SRP_IU_TSK_MGMT: SRP_TSK_MGMT_SQ_SIZE.
2015  * - SRP_IU_RSP: 1, since a conforming SRP target never sends more than
2016  *   one unanswered SRP request to an initiator.
2017  */
2018 static struct srp_iu *__srp_get_tx_iu(struct srp_rdma_ch *ch,
2019 				      enum srp_iu_type iu_type)
2020 {
2021 	struct srp_target_port *target = ch->target;
2022 	s32 rsv = (iu_type == SRP_IU_TSK_MGMT) ? 0 : SRP_TSK_MGMT_SQ_SIZE;
2023 	struct srp_iu *iu;
2024 
2025 	lockdep_assert_held(&ch->lock);
2026 
2027 	ib_process_cq_direct(ch->send_cq, -1);
2028 
2029 	if (list_empty(&ch->free_tx))
2030 		return NULL;
2031 
2032 	/* Initiator responses to target requests do not consume credits */
2033 	if (iu_type != SRP_IU_RSP) {
2034 		if (ch->req_lim <= rsv) {
2035 			++target->zero_req_lim;
2036 			return NULL;
2037 		}
2038 
2039 		--ch->req_lim;
2040 	}
2041 
2042 	iu = list_first_entry(&ch->free_tx, struct srp_iu, list);
2043 	list_del(&iu->list);
2044 	return iu;
2045 }
2046 
2047 /*
2048  * Note: if this function is called from inside ib_drain_sq() then it will
2049  * be called without ch->lock being held. If ib_drain_sq() dequeues a WQE
2050  * with status IB_WC_SUCCESS then that's a bug.
2051  */
2052 static void srp_send_done(struct ib_cq *cq, struct ib_wc *wc)
2053 {
2054 	struct srp_iu *iu = container_of(wc->wr_cqe, struct srp_iu, cqe);
2055 	struct srp_rdma_ch *ch = cq->cq_context;
2056 
2057 	if (unlikely(wc->status != IB_WC_SUCCESS)) {
2058 		srp_handle_qp_err(cq, wc, "SEND");
2059 		return;
2060 	}
2061 
2062 	lockdep_assert_held(&ch->lock);
2063 
2064 	list_add(&iu->list, &ch->free_tx);
2065 }
2066 
2067 /**
2068  * srp_post_send() - send an SRP information unit
2069  * @ch: RDMA channel over which to send the information unit.
2070  * @iu: Information unit to send.
2071  * @len: Length of the information unit excluding immediate data.
2072  */
2073 static int srp_post_send(struct srp_rdma_ch *ch, struct srp_iu *iu, int len)
2074 {
2075 	struct srp_target_port *target = ch->target;
2076 	struct ib_send_wr wr;
2077 
2078 	if (WARN_ON_ONCE(iu->num_sge > SRP_MAX_SGE))
2079 		return -EINVAL;
2080 
2081 	iu->sge[0].addr   = iu->dma;
2082 	iu->sge[0].length = len;
2083 	iu->sge[0].lkey   = target->lkey;
2084 
2085 	iu->cqe.done = srp_send_done;
2086 
2087 	wr.next       = NULL;
2088 	wr.wr_cqe     = &iu->cqe;
2089 	wr.sg_list    = &iu->sge[0];
2090 	wr.num_sge    = iu->num_sge;
2091 	wr.opcode     = IB_WR_SEND;
2092 	wr.send_flags = IB_SEND_SIGNALED;
2093 
2094 	return ib_post_send(ch->qp, &wr, NULL);
2095 }
2096 
2097 static int srp_post_recv(struct srp_rdma_ch *ch, struct srp_iu *iu)
2098 {
2099 	struct srp_target_port *target = ch->target;
2100 	struct ib_recv_wr wr;
2101 	struct ib_sge list;
2102 
2103 	list.addr   = iu->dma;
2104 	list.length = iu->size;
2105 	list.lkey   = target->lkey;
2106 
2107 	iu->cqe.done = srp_recv_done;
2108 
2109 	wr.next     = NULL;
2110 	wr.wr_cqe   = &iu->cqe;
2111 	wr.sg_list  = &list;
2112 	wr.num_sge  = 1;
2113 
2114 	return ib_post_recv(ch->qp, &wr, NULL);
2115 }
2116 
2117 static void srp_process_rsp(struct srp_rdma_ch *ch, struct srp_rsp *rsp)
2118 {
2119 	struct srp_target_port *target = ch->target;
2120 	struct srp_request *req;
2121 	struct scsi_cmnd *scmnd;
2122 	unsigned long flags;
2123 
2124 	if (unlikely(rsp->tag & SRP_TAG_TSK_MGMT)) {
2125 		spin_lock_irqsave(&ch->lock, flags);
2126 		ch->req_lim += be32_to_cpu(rsp->req_lim_delta);
2127 		if (rsp->tag == ch->tsk_mgmt_tag) {
2128 			ch->tsk_mgmt_status = -1;
2129 			if (be32_to_cpu(rsp->resp_data_len) >= 4)
2130 				ch->tsk_mgmt_status = rsp->data[3];
2131 			complete(&ch->tsk_mgmt_done);
2132 		} else {
2133 			shost_printk(KERN_ERR, target->scsi_host,
2134 				     "Received tsk mgmt response too late for tag %#llx\n",
2135 				     rsp->tag);
2136 		}
2137 		spin_unlock_irqrestore(&ch->lock, flags);
2138 	} else {
2139 		scmnd = scsi_host_find_tag(target->scsi_host, rsp->tag);
2140 		if (scmnd && scmnd->host_scribble) {
2141 			req = (void *)scmnd->host_scribble;
2142 			scmnd = srp_claim_req(ch, req, NULL, scmnd);
2143 		} else {
2144 			scmnd = NULL;
2145 		}
2146 		if (!scmnd) {
2147 			shost_printk(KERN_ERR, target->scsi_host,
2148 				     "Null scmnd for RSP w/tag %#016llx received on ch %td / QP %#x\n",
2149 				     rsp->tag, ch - target->ch, ch->qp->qp_num);
2150 
2151 			spin_lock_irqsave(&ch->lock, flags);
2152 			ch->req_lim += be32_to_cpu(rsp->req_lim_delta);
2153 			spin_unlock_irqrestore(&ch->lock, flags);
2154 
2155 			return;
2156 		}
2157 		scmnd->result = rsp->status;
2158 
2159 		if (rsp->flags & SRP_RSP_FLAG_SNSVALID) {
2160 			memcpy(scmnd->sense_buffer, rsp->data +
2161 			       be32_to_cpu(rsp->resp_data_len),
2162 			       min_t(int, be32_to_cpu(rsp->sense_data_len),
2163 				     SCSI_SENSE_BUFFERSIZE));
2164 		}
2165 
2166 		if (unlikely(rsp->flags & SRP_RSP_FLAG_DIUNDER))
2167 			scsi_set_resid(scmnd, be32_to_cpu(rsp->data_in_res_cnt));
2168 		else if (unlikely(rsp->flags & SRP_RSP_FLAG_DIOVER))
2169 			scsi_set_resid(scmnd, -be32_to_cpu(rsp->data_in_res_cnt));
2170 		else if (unlikely(rsp->flags & SRP_RSP_FLAG_DOUNDER))
2171 			scsi_set_resid(scmnd, be32_to_cpu(rsp->data_out_res_cnt));
2172 		else if (unlikely(rsp->flags & SRP_RSP_FLAG_DOOVER))
2173 			scsi_set_resid(scmnd, -be32_to_cpu(rsp->data_out_res_cnt));
2174 
2175 		srp_free_req(ch, req, scmnd,
2176 			     be32_to_cpu(rsp->req_lim_delta));
2177 
2178 		scmnd->host_scribble = NULL;
2179 		scmnd->scsi_done(scmnd);
2180 	}
2181 }
2182 
2183 static int srp_response_common(struct srp_rdma_ch *ch, s32 req_delta,
2184 			       void *rsp, int len)
2185 {
2186 	struct srp_target_port *target = ch->target;
2187 	struct ib_device *dev = target->srp_host->srp_dev->dev;
2188 	unsigned long flags;
2189 	struct srp_iu *iu;
2190 	int err;
2191 
2192 	spin_lock_irqsave(&ch->lock, flags);
2193 	ch->req_lim += req_delta;
2194 	iu = __srp_get_tx_iu(ch, SRP_IU_RSP);
2195 	spin_unlock_irqrestore(&ch->lock, flags);
2196 
2197 	if (!iu) {
2198 		shost_printk(KERN_ERR, target->scsi_host, PFX
2199 			     "no IU available to send response\n");
2200 		return 1;
2201 	}
2202 
2203 	iu->num_sge = 1;
2204 	ib_dma_sync_single_for_cpu(dev, iu->dma, len, DMA_TO_DEVICE);
2205 	memcpy(iu->buf, rsp, len);
2206 	ib_dma_sync_single_for_device(dev, iu->dma, len, DMA_TO_DEVICE);
2207 
2208 	err = srp_post_send(ch, iu, len);
2209 	if (err) {
2210 		shost_printk(KERN_ERR, target->scsi_host, PFX
2211 			     "unable to post response: %d\n", err);
2212 		srp_put_tx_iu(ch, iu, SRP_IU_RSP);
2213 	}
2214 
2215 	return err;
2216 }
2217 
2218 static void srp_process_cred_req(struct srp_rdma_ch *ch,
2219 				 struct srp_cred_req *req)
2220 {
2221 	struct srp_cred_rsp rsp = {
2222 		.opcode = SRP_CRED_RSP,
2223 		.tag = req->tag,
2224 	};
2225 	s32 delta = be32_to_cpu(req->req_lim_delta);
2226 
2227 	if (srp_response_common(ch, delta, &rsp, sizeof(rsp)))
2228 		shost_printk(KERN_ERR, ch->target->scsi_host, PFX
2229 			     "problems processing SRP_CRED_REQ\n");
2230 }
2231 
2232 static void srp_process_aer_req(struct srp_rdma_ch *ch,
2233 				struct srp_aer_req *req)
2234 {
2235 	struct srp_target_port *target = ch->target;
2236 	struct srp_aer_rsp rsp = {
2237 		.opcode = SRP_AER_RSP,
2238 		.tag = req->tag,
2239 	};
2240 	s32 delta = be32_to_cpu(req->req_lim_delta);
2241 
2242 	shost_printk(KERN_ERR, target->scsi_host, PFX
2243 		     "ignoring AER for LUN %llu\n", scsilun_to_int(&req->lun));
2244 
2245 	if (srp_response_common(ch, delta, &rsp, sizeof(rsp)))
2246 		shost_printk(KERN_ERR, target->scsi_host, PFX
2247 			     "problems processing SRP_AER_REQ\n");
2248 }
2249 
2250 static void srp_recv_done(struct ib_cq *cq, struct ib_wc *wc)
2251 {
2252 	struct srp_iu *iu = container_of(wc->wr_cqe, struct srp_iu, cqe);
2253 	struct srp_rdma_ch *ch = cq->cq_context;
2254 	struct srp_target_port *target = ch->target;
2255 	struct ib_device *dev = target->srp_host->srp_dev->dev;
2256 	int res;
2257 	u8 opcode;
2258 
2259 	if (unlikely(wc->status != IB_WC_SUCCESS)) {
2260 		srp_handle_qp_err(cq, wc, "RECV");
2261 		return;
2262 	}
2263 
2264 	ib_dma_sync_single_for_cpu(dev, iu->dma, ch->max_ti_iu_len,
2265 				   DMA_FROM_DEVICE);
2266 
2267 	opcode = *(u8 *) iu->buf;
2268 
2269 	if (0) {
2270 		shost_printk(KERN_ERR, target->scsi_host,
2271 			     PFX "recv completion, opcode 0x%02x\n", opcode);
2272 		print_hex_dump(KERN_ERR, "", DUMP_PREFIX_OFFSET, 8, 1,
2273 			       iu->buf, wc->byte_len, true);
2274 	}
2275 
2276 	switch (opcode) {
2277 	case SRP_RSP:
2278 		srp_process_rsp(ch, iu->buf);
2279 		break;
2280 
2281 	case SRP_CRED_REQ:
2282 		srp_process_cred_req(ch, iu->buf);
2283 		break;
2284 
2285 	case SRP_AER_REQ:
2286 		srp_process_aer_req(ch, iu->buf);
2287 		break;
2288 
2289 	case SRP_T_LOGOUT:
2290 		/* XXX Handle target logout */
2291 		shost_printk(KERN_WARNING, target->scsi_host,
2292 			     PFX "Got target logout request\n");
2293 		break;
2294 
2295 	default:
2296 		shost_printk(KERN_WARNING, target->scsi_host,
2297 			     PFX "Unhandled SRP opcode 0x%02x\n", opcode);
2298 		break;
2299 	}
2300 
2301 	ib_dma_sync_single_for_device(dev, iu->dma, ch->max_ti_iu_len,
2302 				      DMA_FROM_DEVICE);
2303 
2304 	res = srp_post_recv(ch, iu);
2305 	if (res != 0)
2306 		shost_printk(KERN_ERR, target->scsi_host,
2307 			     PFX "Recv failed with error code %d\n", res);
2308 }
2309 
2310 /**
2311  * srp_tl_err_work() - handle a transport layer error
2312  * @work: Work structure embedded in an SRP target port.
2313  *
2314  * Note: This function may get invoked before the rport has been created,
2315  * hence the target->rport test.
2316  */
2317 static void srp_tl_err_work(struct work_struct *work)
2318 {
2319 	struct srp_target_port *target;
2320 
2321 	target = container_of(work, struct srp_target_port, tl_err_work);
2322 	if (target->rport)
2323 		srp_start_tl_fail_timers(target->rport);
2324 }
2325 
2326 static void srp_handle_qp_err(struct ib_cq *cq, struct ib_wc *wc,
2327 		const char *opname)
2328 {
2329 	struct srp_rdma_ch *ch = cq->cq_context;
2330 	struct srp_target_port *target = ch->target;
2331 
2332 	if (ch->connected && !target->qp_in_error) {
2333 		shost_printk(KERN_ERR, target->scsi_host,
2334 			     PFX "failed %s status %s (%d) for CQE %p\n",
2335 			     opname, ib_wc_status_msg(wc->status), wc->status,
2336 			     wc->wr_cqe);
2337 		queue_work(system_long_wq, &target->tl_err_work);
2338 	}
2339 	target->qp_in_error = true;
2340 }
2341 
2342 static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd)
2343 {
2344 	struct srp_target_port *target = host_to_target(shost);
2345 	struct srp_rport *rport = target->rport;
2346 	struct srp_rdma_ch *ch;
2347 	struct srp_request *req;
2348 	struct srp_iu *iu;
2349 	struct srp_cmd *cmd;
2350 	struct ib_device *dev;
2351 	unsigned long flags;
2352 	u32 tag;
2353 	u16 idx;
2354 	int len, ret;
2355 	const bool in_scsi_eh = !in_interrupt() && current == shost->ehandler;
2356 
2357 	/*
2358 	 * The SCSI EH thread is the only context from which srp_queuecommand()
2359 	 * can get invoked for blocked devices (SDEV_BLOCK /
2360 	 * SDEV_CREATED_BLOCK). Avoid racing with srp_reconnect_rport() by
2361 	 * locking the rport mutex if invoked from inside the SCSI EH.
2362 	 */
2363 	if (in_scsi_eh)
2364 		mutex_lock(&rport->mutex);
2365 
2366 	scmnd->result = srp_chkready(target->rport);
2367 	if (unlikely(scmnd->result))
2368 		goto err;
2369 
2370 	WARN_ON_ONCE(scmnd->request->tag < 0);
2371 	tag = blk_mq_unique_tag(scmnd->request);
2372 	ch = &target->ch[blk_mq_unique_tag_to_hwq(tag)];
2373 	idx = blk_mq_unique_tag_to_tag(tag);
2374 	WARN_ONCE(idx >= target->req_ring_size, "%s: tag %#x: idx %d >= %d\n",
2375 		  dev_name(&shost->shost_gendev), tag, idx,
2376 		  target->req_ring_size);
2377 
2378 	spin_lock_irqsave(&ch->lock, flags);
2379 	iu = __srp_get_tx_iu(ch, SRP_IU_CMD);
2380 	spin_unlock_irqrestore(&ch->lock, flags);
2381 
2382 	if (!iu)
2383 		goto err;
2384 
2385 	req = &ch->req_ring[idx];
2386 	dev = target->srp_host->srp_dev->dev;
2387 	ib_dma_sync_single_for_cpu(dev, iu->dma, ch->max_it_iu_len,
2388 				   DMA_TO_DEVICE);
2389 
2390 	scmnd->host_scribble = (void *) req;
2391 
2392 	cmd = iu->buf;
2393 	memset(cmd, 0, sizeof *cmd);
2394 
2395 	cmd->opcode = SRP_CMD;
2396 	int_to_scsilun(scmnd->device->lun, &cmd->lun);
2397 	cmd->tag    = tag;
2398 	memcpy(cmd->cdb, scmnd->cmnd, scmnd->cmd_len);
2399 	if (unlikely(scmnd->cmd_len > sizeof(cmd->cdb))) {
2400 		cmd->add_cdb_len = round_up(scmnd->cmd_len - sizeof(cmd->cdb),
2401 					    4);
2402 		if (WARN_ON_ONCE(cmd->add_cdb_len > SRP_MAX_ADD_CDB_LEN))
2403 			goto err_iu;
2404 	}
2405 
2406 	req->scmnd    = scmnd;
2407 	req->cmd      = iu;
2408 
2409 	len = srp_map_data(scmnd, ch, req);
2410 	if (len < 0) {
2411 		shost_printk(KERN_ERR, target->scsi_host,
2412 			     PFX "Failed to map data (%d)\n", len);
2413 		/*
2414 		 * If we ran out of memory descriptors (-ENOMEM) because an
2415 		 * application is queuing many requests with more than
2416 		 * max_pages_per_mr sg-list elements, tell the SCSI mid-layer
2417 		 * to reduce queue depth temporarily.
2418 		 */
2419 		scmnd->result = len == -ENOMEM ?
2420 			DID_OK << 16 | QUEUE_FULL << 1 : DID_ERROR << 16;
2421 		goto err_iu;
2422 	}
2423 
2424 	ib_dma_sync_single_for_device(dev, iu->dma, ch->max_it_iu_len,
2425 				      DMA_TO_DEVICE);
2426 
2427 	if (srp_post_send(ch, iu, len)) {
2428 		shost_printk(KERN_ERR, target->scsi_host, PFX "Send failed\n");
2429 		scmnd->result = DID_ERROR << 16;
2430 		goto err_unmap;
2431 	}
2432 
2433 	ret = 0;
2434 
2435 unlock_rport:
2436 	if (in_scsi_eh)
2437 		mutex_unlock(&rport->mutex);
2438 
2439 	return ret;
2440 
2441 err_unmap:
2442 	srp_unmap_data(scmnd, ch, req);
2443 
2444 err_iu:
2445 	srp_put_tx_iu(ch, iu, SRP_IU_CMD);
2446 
2447 	/*
2448 	 * Avoid that the loops that iterate over the request ring can
2449 	 * encounter a dangling SCSI command pointer.
2450 	 */
2451 	req->scmnd = NULL;
2452 
2453 err:
2454 	if (scmnd->result) {
2455 		scmnd->scsi_done(scmnd);
2456 		ret = 0;
2457 	} else {
2458 		ret = SCSI_MLQUEUE_HOST_BUSY;
2459 	}
2460 
2461 	goto unlock_rport;
2462 }
2463 
2464 /*
2465  * Note: the resources allocated in this function are freed in
2466  * srp_free_ch_ib().
2467  */
2468 static int srp_alloc_iu_bufs(struct srp_rdma_ch *ch)
2469 {
2470 	struct srp_target_port *target = ch->target;
2471 	int i;
2472 
2473 	ch->rx_ring = kcalloc(target->queue_size, sizeof(*ch->rx_ring),
2474 			      GFP_KERNEL);
2475 	if (!ch->rx_ring)
2476 		goto err_no_ring;
2477 	ch->tx_ring = kcalloc(target->queue_size, sizeof(*ch->tx_ring),
2478 			      GFP_KERNEL);
2479 	if (!ch->tx_ring)
2480 		goto err_no_ring;
2481 
2482 	for (i = 0; i < target->queue_size; ++i) {
2483 		ch->rx_ring[i] = srp_alloc_iu(target->srp_host,
2484 					      ch->max_ti_iu_len,
2485 					      GFP_KERNEL, DMA_FROM_DEVICE);
2486 		if (!ch->rx_ring[i])
2487 			goto err;
2488 	}
2489 
2490 	for (i = 0; i < target->queue_size; ++i) {
2491 		ch->tx_ring[i] = srp_alloc_iu(target->srp_host,
2492 					      ch->max_it_iu_len,
2493 					      GFP_KERNEL, DMA_TO_DEVICE);
2494 		if (!ch->tx_ring[i])
2495 			goto err;
2496 
2497 		list_add(&ch->tx_ring[i]->list, &ch->free_tx);
2498 	}
2499 
2500 	return 0;
2501 
2502 err:
2503 	for (i = 0; i < target->queue_size; ++i) {
2504 		srp_free_iu(target->srp_host, ch->rx_ring[i]);
2505 		srp_free_iu(target->srp_host, ch->tx_ring[i]);
2506 	}
2507 
2508 
2509 err_no_ring:
2510 	kfree(ch->tx_ring);
2511 	ch->tx_ring = NULL;
2512 	kfree(ch->rx_ring);
2513 	ch->rx_ring = NULL;
2514 
2515 	return -ENOMEM;
2516 }
2517 
2518 static uint32_t srp_compute_rq_tmo(struct ib_qp_attr *qp_attr, int attr_mask)
2519 {
2520 	uint64_t T_tr_ns, max_compl_time_ms;
2521 	uint32_t rq_tmo_jiffies;
2522 
2523 	/*
2524 	 * According to section 11.2.4.2 in the IBTA spec (Modify Queue Pair,
2525 	 * table 91), both the QP timeout and the retry count have to be set
2526 	 * for RC QP's during the RTR to RTS transition.
2527 	 */
2528 	WARN_ON_ONCE((attr_mask & (IB_QP_TIMEOUT | IB_QP_RETRY_CNT)) !=
2529 		     (IB_QP_TIMEOUT | IB_QP_RETRY_CNT));
2530 
2531 	/*
2532 	 * Set target->rq_tmo_jiffies to one second more than the largest time
2533 	 * it can take before an error completion is generated. See also
2534 	 * C9-140..142 in the IBTA spec for more information about how to
2535 	 * convert the QP Local ACK Timeout value to nanoseconds.
2536 	 */
2537 	T_tr_ns = 4096 * (1ULL << qp_attr->timeout);
2538 	max_compl_time_ms = qp_attr->retry_cnt * 4 * T_tr_ns;
2539 	do_div(max_compl_time_ms, NSEC_PER_MSEC);
2540 	rq_tmo_jiffies = msecs_to_jiffies(max_compl_time_ms + 1000);
2541 
2542 	return rq_tmo_jiffies;
2543 }
2544 
2545 static void srp_cm_rep_handler(struct ib_cm_id *cm_id,
2546 			       const struct srp_login_rsp *lrsp,
2547 			       struct srp_rdma_ch *ch)
2548 {
2549 	struct srp_target_port *target = ch->target;
2550 	struct ib_qp_attr *qp_attr = NULL;
2551 	int attr_mask = 0;
2552 	int ret = 0;
2553 	int i;
2554 
2555 	if (lrsp->opcode == SRP_LOGIN_RSP) {
2556 		ch->max_ti_iu_len = be32_to_cpu(lrsp->max_ti_iu_len);
2557 		ch->req_lim       = be32_to_cpu(lrsp->req_lim_delta);
2558 		ch->use_imm_data  = lrsp->rsp_flags & SRP_LOGIN_RSP_IMMED_SUPP;
2559 		ch->max_it_iu_len = srp_max_it_iu_len(target->cmd_sg_cnt,
2560 						      ch->use_imm_data);
2561 		WARN_ON_ONCE(ch->max_it_iu_len >
2562 			     be32_to_cpu(lrsp->max_it_iu_len));
2563 
2564 		if (ch->use_imm_data)
2565 			shost_printk(KERN_DEBUG, target->scsi_host,
2566 				     PFX "using immediate data\n");
2567 
2568 		/*
2569 		 * Reserve credits for task management so we don't
2570 		 * bounce requests back to the SCSI mid-layer.
2571 		 */
2572 		target->scsi_host->can_queue
2573 			= min(ch->req_lim - SRP_TSK_MGMT_SQ_SIZE,
2574 			      target->scsi_host->can_queue);
2575 		target->scsi_host->cmd_per_lun
2576 			= min_t(int, target->scsi_host->can_queue,
2577 				target->scsi_host->cmd_per_lun);
2578 	} else {
2579 		shost_printk(KERN_WARNING, target->scsi_host,
2580 			     PFX "Unhandled RSP opcode %#x\n", lrsp->opcode);
2581 		ret = -ECONNRESET;
2582 		goto error;
2583 	}
2584 
2585 	if (!ch->rx_ring) {
2586 		ret = srp_alloc_iu_bufs(ch);
2587 		if (ret)
2588 			goto error;
2589 	}
2590 
2591 	for (i = 0; i < target->queue_size; i++) {
2592 		struct srp_iu *iu = ch->rx_ring[i];
2593 
2594 		ret = srp_post_recv(ch, iu);
2595 		if (ret)
2596 			goto error;
2597 	}
2598 
2599 	if (!target->using_rdma_cm) {
2600 		ret = -ENOMEM;
2601 		qp_attr = kmalloc(sizeof(*qp_attr), GFP_KERNEL);
2602 		if (!qp_attr)
2603 			goto error;
2604 
2605 		qp_attr->qp_state = IB_QPS_RTR;
2606 		ret = ib_cm_init_qp_attr(cm_id, qp_attr, &attr_mask);
2607 		if (ret)
2608 			goto error_free;
2609 
2610 		ret = ib_modify_qp(ch->qp, qp_attr, attr_mask);
2611 		if (ret)
2612 			goto error_free;
2613 
2614 		qp_attr->qp_state = IB_QPS_RTS;
2615 		ret = ib_cm_init_qp_attr(cm_id, qp_attr, &attr_mask);
2616 		if (ret)
2617 			goto error_free;
2618 
2619 		target->rq_tmo_jiffies = srp_compute_rq_tmo(qp_attr, attr_mask);
2620 
2621 		ret = ib_modify_qp(ch->qp, qp_attr, attr_mask);
2622 		if (ret)
2623 			goto error_free;
2624 
2625 		ret = ib_send_cm_rtu(cm_id, NULL, 0);
2626 	}
2627 
2628 error_free:
2629 	kfree(qp_attr);
2630 
2631 error:
2632 	ch->status = ret;
2633 }
2634 
2635 static void srp_ib_cm_rej_handler(struct ib_cm_id *cm_id,
2636 				  const struct ib_cm_event *event,
2637 				  struct srp_rdma_ch *ch)
2638 {
2639 	struct srp_target_port *target = ch->target;
2640 	struct Scsi_Host *shost = target->scsi_host;
2641 	struct ib_class_port_info *cpi;
2642 	int opcode;
2643 	u16 dlid;
2644 
2645 	switch (event->param.rej_rcvd.reason) {
2646 	case IB_CM_REJ_PORT_CM_REDIRECT:
2647 		cpi = event->param.rej_rcvd.ari;
2648 		dlid = be16_to_cpu(cpi->redirect_lid);
2649 		sa_path_set_dlid(&ch->ib_cm.path, dlid);
2650 		ch->ib_cm.path.pkey = cpi->redirect_pkey;
2651 		cm_id->remote_cm_qpn = be32_to_cpu(cpi->redirect_qp) & 0x00ffffff;
2652 		memcpy(ch->ib_cm.path.dgid.raw, cpi->redirect_gid, 16);
2653 
2654 		ch->status = dlid ? SRP_DLID_REDIRECT : SRP_PORT_REDIRECT;
2655 		break;
2656 
2657 	case IB_CM_REJ_PORT_REDIRECT:
2658 		if (srp_target_is_topspin(target)) {
2659 			union ib_gid *dgid = &ch->ib_cm.path.dgid;
2660 
2661 			/*
2662 			 * Topspin/Cisco SRP gateways incorrectly send
2663 			 * reject reason code 25 when they mean 24
2664 			 * (port redirect).
2665 			 */
2666 			memcpy(dgid->raw, event->param.rej_rcvd.ari, 16);
2667 
2668 			shost_printk(KERN_DEBUG, shost,
2669 				     PFX "Topspin/Cisco redirect to target port GID %016llx%016llx\n",
2670 				     be64_to_cpu(dgid->global.subnet_prefix),
2671 				     be64_to_cpu(dgid->global.interface_id));
2672 
2673 			ch->status = SRP_PORT_REDIRECT;
2674 		} else {
2675 			shost_printk(KERN_WARNING, shost,
2676 				     "  REJ reason: IB_CM_REJ_PORT_REDIRECT\n");
2677 			ch->status = -ECONNRESET;
2678 		}
2679 		break;
2680 
2681 	case IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID:
2682 		shost_printk(KERN_WARNING, shost,
2683 			    "  REJ reason: IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID\n");
2684 		ch->status = -ECONNRESET;
2685 		break;
2686 
2687 	case IB_CM_REJ_CONSUMER_DEFINED:
2688 		opcode = *(u8 *) event->private_data;
2689 		if (opcode == SRP_LOGIN_REJ) {
2690 			struct srp_login_rej *rej = event->private_data;
2691 			u32 reason = be32_to_cpu(rej->reason);
2692 
2693 			if (reason == SRP_LOGIN_REJ_REQ_IT_IU_LENGTH_TOO_LARGE)
2694 				shost_printk(KERN_WARNING, shost,
2695 					     PFX "SRP_LOGIN_REJ: requested max_it_iu_len too large\n");
2696 			else
2697 				shost_printk(KERN_WARNING, shost, PFX
2698 					     "SRP LOGIN from %pI6 to %pI6 REJECTED, reason 0x%08x\n",
2699 					     target->sgid.raw,
2700 					     target->ib_cm.orig_dgid.raw,
2701 					     reason);
2702 		} else
2703 			shost_printk(KERN_WARNING, shost,
2704 				     "  REJ reason: IB_CM_REJ_CONSUMER_DEFINED,"
2705 				     " opcode 0x%02x\n", opcode);
2706 		ch->status = -ECONNRESET;
2707 		break;
2708 
2709 	case IB_CM_REJ_STALE_CONN:
2710 		shost_printk(KERN_WARNING, shost, "  REJ reason: stale connection\n");
2711 		ch->status = SRP_STALE_CONN;
2712 		break;
2713 
2714 	default:
2715 		shost_printk(KERN_WARNING, shost, "  REJ reason 0x%x\n",
2716 			     event->param.rej_rcvd.reason);
2717 		ch->status = -ECONNRESET;
2718 	}
2719 }
2720 
2721 static int srp_ib_cm_handler(struct ib_cm_id *cm_id,
2722 			     const struct ib_cm_event *event)
2723 {
2724 	struct srp_rdma_ch *ch = cm_id->context;
2725 	struct srp_target_port *target = ch->target;
2726 	int comp = 0;
2727 
2728 	switch (event->event) {
2729 	case IB_CM_REQ_ERROR:
2730 		shost_printk(KERN_DEBUG, target->scsi_host,
2731 			     PFX "Sending CM REQ failed\n");
2732 		comp = 1;
2733 		ch->status = -ECONNRESET;
2734 		break;
2735 
2736 	case IB_CM_REP_RECEIVED:
2737 		comp = 1;
2738 		srp_cm_rep_handler(cm_id, event->private_data, ch);
2739 		break;
2740 
2741 	case IB_CM_REJ_RECEIVED:
2742 		shost_printk(KERN_DEBUG, target->scsi_host, PFX "REJ received\n");
2743 		comp = 1;
2744 
2745 		srp_ib_cm_rej_handler(cm_id, event, ch);
2746 		break;
2747 
2748 	case IB_CM_DREQ_RECEIVED:
2749 		shost_printk(KERN_WARNING, target->scsi_host,
2750 			     PFX "DREQ received - connection closed\n");
2751 		ch->connected = false;
2752 		if (ib_send_cm_drep(cm_id, NULL, 0))
2753 			shost_printk(KERN_ERR, target->scsi_host,
2754 				     PFX "Sending CM DREP failed\n");
2755 		queue_work(system_long_wq, &target->tl_err_work);
2756 		break;
2757 
2758 	case IB_CM_TIMEWAIT_EXIT:
2759 		shost_printk(KERN_ERR, target->scsi_host,
2760 			     PFX "connection closed\n");
2761 		comp = 1;
2762 
2763 		ch->status = 0;
2764 		break;
2765 
2766 	case IB_CM_MRA_RECEIVED:
2767 	case IB_CM_DREQ_ERROR:
2768 	case IB_CM_DREP_RECEIVED:
2769 		break;
2770 
2771 	default:
2772 		shost_printk(KERN_WARNING, target->scsi_host,
2773 			     PFX "Unhandled CM event %d\n", event->event);
2774 		break;
2775 	}
2776 
2777 	if (comp)
2778 		complete(&ch->done);
2779 
2780 	return 0;
2781 }
2782 
2783 static void srp_rdma_cm_rej_handler(struct srp_rdma_ch *ch,
2784 				    struct rdma_cm_event *event)
2785 {
2786 	struct srp_target_port *target = ch->target;
2787 	struct Scsi_Host *shost = target->scsi_host;
2788 	int opcode;
2789 
2790 	switch (event->status) {
2791 	case IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID:
2792 		shost_printk(KERN_WARNING, shost,
2793 			    "  REJ reason: IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID\n");
2794 		ch->status = -ECONNRESET;
2795 		break;
2796 
2797 	case IB_CM_REJ_CONSUMER_DEFINED:
2798 		opcode = *(u8 *) event->param.conn.private_data;
2799 		if (opcode == SRP_LOGIN_REJ) {
2800 			struct srp_login_rej *rej =
2801 				(struct srp_login_rej *)
2802 				event->param.conn.private_data;
2803 			u32 reason = be32_to_cpu(rej->reason);
2804 
2805 			if (reason == SRP_LOGIN_REJ_REQ_IT_IU_LENGTH_TOO_LARGE)
2806 				shost_printk(KERN_WARNING, shost,
2807 					     PFX "SRP_LOGIN_REJ: requested max_it_iu_len too large\n");
2808 			else
2809 				shost_printk(KERN_WARNING, shost,
2810 					    PFX "SRP LOGIN REJECTED, reason 0x%08x\n", reason);
2811 		} else {
2812 			shost_printk(KERN_WARNING, shost,
2813 				     "  REJ reason: IB_CM_REJ_CONSUMER_DEFINED, opcode 0x%02x\n",
2814 				     opcode);
2815 		}
2816 		ch->status = -ECONNRESET;
2817 		break;
2818 
2819 	case IB_CM_REJ_STALE_CONN:
2820 		shost_printk(KERN_WARNING, shost,
2821 			     "  REJ reason: stale connection\n");
2822 		ch->status = SRP_STALE_CONN;
2823 		break;
2824 
2825 	default:
2826 		shost_printk(KERN_WARNING, shost, "  REJ reason 0x%x\n",
2827 			     event->status);
2828 		ch->status = -ECONNRESET;
2829 		break;
2830 	}
2831 }
2832 
2833 static int srp_rdma_cm_handler(struct rdma_cm_id *cm_id,
2834 			       struct rdma_cm_event *event)
2835 {
2836 	struct srp_rdma_ch *ch = cm_id->context;
2837 	struct srp_target_port *target = ch->target;
2838 	int comp = 0;
2839 
2840 	switch (event->event) {
2841 	case RDMA_CM_EVENT_ADDR_RESOLVED:
2842 		ch->status = 0;
2843 		comp = 1;
2844 		break;
2845 
2846 	case RDMA_CM_EVENT_ADDR_ERROR:
2847 		ch->status = -ENXIO;
2848 		comp = 1;
2849 		break;
2850 
2851 	case RDMA_CM_EVENT_ROUTE_RESOLVED:
2852 		ch->status = 0;
2853 		comp = 1;
2854 		break;
2855 
2856 	case RDMA_CM_EVENT_ROUTE_ERROR:
2857 	case RDMA_CM_EVENT_UNREACHABLE:
2858 		ch->status = -EHOSTUNREACH;
2859 		comp = 1;
2860 		break;
2861 
2862 	case RDMA_CM_EVENT_CONNECT_ERROR:
2863 		shost_printk(KERN_DEBUG, target->scsi_host,
2864 			     PFX "Sending CM REQ failed\n");
2865 		comp = 1;
2866 		ch->status = -ECONNRESET;
2867 		break;
2868 
2869 	case RDMA_CM_EVENT_ESTABLISHED:
2870 		comp = 1;
2871 		srp_cm_rep_handler(NULL, event->param.conn.private_data, ch);
2872 		break;
2873 
2874 	case RDMA_CM_EVENT_REJECTED:
2875 		shost_printk(KERN_DEBUG, target->scsi_host, PFX "REJ received\n");
2876 		comp = 1;
2877 
2878 		srp_rdma_cm_rej_handler(ch, event);
2879 		break;
2880 
2881 	case RDMA_CM_EVENT_DISCONNECTED:
2882 		if (ch->connected) {
2883 			shost_printk(KERN_WARNING, target->scsi_host,
2884 				     PFX "received DREQ\n");
2885 			rdma_disconnect(ch->rdma_cm.cm_id);
2886 			comp = 1;
2887 			ch->status = 0;
2888 			queue_work(system_long_wq, &target->tl_err_work);
2889 		}
2890 		break;
2891 
2892 	case RDMA_CM_EVENT_TIMEWAIT_EXIT:
2893 		shost_printk(KERN_ERR, target->scsi_host,
2894 			     PFX "connection closed\n");
2895 
2896 		comp = 1;
2897 		ch->status = 0;
2898 		break;
2899 
2900 	default:
2901 		shost_printk(KERN_WARNING, target->scsi_host,
2902 			     PFX "Unhandled CM event %d\n", event->event);
2903 		break;
2904 	}
2905 
2906 	if (comp)
2907 		complete(&ch->done);
2908 
2909 	return 0;
2910 }
2911 
2912 /**
2913  * srp_change_queue_depth - setting device queue depth
2914  * @sdev: scsi device struct
2915  * @qdepth: requested queue depth
2916  *
2917  * Returns queue depth.
2918  */
2919 static int
2920 srp_change_queue_depth(struct scsi_device *sdev, int qdepth)
2921 {
2922 	if (!sdev->tagged_supported)
2923 		qdepth = 1;
2924 	return scsi_change_queue_depth(sdev, qdepth);
2925 }
2926 
2927 static int srp_send_tsk_mgmt(struct srp_rdma_ch *ch, u64 req_tag, u64 lun,
2928 			     u8 func, u8 *status)
2929 {
2930 	struct srp_target_port *target = ch->target;
2931 	struct srp_rport *rport = target->rport;
2932 	struct ib_device *dev = target->srp_host->srp_dev->dev;
2933 	struct srp_iu *iu;
2934 	struct srp_tsk_mgmt *tsk_mgmt;
2935 	int res;
2936 
2937 	if (!ch->connected || target->qp_in_error)
2938 		return -1;
2939 
2940 	/*
2941 	 * Lock the rport mutex to avoid that srp_create_ch_ib() is
2942 	 * invoked while a task management function is being sent.
2943 	 */
2944 	mutex_lock(&rport->mutex);
2945 	spin_lock_irq(&ch->lock);
2946 	iu = __srp_get_tx_iu(ch, SRP_IU_TSK_MGMT);
2947 	spin_unlock_irq(&ch->lock);
2948 
2949 	if (!iu) {
2950 		mutex_unlock(&rport->mutex);
2951 
2952 		return -1;
2953 	}
2954 
2955 	iu->num_sge = 1;
2956 
2957 	ib_dma_sync_single_for_cpu(dev, iu->dma, sizeof *tsk_mgmt,
2958 				   DMA_TO_DEVICE);
2959 	tsk_mgmt = iu->buf;
2960 	memset(tsk_mgmt, 0, sizeof *tsk_mgmt);
2961 
2962 	tsk_mgmt->opcode 	= SRP_TSK_MGMT;
2963 	int_to_scsilun(lun, &tsk_mgmt->lun);
2964 	tsk_mgmt->tsk_mgmt_func = func;
2965 	tsk_mgmt->task_tag	= req_tag;
2966 
2967 	spin_lock_irq(&ch->lock);
2968 	ch->tsk_mgmt_tag = (ch->tsk_mgmt_tag + 1) | SRP_TAG_TSK_MGMT;
2969 	tsk_mgmt->tag = ch->tsk_mgmt_tag;
2970 	spin_unlock_irq(&ch->lock);
2971 
2972 	init_completion(&ch->tsk_mgmt_done);
2973 
2974 	ib_dma_sync_single_for_device(dev, iu->dma, sizeof *tsk_mgmt,
2975 				      DMA_TO_DEVICE);
2976 	if (srp_post_send(ch, iu, sizeof(*tsk_mgmt))) {
2977 		srp_put_tx_iu(ch, iu, SRP_IU_TSK_MGMT);
2978 		mutex_unlock(&rport->mutex);
2979 
2980 		return -1;
2981 	}
2982 	res = wait_for_completion_timeout(&ch->tsk_mgmt_done,
2983 					msecs_to_jiffies(SRP_ABORT_TIMEOUT_MS));
2984 	if (res > 0 && status)
2985 		*status = ch->tsk_mgmt_status;
2986 	mutex_unlock(&rport->mutex);
2987 
2988 	WARN_ON_ONCE(res < 0);
2989 
2990 	return res > 0 ? 0 : -1;
2991 }
2992 
2993 static int srp_abort(struct scsi_cmnd *scmnd)
2994 {
2995 	struct srp_target_port *target = host_to_target(scmnd->device->host);
2996 	struct srp_request *req = (struct srp_request *) scmnd->host_scribble;
2997 	u32 tag;
2998 	u16 ch_idx;
2999 	struct srp_rdma_ch *ch;
3000 	int ret;
3001 
3002 	shost_printk(KERN_ERR, target->scsi_host, "SRP abort called\n");
3003 
3004 	if (!req)
3005 		return SUCCESS;
3006 	tag = blk_mq_unique_tag(scmnd->request);
3007 	ch_idx = blk_mq_unique_tag_to_hwq(tag);
3008 	if (WARN_ON_ONCE(ch_idx >= target->ch_count))
3009 		return SUCCESS;
3010 	ch = &target->ch[ch_idx];
3011 	if (!srp_claim_req(ch, req, NULL, scmnd))
3012 		return SUCCESS;
3013 	shost_printk(KERN_ERR, target->scsi_host,
3014 		     "Sending SRP abort for tag %#x\n", tag);
3015 	if (srp_send_tsk_mgmt(ch, tag, scmnd->device->lun,
3016 			      SRP_TSK_ABORT_TASK, NULL) == 0)
3017 		ret = SUCCESS;
3018 	else if (target->rport->state == SRP_RPORT_LOST)
3019 		ret = FAST_IO_FAIL;
3020 	else
3021 		ret = FAILED;
3022 	if (ret == SUCCESS) {
3023 		srp_free_req(ch, req, scmnd, 0);
3024 		scmnd->result = DID_ABORT << 16;
3025 		scmnd->scsi_done(scmnd);
3026 	}
3027 
3028 	return ret;
3029 }
3030 
3031 static int srp_reset_device(struct scsi_cmnd *scmnd)
3032 {
3033 	struct srp_target_port *target = host_to_target(scmnd->device->host);
3034 	struct srp_rdma_ch *ch;
3035 	u8 status;
3036 
3037 	shost_printk(KERN_ERR, target->scsi_host, "SRP reset_device called\n");
3038 
3039 	ch = &target->ch[0];
3040 	if (srp_send_tsk_mgmt(ch, SRP_TAG_NO_REQ, scmnd->device->lun,
3041 			      SRP_TSK_LUN_RESET, &status))
3042 		return FAILED;
3043 	if (status)
3044 		return FAILED;
3045 
3046 	return SUCCESS;
3047 }
3048 
3049 static int srp_reset_host(struct scsi_cmnd *scmnd)
3050 {
3051 	struct srp_target_port *target = host_to_target(scmnd->device->host);
3052 
3053 	shost_printk(KERN_ERR, target->scsi_host, PFX "SRP reset_host called\n");
3054 
3055 	return srp_reconnect_rport(target->rport) == 0 ? SUCCESS : FAILED;
3056 }
3057 
3058 static int srp_target_alloc(struct scsi_target *starget)
3059 {
3060 	struct Scsi_Host *shost = dev_to_shost(starget->dev.parent);
3061 	struct srp_target_port *target = host_to_target(shost);
3062 
3063 	if (target->target_can_queue)
3064 		starget->can_queue = target->target_can_queue;
3065 	return 0;
3066 }
3067 
3068 static int srp_slave_alloc(struct scsi_device *sdev)
3069 {
3070 	struct Scsi_Host *shost = sdev->host;
3071 	struct srp_target_port *target = host_to_target(shost);
3072 	struct srp_device *srp_dev = target->srp_host->srp_dev;
3073 	struct ib_device *ibdev = srp_dev->dev;
3074 
3075 	if (!(ibdev->attrs.device_cap_flags & IB_DEVICE_SG_GAPS_REG))
3076 		blk_queue_virt_boundary(sdev->request_queue,
3077 					~srp_dev->mr_page_mask);
3078 
3079 	return 0;
3080 }
3081 
3082 static int srp_slave_configure(struct scsi_device *sdev)
3083 {
3084 	struct Scsi_Host *shost = sdev->host;
3085 	struct srp_target_port *target = host_to_target(shost);
3086 	struct request_queue *q = sdev->request_queue;
3087 	unsigned long timeout;
3088 
3089 	if (sdev->type == TYPE_DISK) {
3090 		timeout = max_t(unsigned, 30 * HZ, target->rq_tmo_jiffies);
3091 		blk_queue_rq_timeout(q, timeout);
3092 	}
3093 
3094 	return 0;
3095 }
3096 
3097 static ssize_t show_id_ext(struct device *dev, struct device_attribute *attr,
3098 			   char *buf)
3099 {
3100 	struct srp_target_port *target = host_to_target(class_to_shost(dev));
3101 
3102 	return sprintf(buf, "0x%016llx\n", be64_to_cpu(target->id_ext));
3103 }
3104 
3105 static ssize_t show_ioc_guid(struct device *dev, struct device_attribute *attr,
3106 			     char *buf)
3107 {
3108 	struct srp_target_port *target = host_to_target(class_to_shost(dev));
3109 
3110 	return sprintf(buf, "0x%016llx\n", be64_to_cpu(target->ioc_guid));
3111 }
3112 
3113 static ssize_t show_service_id(struct device *dev,
3114 			       struct device_attribute *attr, char *buf)
3115 {
3116 	struct srp_target_port *target = host_to_target(class_to_shost(dev));
3117 
3118 	if (target->using_rdma_cm)
3119 		return -ENOENT;
3120 	return sprintf(buf, "0x%016llx\n",
3121 		       be64_to_cpu(target->ib_cm.service_id));
3122 }
3123 
3124 static ssize_t show_pkey(struct device *dev, struct device_attribute *attr,
3125 			 char *buf)
3126 {
3127 	struct srp_target_port *target = host_to_target(class_to_shost(dev));
3128 
3129 	if (target->using_rdma_cm)
3130 		return -ENOENT;
3131 	return sprintf(buf, "0x%04x\n", be16_to_cpu(target->ib_cm.pkey));
3132 }
3133 
3134 static ssize_t show_sgid(struct device *dev, struct device_attribute *attr,
3135 			 char *buf)
3136 {
3137 	struct srp_target_port *target = host_to_target(class_to_shost(dev));
3138 
3139 	return sprintf(buf, "%pI6\n", target->sgid.raw);
3140 }
3141 
3142 static ssize_t show_dgid(struct device *dev, struct device_attribute *attr,
3143 			 char *buf)
3144 {
3145 	struct srp_target_port *target = host_to_target(class_to_shost(dev));
3146 	struct srp_rdma_ch *ch = &target->ch[0];
3147 
3148 	if (target->using_rdma_cm)
3149 		return -ENOENT;
3150 	return sprintf(buf, "%pI6\n", ch->ib_cm.path.dgid.raw);
3151 }
3152 
3153 static ssize_t show_orig_dgid(struct device *dev,
3154 			      struct device_attribute *attr, char *buf)
3155 {
3156 	struct srp_target_port *target = host_to_target(class_to_shost(dev));
3157 
3158 	if (target->using_rdma_cm)
3159 		return -ENOENT;
3160 	return sprintf(buf, "%pI6\n", target->ib_cm.orig_dgid.raw);
3161 }
3162 
3163 static ssize_t show_req_lim(struct device *dev,
3164 			    struct device_attribute *attr, char *buf)
3165 {
3166 	struct srp_target_port *target = host_to_target(class_to_shost(dev));
3167 	struct srp_rdma_ch *ch;
3168 	int i, req_lim = INT_MAX;
3169 
3170 	for (i = 0; i < target->ch_count; i++) {
3171 		ch = &target->ch[i];
3172 		req_lim = min(req_lim, ch->req_lim);
3173 	}
3174 	return sprintf(buf, "%d\n", req_lim);
3175 }
3176 
3177 static ssize_t show_zero_req_lim(struct device *dev,
3178 				 struct device_attribute *attr, char *buf)
3179 {
3180 	struct srp_target_port *target = host_to_target(class_to_shost(dev));
3181 
3182 	return sprintf(buf, "%d\n", target->zero_req_lim);
3183 }
3184 
3185 static ssize_t show_local_ib_port(struct device *dev,
3186 				  struct device_attribute *attr, char *buf)
3187 {
3188 	struct srp_target_port *target = host_to_target(class_to_shost(dev));
3189 
3190 	return sprintf(buf, "%d\n", target->srp_host->port);
3191 }
3192 
3193 static ssize_t show_local_ib_device(struct device *dev,
3194 				    struct device_attribute *attr, char *buf)
3195 {
3196 	struct srp_target_port *target = host_to_target(class_to_shost(dev));
3197 
3198 	return sprintf(buf, "%s\n",
3199 		       dev_name(&target->srp_host->srp_dev->dev->dev));
3200 }
3201 
3202 static ssize_t show_ch_count(struct device *dev, struct device_attribute *attr,
3203 			     char *buf)
3204 {
3205 	struct srp_target_port *target = host_to_target(class_to_shost(dev));
3206 
3207 	return sprintf(buf, "%d\n", target->ch_count);
3208 }
3209 
3210 static ssize_t show_comp_vector(struct device *dev,
3211 				struct device_attribute *attr, char *buf)
3212 {
3213 	struct srp_target_port *target = host_to_target(class_to_shost(dev));
3214 
3215 	return sprintf(buf, "%d\n", target->comp_vector);
3216 }
3217 
3218 static ssize_t show_tl_retry_count(struct device *dev,
3219 				   struct device_attribute *attr, char *buf)
3220 {
3221 	struct srp_target_port *target = host_to_target(class_to_shost(dev));
3222 
3223 	return sprintf(buf, "%d\n", target->tl_retry_count);
3224 }
3225 
3226 static ssize_t show_cmd_sg_entries(struct device *dev,
3227 				   struct device_attribute *attr, char *buf)
3228 {
3229 	struct srp_target_port *target = host_to_target(class_to_shost(dev));
3230 
3231 	return sprintf(buf, "%u\n", target->cmd_sg_cnt);
3232 }
3233 
3234 static ssize_t show_allow_ext_sg(struct device *dev,
3235 				 struct device_attribute *attr, char *buf)
3236 {
3237 	struct srp_target_port *target = host_to_target(class_to_shost(dev));
3238 
3239 	return sprintf(buf, "%s\n", target->allow_ext_sg ? "true" : "false");
3240 }
3241 
3242 static DEVICE_ATTR(id_ext,	    S_IRUGO, show_id_ext,	   NULL);
3243 static DEVICE_ATTR(ioc_guid,	    S_IRUGO, show_ioc_guid,	   NULL);
3244 static DEVICE_ATTR(service_id,	    S_IRUGO, show_service_id,	   NULL);
3245 static DEVICE_ATTR(pkey,	    S_IRUGO, show_pkey,		   NULL);
3246 static DEVICE_ATTR(sgid,	    S_IRUGO, show_sgid,		   NULL);
3247 static DEVICE_ATTR(dgid,	    S_IRUGO, show_dgid,		   NULL);
3248 static DEVICE_ATTR(orig_dgid,	    S_IRUGO, show_orig_dgid,	   NULL);
3249 static DEVICE_ATTR(req_lim,         S_IRUGO, show_req_lim,         NULL);
3250 static DEVICE_ATTR(zero_req_lim,    S_IRUGO, show_zero_req_lim,	   NULL);
3251 static DEVICE_ATTR(local_ib_port,   S_IRUGO, show_local_ib_port,   NULL);
3252 static DEVICE_ATTR(local_ib_device, S_IRUGO, show_local_ib_device, NULL);
3253 static DEVICE_ATTR(ch_count,        S_IRUGO, show_ch_count,        NULL);
3254 static DEVICE_ATTR(comp_vector,     S_IRUGO, show_comp_vector,     NULL);
3255 static DEVICE_ATTR(tl_retry_count,  S_IRUGO, show_tl_retry_count,  NULL);
3256 static DEVICE_ATTR(cmd_sg_entries,  S_IRUGO, show_cmd_sg_entries,  NULL);
3257 static DEVICE_ATTR(allow_ext_sg,    S_IRUGO, show_allow_ext_sg,    NULL);
3258 
3259 static struct device_attribute *srp_host_attrs[] = {
3260 	&dev_attr_id_ext,
3261 	&dev_attr_ioc_guid,
3262 	&dev_attr_service_id,
3263 	&dev_attr_pkey,
3264 	&dev_attr_sgid,
3265 	&dev_attr_dgid,
3266 	&dev_attr_orig_dgid,
3267 	&dev_attr_req_lim,
3268 	&dev_attr_zero_req_lim,
3269 	&dev_attr_local_ib_port,
3270 	&dev_attr_local_ib_device,
3271 	&dev_attr_ch_count,
3272 	&dev_attr_comp_vector,
3273 	&dev_attr_tl_retry_count,
3274 	&dev_attr_cmd_sg_entries,
3275 	&dev_attr_allow_ext_sg,
3276 	NULL
3277 };
3278 
3279 static struct scsi_host_template srp_template = {
3280 	.module				= THIS_MODULE,
3281 	.name				= "InfiniBand SRP initiator",
3282 	.proc_name			= DRV_NAME,
3283 	.target_alloc			= srp_target_alloc,
3284 	.slave_alloc			= srp_slave_alloc,
3285 	.slave_configure		= srp_slave_configure,
3286 	.info				= srp_target_info,
3287 	.queuecommand			= srp_queuecommand,
3288 	.change_queue_depth             = srp_change_queue_depth,
3289 	.eh_timed_out			= srp_timed_out,
3290 	.eh_abort_handler		= srp_abort,
3291 	.eh_device_reset_handler	= srp_reset_device,
3292 	.eh_host_reset_handler		= srp_reset_host,
3293 	.skip_settle_delay		= true,
3294 	.sg_tablesize			= SRP_DEF_SG_TABLESIZE,
3295 	.can_queue			= SRP_DEFAULT_CMD_SQ_SIZE,
3296 	.this_id			= -1,
3297 	.cmd_per_lun			= SRP_DEFAULT_CMD_SQ_SIZE,
3298 	.shost_attrs			= srp_host_attrs,
3299 	.track_queue_depth		= 1,
3300 };
3301 
3302 static int srp_sdev_count(struct Scsi_Host *host)
3303 {
3304 	struct scsi_device *sdev;
3305 	int c = 0;
3306 
3307 	shost_for_each_device(sdev, host)
3308 		c++;
3309 
3310 	return c;
3311 }
3312 
3313 /*
3314  * Return values:
3315  * < 0 upon failure. Caller is responsible for SRP target port cleanup.
3316  * 0 and target->state == SRP_TARGET_REMOVED if asynchronous target port
3317  *    removal has been scheduled.
3318  * 0 and target->state != SRP_TARGET_REMOVED upon success.
3319  */
3320 static int srp_add_target(struct srp_host *host, struct srp_target_port *target)
3321 {
3322 	struct srp_rport_identifiers ids;
3323 	struct srp_rport *rport;
3324 
3325 	target->state = SRP_TARGET_SCANNING;
3326 	sprintf(target->target_name, "SRP.T10:%016llX",
3327 		be64_to_cpu(target->id_ext));
3328 
3329 	if (scsi_add_host(target->scsi_host, host->srp_dev->dev->dev.parent))
3330 		return -ENODEV;
3331 
3332 	memcpy(ids.port_id, &target->id_ext, 8);
3333 	memcpy(ids.port_id + 8, &target->ioc_guid, 8);
3334 	ids.roles = SRP_RPORT_ROLE_TARGET;
3335 	rport = srp_rport_add(target->scsi_host, &ids);
3336 	if (IS_ERR(rport)) {
3337 		scsi_remove_host(target->scsi_host);
3338 		return PTR_ERR(rport);
3339 	}
3340 
3341 	rport->lld_data = target;
3342 	target->rport = rport;
3343 
3344 	spin_lock(&host->target_lock);
3345 	list_add_tail(&target->list, &host->target_list);
3346 	spin_unlock(&host->target_lock);
3347 
3348 	scsi_scan_target(&target->scsi_host->shost_gendev,
3349 			 0, target->scsi_id, SCAN_WILD_CARD, SCSI_SCAN_INITIAL);
3350 
3351 	if (srp_connected_ch(target) < target->ch_count ||
3352 	    target->qp_in_error) {
3353 		shost_printk(KERN_INFO, target->scsi_host,
3354 			     PFX "SCSI scan failed - removing SCSI host\n");
3355 		srp_queue_remove_work(target);
3356 		goto out;
3357 	}
3358 
3359 	pr_debug("%s: SCSI scan succeeded - detected %d LUNs\n",
3360 		 dev_name(&target->scsi_host->shost_gendev),
3361 		 srp_sdev_count(target->scsi_host));
3362 
3363 	spin_lock_irq(&target->lock);
3364 	if (target->state == SRP_TARGET_SCANNING)
3365 		target->state = SRP_TARGET_LIVE;
3366 	spin_unlock_irq(&target->lock);
3367 
3368 out:
3369 	return 0;
3370 }
3371 
3372 static void srp_release_dev(struct device *dev)
3373 {
3374 	struct srp_host *host =
3375 		container_of(dev, struct srp_host, dev);
3376 
3377 	complete(&host->released);
3378 }
3379 
3380 static struct class srp_class = {
3381 	.name    = "infiniband_srp",
3382 	.dev_release = srp_release_dev
3383 };
3384 
3385 /**
3386  * srp_conn_unique() - check whether the connection to a target is unique
3387  * @host:   SRP host.
3388  * @target: SRP target port.
3389  */
3390 static bool srp_conn_unique(struct srp_host *host,
3391 			    struct srp_target_port *target)
3392 {
3393 	struct srp_target_port *t;
3394 	bool ret = false;
3395 
3396 	if (target->state == SRP_TARGET_REMOVED)
3397 		goto out;
3398 
3399 	ret = true;
3400 
3401 	spin_lock(&host->target_lock);
3402 	list_for_each_entry(t, &host->target_list, list) {
3403 		if (t != target &&
3404 		    target->id_ext == t->id_ext &&
3405 		    target->ioc_guid == t->ioc_guid &&
3406 		    target->initiator_ext == t->initiator_ext) {
3407 			ret = false;
3408 			break;
3409 		}
3410 	}
3411 	spin_unlock(&host->target_lock);
3412 
3413 out:
3414 	return ret;
3415 }
3416 
3417 /*
3418  * Target ports are added by writing
3419  *
3420  *     id_ext=<SRP ID ext>,ioc_guid=<SRP IOC GUID>,dgid=<dest GID>,
3421  *     pkey=<P_Key>,service_id=<service ID>
3422  * or
3423  *     id_ext=<SRP ID ext>,ioc_guid=<SRP IOC GUID>,
3424  *     [src=<IPv4 address>,]dest=<IPv4 address>:<port number>
3425  *
3426  * to the add_target sysfs attribute.
3427  */
3428 enum {
3429 	SRP_OPT_ERR		= 0,
3430 	SRP_OPT_ID_EXT		= 1 << 0,
3431 	SRP_OPT_IOC_GUID	= 1 << 1,
3432 	SRP_OPT_DGID		= 1 << 2,
3433 	SRP_OPT_PKEY		= 1 << 3,
3434 	SRP_OPT_SERVICE_ID	= 1 << 4,
3435 	SRP_OPT_MAX_SECT	= 1 << 5,
3436 	SRP_OPT_MAX_CMD_PER_LUN	= 1 << 6,
3437 	SRP_OPT_IO_CLASS	= 1 << 7,
3438 	SRP_OPT_INITIATOR_EXT	= 1 << 8,
3439 	SRP_OPT_CMD_SG_ENTRIES	= 1 << 9,
3440 	SRP_OPT_ALLOW_EXT_SG	= 1 << 10,
3441 	SRP_OPT_SG_TABLESIZE	= 1 << 11,
3442 	SRP_OPT_COMP_VECTOR	= 1 << 12,
3443 	SRP_OPT_TL_RETRY_COUNT	= 1 << 13,
3444 	SRP_OPT_QUEUE_SIZE	= 1 << 14,
3445 	SRP_OPT_IP_SRC		= 1 << 15,
3446 	SRP_OPT_IP_DEST		= 1 << 16,
3447 	SRP_OPT_TARGET_CAN_QUEUE= 1 << 17,
3448 };
3449 
3450 static unsigned int srp_opt_mandatory[] = {
3451 	SRP_OPT_ID_EXT		|
3452 	SRP_OPT_IOC_GUID	|
3453 	SRP_OPT_DGID		|
3454 	SRP_OPT_PKEY		|
3455 	SRP_OPT_SERVICE_ID,
3456 	SRP_OPT_ID_EXT		|
3457 	SRP_OPT_IOC_GUID	|
3458 	SRP_OPT_IP_DEST,
3459 };
3460 
3461 static const match_table_t srp_opt_tokens = {
3462 	{ SRP_OPT_ID_EXT,		"id_ext=%s" 		},
3463 	{ SRP_OPT_IOC_GUID,		"ioc_guid=%s" 		},
3464 	{ SRP_OPT_DGID,			"dgid=%s" 		},
3465 	{ SRP_OPT_PKEY,			"pkey=%x" 		},
3466 	{ SRP_OPT_SERVICE_ID,		"service_id=%s"		},
3467 	{ SRP_OPT_MAX_SECT,		"max_sect=%d" 		},
3468 	{ SRP_OPT_MAX_CMD_PER_LUN,	"max_cmd_per_lun=%d" 	},
3469 	{ SRP_OPT_TARGET_CAN_QUEUE,	"target_can_queue=%d"	},
3470 	{ SRP_OPT_IO_CLASS,		"io_class=%x"		},
3471 	{ SRP_OPT_INITIATOR_EXT,	"initiator_ext=%s"	},
3472 	{ SRP_OPT_CMD_SG_ENTRIES,	"cmd_sg_entries=%u"	},
3473 	{ SRP_OPT_ALLOW_EXT_SG,		"allow_ext_sg=%u"	},
3474 	{ SRP_OPT_SG_TABLESIZE,		"sg_tablesize=%u"	},
3475 	{ SRP_OPT_COMP_VECTOR,		"comp_vector=%u"	},
3476 	{ SRP_OPT_TL_RETRY_COUNT,	"tl_retry_count=%u"	},
3477 	{ SRP_OPT_QUEUE_SIZE,		"queue_size=%d"		},
3478 	{ SRP_OPT_IP_SRC,		"src=%s"		},
3479 	{ SRP_OPT_IP_DEST,		"dest=%s"		},
3480 	{ SRP_OPT_ERR,			NULL 			}
3481 };
3482 
3483 /**
3484  * srp_parse_in - parse an IP address and port number combination
3485  * @net:	   [in]  Network namespace.
3486  * @sa:		   [out] Address family, IP address and port number.
3487  * @addr_port_str: [in]  IP address and port number.
3488  *
3489  * Parse the following address formats:
3490  * - IPv4: <ip_address>:<port>, e.g. 1.2.3.4:5.
3491  * - IPv6: \[<ipv6_address>\]:<port>, e.g. [1::2:3%4]:5.
3492  */
3493 static int srp_parse_in(struct net *net, struct sockaddr_storage *sa,
3494 			const char *addr_port_str)
3495 {
3496 	char *addr_end, *addr = kstrdup(addr_port_str, GFP_KERNEL);
3497 	char *port_str;
3498 	int ret;
3499 
3500 	if (!addr)
3501 		return -ENOMEM;
3502 	port_str = strrchr(addr, ':');
3503 	if (!port_str)
3504 		return -EINVAL;
3505 	*port_str++ = '\0';
3506 	ret = inet_pton_with_scope(net, AF_INET, addr, port_str, sa);
3507 	if (ret && addr[0]) {
3508 		addr_end = addr + strlen(addr) - 1;
3509 		if (addr[0] == '[' && *addr_end == ']') {
3510 			*addr_end = '\0';
3511 			ret = inet_pton_with_scope(net, AF_INET6, addr + 1,
3512 						   port_str, sa);
3513 		}
3514 	}
3515 	kfree(addr);
3516 	pr_debug("%s -> %pISpfsc\n", addr_port_str, sa);
3517 	return ret;
3518 }
3519 
3520 static int srp_parse_options(struct net *net, const char *buf,
3521 			     struct srp_target_port *target)
3522 {
3523 	char *options, *sep_opt;
3524 	char *p;
3525 	substring_t args[MAX_OPT_ARGS];
3526 	unsigned long long ull;
3527 	int opt_mask = 0;
3528 	int token;
3529 	int ret = -EINVAL;
3530 	int i;
3531 
3532 	options = kstrdup(buf, GFP_KERNEL);
3533 	if (!options)
3534 		return -ENOMEM;
3535 
3536 	sep_opt = options;
3537 	while ((p = strsep(&sep_opt, ",\n")) != NULL) {
3538 		if (!*p)
3539 			continue;
3540 
3541 		token = match_token(p, srp_opt_tokens, args);
3542 		opt_mask |= token;
3543 
3544 		switch (token) {
3545 		case SRP_OPT_ID_EXT:
3546 			p = match_strdup(args);
3547 			if (!p) {
3548 				ret = -ENOMEM;
3549 				goto out;
3550 			}
3551 			ret = kstrtoull(p, 16, &ull);
3552 			if (ret) {
3553 				pr_warn("invalid id_ext parameter '%s'\n", p);
3554 				kfree(p);
3555 				goto out;
3556 			}
3557 			target->id_ext = cpu_to_be64(ull);
3558 			kfree(p);
3559 			break;
3560 
3561 		case SRP_OPT_IOC_GUID:
3562 			p = match_strdup(args);
3563 			if (!p) {
3564 				ret = -ENOMEM;
3565 				goto out;
3566 			}
3567 			ret = kstrtoull(p, 16, &ull);
3568 			if (ret) {
3569 				pr_warn("invalid ioc_guid parameter '%s'\n", p);
3570 				kfree(p);
3571 				goto out;
3572 			}
3573 			target->ioc_guid = cpu_to_be64(ull);
3574 			kfree(p);
3575 			break;
3576 
3577 		case SRP_OPT_DGID:
3578 			p = match_strdup(args);
3579 			if (!p) {
3580 				ret = -ENOMEM;
3581 				goto out;
3582 			}
3583 			if (strlen(p) != 32) {
3584 				pr_warn("bad dest GID parameter '%s'\n", p);
3585 				kfree(p);
3586 				goto out;
3587 			}
3588 
3589 			ret = hex2bin(target->ib_cm.orig_dgid.raw, p, 16);
3590 			kfree(p);
3591 			if (ret < 0)
3592 				goto out;
3593 			break;
3594 
3595 		case SRP_OPT_PKEY:
3596 			if (match_hex(args, &token)) {
3597 				pr_warn("bad P_Key parameter '%s'\n", p);
3598 				goto out;
3599 			}
3600 			target->ib_cm.pkey = cpu_to_be16(token);
3601 			break;
3602 
3603 		case SRP_OPT_SERVICE_ID:
3604 			p = match_strdup(args);
3605 			if (!p) {
3606 				ret = -ENOMEM;
3607 				goto out;
3608 			}
3609 			ret = kstrtoull(p, 16, &ull);
3610 			if (ret) {
3611 				pr_warn("bad service_id parameter '%s'\n", p);
3612 				kfree(p);
3613 				goto out;
3614 			}
3615 			target->ib_cm.service_id = cpu_to_be64(ull);
3616 			kfree(p);
3617 			break;
3618 
3619 		case SRP_OPT_IP_SRC:
3620 			p = match_strdup(args);
3621 			if (!p) {
3622 				ret = -ENOMEM;
3623 				goto out;
3624 			}
3625 			ret = srp_parse_in(net, &target->rdma_cm.src.ss, p);
3626 			if (ret < 0) {
3627 				pr_warn("bad source parameter '%s'\n", p);
3628 				kfree(p);
3629 				goto out;
3630 			}
3631 			target->rdma_cm.src_specified = true;
3632 			kfree(p);
3633 			break;
3634 
3635 		case SRP_OPT_IP_DEST:
3636 			p = match_strdup(args);
3637 			if (!p) {
3638 				ret = -ENOMEM;
3639 				goto out;
3640 			}
3641 			ret = srp_parse_in(net, &target->rdma_cm.dst.ss, p);
3642 			if (ret < 0) {
3643 				pr_warn("bad dest parameter '%s'\n", p);
3644 				kfree(p);
3645 				goto out;
3646 			}
3647 			target->using_rdma_cm = true;
3648 			kfree(p);
3649 			break;
3650 
3651 		case SRP_OPT_MAX_SECT:
3652 			if (match_int(args, &token)) {
3653 				pr_warn("bad max sect parameter '%s'\n", p);
3654 				goto out;
3655 			}
3656 			target->scsi_host->max_sectors = token;
3657 			break;
3658 
3659 		case SRP_OPT_QUEUE_SIZE:
3660 			if (match_int(args, &token) || token < 1) {
3661 				pr_warn("bad queue_size parameter '%s'\n", p);
3662 				goto out;
3663 			}
3664 			target->scsi_host->can_queue = token;
3665 			target->queue_size = token + SRP_RSP_SQ_SIZE +
3666 					     SRP_TSK_MGMT_SQ_SIZE;
3667 			if (!(opt_mask & SRP_OPT_MAX_CMD_PER_LUN))
3668 				target->scsi_host->cmd_per_lun = token;
3669 			break;
3670 
3671 		case SRP_OPT_MAX_CMD_PER_LUN:
3672 			if (match_int(args, &token) || token < 1) {
3673 				pr_warn("bad max cmd_per_lun parameter '%s'\n",
3674 					p);
3675 				goto out;
3676 			}
3677 			target->scsi_host->cmd_per_lun = token;
3678 			break;
3679 
3680 		case SRP_OPT_TARGET_CAN_QUEUE:
3681 			if (match_int(args, &token) || token < 1) {
3682 				pr_warn("bad max target_can_queue parameter '%s'\n",
3683 					p);
3684 				goto out;
3685 			}
3686 			target->target_can_queue = token;
3687 			break;
3688 
3689 		case SRP_OPT_IO_CLASS:
3690 			if (match_hex(args, &token)) {
3691 				pr_warn("bad IO class parameter '%s'\n", p);
3692 				goto out;
3693 			}
3694 			if (token != SRP_REV10_IB_IO_CLASS &&
3695 			    token != SRP_REV16A_IB_IO_CLASS) {
3696 				pr_warn("unknown IO class parameter value %x specified (use %x or %x).\n",
3697 					token, SRP_REV10_IB_IO_CLASS,
3698 					SRP_REV16A_IB_IO_CLASS);
3699 				goto out;
3700 			}
3701 			target->io_class = token;
3702 			break;
3703 
3704 		case SRP_OPT_INITIATOR_EXT:
3705 			p = match_strdup(args);
3706 			if (!p) {
3707 				ret = -ENOMEM;
3708 				goto out;
3709 			}
3710 			ret = kstrtoull(p, 16, &ull);
3711 			if (ret) {
3712 				pr_warn("bad initiator_ext value '%s'\n", p);
3713 				kfree(p);
3714 				goto out;
3715 			}
3716 			target->initiator_ext = cpu_to_be64(ull);
3717 			kfree(p);
3718 			break;
3719 
3720 		case SRP_OPT_CMD_SG_ENTRIES:
3721 			if (match_int(args, &token) || token < 1 || token > 255) {
3722 				pr_warn("bad max cmd_sg_entries parameter '%s'\n",
3723 					p);
3724 				goto out;
3725 			}
3726 			target->cmd_sg_cnt = token;
3727 			break;
3728 
3729 		case SRP_OPT_ALLOW_EXT_SG:
3730 			if (match_int(args, &token)) {
3731 				pr_warn("bad allow_ext_sg parameter '%s'\n", p);
3732 				goto out;
3733 			}
3734 			target->allow_ext_sg = !!token;
3735 			break;
3736 
3737 		case SRP_OPT_SG_TABLESIZE:
3738 			if (match_int(args, &token) || token < 1 ||
3739 					token > SG_MAX_SEGMENTS) {
3740 				pr_warn("bad max sg_tablesize parameter '%s'\n",
3741 					p);
3742 				goto out;
3743 			}
3744 			target->sg_tablesize = token;
3745 			break;
3746 
3747 		case SRP_OPT_COMP_VECTOR:
3748 			if (match_int(args, &token) || token < 0) {
3749 				pr_warn("bad comp_vector parameter '%s'\n", p);
3750 				goto out;
3751 			}
3752 			target->comp_vector = token;
3753 			break;
3754 
3755 		case SRP_OPT_TL_RETRY_COUNT:
3756 			if (match_int(args, &token) || token < 2 || token > 7) {
3757 				pr_warn("bad tl_retry_count parameter '%s' (must be a number between 2 and 7)\n",
3758 					p);
3759 				goto out;
3760 			}
3761 			target->tl_retry_count = token;
3762 			break;
3763 
3764 		default:
3765 			pr_warn("unknown parameter or missing value '%s' in target creation request\n",
3766 				p);
3767 			goto out;
3768 		}
3769 	}
3770 
3771 	for (i = 0; i < ARRAY_SIZE(srp_opt_mandatory); i++) {
3772 		if ((opt_mask & srp_opt_mandatory[i]) == srp_opt_mandatory[i]) {
3773 			ret = 0;
3774 			break;
3775 		}
3776 	}
3777 	if (ret)
3778 		pr_warn("target creation request is missing one or more parameters\n");
3779 
3780 	if (target->scsi_host->cmd_per_lun > target->scsi_host->can_queue
3781 	    && (opt_mask & SRP_OPT_MAX_CMD_PER_LUN))
3782 		pr_warn("cmd_per_lun = %d > queue_size = %d\n",
3783 			target->scsi_host->cmd_per_lun,
3784 			target->scsi_host->can_queue);
3785 
3786 out:
3787 	kfree(options);
3788 	return ret;
3789 }
3790 
3791 static ssize_t srp_create_target(struct device *dev,
3792 				 struct device_attribute *attr,
3793 				 const char *buf, size_t count)
3794 {
3795 	struct srp_host *host =
3796 		container_of(dev, struct srp_host, dev);
3797 	struct Scsi_Host *target_host;
3798 	struct srp_target_port *target;
3799 	struct srp_rdma_ch *ch;
3800 	struct srp_device *srp_dev = host->srp_dev;
3801 	struct ib_device *ibdev = srp_dev->dev;
3802 	int ret, node_idx, node, cpu, i;
3803 	unsigned int max_sectors_per_mr, mr_per_cmd = 0;
3804 	bool multich = false;
3805 	uint32_t max_iu_len;
3806 
3807 	target_host = scsi_host_alloc(&srp_template,
3808 				      sizeof (struct srp_target_port));
3809 	if (!target_host)
3810 		return -ENOMEM;
3811 
3812 	target_host->transportt  = ib_srp_transport_template;
3813 	target_host->max_channel = 0;
3814 	target_host->max_id      = 1;
3815 	target_host->max_lun     = -1LL;
3816 	target_host->max_cmd_len = sizeof ((struct srp_cmd *) (void *) 0L)->cdb;
3817 
3818 	target = host_to_target(target_host);
3819 
3820 	target->net		= kobj_ns_grab_current(KOBJ_NS_TYPE_NET);
3821 	target->io_class	= SRP_REV16A_IB_IO_CLASS;
3822 	target->scsi_host	= target_host;
3823 	target->srp_host	= host;
3824 	target->lkey		= host->srp_dev->pd->local_dma_lkey;
3825 	target->global_rkey	= host->srp_dev->global_rkey;
3826 	target->cmd_sg_cnt	= cmd_sg_entries;
3827 	target->sg_tablesize	= indirect_sg_entries ? : cmd_sg_entries;
3828 	target->allow_ext_sg	= allow_ext_sg;
3829 	target->tl_retry_count	= 7;
3830 	target->queue_size	= SRP_DEFAULT_QUEUE_SIZE;
3831 
3832 	/*
3833 	 * Avoid that the SCSI host can be removed by srp_remove_target()
3834 	 * before this function returns.
3835 	 */
3836 	scsi_host_get(target->scsi_host);
3837 
3838 	ret = mutex_lock_interruptible(&host->add_target_mutex);
3839 	if (ret < 0)
3840 		goto put;
3841 
3842 	ret = srp_parse_options(target->net, buf, target);
3843 	if (ret)
3844 		goto out;
3845 
3846 	target->req_ring_size = target->queue_size - SRP_TSK_MGMT_SQ_SIZE;
3847 
3848 	if (!srp_conn_unique(target->srp_host, target)) {
3849 		if (target->using_rdma_cm) {
3850 			shost_printk(KERN_INFO, target->scsi_host,
3851 				     PFX "Already connected to target port with id_ext=%016llx;ioc_guid=%016llx;dest=%pIS\n",
3852 				     be64_to_cpu(target->id_ext),
3853 				     be64_to_cpu(target->ioc_guid),
3854 				     &target->rdma_cm.dst);
3855 		} else {
3856 			shost_printk(KERN_INFO, target->scsi_host,
3857 				     PFX "Already connected to target port with id_ext=%016llx;ioc_guid=%016llx;initiator_ext=%016llx\n",
3858 				     be64_to_cpu(target->id_ext),
3859 				     be64_to_cpu(target->ioc_guid),
3860 				     be64_to_cpu(target->initiator_ext));
3861 		}
3862 		ret = -EEXIST;
3863 		goto out;
3864 	}
3865 
3866 	if (!srp_dev->has_fmr && !srp_dev->has_fr && !target->allow_ext_sg &&
3867 	    target->cmd_sg_cnt < target->sg_tablesize) {
3868 		pr_warn("No MR pool and no external indirect descriptors, limiting sg_tablesize to cmd_sg_cnt\n");
3869 		target->sg_tablesize = target->cmd_sg_cnt;
3870 	}
3871 
3872 	if (srp_dev->use_fast_reg || srp_dev->use_fmr) {
3873 		bool gaps_reg = (ibdev->attrs.device_cap_flags &
3874 				 IB_DEVICE_SG_GAPS_REG);
3875 
3876 		max_sectors_per_mr = srp_dev->max_pages_per_mr <<
3877 				  (ilog2(srp_dev->mr_page_size) - 9);
3878 		if (!gaps_reg) {
3879 			/*
3880 			 * FR and FMR can only map one HCA page per entry. If
3881 			 * the start address is not aligned on a HCA page
3882 			 * boundary two entries will be used for the head and
3883 			 * the tail although these two entries combined
3884 			 * contain at most one HCA page of data. Hence the "+
3885 			 * 1" in the calculation below.
3886 			 *
3887 			 * The indirect data buffer descriptor is contiguous
3888 			 * so the memory for that buffer will only be
3889 			 * registered if register_always is true. Hence add
3890 			 * one to mr_per_cmd if register_always has been set.
3891 			 */
3892 			mr_per_cmd = register_always +
3893 				(target->scsi_host->max_sectors + 1 +
3894 				 max_sectors_per_mr - 1) / max_sectors_per_mr;
3895 		} else {
3896 			mr_per_cmd = register_always +
3897 				(target->sg_tablesize +
3898 				 srp_dev->max_pages_per_mr - 1) /
3899 				srp_dev->max_pages_per_mr;
3900 		}
3901 		pr_debug("max_sectors = %u; max_pages_per_mr = %u; mr_page_size = %u; max_sectors_per_mr = %u; mr_per_cmd = %u\n",
3902 			 target->scsi_host->max_sectors, srp_dev->max_pages_per_mr, srp_dev->mr_page_size,
3903 			 max_sectors_per_mr, mr_per_cmd);
3904 	}
3905 
3906 	target_host->sg_tablesize = target->sg_tablesize;
3907 	target->mr_pool_size = target->scsi_host->can_queue * mr_per_cmd;
3908 	target->mr_per_cmd = mr_per_cmd;
3909 	target->indirect_size = target->sg_tablesize *
3910 				sizeof (struct srp_direct_buf);
3911 	max_iu_len = srp_max_it_iu_len(target->cmd_sg_cnt, srp_use_imm_data);
3912 
3913 	INIT_WORK(&target->tl_err_work, srp_tl_err_work);
3914 	INIT_WORK(&target->remove_work, srp_remove_work);
3915 	spin_lock_init(&target->lock);
3916 	ret = rdma_query_gid(ibdev, host->port, 0, &target->sgid);
3917 	if (ret)
3918 		goto out;
3919 
3920 	ret = -ENOMEM;
3921 	target->ch_count = max_t(unsigned, num_online_nodes(),
3922 				 min(ch_count ? :
3923 				     min(4 * num_online_nodes(),
3924 					 ibdev->num_comp_vectors),
3925 				     num_online_cpus()));
3926 	target->ch = kcalloc(target->ch_count, sizeof(*target->ch),
3927 			     GFP_KERNEL);
3928 	if (!target->ch)
3929 		goto out;
3930 
3931 	node_idx = 0;
3932 	for_each_online_node(node) {
3933 		const int ch_start = (node_idx * target->ch_count /
3934 				      num_online_nodes());
3935 		const int ch_end = ((node_idx + 1) * target->ch_count /
3936 				    num_online_nodes());
3937 		const int cv_start = node_idx * ibdev->num_comp_vectors /
3938 				     num_online_nodes();
3939 		const int cv_end = (node_idx + 1) * ibdev->num_comp_vectors /
3940 				   num_online_nodes();
3941 		int cpu_idx = 0;
3942 
3943 		for_each_online_cpu(cpu) {
3944 			if (cpu_to_node(cpu) != node)
3945 				continue;
3946 			if (ch_start + cpu_idx >= ch_end)
3947 				continue;
3948 			ch = &target->ch[ch_start + cpu_idx];
3949 			ch->target = target;
3950 			ch->comp_vector = cv_start == cv_end ? cv_start :
3951 				cv_start + cpu_idx % (cv_end - cv_start);
3952 			spin_lock_init(&ch->lock);
3953 			INIT_LIST_HEAD(&ch->free_tx);
3954 			ret = srp_new_cm_id(ch);
3955 			if (ret)
3956 				goto err_disconnect;
3957 
3958 			ret = srp_create_ch_ib(ch);
3959 			if (ret)
3960 				goto err_disconnect;
3961 
3962 			ret = srp_alloc_req_data(ch);
3963 			if (ret)
3964 				goto err_disconnect;
3965 
3966 			ret = srp_connect_ch(ch, max_iu_len, multich);
3967 			if (ret) {
3968 				char dst[64];
3969 
3970 				if (target->using_rdma_cm)
3971 					snprintf(dst, sizeof(dst), "%pIS",
3972 						 &target->rdma_cm.dst);
3973 				else
3974 					snprintf(dst, sizeof(dst), "%pI6",
3975 						 target->ib_cm.orig_dgid.raw);
3976 				shost_printk(KERN_ERR, target->scsi_host,
3977 					     PFX "Connection %d/%d to %s failed\n",
3978 					     ch_start + cpu_idx,
3979 					     target->ch_count, dst);
3980 				if (node_idx == 0 && cpu_idx == 0) {
3981 					goto free_ch;
3982 				} else {
3983 					srp_free_ch_ib(target, ch);
3984 					srp_free_req_data(target, ch);
3985 					target->ch_count = ch - target->ch;
3986 					goto connected;
3987 				}
3988 			}
3989 
3990 			multich = true;
3991 			cpu_idx++;
3992 		}
3993 		node_idx++;
3994 	}
3995 
3996 connected:
3997 	target->scsi_host->nr_hw_queues = target->ch_count;
3998 
3999 	ret = srp_add_target(host, target);
4000 	if (ret)
4001 		goto err_disconnect;
4002 
4003 	if (target->state != SRP_TARGET_REMOVED) {
4004 		if (target->using_rdma_cm) {
4005 			shost_printk(KERN_DEBUG, target->scsi_host, PFX
4006 				     "new target: id_ext %016llx ioc_guid %016llx sgid %pI6 dest %pIS\n",
4007 				     be64_to_cpu(target->id_ext),
4008 				     be64_to_cpu(target->ioc_guid),
4009 				     target->sgid.raw, &target->rdma_cm.dst);
4010 		} else {
4011 			shost_printk(KERN_DEBUG, target->scsi_host, PFX
4012 				     "new target: id_ext %016llx ioc_guid %016llx pkey %04x service_id %016llx sgid %pI6 dgid %pI6\n",
4013 				     be64_to_cpu(target->id_ext),
4014 				     be64_to_cpu(target->ioc_guid),
4015 				     be16_to_cpu(target->ib_cm.pkey),
4016 				     be64_to_cpu(target->ib_cm.service_id),
4017 				     target->sgid.raw,
4018 				     target->ib_cm.orig_dgid.raw);
4019 		}
4020 	}
4021 
4022 	ret = count;
4023 
4024 out:
4025 	mutex_unlock(&host->add_target_mutex);
4026 
4027 put:
4028 	scsi_host_put(target->scsi_host);
4029 	if (ret < 0) {
4030 		/*
4031 		 * If a call to srp_remove_target() has not been scheduled,
4032 		 * drop the network namespace reference now that was obtained
4033 		 * earlier in this function.
4034 		 */
4035 		if (target->state != SRP_TARGET_REMOVED)
4036 			kobj_ns_drop(KOBJ_NS_TYPE_NET, target->net);
4037 		scsi_host_put(target->scsi_host);
4038 	}
4039 
4040 	return ret;
4041 
4042 err_disconnect:
4043 	srp_disconnect_target(target);
4044 
4045 free_ch:
4046 	for (i = 0; i < target->ch_count; i++) {
4047 		ch = &target->ch[i];
4048 		srp_free_ch_ib(target, ch);
4049 		srp_free_req_data(target, ch);
4050 	}
4051 
4052 	kfree(target->ch);
4053 	goto out;
4054 }
4055 
4056 static DEVICE_ATTR(add_target, S_IWUSR, NULL, srp_create_target);
4057 
4058 static ssize_t show_ibdev(struct device *dev, struct device_attribute *attr,
4059 			  char *buf)
4060 {
4061 	struct srp_host *host = container_of(dev, struct srp_host, dev);
4062 
4063 	return sprintf(buf, "%s\n", dev_name(&host->srp_dev->dev->dev));
4064 }
4065 
4066 static DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL);
4067 
4068 static ssize_t show_port(struct device *dev, struct device_attribute *attr,
4069 			 char *buf)
4070 {
4071 	struct srp_host *host = container_of(dev, struct srp_host, dev);
4072 
4073 	return sprintf(buf, "%d\n", host->port);
4074 }
4075 
4076 static DEVICE_ATTR(port, S_IRUGO, show_port, NULL);
4077 
4078 static struct srp_host *srp_add_port(struct srp_device *device, u8 port)
4079 {
4080 	struct srp_host *host;
4081 
4082 	host = kzalloc(sizeof *host, GFP_KERNEL);
4083 	if (!host)
4084 		return NULL;
4085 
4086 	INIT_LIST_HEAD(&host->target_list);
4087 	spin_lock_init(&host->target_lock);
4088 	init_completion(&host->released);
4089 	mutex_init(&host->add_target_mutex);
4090 	host->srp_dev = device;
4091 	host->port = port;
4092 
4093 	host->dev.class = &srp_class;
4094 	host->dev.parent = device->dev->dev.parent;
4095 	dev_set_name(&host->dev, "srp-%s-%d", dev_name(&device->dev->dev),
4096 		     port);
4097 
4098 	if (device_register(&host->dev))
4099 		goto free_host;
4100 	if (device_create_file(&host->dev, &dev_attr_add_target))
4101 		goto err_class;
4102 	if (device_create_file(&host->dev, &dev_attr_ibdev))
4103 		goto err_class;
4104 	if (device_create_file(&host->dev, &dev_attr_port))
4105 		goto err_class;
4106 
4107 	return host;
4108 
4109 err_class:
4110 	device_unregister(&host->dev);
4111 
4112 free_host:
4113 	kfree(host);
4114 
4115 	return NULL;
4116 }
4117 
4118 static void srp_add_one(struct ib_device *device)
4119 {
4120 	struct srp_device *srp_dev;
4121 	struct ib_device_attr *attr = &device->attrs;
4122 	struct srp_host *host;
4123 	int mr_page_shift, p;
4124 	u64 max_pages_per_mr;
4125 	unsigned int flags = 0;
4126 
4127 	srp_dev = kzalloc(sizeof(*srp_dev), GFP_KERNEL);
4128 	if (!srp_dev)
4129 		return;
4130 
4131 	/*
4132 	 * Use the smallest page size supported by the HCA, down to a
4133 	 * minimum of 4096 bytes. We're unlikely to build large sglists
4134 	 * out of smaller entries.
4135 	 */
4136 	mr_page_shift		= max(12, ffs(attr->page_size_cap) - 1);
4137 	srp_dev->mr_page_size	= 1 << mr_page_shift;
4138 	srp_dev->mr_page_mask	= ~((u64) srp_dev->mr_page_size - 1);
4139 	max_pages_per_mr	= attr->max_mr_size;
4140 	do_div(max_pages_per_mr, srp_dev->mr_page_size);
4141 	pr_debug("%s: %llu / %u = %llu <> %u\n", __func__,
4142 		 attr->max_mr_size, srp_dev->mr_page_size,
4143 		 max_pages_per_mr, SRP_MAX_PAGES_PER_MR);
4144 	srp_dev->max_pages_per_mr = min_t(u64, SRP_MAX_PAGES_PER_MR,
4145 					  max_pages_per_mr);
4146 
4147 	srp_dev->has_fmr = (device->ops.alloc_fmr &&
4148 			    device->ops.dealloc_fmr &&
4149 			    device->ops.map_phys_fmr &&
4150 			    device->ops.unmap_fmr);
4151 	srp_dev->has_fr = (attr->device_cap_flags &
4152 			   IB_DEVICE_MEM_MGT_EXTENSIONS);
4153 	if (!never_register && !srp_dev->has_fmr && !srp_dev->has_fr) {
4154 		dev_warn(&device->dev, "neither FMR nor FR is supported\n");
4155 	} else if (!never_register &&
4156 		   attr->max_mr_size >= 2 * srp_dev->mr_page_size) {
4157 		srp_dev->use_fast_reg = (srp_dev->has_fr &&
4158 					 (!srp_dev->has_fmr || prefer_fr));
4159 		srp_dev->use_fmr = !srp_dev->use_fast_reg && srp_dev->has_fmr;
4160 	}
4161 
4162 	if (never_register || !register_always ||
4163 	    (!srp_dev->has_fmr && !srp_dev->has_fr))
4164 		flags |= IB_PD_UNSAFE_GLOBAL_RKEY;
4165 
4166 	if (srp_dev->use_fast_reg) {
4167 		srp_dev->max_pages_per_mr =
4168 			min_t(u32, srp_dev->max_pages_per_mr,
4169 			      attr->max_fast_reg_page_list_len);
4170 	}
4171 	srp_dev->mr_max_size	= srp_dev->mr_page_size *
4172 				   srp_dev->max_pages_per_mr;
4173 	pr_debug("%s: mr_page_shift = %d, device->max_mr_size = %#llx, device->max_fast_reg_page_list_len = %u, max_pages_per_mr = %d, mr_max_size = %#x\n",
4174 		 dev_name(&device->dev), mr_page_shift, attr->max_mr_size,
4175 		 attr->max_fast_reg_page_list_len,
4176 		 srp_dev->max_pages_per_mr, srp_dev->mr_max_size);
4177 
4178 	INIT_LIST_HEAD(&srp_dev->dev_list);
4179 
4180 	srp_dev->dev = device;
4181 	srp_dev->pd  = ib_alloc_pd(device, flags);
4182 	if (IS_ERR(srp_dev->pd))
4183 		goto free_dev;
4184 
4185 	if (flags & IB_PD_UNSAFE_GLOBAL_RKEY) {
4186 		srp_dev->global_rkey = srp_dev->pd->unsafe_global_rkey;
4187 		WARN_ON_ONCE(srp_dev->global_rkey == 0);
4188 	}
4189 
4190 	for (p = rdma_start_port(device); p <= rdma_end_port(device); ++p) {
4191 		host = srp_add_port(srp_dev, p);
4192 		if (host)
4193 			list_add_tail(&host->list, &srp_dev->dev_list);
4194 	}
4195 
4196 	ib_set_client_data(device, &srp_client, srp_dev);
4197 	return;
4198 
4199 free_dev:
4200 	kfree(srp_dev);
4201 }
4202 
4203 static void srp_remove_one(struct ib_device *device, void *client_data)
4204 {
4205 	struct srp_device *srp_dev;
4206 	struct srp_host *host, *tmp_host;
4207 	struct srp_target_port *target;
4208 
4209 	srp_dev = client_data;
4210 	if (!srp_dev)
4211 		return;
4212 
4213 	list_for_each_entry_safe(host, tmp_host, &srp_dev->dev_list, list) {
4214 		device_unregister(&host->dev);
4215 		/*
4216 		 * Wait for the sysfs entry to go away, so that no new
4217 		 * target ports can be created.
4218 		 */
4219 		wait_for_completion(&host->released);
4220 
4221 		/*
4222 		 * Remove all target ports.
4223 		 */
4224 		spin_lock(&host->target_lock);
4225 		list_for_each_entry(target, &host->target_list, list)
4226 			srp_queue_remove_work(target);
4227 		spin_unlock(&host->target_lock);
4228 
4229 		/*
4230 		 * Wait for tl_err and target port removal tasks.
4231 		 */
4232 		flush_workqueue(system_long_wq);
4233 		flush_workqueue(srp_remove_wq);
4234 
4235 		kfree(host);
4236 	}
4237 
4238 	ib_dealloc_pd(srp_dev->pd);
4239 
4240 	kfree(srp_dev);
4241 }
4242 
4243 static struct srp_function_template ib_srp_transport_functions = {
4244 	.has_rport_state	 = true,
4245 	.reset_timer_if_blocked	 = true,
4246 	.reconnect_delay	 = &srp_reconnect_delay,
4247 	.fast_io_fail_tmo	 = &srp_fast_io_fail_tmo,
4248 	.dev_loss_tmo		 = &srp_dev_loss_tmo,
4249 	.reconnect		 = srp_rport_reconnect,
4250 	.rport_delete		 = srp_rport_delete,
4251 	.terminate_rport_io	 = srp_terminate_io,
4252 };
4253 
4254 static int __init srp_init_module(void)
4255 {
4256 	int ret;
4257 
4258 	BUILD_BUG_ON(sizeof(struct srp_imm_buf) != 4);
4259 	BUILD_BUG_ON(sizeof(struct srp_login_req) != 64);
4260 	BUILD_BUG_ON(sizeof(struct srp_login_req_rdma) != 56);
4261 	BUILD_BUG_ON(sizeof(struct srp_cmd) != 48);
4262 
4263 	if (srp_sg_tablesize) {
4264 		pr_warn("srp_sg_tablesize is deprecated, please use cmd_sg_entries\n");
4265 		if (!cmd_sg_entries)
4266 			cmd_sg_entries = srp_sg_tablesize;
4267 	}
4268 
4269 	if (!cmd_sg_entries)
4270 		cmd_sg_entries = SRP_DEF_SG_TABLESIZE;
4271 
4272 	if (cmd_sg_entries > 255) {
4273 		pr_warn("Clamping cmd_sg_entries to 255\n");
4274 		cmd_sg_entries = 255;
4275 	}
4276 
4277 	if (!indirect_sg_entries)
4278 		indirect_sg_entries = cmd_sg_entries;
4279 	else if (indirect_sg_entries < cmd_sg_entries) {
4280 		pr_warn("Bumping up indirect_sg_entries to match cmd_sg_entries (%u)\n",
4281 			cmd_sg_entries);
4282 		indirect_sg_entries = cmd_sg_entries;
4283 	}
4284 
4285 	if (indirect_sg_entries > SG_MAX_SEGMENTS) {
4286 		pr_warn("Clamping indirect_sg_entries to %u\n",
4287 			SG_MAX_SEGMENTS);
4288 		indirect_sg_entries = SG_MAX_SEGMENTS;
4289 	}
4290 
4291 	srp_remove_wq = create_workqueue("srp_remove");
4292 	if (!srp_remove_wq) {
4293 		ret = -ENOMEM;
4294 		goto out;
4295 	}
4296 
4297 	ret = -ENOMEM;
4298 	ib_srp_transport_template =
4299 		srp_attach_transport(&ib_srp_transport_functions);
4300 	if (!ib_srp_transport_template)
4301 		goto destroy_wq;
4302 
4303 	ret = class_register(&srp_class);
4304 	if (ret) {
4305 		pr_err("couldn't register class infiniband_srp\n");
4306 		goto release_tr;
4307 	}
4308 
4309 	ib_sa_register_client(&srp_sa_client);
4310 
4311 	ret = ib_register_client(&srp_client);
4312 	if (ret) {
4313 		pr_err("couldn't register IB client\n");
4314 		goto unreg_sa;
4315 	}
4316 
4317 out:
4318 	return ret;
4319 
4320 unreg_sa:
4321 	ib_sa_unregister_client(&srp_sa_client);
4322 	class_unregister(&srp_class);
4323 
4324 release_tr:
4325 	srp_release_transport(ib_srp_transport_template);
4326 
4327 destroy_wq:
4328 	destroy_workqueue(srp_remove_wq);
4329 	goto out;
4330 }
4331 
4332 static void __exit srp_cleanup_module(void)
4333 {
4334 	ib_unregister_client(&srp_client);
4335 	ib_sa_unregister_client(&srp_sa_client);
4336 	class_unregister(&srp_class);
4337 	srp_release_transport(ib_srp_transport_template);
4338 	destroy_workqueue(srp_remove_wq);
4339 }
4340 
4341 module_init(srp_init_module);
4342 module_exit(srp_cleanup_module);
4343