1 /*
2  * Copyright (c) 2005 Cisco Systems.  All rights reserved.
3  *
4  * This software is available to you under a choice of one of two
5  * licenses.  You may choose to be licensed under the terms of the GNU
6  * General Public License (GPL) Version 2, available from the file
7  * COPYING in the main directory of this source tree, or the
8  * OpenIB.org BSD license below:
9  *
10  *     Redistribution and use in source and binary forms, with or
11  *     without modification, are permitted provided that the following
12  *     conditions are met:
13  *
14  *      - Redistributions of source code must retain the above
15  *        copyright notice, this list of conditions and the following
16  *        disclaimer.
17  *
18  *      - Redistributions in binary form must reproduce the above
19  *        copyright notice, this list of conditions and the following
20  *        disclaimer in the documentation and/or other materials
21  *        provided with the distribution.
22  *
23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30  * SOFTWARE.
31  */
32 
33 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
34 
35 #include <linux/module.h>
36 #include <linux/init.h>
37 #include <linux/slab.h>
38 #include <linux/err.h>
39 #include <linux/string.h>
40 #include <linux/parser.h>
41 #include <linux/random.h>
42 #include <linux/jiffies.h>
43 #include <linux/lockdep.h>
44 #include <rdma/ib_cache.h>
45 
46 #include <linux/atomic.h>
47 
48 #include <scsi/scsi.h>
49 #include <scsi/scsi_device.h>
50 #include <scsi/scsi_dbg.h>
51 #include <scsi/scsi_tcq.h>
52 #include <scsi/srp.h>
53 #include <scsi/scsi_transport_srp.h>
54 
55 #include "ib_srp.h"
56 
57 #define DRV_NAME	"ib_srp"
58 #define PFX		DRV_NAME ": "
59 #define DRV_VERSION	"2.0"
60 #define DRV_RELDATE	"July 26, 2015"
61 
62 MODULE_AUTHOR("Roland Dreier");
63 MODULE_DESCRIPTION("InfiniBand SCSI RDMA Protocol initiator");
64 MODULE_LICENSE("Dual BSD/GPL");
65 MODULE_INFO(release_date, DRV_RELDATE);
66 
67 #if !defined(CONFIG_DYNAMIC_DEBUG)
68 #define DEFINE_DYNAMIC_DEBUG_METADATA(name, fmt)
69 #define DYNAMIC_DEBUG_BRANCH(descriptor) false
70 #endif
71 
72 static unsigned int srp_sg_tablesize;
73 static unsigned int cmd_sg_entries;
74 static unsigned int indirect_sg_entries;
75 static bool allow_ext_sg;
76 static bool prefer_fr = true;
77 static bool register_always = true;
78 static bool never_register;
79 static int topspin_workarounds = 1;
80 
81 module_param(srp_sg_tablesize, uint, 0444);
82 MODULE_PARM_DESC(srp_sg_tablesize, "Deprecated name for cmd_sg_entries");
83 
84 module_param(cmd_sg_entries, uint, 0444);
85 MODULE_PARM_DESC(cmd_sg_entries,
86 		 "Default number of gather/scatter entries in the SRP command (default is 12, max 255)");
87 
88 module_param(indirect_sg_entries, uint, 0444);
89 MODULE_PARM_DESC(indirect_sg_entries,
90 		 "Default max number of gather/scatter entries (default is 12, max is " __stringify(SG_MAX_SEGMENTS) ")");
91 
92 module_param(allow_ext_sg, bool, 0444);
93 MODULE_PARM_DESC(allow_ext_sg,
94 		  "Default behavior when there are more than cmd_sg_entries S/G entries after mapping; fails the request when false (default false)");
95 
96 module_param(topspin_workarounds, int, 0444);
97 MODULE_PARM_DESC(topspin_workarounds,
98 		 "Enable workarounds for Topspin/Cisco SRP target bugs if != 0");
99 
100 module_param(prefer_fr, bool, 0444);
101 MODULE_PARM_DESC(prefer_fr,
102 "Whether to use fast registration if both FMR and fast registration are supported");
103 
104 module_param(register_always, bool, 0444);
105 MODULE_PARM_DESC(register_always,
106 		 "Use memory registration even for contiguous memory regions");
107 
108 module_param(never_register, bool, 0444);
109 MODULE_PARM_DESC(never_register, "Never register memory");
110 
111 static const struct kernel_param_ops srp_tmo_ops;
112 
113 static int srp_reconnect_delay = 10;
114 module_param_cb(reconnect_delay, &srp_tmo_ops, &srp_reconnect_delay,
115 		S_IRUGO | S_IWUSR);
116 MODULE_PARM_DESC(reconnect_delay, "Time between successive reconnect attempts");
117 
118 static int srp_fast_io_fail_tmo = 15;
119 module_param_cb(fast_io_fail_tmo, &srp_tmo_ops, &srp_fast_io_fail_tmo,
120 		S_IRUGO | S_IWUSR);
121 MODULE_PARM_DESC(fast_io_fail_tmo,
122 		 "Number of seconds between the observation of a transport"
123 		 " layer error and failing all I/O. \"off\" means that this"
124 		 " functionality is disabled.");
125 
126 static int srp_dev_loss_tmo = 600;
127 module_param_cb(dev_loss_tmo, &srp_tmo_ops, &srp_dev_loss_tmo,
128 		S_IRUGO | S_IWUSR);
129 MODULE_PARM_DESC(dev_loss_tmo,
130 		 "Maximum number of seconds that the SRP transport should"
131 		 " insulate transport layer errors. After this time has been"
132 		 " exceeded the SCSI host is removed. Should be"
133 		 " between 1 and " __stringify(SCSI_DEVICE_BLOCK_MAX_TIMEOUT)
134 		 " if fast_io_fail_tmo has not been set. \"off\" means that"
135 		 " this functionality is disabled.");
136 
137 static unsigned ch_count;
138 module_param(ch_count, uint, 0444);
139 MODULE_PARM_DESC(ch_count,
140 		 "Number of RDMA channels to use for communication with an SRP target. Using more than one channel improves performance if the HCA supports multiple completion vectors. The default value is the minimum of four times the number of online CPU sockets and the number of completion vectors supported by the HCA.");
141 
142 static void srp_add_one(struct ib_device *device);
143 static void srp_remove_one(struct ib_device *device, void *client_data);
144 static void srp_recv_done(struct ib_cq *cq, struct ib_wc *wc);
145 static void srp_handle_qp_err(struct ib_cq *cq, struct ib_wc *wc,
146 		const char *opname);
147 static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event);
148 
149 static struct scsi_transport_template *ib_srp_transport_template;
150 static struct workqueue_struct *srp_remove_wq;
151 
152 static struct ib_client srp_client = {
153 	.name   = "srp",
154 	.add    = srp_add_one,
155 	.remove = srp_remove_one
156 };
157 
158 static struct ib_sa_client srp_sa_client;
159 
160 static int srp_tmo_get(char *buffer, const struct kernel_param *kp)
161 {
162 	int tmo = *(int *)kp->arg;
163 
164 	if (tmo >= 0)
165 		return sprintf(buffer, "%d", tmo);
166 	else
167 		return sprintf(buffer, "off");
168 }
169 
170 static int srp_tmo_set(const char *val, const struct kernel_param *kp)
171 {
172 	int tmo, res;
173 
174 	res = srp_parse_tmo(&tmo, val);
175 	if (res)
176 		goto out;
177 
178 	if (kp->arg == &srp_reconnect_delay)
179 		res = srp_tmo_valid(tmo, srp_fast_io_fail_tmo,
180 				    srp_dev_loss_tmo);
181 	else if (kp->arg == &srp_fast_io_fail_tmo)
182 		res = srp_tmo_valid(srp_reconnect_delay, tmo, srp_dev_loss_tmo);
183 	else
184 		res = srp_tmo_valid(srp_reconnect_delay, srp_fast_io_fail_tmo,
185 				    tmo);
186 	if (res)
187 		goto out;
188 	*(int *)kp->arg = tmo;
189 
190 out:
191 	return res;
192 }
193 
194 static const struct kernel_param_ops srp_tmo_ops = {
195 	.get = srp_tmo_get,
196 	.set = srp_tmo_set,
197 };
198 
199 static inline struct srp_target_port *host_to_target(struct Scsi_Host *host)
200 {
201 	return (struct srp_target_port *) host->hostdata;
202 }
203 
204 static const char *srp_target_info(struct Scsi_Host *host)
205 {
206 	return host_to_target(host)->target_name;
207 }
208 
209 static int srp_target_is_topspin(struct srp_target_port *target)
210 {
211 	static const u8 topspin_oui[3] = { 0x00, 0x05, 0xad };
212 	static const u8 cisco_oui[3]   = { 0x00, 0x1b, 0x0d };
213 
214 	return topspin_workarounds &&
215 		(!memcmp(&target->ioc_guid, topspin_oui, sizeof topspin_oui) ||
216 		 !memcmp(&target->ioc_guid, cisco_oui, sizeof cisco_oui));
217 }
218 
219 static struct srp_iu *srp_alloc_iu(struct srp_host *host, size_t size,
220 				   gfp_t gfp_mask,
221 				   enum dma_data_direction direction)
222 {
223 	struct srp_iu *iu;
224 
225 	iu = kmalloc(sizeof *iu, gfp_mask);
226 	if (!iu)
227 		goto out;
228 
229 	iu->buf = kzalloc(size, gfp_mask);
230 	if (!iu->buf)
231 		goto out_free_iu;
232 
233 	iu->dma = ib_dma_map_single(host->srp_dev->dev, iu->buf, size,
234 				    direction);
235 	if (ib_dma_mapping_error(host->srp_dev->dev, iu->dma))
236 		goto out_free_buf;
237 
238 	iu->size      = size;
239 	iu->direction = direction;
240 
241 	return iu;
242 
243 out_free_buf:
244 	kfree(iu->buf);
245 out_free_iu:
246 	kfree(iu);
247 out:
248 	return NULL;
249 }
250 
251 static void srp_free_iu(struct srp_host *host, struct srp_iu *iu)
252 {
253 	if (!iu)
254 		return;
255 
256 	ib_dma_unmap_single(host->srp_dev->dev, iu->dma, iu->size,
257 			    iu->direction);
258 	kfree(iu->buf);
259 	kfree(iu);
260 }
261 
262 static void srp_qp_event(struct ib_event *event, void *context)
263 {
264 	pr_debug("QP event %s (%d)\n",
265 		 ib_event_msg(event->event), event->event);
266 }
267 
268 static int srp_init_qp(struct srp_target_port *target,
269 		       struct ib_qp *qp)
270 {
271 	struct ib_qp_attr *attr;
272 	int ret;
273 
274 	attr = kmalloc(sizeof *attr, GFP_KERNEL);
275 	if (!attr)
276 		return -ENOMEM;
277 
278 	ret = ib_find_cached_pkey(target->srp_host->srp_dev->dev,
279 				  target->srp_host->port,
280 				  be16_to_cpu(target->pkey),
281 				  &attr->pkey_index);
282 	if (ret)
283 		goto out;
284 
285 	attr->qp_state        = IB_QPS_INIT;
286 	attr->qp_access_flags = (IB_ACCESS_REMOTE_READ |
287 				    IB_ACCESS_REMOTE_WRITE);
288 	attr->port_num        = target->srp_host->port;
289 
290 	ret = ib_modify_qp(qp, attr,
291 			   IB_QP_STATE		|
292 			   IB_QP_PKEY_INDEX	|
293 			   IB_QP_ACCESS_FLAGS	|
294 			   IB_QP_PORT);
295 
296 out:
297 	kfree(attr);
298 	return ret;
299 }
300 
301 static int srp_new_cm_id(struct srp_rdma_ch *ch)
302 {
303 	struct srp_target_port *target = ch->target;
304 	struct ib_cm_id *new_cm_id;
305 
306 	new_cm_id = ib_create_cm_id(target->srp_host->srp_dev->dev,
307 				    srp_cm_handler, ch);
308 	if (IS_ERR(new_cm_id))
309 		return PTR_ERR(new_cm_id);
310 
311 	if (ch->cm_id)
312 		ib_destroy_cm_id(ch->cm_id);
313 	ch->cm_id = new_cm_id;
314 	if (rdma_cap_opa_ah(target->srp_host->srp_dev->dev,
315 			    target->srp_host->port))
316 		ch->path.rec_type = SA_PATH_REC_TYPE_OPA;
317 	else
318 		ch->path.rec_type = SA_PATH_REC_TYPE_IB;
319 	ch->path.sgid = target->sgid;
320 	ch->path.dgid = target->orig_dgid;
321 	ch->path.pkey = target->pkey;
322 	ch->path.service_id = target->service_id;
323 
324 	return 0;
325 }
326 
327 static struct ib_fmr_pool *srp_alloc_fmr_pool(struct srp_target_port *target)
328 {
329 	struct srp_device *dev = target->srp_host->srp_dev;
330 	struct ib_fmr_pool_param fmr_param;
331 
332 	memset(&fmr_param, 0, sizeof(fmr_param));
333 	fmr_param.pool_size	    = target->mr_pool_size;
334 	fmr_param.dirty_watermark   = fmr_param.pool_size / 4;
335 	fmr_param.cache		    = 1;
336 	fmr_param.max_pages_per_fmr = dev->max_pages_per_mr;
337 	fmr_param.page_shift	    = ilog2(dev->mr_page_size);
338 	fmr_param.access	    = (IB_ACCESS_LOCAL_WRITE |
339 				       IB_ACCESS_REMOTE_WRITE |
340 				       IB_ACCESS_REMOTE_READ);
341 
342 	return ib_create_fmr_pool(dev->pd, &fmr_param);
343 }
344 
345 /**
346  * srp_destroy_fr_pool() - free the resources owned by a pool
347  * @pool: Fast registration pool to be destroyed.
348  */
349 static void srp_destroy_fr_pool(struct srp_fr_pool *pool)
350 {
351 	int i;
352 	struct srp_fr_desc *d;
353 
354 	if (!pool)
355 		return;
356 
357 	for (i = 0, d = &pool->desc[0]; i < pool->size; i++, d++) {
358 		if (d->mr)
359 			ib_dereg_mr(d->mr);
360 	}
361 	kfree(pool);
362 }
363 
364 /**
365  * srp_create_fr_pool() - allocate and initialize a pool for fast registration
366  * @device:            IB device to allocate fast registration descriptors for.
367  * @pd:                Protection domain associated with the FR descriptors.
368  * @pool_size:         Number of descriptors to allocate.
369  * @max_page_list_len: Maximum fast registration work request page list length.
370  */
371 static struct srp_fr_pool *srp_create_fr_pool(struct ib_device *device,
372 					      struct ib_pd *pd, int pool_size,
373 					      int max_page_list_len)
374 {
375 	struct srp_fr_pool *pool;
376 	struct srp_fr_desc *d;
377 	struct ib_mr *mr;
378 	int i, ret = -EINVAL;
379 
380 	if (pool_size <= 0)
381 		goto err;
382 	ret = -ENOMEM;
383 	pool = kzalloc(sizeof(struct srp_fr_pool) +
384 		       pool_size * sizeof(struct srp_fr_desc), GFP_KERNEL);
385 	if (!pool)
386 		goto err;
387 	pool->size = pool_size;
388 	pool->max_page_list_len = max_page_list_len;
389 	spin_lock_init(&pool->lock);
390 	INIT_LIST_HEAD(&pool->free_list);
391 
392 	for (i = 0, d = &pool->desc[0]; i < pool->size; i++, d++) {
393 		mr = ib_alloc_mr(pd, IB_MR_TYPE_MEM_REG,
394 				 max_page_list_len);
395 		if (IS_ERR(mr)) {
396 			ret = PTR_ERR(mr);
397 			if (ret == -ENOMEM)
398 				pr_info("%s: ib_alloc_mr() failed. Try to reduce max_cmd_per_lun, max_sect or ch_count\n",
399 					dev_name(&device->dev));
400 			goto destroy_pool;
401 		}
402 		d->mr = mr;
403 		list_add_tail(&d->entry, &pool->free_list);
404 	}
405 
406 out:
407 	return pool;
408 
409 destroy_pool:
410 	srp_destroy_fr_pool(pool);
411 
412 err:
413 	pool = ERR_PTR(ret);
414 	goto out;
415 }
416 
417 /**
418  * srp_fr_pool_get() - obtain a descriptor suitable for fast registration
419  * @pool: Pool to obtain descriptor from.
420  */
421 static struct srp_fr_desc *srp_fr_pool_get(struct srp_fr_pool *pool)
422 {
423 	struct srp_fr_desc *d = NULL;
424 	unsigned long flags;
425 
426 	spin_lock_irqsave(&pool->lock, flags);
427 	if (!list_empty(&pool->free_list)) {
428 		d = list_first_entry(&pool->free_list, typeof(*d), entry);
429 		list_del(&d->entry);
430 	}
431 	spin_unlock_irqrestore(&pool->lock, flags);
432 
433 	return d;
434 }
435 
436 /**
437  * srp_fr_pool_put() - put an FR descriptor back in the free list
438  * @pool: Pool the descriptor was allocated from.
439  * @desc: Pointer to an array of fast registration descriptor pointers.
440  * @n:    Number of descriptors to put back.
441  *
442  * Note: The caller must already have queued an invalidation request for
443  * desc->mr->rkey before calling this function.
444  */
445 static void srp_fr_pool_put(struct srp_fr_pool *pool, struct srp_fr_desc **desc,
446 			    int n)
447 {
448 	unsigned long flags;
449 	int i;
450 
451 	spin_lock_irqsave(&pool->lock, flags);
452 	for (i = 0; i < n; i++)
453 		list_add(&desc[i]->entry, &pool->free_list);
454 	spin_unlock_irqrestore(&pool->lock, flags);
455 }
456 
457 static struct srp_fr_pool *srp_alloc_fr_pool(struct srp_target_port *target)
458 {
459 	struct srp_device *dev = target->srp_host->srp_dev;
460 
461 	return srp_create_fr_pool(dev->dev, dev->pd, target->mr_pool_size,
462 				  dev->max_pages_per_mr);
463 }
464 
465 /**
466  * srp_destroy_qp() - destroy an RDMA queue pair
467  * @ch: SRP RDMA channel.
468  *
469  * Drain the qp before destroying it.  This avoids that the receive
470  * completion handler can access the queue pair while it is
471  * being destroyed.
472  */
473 static void srp_destroy_qp(struct srp_rdma_ch *ch)
474 {
475 	spin_lock_irq(&ch->lock);
476 	ib_process_cq_direct(ch->send_cq, -1);
477 	spin_unlock_irq(&ch->lock);
478 
479 	ib_drain_qp(ch->qp);
480 	ib_destroy_qp(ch->qp);
481 }
482 
483 static int srp_create_ch_ib(struct srp_rdma_ch *ch)
484 {
485 	struct srp_target_port *target = ch->target;
486 	struct srp_device *dev = target->srp_host->srp_dev;
487 	struct ib_qp_init_attr *init_attr;
488 	struct ib_cq *recv_cq, *send_cq;
489 	struct ib_qp *qp;
490 	struct ib_fmr_pool *fmr_pool = NULL;
491 	struct srp_fr_pool *fr_pool = NULL;
492 	const int m = 1 + dev->use_fast_reg * target->mr_per_cmd * 2;
493 	int ret;
494 
495 	init_attr = kzalloc(sizeof *init_attr, GFP_KERNEL);
496 	if (!init_attr)
497 		return -ENOMEM;
498 
499 	/* queue_size + 1 for ib_drain_rq() */
500 	recv_cq = ib_alloc_cq(dev->dev, ch, target->queue_size + 1,
501 				ch->comp_vector, IB_POLL_SOFTIRQ);
502 	if (IS_ERR(recv_cq)) {
503 		ret = PTR_ERR(recv_cq);
504 		goto err;
505 	}
506 
507 	send_cq = ib_alloc_cq(dev->dev, ch, m * target->queue_size,
508 				ch->comp_vector, IB_POLL_DIRECT);
509 	if (IS_ERR(send_cq)) {
510 		ret = PTR_ERR(send_cq);
511 		goto err_recv_cq;
512 	}
513 
514 	init_attr->event_handler       = srp_qp_event;
515 	init_attr->cap.max_send_wr     = m * target->queue_size;
516 	init_attr->cap.max_recv_wr     = target->queue_size + 1;
517 	init_attr->cap.max_recv_sge    = 1;
518 	init_attr->cap.max_send_sge    = 1;
519 	init_attr->sq_sig_type         = IB_SIGNAL_REQ_WR;
520 	init_attr->qp_type             = IB_QPT_RC;
521 	init_attr->send_cq             = send_cq;
522 	init_attr->recv_cq             = recv_cq;
523 
524 	qp = ib_create_qp(dev->pd, init_attr);
525 	if (IS_ERR(qp)) {
526 		ret = PTR_ERR(qp);
527 		goto err_send_cq;
528 	}
529 
530 	ret = srp_init_qp(target, qp);
531 	if (ret)
532 		goto err_qp;
533 
534 	if (dev->use_fast_reg) {
535 		fr_pool = srp_alloc_fr_pool(target);
536 		if (IS_ERR(fr_pool)) {
537 			ret = PTR_ERR(fr_pool);
538 			shost_printk(KERN_WARNING, target->scsi_host, PFX
539 				     "FR pool allocation failed (%d)\n", ret);
540 			goto err_qp;
541 		}
542 	} else if (dev->use_fmr) {
543 		fmr_pool = srp_alloc_fmr_pool(target);
544 		if (IS_ERR(fmr_pool)) {
545 			ret = PTR_ERR(fmr_pool);
546 			shost_printk(KERN_WARNING, target->scsi_host, PFX
547 				     "FMR pool allocation failed (%d)\n", ret);
548 			goto err_qp;
549 		}
550 	}
551 
552 	if (ch->qp)
553 		srp_destroy_qp(ch);
554 	if (ch->recv_cq)
555 		ib_free_cq(ch->recv_cq);
556 	if (ch->send_cq)
557 		ib_free_cq(ch->send_cq);
558 
559 	ch->qp = qp;
560 	ch->recv_cq = recv_cq;
561 	ch->send_cq = send_cq;
562 
563 	if (dev->use_fast_reg) {
564 		if (ch->fr_pool)
565 			srp_destroy_fr_pool(ch->fr_pool);
566 		ch->fr_pool = fr_pool;
567 	} else if (dev->use_fmr) {
568 		if (ch->fmr_pool)
569 			ib_destroy_fmr_pool(ch->fmr_pool);
570 		ch->fmr_pool = fmr_pool;
571 	}
572 
573 	kfree(init_attr);
574 	return 0;
575 
576 err_qp:
577 	ib_destroy_qp(qp);
578 
579 err_send_cq:
580 	ib_free_cq(send_cq);
581 
582 err_recv_cq:
583 	ib_free_cq(recv_cq);
584 
585 err:
586 	kfree(init_attr);
587 	return ret;
588 }
589 
590 /*
591  * Note: this function may be called without srp_alloc_iu_bufs() having been
592  * invoked. Hence the ch->[rt]x_ring checks.
593  */
594 static void srp_free_ch_ib(struct srp_target_port *target,
595 			   struct srp_rdma_ch *ch)
596 {
597 	struct srp_device *dev = target->srp_host->srp_dev;
598 	int i;
599 
600 	if (!ch->target)
601 		return;
602 
603 	if (ch->cm_id) {
604 		ib_destroy_cm_id(ch->cm_id);
605 		ch->cm_id = NULL;
606 	}
607 
608 	/* If srp_new_cm_id() succeeded but srp_create_ch_ib() not, return. */
609 	if (!ch->qp)
610 		return;
611 
612 	if (dev->use_fast_reg) {
613 		if (ch->fr_pool)
614 			srp_destroy_fr_pool(ch->fr_pool);
615 	} else if (dev->use_fmr) {
616 		if (ch->fmr_pool)
617 			ib_destroy_fmr_pool(ch->fmr_pool);
618 	}
619 
620 	srp_destroy_qp(ch);
621 	ib_free_cq(ch->send_cq);
622 	ib_free_cq(ch->recv_cq);
623 
624 	/*
625 	 * Avoid that the SCSI error handler tries to use this channel after
626 	 * it has been freed. The SCSI error handler can namely continue
627 	 * trying to perform recovery actions after scsi_remove_host()
628 	 * returned.
629 	 */
630 	ch->target = NULL;
631 
632 	ch->qp = NULL;
633 	ch->send_cq = ch->recv_cq = NULL;
634 
635 	if (ch->rx_ring) {
636 		for (i = 0; i < target->queue_size; ++i)
637 			srp_free_iu(target->srp_host, ch->rx_ring[i]);
638 		kfree(ch->rx_ring);
639 		ch->rx_ring = NULL;
640 	}
641 	if (ch->tx_ring) {
642 		for (i = 0; i < target->queue_size; ++i)
643 			srp_free_iu(target->srp_host, ch->tx_ring[i]);
644 		kfree(ch->tx_ring);
645 		ch->tx_ring = NULL;
646 	}
647 }
648 
649 static void srp_path_rec_completion(int status,
650 				    struct sa_path_rec *pathrec,
651 				    void *ch_ptr)
652 {
653 	struct srp_rdma_ch *ch = ch_ptr;
654 	struct srp_target_port *target = ch->target;
655 
656 	ch->status = status;
657 	if (status)
658 		shost_printk(KERN_ERR, target->scsi_host,
659 			     PFX "Got failed path rec status %d\n", status);
660 	else
661 		ch->path = *pathrec;
662 	complete(&ch->done);
663 }
664 
665 static int srp_lookup_path(struct srp_rdma_ch *ch)
666 {
667 	struct srp_target_port *target = ch->target;
668 	int ret = -ENODEV;
669 
670 	ch->path.numb_path = 1;
671 
672 	init_completion(&ch->done);
673 
674 	/*
675 	 * Avoid that the SCSI host can be removed by srp_remove_target()
676 	 * before srp_path_rec_completion() is called.
677 	 */
678 	if (!scsi_host_get(target->scsi_host))
679 		goto out;
680 
681 	ch->path_query_id = ib_sa_path_rec_get(&srp_sa_client,
682 					       target->srp_host->srp_dev->dev,
683 					       target->srp_host->port,
684 					       &ch->path,
685 					       IB_SA_PATH_REC_SERVICE_ID |
686 					       IB_SA_PATH_REC_DGID	 |
687 					       IB_SA_PATH_REC_SGID	 |
688 					       IB_SA_PATH_REC_NUMB_PATH	 |
689 					       IB_SA_PATH_REC_PKEY,
690 					       SRP_PATH_REC_TIMEOUT_MS,
691 					       GFP_KERNEL,
692 					       srp_path_rec_completion,
693 					       ch, &ch->path_query);
694 	ret = ch->path_query_id;
695 	if (ret < 0)
696 		goto put;
697 
698 	ret = wait_for_completion_interruptible(&ch->done);
699 	if (ret < 0)
700 		goto put;
701 
702 	ret = ch->status;
703 	if (ret < 0)
704 		shost_printk(KERN_WARNING, target->scsi_host,
705 			     PFX "Path record query failed\n");
706 
707 put:
708 	scsi_host_put(target->scsi_host);
709 
710 out:
711 	return ret;
712 }
713 
714 static u8 srp_get_subnet_timeout(struct srp_host *host)
715 {
716 	struct ib_port_attr attr;
717 	int ret;
718 	u8 subnet_timeout = 18;
719 
720 	ret = ib_query_port(host->srp_dev->dev, host->port, &attr);
721 	if (ret == 0)
722 		subnet_timeout = attr.subnet_timeout;
723 
724 	if (unlikely(subnet_timeout < 15))
725 		pr_warn("%s: subnet timeout %d may cause SRP login to fail.\n",
726 			dev_name(&host->srp_dev->dev->dev), subnet_timeout);
727 
728 	return subnet_timeout;
729 }
730 
731 static int srp_send_req(struct srp_rdma_ch *ch, bool multich)
732 {
733 	struct srp_target_port *target = ch->target;
734 	struct {
735 		struct ib_cm_req_param param;
736 		struct srp_login_req   priv;
737 	} *req = NULL;
738 	int status;
739 	u8 subnet_timeout;
740 
741 	subnet_timeout = srp_get_subnet_timeout(target->srp_host);
742 
743 	req = kzalloc(sizeof *req, GFP_KERNEL);
744 	if (!req)
745 		return -ENOMEM;
746 
747 	req->param.primary_path		      = &ch->path;
748 	req->param.alternate_path 	      = NULL;
749 	req->param.service_id 		      = target->service_id;
750 	req->param.qp_num		      = ch->qp->qp_num;
751 	req->param.qp_type		      = ch->qp->qp_type;
752 	req->param.private_data 	      = &req->priv;
753 	req->param.private_data_len 	      = sizeof req->priv;
754 	req->param.flow_control 	      = 1;
755 
756 	get_random_bytes(&req->param.starting_psn, 4);
757 	req->param.starting_psn 	     &= 0xffffff;
758 
759 	/*
760 	 * Pick some arbitrary defaults here; we could make these
761 	 * module parameters if anyone cared about setting them.
762 	 */
763 	req->param.responder_resources	      = 4;
764 	req->param.remote_cm_response_timeout = subnet_timeout + 2;
765 	req->param.local_cm_response_timeout  = subnet_timeout + 2;
766 	req->param.retry_count                = target->tl_retry_count;
767 	req->param.rnr_retry_count 	      = 7;
768 	req->param.max_cm_retries 	      = 15;
769 
770 	req->priv.opcode     	= SRP_LOGIN_REQ;
771 	req->priv.tag        	= 0;
772 	req->priv.req_it_iu_len = cpu_to_be32(target->max_iu_len);
773 	req->priv.req_buf_fmt 	= cpu_to_be16(SRP_BUF_FORMAT_DIRECT |
774 					      SRP_BUF_FORMAT_INDIRECT);
775 	req->priv.req_flags	= (multich ? SRP_MULTICHAN_MULTI :
776 				   SRP_MULTICHAN_SINGLE);
777 	/*
778 	 * In the published SRP specification (draft rev. 16a), the
779 	 * port identifier format is 8 bytes of ID extension followed
780 	 * by 8 bytes of GUID.  Older drafts put the two halves in the
781 	 * opposite order, so that the GUID comes first.
782 	 *
783 	 * Targets conforming to these obsolete drafts can be
784 	 * recognized by the I/O Class they report.
785 	 */
786 	if (target->io_class == SRP_REV10_IB_IO_CLASS) {
787 		memcpy(req->priv.initiator_port_id,
788 		       &target->sgid.global.interface_id, 8);
789 		memcpy(req->priv.initiator_port_id + 8,
790 		       &target->initiator_ext, 8);
791 		memcpy(req->priv.target_port_id,     &target->ioc_guid, 8);
792 		memcpy(req->priv.target_port_id + 8, &target->id_ext, 8);
793 	} else {
794 		memcpy(req->priv.initiator_port_id,
795 		       &target->initiator_ext, 8);
796 		memcpy(req->priv.initiator_port_id + 8,
797 		       &target->sgid.global.interface_id, 8);
798 		memcpy(req->priv.target_port_id,     &target->id_ext, 8);
799 		memcpy(req->priv.target_port_id + 8, &target->ioc_guid, 8);
800 	}
801 
802 	/*
803 	 * Topspin/Cisco SRP targets will reject our login unless we
804 	 * zero out the first 8 bytes of our initiator port ID and set
805 	 * the second 8 bytes to the local node GUID.
806 	 */
807 	if (srp_target_is_topspin(target)) {
808 		shost_printk(KERN_DEBUG, target->scsi_host,
809 			     PFX "Topspin/Cisco initiator port ID workaround "
810 			     "activated for target GUID %016llx\n",
811 			     be64_to_cpu(target->ioc_guid));
812 		memset(req->priv.initiator_port_id, 0, 8);
813 		memcpy(req->priv.initiator_port_id + 8,
814 		       &target->srp_host->srp_dev->dev->node_guid, 8);
815 	}
816 
817 	status = ib_send_cm_req(ch->cm_id, &req->param);
818 
819 	kfree(req);
820 
821 	return status;
822 }
823 
824 static bool srp_queue_remove_work(struct srp_target_port *target)
825 {
826 	bool changed = false;
827 
828 	spin_lock_irq(&target->lock);
829 	if (target->state != SRP_TARGET_REMOVED) {
830 		target->state = SRP_TARGET_REMOVED;
831 		changed = true;
832 	}
833 	spin_unlock_irq(&target->lock);
834 
835 	if (changed)
836 		queue_work(srp_remove_wq, &target->remove_work);
837 
838 	return changed;
839 }
840 
841 static void srp_disconnect_target(struct srp_target_port *target)
842 {
843 	struct srp_rdma_ch *ch;
844 	int i;
845 
846 	/* XXX should send SRP_I_LOGOUT request */
847 
848 	for (i = 0; i < target->ch_count; i++) {
849 		ch = &target->ch[i];
850 		ch->connected = false;
851 		if (ch->cm_id && ib_send_cm_dreq(ch->cm_id, NULL, 0)) {
852 			shost_printk(KERN_DEBUG, target->scsi_host,
853 				     PFX "Sending CM DREQ failed\n");
854 		}
855 	}
856 }
857 
858 static void srp_free_req_data(struct srp_target_port *target,
859 			      struct srp_rdma_ch *ch)
860 {
861 	struct srp_device *dev = target->srp_host->srp_dev;
862 	struct ib_device *ibdev = dev->dev;
863 	struct srp_request *req;
864 	int i;
865 
866 	if (!ch->req_ring)
867 		return;
868 
869 	for (i = 0; i < target->req_ring_size; ++i) {
870 		req = &ch->req_ring[i];
871 		if (dev->use_fast_reg) {
872 			kfree(req->fr_list);
873 		} else {
874 			kfree(req->fmr_list);
875 			kfree(req->map_page);
876 		}
877 		if (req->indirect_dma_addr) {
878 			ib_dma_unmap_single(ibdev, req->indirect_dma_addr,
879 					    target->indirect_size,
880 					    DMA_TO_DEVICE);
881 		}
882 		kfree(req->indirect_desc);
883 	}
884 
885 	kfree(ch->req_ring);
886 	ch->req_ring = NULL;
887 }
888 
889 static int srp_alloc_req_data(struct srp_rdma_ch *ch)
890 {
891 	struct srp_target_port *target = ch->target;
892 	struct srp_device *srp_dev = target->srp_host->srp_dev;
893 	struct ib_device *ibdev = srp_dev->dev;
894 	struct srp_request *req;
895 	void *mr_list;
896 	dma_addr_t dma_addr;
897 	int i, ret = -ENOMEM;
898 
899 	ch->req_ring = kcalloc(target->req_ring_size, sizeof(*ch->req_ring),
900 			       GFP_KERNEL);
901 	if (!ch->req_ring)
902 		goto out;
903 
904 	for (i = 0; i < target->req_ring_size; ++i) {
905 		req = &ch->req_ring[i];
906 		mr_list = kmalloc(target->mr_per_cmd * sizeof(void *),
907 				  GFP_KERNEL);
908 		if (!mr_list)
909 			goto out;
910 		if (srp_dev->use_fast_reg) {
911 			req->fr_list = mr_list;
912 		} else {
913 			req->fmr_list = mr_list;
914 			req->map_page = kmalloc(srp_dev->max_pages_per_mr *
915 						sizeof(void *), GFP_KERNEL);
916 			if (!req->map_page)
917 				goto out;
918 		}
919 		req->indirect_desc = kmalloc(target->indirect_size, GFP_KERNEL);
920 		if (!req->indirect_desc)
921 			goto out;
922 
923 		dma_addr = ib_dma_map_single(ibdev, req->indirect_desc,
924 					     target->indirect_size,
925 					     DMA_TO_DEVICE);
926 		if (ib_dma_mapping_error(ibdev, dma_addr))
927 			goto out;
928 
929 		req->indirect_dma_addr = dma_addr;
930 	}
931 	ret = 0;
932 
933 out:
934 	return ret;
935 }
936 
937 /**
938  * srp_del_scsi_host_attr() - Remove attributes defined in the host template.
939  * @shost: SCSI host whose attributes to remove from sysfs.
940  *
941  * Note: Any attributes defined in the host template and that did not exist
942  * before invocation of this function will be ignored.
943  */
944 static void srp_del_scsi_host_attr(struct Scsi_Host *shost)
945 {
946 	struct device_attribute **attr;
947 
948 	for (attr = shost->hostt->shost_attrs; attr && *attr; ++attr)
949 		device_remove_file(&shost->shost_dev, *attr);
950 }
951 
952 static void srp_remove_target(struct srp_target_port *target)
953 {
954 	struct srp_rdma_ch *ch;
955 	int i;
956 
957 	WARN_ON_ONCE(target->state != SRP_TARGET_REMOVED);
958 
959 	srp_del_scsi_host_attr(target->scsi_host);
960 	srp_rport_get(target->rport);
961 	srp_remove_host(target->scsi_host);
962 	scsi_remove_host(target->scsi_host);
963 	srp_stop_rport_timers(target->rport);
964 	srp_disconnect_target(target);
965 	for (i = 0; i < target->ch_count; i++) {
966 		ch = &target->ch[i];
967 		srp_free_ch_ib(target, ch);
968 	}
969 	cancel_work_sync(&target->tl_err_work);
970 	srp_rport_put(target->rport);
971 	for (i = 0; i < target->ch_count; i++) {
972 		ch = &target->ch[i];
973 		srp_free_req_data(target, ch);
974 	}
975 	kfree(target->ch);
976 	target->ch = NULL;
977 
978 	spin_lock(&target->srp_host->target_lock);
979 	list_del(&target->list);
980 	spin_unlock(&target->srp_host->target_lock);
981 
982 	scsi_host_put(target->scsi_host);
983 }
984 
985 static void srp_remove_work(struct work_struct *work)
986 {
987 	struct srp_target_port *target =
988 		container_of(work, struct srp_target_port, remove_work);
989 
990 	WARN_ON_ONCE(target->state != SRP_TARGET_REMOVED);
991 
992 	srp_remove_target(target);
993 }
994 
995 static void srp_rport_delete(struct srp_rport *rport)
996 {
997 	struct srp_target_port *target = rport->lld_data;
998 
999 	srp_queue_remove_work(target);
1000 }
1001 
1002 /**
1003  * srp_connected_ch() - number of connected channels
1004  * @target: SRP target port.
1005  */
1006 static int srp_connected_ch(struct srp_target_port *target)
1007 {
1008 	int i, c = 0;
1009 
1010 	for (i = 0; i < target->ch_count; i++)
1011 		c += target->ch[i].connected;
1012 
1013 	return c;
1014 }
1015 
1016 static int srp_connect_ch(struct srp_rdma_ch *ch, bool multich)
1017 {
1018 	struct srp_target_port *target = ch->target;
1019 	int ret;
1020 
1021 	WARN_ON_ONCE(!multich && srp_connected_ch(target) > 0);
1022 
1023 	ret = srp_lookup_path(ch);
1024 	if (ret)
1025 		goto out;
1026 
1027 	while (1) {
1028 		init_completion(&ch->done);
1029 		ret = srp_send_req(ch, multich);
1030 		if (ret)
1031 			goto out;
1032 		ret = wait_for_completion_interruptible(&ch->done);
1033 		if (ret < 0)
1034 			goto out;
1035 
1036 		/*
1037 		 * The CM event handling code will set status to
1038 		 * SRP_PORT_REDIRECT if we get a port redirect REJ
1039 		 * back, or SRP_DLID_REDIRECT if we get a lid/qp
1040 		 * redirect REJ back.
1041 		 */
1042 		ret = ch->status;
1043 		switch (ret) {
1044 		case 0:
1045 			ch->connected = true;
1046 			goto out;
1047 
1048 		case SRP_PORT_REDIRECT:
1049 			ret = srp_lookup_path(ch);
1050 			if (ret)
1051 				goto out;
1052 			break;
1053 
1054 		case SRP_DLID_REDIRECT:
1055 			break;
1056 
1057 		case SRP_STALE_CONN:
1058 			shost_printk(KERN_ERR, target->scsi_host, PFX
1059 				     "giving up on stale connection\n");
1060 			ret = -ECONNRESET;
1061 			goto out;
1062 
1063 		default:
1064 			goto out;
1065 		}
1066 	}
1067 
1068 out:
1069 	return ret <= 0 ? ret : -ENODEV;
1070 }
1071 
1072 static void srp_inv_rkey_err_done(struct ib_cq *cq, struct ib_wc *wc)
1073 {
1074 	srp_handle_qp_err(cq, wc, "INV RKEY");
1075 }
1076 
1077 static int srp_inv_rkey(struct srp_request *req, struct srp_rdma_ch *ch,
1078 		u32 rkey)
1079 {
1080 	struct ib_send_wr *bad_wr;
1081 	struct ib_send_wr wr = {
1082 		.opcode		    = IB_WR_LOCAL_INV,
1083 		.next		    = NULL,
1084 		.num_sge	    = 0,
1085 		.send_flags	    = 0,
1086 		.ex.invalidate_rkey = rkey,
1087 	};
1088 
1089 	wr.wr_cqe = &req->reg_cqe;
1090 	req->reg_cqe.done = srp_inv_rkey_err_done;
1091 	return ib_post_send(ch->qp, &wr, &bad_wr);
1092 }
1093 
1094 static void srp_unmap_data(struct scsi_cmnd *scmnd,
1095 			   struct srp_rdma_ch *ch,
1096 			   struct srp_request *req)
1097 {
1098 	struct srp_target_port *target = ch->target;
1099 	struct srp_device *dev = target->srp_host->srp_dev;
1100 	struct ib_device *ibdev = dev->dev;
1101 	int i, res;
1102 
1103 	if (!scsi_sglist(scmnd) ||
1104 	    (scmnd->sc_data_direction != DMA_TO_DEVICE &&
1105 	     scmnd->sc_data_direction != DMA_FROM_DEVICE))
1106 		return;
1107 
1108 	if (dev->use_fast_reg) {
1109 		struct srp_fr_desc **pfr;
1110 
1111 		for (i = req->nmdesc, pfr = req->fr_list; i > 0; i--, pfr++) {
1112 			res = srp_inv_rkey(req, ch, (*pfr)->mr->rkey);
1113 			if (res < 0) {
1114 				shost_printk(KERN_ERR, target->scsi_host, PFX
1115 				  "Queueing INV WR for rkey %#x failed (%d)\n",
1116 				  (*pfr)->mr->rkey, res);
1117 				queue_work(system_long_wq,
1118 					   &target->tl_err_work);
1119 			}
1120 		}
1121 		if (req->nmdesc)
1122 			srp_fr_pool_put(ch->fr_pool, req->fr_list,
1123 					req->nmdesc);
1124 	} else if (dev->use_fmr) {
1125 		struct ib_pool_fmr **pfmr;
1126 
1127 		for (i = req->nmdesc, pfmr = req->fmr_list; i > 0; i--, pfmr++)
1128 			ib_fmr_pool_unmap(*pfmr);
1129 	}
1130 
1131 	ib_dma_unmap_sg(ibdev, scsi_sglist(scmnd), scsi_sg_count(scmnd),
1132 			scmnd->sc_data_direction);
1133 }
1134 
1135 /**
1136  * srp_claim_req - Take ownership of the scmnd associated with a request.
1137  * @ch: SRP RDMA channel.
1138  * @req: SRP request.
1139  * @sdev: If not NULL, only take ownership for this SCSI device.
1140  * @scmnd: If NULL, take ownership of @req->scmnd. If not NULL, only take
1141  *         ownership of @req->scmnd if it equals @scmnd.
1142  *
1143  * Return value:
1144  * Either NULL or a pointer to the SCSI command the caller became owner of.
1145  */
1146 static struct scsi_cmnd *srp_claim_req(struct srp_rdma_ch *ch,
1147 				       struct srp_request *req,
1148 				       struct scsi_device *sdev,
1149 				       struct scsi_cmnd *scmnd)
1150 {
1151 	unsigned long flags;
1152 
1153 	spin_lock_irqsave(&ch->lock, flags);
1154 	if (req->scmnd &&
1155 	    (!sdev || req->scmnd->device == sdev) &&
1156 	    (!scmnd || req->scmnd == scmnd)) {
1157 		scmnd = req->scmnd;
1158 		req->scmnd = NULL;
1159 	} else {
1160 		scmnd = NULL;
1161 	}
1162 	spin_unlock_irqrestore(&ch->lock, flags);
1163 
1164 	return scmnd;
1165 }
1166 
1167 /**
1168  * srp_free_req() - Unmap data and adjust ch->req_lim.
1169  * @ch:     SRP RDMA channel.
1170  * @req:    Request to be freed.
1171  * @scmnd:  SCSI command associated with @req.
1172  * @req_lim_delta: Amount to be added to @target->req_lim.
1173  */
1174 static void srp_free_req(struct srp_rdma_ch *ch, struct srp_request *req,
1175 			 struct scsi_cmnd *scmnd, s32 req_lim_delta)
1176 {
1177 	unsigned long flags;
1178 
1179 	srp_unmap_data(scmnd, ch, req);
1180 
1181 	spin_lock_irqsave(&ch->lock, flags);
1182 	ch->req_lim += req_lim_delta;
1183 	spin_unlock_irqrestore(&ch->lock, flags);
1184 }
1185 
1186 static void srp_finish_req(struct srp_rdma_ch *ch, struct srp_request *req,
1187 			   struct scsi_device *sdev, int result)
1188 {
1189 	struct scsi_cmnd *scmnd = srp_claim_req(ch, req, sdev, NULL);
1190 
1191 	if (scmnd) {
1192 		srp_free_req(ch, req, scmnd, 0);
1193 		scmnd->result = result;
1194 		scmnd->scsi_done(scmnd);
1195 	}
1196 }
1197 
1198 static void srp_terminate_io(struct srp_rport *rport)
1199 {
1200 	struct srp_target_port *target = rport->lld_data;
1201 	struct srp_rdma_ch *ch;
1202 	struct Scsi_Host *shost = target->scsi_host;
1203 	struct scsi_device *sdev;
1204 	int i, j;
1205 
1206 	/*
1207 	 * Invoking srp_terminate_io() while srp_queuecommand() is running
1208 	 * is not safe. Hence the warning statement below.
1209 	 */
1210 	shost_for_each_device(sdev, shost)
1211 		WARN_ON_ONCE(sdev->request_queue->request_fn_active);
1212 
1213 	for (i = 0; i < target->ch_count; i++) {
1214 		ch = &target->ch[i];
1215 
1216 		for (j = 0; j < target->req_ring_size; ++j) {
1217 			struct srp_request *req = &ch->req_ring[j];
1218 
1219 			srp_finish_req(ch, req, NULL,
1220 				       DID_TRANSPORT_FAILFAST << 16);
1221 		}
1222 	}
1223 }
1224 
1225 /*
1226  * It is up to the caller to ensure that srp_rport_reconnect() calls are
1227  * serialized and that no concurrent srp_queuecommand(), srp_abort(),
1228  * srp_reset_device() or srp_reset_host() calls will occur while this function
1229  * is in progress. One way to realize that is not to call this function
1230  * directly but to call srp_reconnect_rport() instead since that last function
1231  * serializes calls of this function via rport->mutex and also blocks
1232  * srp_queuecommand() calls before invoking this function.
1233  */
1234 static int srp_rport_reconnect(struct srp_rport *rport)
1235 {
1236 	struct srp_target_port *target = rport->lld_data;
1237 	struct srp_rdma_ch *ch;
1238 	int i, j, ret = 0;
1239 	bool multich = false;
1240 
1241 	srp_disconnect_target(target);
1242 
1243 	if (target->state == SRP_TARGET_SCANNING)
1244 		return -ENODEV;
1245 
1246 	/*
1247 	 * Now get a new local CM ID so that we avoid confusing the target in
1248 	 * case things are really fouled up. Doing so also ensures that all CM
1249 	 * callbacks will have finished before a new QP is allocated.
1250 	 */
1251 	for (i = 0; i < target->ch_count; i++) {
1252 		ch = &target->ch[i];
1253 		ret += srp_new_cm_id(ch);
1254 	}
1255 	for (i = 0; i < target->ch_count; i++) {
1256 		ch = &target->ch[i];
1257 		for (j = 0; j < target->req_ring_size; ++j) {
1258 			struct srp_request *req = &ch->req_ring[j];
1259 
1260 			srp_finish_req(ch, req, NULL, DID_RESET << 16);
1261 		}
1262 	}
1263 	for (i = 0; i < target->ch_count; i++) {
1264 		ch = &target->ch[i];
1265 		/*
1266 		 * Whether or not creating a new CM ID succeeded, create a new
1267 		 * QP. This guarantees that all completion callback function
1268 		 * invocations have finished before request resetting starts.
1269 		 */
1270 		ret += srp_create_ch_ib(ch);
1271 
1272 		INIT_LIST_HEAD(&ch->free_tx);
1273 		for (j = 0; j < target->queue_size; ++j)
1274 			list_add(&ch->tx_ring[j]->list, &ch->free_tx);
1275 	}
1276 
1277 	target->qp_in_error = false;
1278 
1279 	for (i = 0; i < target->ch_count; i++) {
1280 		ch = &target->ch[i];
1281 		if (ret)
1282 			break;
1283 		ret = srp_connect_ch(ch, multich);
1284 		multich = true;
1285 	}
1286 
1287 	if (ret == 0)
1288 		shost_printk(KERN_INFO, target->scsi_host,
1289 			     PFX "reconnect succeeded\n");
1290 
1291 	return ret;
1292 }
1293 
1294 static void srp_map_desc(struct srp_map_state *state, dma_addr_t dma_addr,
1295 			 unsigned int dma_len, u32 rkey)
1296 {
1297 	struct srp_direct_buf *desc = state->desc;
1298 
1299 	WARN_ON_ONCE(!dma_len);
1300 
1301 	desc->va = cpu_to_be64(dma_addr);
1302 	desc->key = cpu_to_be32(rkey);
1303 	desc->len = cpu_to_be32(dma_len);
1304 
1305 	state->total_len += dma_len;
1306 	state->desc++;
1307 	state->ndesc++;
1308 }
1309 
1310 static int srp_map_finish_fmr(struct srp_map_state *state,
1311 			      struct srp_rdma_ch *ch)
1312 {
1313 	struct srp_target_port *target = ch->target;
1314 	struct srp_device *dev = target->srp_host->srp_dev;
1315 	struct ib_pool_fmr *fmr;
1316 	u64 io_addr = 0;
1317 
1318 	if (state->fmr.next >= state->fmr.end) {
1319 		shost_printk(KERN_ERR, ch->target->scsi_host,
1320 			     PFX "Out of MRs (mr_per_cmd = %d)\n",
1321 			     ch->target->mr_per_cmd);
1322 		return -ENOMEM;
1323 	}
1324 
1325 	WARN_ON_ONCE(!dev->use_fmr);
1326 
1327 	if (state->npages == 0)
1328 		return 0;
1329 
1330 	if (state->npages == 1 && target->global_rkey) {
1331 		srp_map_desc(state, state->base_dma_addr, state->dma_len,
1332 			     target->global_rkey);
1333 		goto reset_state;
1334 	}
1335 
1336 	fmr = ib_fmr_pool_map_phys(ch->fmr_pool, state->pages,
1337 				   state->npages, io_addr);
1338 	if (IS_ERR(fmr))
1339 		return PTR_ERR(fmr);
1340 
1341 	*state->fmr.next++ = fmr;
1342 	state->nmdesc++;
1343 
1344 	srp_map_desc(state, state->base_dma_addr & ~dev->mr_page_mask,
1345 		     state->dma_len, fmr->fmr->rkey);
1346 
1347 reset_state:
1348 	state->npages = 0;
1349 	state->dma_len = 0;
1350 
1351 	return 0;
1352 }
1353 
1354 static void srp_reg_mr_err_done(struct ib_cq *cq, struct ib_wc *wc)
1355 {
1356 	srp_handle_qp_err(cq, wc, "FAST REG");
1357 }
1358 
1359 /*
1360  * Map up to sg_nents elements of state->sg where *sg_offset_p is the offset
1361  * where to start in the first element. If sg_offset_p != NULL then
1362  * *sg_offset_p is updated to the offset in state->sg[retval] of the first
1363  * byte that has not yet been mapped.
1364  */
1365 static int srp_map_finish_fr(struct srp_map_state *state,
1366 			     struct srp_request *req,
1367 			     struct srp_rdma_ch *ch, int sg_nents,
1368 			     unsigned int *sg_offset_p)
1369 {
1370 	struct srp_target_port *target = ch->target;
1371 	struct srp_device *dev = target->srp_host->srp_dev;
1372 	struct ib_send_wr *bad_wr;
1373 	struct ib_reg_wr wr;
1374 	struct srp_fr_desc *desc;
1375 	u32 rkey;
1376 	int n, err;
1377 
1378 	if (state->fr.next >= state->fr.end) {
1379 		shost_printk(KERN_ERR, ch->target->scsi_host,
1380 			     PFX "Out of MRs (mr_per_cmd = %d)\n",
1381 			     ch->target->mr_per_cmd);
1382 		return -ENOMEM;
1383 	}
1384 
1385 	WARN_ON_ONCE(!dev->use_fast_reg);
1386 
1387 	if (sg_nents == 1 && target->global_rkey) {
1388 		unsigned int sg_offset = sg_offset_p ? *sg_offset_p : 0;
1389 
1390 		srp_map_desc(state, sg_dma_address(state->sg) + sg_offset,
1391 			     sg_dma_len(state->sg) - sg_offset,
1392 			     target->global_rkey);
1393 		if (sg_offset_p)
1394 			*sg_offset_p = 0;
1395 		return 1;
1396 	}
1397 
1398 	desc = srp_fr_pool_get(ch->fr_pool);
1399 	if (!desc)
1400 		return -ENOMEM;
1401 
1402 	rkey = ib_inc_rkey(desc->mr->rkey);
1403 	ib_update_fast_reg_key(desc->mr, rkey);
1404 
1405 	n = ib_map_mr_sg(desc->mr, state->sg, sg_nents, sg_offset_p,
1406 			 dev->mr_page_size);
1407 	if (unlikely(n < 0)) {
1408 		srp_fr_pool_put(ch->fr_pool, &desc, 1);
1409 		pr_debug("%s: ib_map_mr_sg(%d, %d) returned %d.\n",
1410 			 dev_name(&req->scmnd->device->sdev_gendev), sg_nents,
1411 			 sg_offset_p ? *sg_offset_p : -1, n);
1412 		return n;
1413 	}
1414 
1415 	WARN_ON_ONCE(desc->mr->length == 0);
1416 
1417 	req->reg_cqe.done = srp_reg_mr_err_done;
1418 
1419 	wr.wr.next = NULL;
1420 	wr.wr.opcode = IB_WR_REG_MR;
1421 	wr.wr.wr_cqe = &req->reg_cqe;
1422 	wr.wr.num_sge = 0;
1423 	wr.wr.send_flags = 0;
1424 	wr.mr = desc->mr;
1425 	wr.key = desc->mr->rkey;
1426 	wr.access = (IB_ACCESS_LOCAL_WRITE |
1427 		     IB_ACCESS_REMOTE_READ |
1428 		     IB_ACCESS_REMOTE_WRITE);
1429 
1430 	*state->fr.next++ = desc;
1431 	state->nmdesc++;
1432 
1433 	srp_map_desc(state, desc->mr->iova,
1434 		     desc->mr->length, desc->mr->rkey);
1435 
1436 	err = ib_post_send(ch->qp, &wr.wr, &bad_wr);
1437 	if (unlikely(err)) {
1438 		WARN_ON_ONCE(err == -ENOMEM);
1439 		return err;
1440 	}
1441 
1442 	return n;
1443 }
1444 
1445 static int srp_map_sg_entry(struct srp_map_state *state,
1446 			    struct srp_rdma_ch *ch,
1447 			    struct scatterlist *sg)
1448 {
1449 	struct srp_target_port *target = ch->target;
1450 	struct srp_device *dev = target->srp_host->srp_dev;
1451 	struct ib_device *ibdev = dev->dev;
1452 	dma_addr_t dma_addr = ib_sg_dma_address(ibdev, sg);
1453 	unsigned int dma_len = ib_sg_dma_len(ibdev, sg);
1454 	unsigned int len = 0;
1455 	int ret;
1456 
1457 	WARN_ON_ONCE(!dma_len);
1458 
1459 	while (dma_len) {
1460 		unsigned offset = dma_addr & ~dev->mr_page_mask;
1461 
1462 		if (state->npages == dev->max_pages_per_mr ||
1463 		    (state->npages > 0 && offset != 0)) {
1464 			ret = srp_map_finish_fmr(state, ch);
1465 			if (ret)
1466 				return ret;
1467 		}
1468 
1469 		len = min_t(unsigned int, dma_len, dev->mr_page_size - offset);
1470 
1471 		if (!state->npages)
1472 			state->base_dma_addr = dma_addr;
1473 		state->pages[state->npages++] = dma_addr & dev->mr_page_mask;
1474 		state->dma_len += len;
1475 		dma_addr += len;
1476 		dma_len -= len;
1477 	}
1478 
1479 	/*
1480 	 * If the end of the MR is not on a page boundary then we need to
1481 	 * close it out and start a new one -- we can only merge at page
1482 	 * boundaries.
1483 	 */
1484 	ret = 0;
1485 	if ((dma_addr & ~dev->mr_page_mask) != 0)
1486 		ret = srp_map_finish_fmr(state, ch);
1487 	return ret;
1488 }
1489 
1490 static int srp_map_sg_fmr(struct srp_map_state *state, struct srp_rdma_ch *ch,
1491 			  struct srp_request *req, struct scatterlist *scat,
1492 			  int count)
1493 {
1494 	struct scatterlist *sg;
1495 	int i, ret;
1496 
1497 	state->pages = req->map_page;
1498 	state->fmr.next = req->fmr_list;
1499 	state->fmr.end = req->fmr_list + ch->target->mr_per_cmd;
1500 
1501 	for_each_sg(scat, sg, count, i) {
1502 		ret = srp_map_sg_entry(state, ch, sg);
1503 		if (ret)
1504 			return ret;
1505 	}
1506 
1507 	ret = srp_map_finish_fmr(state, ch);
1508 	if (ret)
1509 		return ret;
1510 
1511 	return 0;
1512 }
1513 
1514 static int srp_map_sg_fr(struct srp_map_state *state, struct srp_rdma_ch *ch,
1515 			 struct srp_request *req, struct scatterlist *scat,
1516 			 int count)
1517 {
1518 	unsigned int sg_offset = 0;
1519 
1520 	state->fr.next = req->fr_list;
1521 	state->fr.end = req->fr_list + ch->target->mr_per_cmd;
1522 	state->sg = scat;
1523 
1524 	if (count == 0)
1525 		return 0;
1526 
1527 	while (count) {
1528 		int i, n;
1529 
1530 		n = srp_map_finish_fr(state, req, ch, count, &sg_offset);
1531 		if (unlikely(n < 0))
1532 			return n;
1533 
1534 		count -= n;
1535 		for (i = 0; i < n; i++)
1536 			state->sg = sg_next(state->sg);
1537 	}
1538 
1539 	return 0;
1540 }
1541 
1542 static int srp_map_sg_dma(struct srp_map_state *state, struct srp_rdma_ch *ch,
1543 			  struct srp_request *req, struct scatterlist *scat,
1544 			  int count)
1545 {
1546 	struct srp_target_port *target = ch->target;
1547 	struct srp_device *dev = target->srp_host->srp_dev;
1548 	struct scatterlist *sg;
1549 	int i;
1550 
1551 	for_each_sg(scat, sg, count, i) {
1552 		srp_map_desc(state, ib_sg_dma_address(dev->dev, sg),
1553 			     ib_sg_dma_len(dev->dev, sg),
1554 			     target->global_rkey);
1555 	}
1556 
1557 	return 0;
1558 }
1559 
1560 /*
1561  * Register the indirect data buffer descriptor with the HCA.
1562  *
1563  * Note: since the indirect data buffer descriptor has been allocated with
1564  * kmalloc() it is guaranteed that this buffer is a physically contiguous
1565  * memory buffer.
1566  */
1567 static int srp_map_idb(struct srp_rdma_ch *ch, struct srp_request *req,
1568 		       void **next_mr, void **end_mr, u32 idb_len,
1569 		       __be32 *idb_rkey)
1570 {
1571 	struct srp_target_port *target = ch->target;
1572 	struct srp_device *dev = target->srp_host->srp_dev;
1573 	struct srp_map_state state;
1574 	struct srp_direct_buf idb_desc;
1575 	u64 idb_pages[1];
1576 	struct scatterlist idb_sg[1];
1577 	int ret;
1578 
1579 	memset(&state, 0, sizeof(state));
1580 	memset(&idb_desc, 0, sizeof(idb_desc));
1581 	state.gen.next = next_mr;
1582 	state.gen.end = end_mr;
1583 	state.desc = &idb_desc;
1584 	state.base_dma_addr = req->indirect_dma_addr;
1585 	state.dma_len = idb_len;
1586 
1587 	if (dev->use_fast_reg) {
1588 		state.sg = idb_sg;
1589 		sg_init_one(idb_sg, req->indirect_desc, idb_len);
1590 		idb_sg->dma_address = req->indirect_dma_addr; /* hack! */
1591 #ifdef CONFIG_NEED_SG_DMA_LENGTH
1592 		idb_sg->dma_length = idb_sg->length;	      /* hack^2 */
1593 #endif
1594 		ret = srp_map_finish_fr(&state, req, ch, 1, NULL);
1595 		if (ret < 0)
1596 			return ret;
1597 		WARN_ON_ONCE(ret < 1);
1598 	} else if (dev->use_fmr) {
1599 		state.pages = idb_pages;
1600 		state.pages[0] = (req->indirect_dma_addr &
1601 				  dev->mr_page_mask);
1602 		state.npages = 1;
1603 		ret = srp_map_finish_fmr(&state, ch);
1604 		if (ret < 0)
1605 			return ret;
1606 	} else {
1607 		return -EINVAL;
1608 	}
1609 
1610 	*idb_rkey = idb_desc.key;
1611 
1612 	return 0;
1613 }
1614 
1615 static void srp_check_mapping(struct srp_map_state *state,
1616 			      struct srp_rdma_ch *ch, struct srp_request *req,
1617 			      struct scatterlist *scat, int count)
1618 {
1619 	struct srp_device *dev = ch->target->srp_host->srp_dev;
1620 	struct srp_fr_desc **pfr;
1621 	u64 desc_len = 0, mr_len = 0;
1622 	int i;
1623 
1624 	for (i = 0; i < state->ndesc; i++)
1625 		desc_len += be32_to_cpu(req->indirect_desc[i].len);
1626 	if (dev->use_fast_reg)
1627 		for (i = 0, pfr = req->fr_list; i < state->nmdesc; i++, pfr++)
1628 			mr_len += (*pfr)->mr->length;
1629 	else if (dev->use_fmr)
1630 		for (i = 0; i < state->nmdesc; i++)
1631 			mr_len += be32_to_cpu(req->indirect_desc[i].len);
1632 	if (desc_len != scsi_bufflen(req->scmnd) ||
1633 	    mr_len > scsi_bufflen(req->scmnd))
1634 		pr_err("Inconsistent: scsi len %d <> desc len %lld <> mr len %lld; ndesc %d; nmdesc = %d\n",
1635 		       scsi_bufflen(req->scmnd), desc_len, mr_len,
1636 		       state->ndesc, state->nmdesc);
1637 }
1638 
1639 /**
1640  * srp_map_data() - map SCSI data buffer onto an SRP request
1641  * @scmnd: SCSI command to map
1642  * @ch: SRP RDMA channel
1643  * @req: SRP request
1644  *
1645  * Returns the length in bytes of the SRP_CMD IU or a negative value if
1646  * mapping failed.
1647  */
1648 static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_rdma_ch *ch,
1649 			struct srp_request *req)
1650 {
1651 	struct srp_target_port *target = ch->target;
1652 	struct scatterlist *scat;
1653 	struct srp_cmd *cmd = req->cmd->buf;
1654 	int len, nents, count, ret;
1655 	struct srp_device *dev;
1656 	struct ib_device *ibdev;
1657 	struct srp_map_state state;
1658 	struct srp_indirect_buf *indirect_hdr;
1659 	u32 idb_len, table_len;
1660 	__be32 idb_rkey;
1661 	u8 fmt;
1662 
1663 	if (!scsi_sglist(scmnd) || scmnd->sc_data_direction == DMA_NONE)
1664 		return sizeof (struct srp_cmd);
1665 
1666 	if (scmnd->sc_data_direction != DMA_FROM_DEVICE &&
1667 	    scmnd->sc_data_direction != DMA_TO_DEVICE) {
1668 		shost_printk(KERN_WARNING, target->scsi_host,
1669 			     PFX "Unhandled data direction %d\n",
1670 			     scmnd->sc_data_direction);
1671 		return -EINVAL;
1672 	}
1673 
1674 	nents = scsi_sg_count(scmnd);
1675 	scat  = scsi_sglist(scmnd);
1676 
1677 	dev = target->srp_host->srp_dev;
1678 	ibdev = dev->dev;
1679 
1680 	count = ib_dma_map_sg(ibdev, scat, nents, scmnd->sc_data_direction);
1681 	if (unlikely(count == 0))
1682 		return -EIO;
1683 
1684 	fmt = SRP_DATA_DESC_DIRECT;
1685 	len = sizeof (struct srp_cmd) +	sizeof (struct srp_direct_buf);
1686 
1687 	if (count == 1 && target->global_rkey) {
1688 		/*
1689 		 * The midlayer only generated a single gather/scatter
1690 		 * entry, or DMA mapping coalesced everything to a
1691 		 * single entry.  So a direct descriptor along with
1692 		 * the DMA MR suffices.
1693 		 */
1694 		struct srp_direct_buf *buf = (void *) cmd->add_data;
1695 
1696 		buf->va  = cpu_to_be64(ib_sg_dma_address(ibdev, scat));
1697 		buf->key = cpu_to_be32(target->global_rkey);
1698 		buf->len = cpu_to_be32(ib_sg_dma_len(ibdev, scat));
1699 
1700 		req->nmdesc = 0;
1701 		goto map_complete;
1702 	}
1703 
1704 	/*
1705 	 * We have more than one scatter/gather entry, so build our indirect
1706 	 * descriptor table, trying to merge as many entries as we can.
1707 	 */
1708 	indirect_hdr = (void *) cmd->add_data;
1709 
1710 	ib_dma_sync_single_for_cpu(ibdev, req->indirect_dma_addr,
1711 				   target->indirect_size, DMA_TO_DEVICE);
1712 
1713 	memset(&state, 0, sizeof(state));
1714 	state.desc = req->indirect_desc;
1715 	if (dev->use_fast_reg)
1716 		ret = srp_map_sg_fr(&state, ch, req, scat, count);
1717 	else if (dev->use_fmr)
1718 		ret = srp_map_sg_fmr(&state, ch, req, scat, count);
1719 	else
1720 		ret = srp_map_sg_dma(&state, ch, req, scat, count);
1721 	req->nmdesc = state.nmdesc;
1722 	if (ret < 0)
1723 		goto unmap;
1724 
1725 	{
1726 		DEFINE_DYNAMIC_DEBUG_METADATA(ddm,
1727 			"Memory mapping consistency check");
1728 		if (DYNAMIC_DEBUG_BRANCH(ddm))
1729 			srp_check_mapping(&state, ch, req, scat, count);
1730 	}
1731 
1732 	/* We've mapped the request, now pull as much of the indirect
1733 	 * descriptor table as we can into the command buffer. If this
1734 	 * target is not using an external indirect table, we are
1735 	 * guaranteed to fit into the command, as the SCSI layer won't
1736 	 * give us more S/G entries than we allow.
1737 	 */
1738 	if (state.ndesc == 1) {
1739 		/*
1740 		 * Memory registration collapsed the sg-list into one entry,
1741 		 * so use a direct descriptor.
1742 		 */
1743 		struct srp_direct_buf *buf = (void *) cmd->add_data;
1744 
1745 		*buf = req->indirect_desc[0];
1746 		goto map_complete;
1747 	}
1748 
1749 	if (unlikely(target->cmd_sg_cnt < state.ndesc &&
1750 						!target->allow_ext_sg)) {
1751 		shost_printk(KERN_ERR, target->scsi_host,
1752 			     "Could not fit S/G list into SRP_CMD\n");
1753 		ret = -EIO;
1754 		goto unmap;
1755 	}
1756 
1757 	count = min(state.ndesc, target->cmd_sg_cnt);
1758 	table_len = state.ndesc * sizeof (struct srp_direct_buf);
1759 	idb_len = sizeof(struct srp_indirect_buf) + table_len;
1760 
1761 	fmt = SRP_DATA_DESC_INDIRECT;
1762 	len = sizeof(struct srp_cmd) + sizeof (struct srp_indirect_buf);
1763 	len += count * sizeof (struct srp_direct_buf);
1764 
1765 	memcpy(indirect_hdr->desc_list, req->indirect_desc,
1766 	       count * sizeof (struct srp_direct_buf));
1767 
1768 	if (!target->global_rkey) {
1769 		ret = srp_map_idb(ch, req, state.gen.next, state.gen.end,
1770 				  idb_len, &idb_rkey);
1771 		if (ret < 0)
1772 			goto unmap;
1773 		req->nmdesc++;
1774 	} else {
1775 		idb_rkey = cpu_to_be32(target->global_rkey);
1776 	}
1777 
1778 	indirect_hdr->table_desc.va = cpu_to_be64(req->indirect_dma_addr);
1779 	indirect_hdr->table_desc.key = idb_rkey;
1780 	indirect_hdr->table_desc.len = cpu_to_be32(table_len);
1781 	indirect_hdr->len = cpu_to_be32(state.total_len);
1782 
1783 	if (scmnd->sc_data_direction == DMA_TO_DEVICE)
1784 		cmd->data_out_desc_cnt = count;
1785 	else
1786 		cmd->data_in_desc_cnt = count;
1787 
1788 	ib_dma_sync_single_for_device(ibdev, req->indirect_dma_addr, table_len,
1789 				      DMA_TO_DEVICE);
1790 
1791 map_complete:
1792 	if (scmnd->sc_data_direction == DMA_TO_DEVICE)
1793 		cmd->buf_fmt = fmt << 4;
1794 	else
1795 		cmd->buf_fmt = fmt;
1796 
1797 	return len;
1798 
1799 unmap:
1800 	srp_unmap_data(scmnd, ch, req);
1801 	if (ret == -ENOMEM && req->nmdesc >= target->mr_pool_size)
1802 		ret = -E2BIG;
1803 	return ret;
1804 }
1805 
1806 /*
1807  * Return an IU and possible credit to the free pool
1808  */
1809 static void srp_put_tx_iu(struct srp_rdma_ch *ch, struct srp_iu *iu,
1810 			  enum srp_iu_type iu_type)
1811 {
1812 	unsigned long flags;
1813 
1814 	spin_lock_irqsave(&ch->lock, flags);
1815 	list_add(&iu->list, &ch->free_tx);
1816 	if (iu_type != SRP_IU_RSP)
1817 		++ch->req_lim;
1818 	spin_unlock_irqrestore(&ch->lock, flags);
1819 }
1820 
1821 /*
1822  * Must be called with ch->lock held to protect req_lim and free_tx.
1823  * If IU is not sent, it must be returned using srp_put_tx_iu().
1824  *
1825  * Note:
1826  * An upper limit for the number of allocated information units for each
1827  * request type is:
1828  * - SRP_IU_CMD: SRP_CMD_SQ_SIZE, since the SCSI mid-layer never queues
1829  *   more than Scsi_Host.can_queue requests.
1830  * - SRP_IU_TSK_MGMT: SRP_TSK_MGMT_SQ_SIZE.
1831  * - SRP_IU_RSP: 1, since a conforming SRP target never sends more than
1832  *   one unanswered SRP request to an initiator.
1833  */
1834 static struct srp_iu *__srp_get_tx_iu(struct srp_rdma_ch *ch,
1835 				      enum srp_iu_type iu_type)
1836 {
1837 	struct srp_target_port *target = ch->target;
1838 	s32 rsv = (iu_type == SRP_IU_TSK_MGMT) ? 0 : SRP_TSK_MGMT_SQ_SIZE;
1839 	struct srp_iu *iu;
1840 
1841 	lockdep_assert_held(&ch->lock);
1842 
1843 	ib_process_cq_direct(ch->send_cq, -1);
1844 
1845 	if (list_empty(&ch->free_tx))
1846 		return NULL;
1847 
1848 	/* Initiator responses to target requests do not consume credits */
1849 	if (iu_type != SRP_IU_RSP) {
1850 		if (ch->req_lim <= rsv) {
1851 			++target->zero_req_lim;
1852 			return NULL;
1853 		}
1854 
1855 		--ch->req_lim;
1856 	}
1857 
1858 	iu = list_first_entry(&ch->free_tx, struct srp_iu, list);
1859 	list_del(&iu->list);
1860 	return iu;
1861 }
1862 
1863 /*
1864  * Note: if this function is called from inside ib_drain_sq() then it will
1865  * be called without ch->lock being held. If ib_drain_sq() dequeues a WQE
1866  * with status IB_WC_SUCCESS then that's a bug.
1867  */
1868 static void srp_send_done(struct ib_cq *cq, struct ib_wc *wc)
1869 {
1870 	struct srp_iu *iu = container_of(wc->wr_cqe, struct srp_iu, cqe);
1871 	struct srp_rdma_ch *ch = cq->cq_context;
1872 
1873 	if (unlikely(wc->status != IB_WC_SUCCESS)) {
1874 		srp_handle_qp_err(cq, wc, "SEND");
1875 		return;
1876 	}
1877 
1878 	lockdep_assert_held(&ch->lock);
1879 
1880 	list_add(&iu->list, &ch->free_tx);
1881 }
1882 
1883 static int srp_post_send(struct srp_rdma_ch *ch, struct srp_iu *iu, int len)
1884 {
1885 	struct srp_target_port *target = ch->target;
1886 	struct ib_sge list;
1887 	struct ib_send_wr wr, *bad_wr;
1888 
1889 	list.addr   = iu->dma;
1890 	list.length = len;
1891 	list.lkey   = target->lkey;
1892 
1893 	iu->cqe.done = srp_send_done;
1894 
1895 	wr.next       = NULL;
1896 	wr.wr_cqe     = &iu->cqe;
1897 	wr.sg_list    = &list;
1898 	wr.num_sge    = 1;
1899 	wr.opcode     = IB_WR_SEND;
1900 	wr.send_flags = IB_SEND_SIGNALED;
1901 
1902 	return ib_post_send(ch->qp, &wr, &bad_wr);
1903 }
1904 
1905 static int srp_post_recv(struct srp_rdma_ch *ch, struct srp_iu *iu)
1906 {
1907 	struct srp_target_port *target = ch->target;
1908 	struct ib_recv_wr wr, *bad_wr;
1909 	struct ib_sge list;
1910 
1911 	list.addr   = iu->dma;
1912 	list.length = iu->size;
1913 	list.lkey   = target->lkey;
1914 
1915 	iu->cqe.done = srp_recv_done;
1916 
1917 	wr.next     = NULL;
1918 	wr.wr_cqe   = &iu->cqe;
1919 	wr.sg_list  = &list;
1920 	wr.num_sge  = 1;
1921 
1922 	return ib_post_recv(ch->qp, &wr, &bad_wr);
1923 }
1924 
1925 static void srp_process_rsp(struct srp_rdma_ch *ch, struct srp_rsp *rsp)
1926 {
1927 	struct srp_target_port *target = ch->target;
1928 	struct srp_request *req;
1929 	struct scsi_cmnd *scmnd;
1930 	unsigned long flags;
1931 
1932 	if (unlikely(rsp->tag & SRP_TAG_TSK_MGMT)) {
1933 		spin_lock_irqsave(&ch->lock, flags);
1934 		ch->req_lim += be32_to_cpu(rsp->req_lim_delta);
1935 		if (rsp->tag == ch->tsk_mgmt_tag) {
1936 			ch->tsk_mgmt_status = -1;
1937 			if (be32_to_cpu(rsp->resp_data_len) >= 4)
1938 				ch->tsk_mgmt_status = rsp->data[3];
1939 			complete(&ch->tsk_mgmt_done);
1940 		} else {
1941 			shost_printk(KERN_ERR, target->scsi_host,
1942 				     "Received tsk mgmt response too late for tag %#llx\n",
1943 				     rsp->tag);
1944 		}
1945 		spin_unlock_irqrestore(&ch->lock, flags);
1946 	} else {
1947 		scmnd = scsi_host_find_tag(target->scsi_host, rsp->tag);
1948 		if (scmnd && scmnd->host_scribble) {
1949 			req = (void *)scmnd->host_scribble;
1950 			scmnd = srp_claim_req(ch, req, NULL, scmnd);
1951 		} else {
1952 			scmnd = NULL;
1953 		}
1954 		if (!scmnd) {
1955 			shost_printk(KERN_ERR, target->scsi_host,
1956 				     "Null scmnd for RSP w/tag %#016llx received on ch %td / QP %#x\n",
1957 				     rsp->tag, ch - target->ch, ch->qp->qp_num);
1958 
1959 			spin_lock_irqsave(&ch->lock, flags);
1960 			ch->req_lim += be32_to_cpu(rsp->req_lim_delta);
1961 			spin_unlock_irqrestore(&ch->lock, flags);
1962 
1963 			return;
1964 		}
1965 		scmnd->result = rsp->status;
1966 
1967 		if (rsp->flags & SRP_RSP_FLAG_SNSVALID) {
1968 			memcpy(scmnd->sense_buffer, rsp->data +
1969 			       be32_to_cpu(rsp->resp_data_len),
1970 			       min_t(int, be32_to_cpu(rsp->sense_data_len),
1971 				     SCSI_SENSE_BUFFERSIZE));
1972 		}
1973 
1974 		if (unlikely(rsp->flags & SRP_RSP_FLAG_DIUNDER))
1975 			scsi_set_resid(scmnd, be32_to_cpu(rsp->data_in_res_cnt));
1976 		else if (unlikely(rsp->flags & SRP_RSP_FLAG_DIOVER))
1977 			scsi_set_resid(scmnd, -be32_to_cpu(rsp->data_in_res_cnt));
1978 		else if (unlikely(rsp->flags & SRP_RSP_FLAG_DOUNDER))
1979 			scsi_set_resid(scmnd, be32_to_cpu(rsp->data_out_res_cnt));
1980 		else if (unlikely(rsp->flags & SRP_RSP_FLAG_DOOVER))
1981 			scsi_set_resid(scmnd, -be32_to_cpu(rsp->data_out_res_cnt));
1982 
1983 		srp_free_req(ch, req, scmnd,
1984 			     be32_to_cpu(rsp->req_lim_delta));
1985 
1986 		scmnd->host_scribble = NULL;
1987 		scmnd->scsi_done(scmnd);
1988 	}
1989 }
1990 
1991 static int srp_response_common(struct srp_rdma_ch *ch, s32 req_delta,
1992 			       void *rsp, int len)
1993 {
1994 	struct srp_target_port *target = ch->target;
1995 	struct ib_device *dev = target->srp_host->srp_dev->dev;
1996 	unsigned long flags;
1997 	struct srp_iu *iu;
1998 	int err;
1999 
2000 	spin_lock_irqsave(&ch->lock, flags);
2001 	ch->req_lim += req_delta;
2002 	iu = __srp_get_tx_iu(ch, SRP_IU_RSP);
2003 	spin_unlock_irqrestore(&ch->lock, flags);
2004 
2005 	if (!iu) {
2006 		shost_printk(KERN_ERR, target->scsi_host, PFX
2007 			     "no IU available to send response\n");
2008 		return 1;
2009 	}
2010 
2011 	ib_dma_sync_single_for_cpu(dev, iu->dma, len, DMA_TO_DEVICE);
2012 	memcpy(iu->buf, rsp, len);
2013 	ib_dma_sync_single_for_device(dev, iu->dma, len, DMA_TO_DEVICE);
2014 
2015 	err = srp_post_send(ch, iu, len);
2016 	if (err) {
2017 		shost_printk(KERN_ERR, target->scsi_host, PFX
2018 			     "unable to post response: %d\n", err);
2019 		srp_put_tx_iu(ch, iu, SRP_IU_RSP);
2020 	}
2021 
2022 	return err;
2023 }
2024 
2025 static void srp_process_cred_req(struct srp_rdma_ch *ch,
2026 				 struct srp_cred_req *req)
2027 {
2028 	struct srp_cred_rsp rsp = {
2029 		.opcode = SRP_CRED_RSP,
2030 		.tag = req->tag,
2031 	};
2032 	s32 delta = be32_to_cpu(req->req_lim_delta);
2033 
2034 	if (srp_response_common(ch, delta, &rsp, sizeof(rsp)))
2035 		shost_printk(KERN_ERR, ch->target->scsi_host, PFX
2036 			     "problems processing SRP_CRED_REQ\n");
2037 }
2038 
2039 static void srp_process_aer_req(struct srp_rdma_ch *ch,
2040 				struct srp_aer_req *req)
2041 {
2042 	struct srp_target_port *target = ch->target;
2043 	struct srp_aer_rsp rsp = {
2044 		.opcode = SRP_AER_RSP,
2045 		.tag = req->tag,
2046 	};
2047 	s32 delta = be32_to_cpu(req->req_lim_delta);
2048 
2049 	shost_printk(KERN_ERR, target->scsi_host, PFX
2050 		     "ignoring AER for LUN %llu\n", scsilun_to_int(&req->lun));
2051 
2052 	if (srp_response_common(ch, delta, &rsp, sizeof(rsp)))
2053 		shost_printk(KERN_ERR, target->scsi_host, PFX
2054 			     "problems processing SRP_AER_REQ\n");
2055 }
2056 
2057 static void srp_recv_done(struct ib_cq *cq, struct ib_wc *wc)
2058 {
2059 	struct srp_iu *iu = container_of(wc->wr_cqe, struct srp_iu, cqe);
2060 	struct srp_rdma_ch *ch = cq->cq_context;
2061 	struct srp_target_port *target = ch->target;
2062 	struct ib_device *dev = target->srp_host->srp_dev->dev;
2063 	int res;
2064 	u8 opcode;
2065 
2066 	if (unlikely(wc->status != IB_WC_SUCCESS)) {
2067 		srp_handle_qp_err(cq, wc, "RECV");
2068 		return;
2069 	}
2070 
2071 	ib_dma_sync_single_for_cpu(dev, iu->dma, ch->max_ti_iu_len,
2072 				   DMA_FROM_DEVICE);
2073 
2074 	opcode = *(u8 *) iu->buf;
2075 
2076 	if (0) {
2077 		shost_printk(KERN_ERR, target->scsi_host,
2078 			     PFX "recv completion, opcode 0x%02x\n", opcode);
2079 		print_hex_dump(KERN_ERR, "", DUMP_PREFIX_OFFSET, 8, 1,
2080 			       iu->buf, wc->byte_len, true);
2081 	}
2082 
2083 	switch (opcode) {
2084 	case SRP_RSP:
2085 		srp_process_rsp(ch, iu->buf);
2086 		break;
2087 
2088 	case SRP_CRED_REQ:
2089 		srp_process_cred_req(ch, iu->buf);
2090 		break;
2091 
2092 	case SRP_AER_REQ:
2093 		srp_process_aer_req(ch, iu->buf);
2094 		break;
2095 
2096 	case SRP_T_LOGOUT:
2097 		/* XXX Handle target logout */
2098 		shost_printk(KERN_WARNING, target->scsi_host,
2099 			     PFX "Got target logout request\n");
2100 		break;
2101 
2102 	default:
2103 		shost_printk(KERN_WARNING, target->scsi_host,
2104 			     PFX "Unhandled SRP opcode 0x%02x\n", opcode);
2105 		break;
2106 	}
2107 
2108 	ib_dma_sync_single_for_device(dev, iu->dma, ch->max_ti_iu_len,
2109 				      DMA_FROM_DEVICE);
2110 
2111 	res = srp_post_recv(ch, iu);
2112 	if (res != 0)
2113 		shost_printk(KERN_ERR, target->scsi_host,
2114 			     PFX "Recv failed with error code %d\n", res);
2115 }
2116 
2117 /**
2118  * srp_tl_err_work() - handle a transport layer error
2119  * @work: Work structure embedded in an SRP target port.
2120  *
2121  * Note: This function may get invoked before the rport has been created,
2122  * hence the target->rport test.
2123  */
2124 static void srp_tl_err_work(struct work_struct *work)
2125 {
2126 	struct srp_target_port *target;
2127 
2128 	target = container_of(work, struct srp_target_port, tl_err_work);
2129 	if (target->rport)
2130 		srp_start_tl_fail_timers(target->rport);
2131 }
2132 
2133 static void srp_handle_qp_err(struct ib_cq *cq, struct ib_wc *wc,
2134 		const char *opname)
2135 {
2136 	struct srp_rdma_ch *ch = cq->cq_context;
2137 	struct srp_target_port *target = ch->target;
2138 
2139 	if (ch->connected && !target->qp_in_error) {
2140 		shost_printk(KERN_ERR, target->scsi_host,
2141 			     PFX "failed %s status %s (%d) for CQE %p\n",
2142 			     opname, ib_wc_status_msg(wc->status), wc->status,
2143 			     wc->wr_cqe);
2144 		queue_work(system_long_wq, &target->tl_err_work);
2145 	}
2146 	target->qp_in_error = true;
2147 }
2148 
2149 static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd)
2150 {
2151 	struct srp_target_port *target = host_to_target(shost);
2152 	struct srp_rport *rport = target->rport;
2153 	struct srp_rdma_ch *ch;
2154 	struct srp_request *req;
2155 	struct srp_iu *iu;
2156 	struct srp_cmd *cmd;
2157 	struct ib_device *dev;
2158 	unsigned long flags;
2159 	u32 tag;
2160 	u16 idx;
2161 	int len, ret;
2162 	const bool in_scsi_eh = !in_interrupt() && current == shost->ehandler;
2163 
2164 	/*
2165 	 * The SCSI EH thread is the only context from which srp_queuecommand()
2166 	 * can get invoked for blocked devices (SDEV_BLOCK /
2167 	 * SDEV_CREATED_BLOCK). Avoid racing with srp_reconnect_rport() by
2168 	 * locking the rport mutex if invoked from inside the SCSI EH.
2169 	 */
2170 	if (in_scsi_eh)
2171 		mutex_lock(&rport->mutex);
2172 
2173 	scmnd->result = srp_chkready(target->rport);
2174 	if (unlikely(scmnd->result))
2175 		goto err;
2176 
2177 	WARN_ON_ONCE(scmnd->request->tag < 0);
2178 	tag = blk_mq_unique_tag(scmnd->request);
2179 	ch = &target->ch[blk_mq_unique_tag_to_hwq(tag)];
2180 	idx = blk_mq_unique_tag_to_tag(tag);
2181 	WARN_ONCE(idx >= target->req_ring_size, "%s: tag %#x: idx %d >= %d\n",
2182 		  dev_name(&shost->shost_gendev), tag, idx,
2183 		  target->req_ring_size);
2184 
2185 	spin_lock_irqsave(&ch->lock, flags);
2186 	iu = __srp_get_tx_iu(ch, SRP_IU_CMD);
2187 	spin_unlock_irqrestore(&ch->lock, flags);
2188 
2189 	if (!iu)
2190 		goto err;
2191 
2192 	req = &ch->req_ring[idx];
2193 	dev = target->srp_host->srp_dev->dev;
2194 	ib_dma_sync_single_for_cpu(dev, iu->dma, target->max_iu_len,
2195 				   DMA_TO_DEVICE);
2196 
2197 	scmnd->host_scribble = (void *) req;
2198 
2199 	cmd = iu->buf;
2200 	memset(cmd, 0, sizeof *cmd);
2201 
2202 	cmd->opcode = SRP_CMD;
2203 	int_to_scsilun(scmnd->device->lun, &cmd->lun);
2204 	cmd->tag    = tag;
2205 	memcpy(cmd->cdb, scmnd->cmnd, scmnd->cmd_len);
2206 
2207 	req->scmnd    = scmnd;
2208 	req->cmd      = iu;
2209 
2210 	len = srp_map_data(scmnd, ch, req);
2211 	if (len < 0) {
2212 		shost_printk(KERN_ERR, target->scsi_host,
2213 			     PFX "Failed to map data (%d)\n", len);
2214 		/*
2215 		 * If we ran out of memory descriptors (-ENOMEM) because an
2216 		 * application is queuing many requests with more than
2217 		 * max_pages_per_mr sg-list elements, tell the SCSI mid-layer
2218 		 * to reduce queue depth temporarily.
2219 		 */
2220 		scmnd->result = len == -ENOMEM ?
2221 			DID_OK << 16 | QUEUE_FULL << 1 : DID_ERROR << 16;
2222 		goto err_iu;
2223 	}
2224 
2225 	ib_dma_sync_single_for_device(dev, iu->dma, target->max_iu_len,
2226 				      DMA_TO_DEVICE);
2227 
2228 	if (srp_post_send(ch, iu, len)) {
2229 		shost_printk(KERN_ERR, target->scsi_host, PFX "Send failed\n");
2230 		goto err_unmap;
2231 	}
2232 
2233 	ret = 0;
2234 
2235 unlock_rport:
2236 	if (in_scsi_eh)
2237 		mutex_unlock(&rport->mutex);
2238 
2239 	return ret;
2240 
2241 err_unmap:
2242 	srp_unmap_data(scmnd, ch, req);
2243 
2244 err_iu:
2245 	srp_put_tx_iu(ch, iu, SRP_IU_CMD);
2246 
2247 	/*
2248 	 * Avoid that the loops that iterate over the request ring can
2249 	 * encounter a dangling SCSI command pointer.
2250 	 */
2251 	req->scmnd = NULL;
2252 
2253 err:
2254 	if (scmnd->result) {
2255 		scmnd->scsi_done(scmnd);
2256 		ret = 0;
2257 	} else {
2258 		ret = SCSI_MLQUEUE_HOST_BUSY;
2259 	}
2260 
2261 	goto unlock_rport;
2262 }
2263 
2264 /*
2265  * Note: the resources allocated in this function are freed in
2266  * srp_free_ch_ib().
2267  */
2268 static int srp_alloc_iu_bufs(struct srp_rdma_ch *ch)
2269 {
2270 	struct srp_target_port *target = ch->target;
2271 	int i;
2272 
2273 	ch->rx_ring = kcalloc(target->queue_size, sizeof(*ch->rx_ring),
2274 			      GFP_KERNEL);
2275 	if (!ch->rx_ring)
2276 		goto err_no_ring;
2277 	ch->tx_ring = kcalloc(target->queue_size, sizeof(*ch->tx_ring),
2278 			      GFP_KERNEL);
2279 	if (!ch->tx_ring)
2280 		goto err_no_ring;
2281 
2282 	for (i = 0; i < target->queue_size; ++i) {
2283 		ch->rx_ring[i] = srp_alloc_iu(target->srp_host,
2284 					      ch->max_ti_iu_len,
2285 					      GFP_KERNEL, DMA_FROM_DEVICE);
2286 		if (!ch->rx_ring[i])
2287 			goto err;
2288 	}
2289 
2290 	for (i = 0; i < target->queue_size; ++i) {
2291 		ch->tx_ring[i] = srp_alloc_iu(target->srp_host,
2292 					      target->max_iu_len,
2293 					      GFP_KERNEL, DMA_TO_DEVICE);
2294 		if (!ch->tx_ring[i])
2295 			goto err;
2296 
2297 		list_add(&ch->tx_ring[i]->list, &ch->free_tx);
2298 	}
2299 
2300 	return 0;
2301 
2302 err:
2303 	for (i = 0; i < target->queue_size; ++i) {
2304 		srp_free_iu(target->srp_host, ch->rx_ring[i]);
2305 		srp_free_iu(target->srp_host, ch->tx_ring[i]);
2306 	}
2307 
2308 
2309 err_no_ring:
2310 	kfree(ch->tx_ring);
2311 	ch->tx_ring = NULL;
2312 	kfree(ch->rx_ring);
2313 	ch->rx_ring = NULL;
2314 
2315 	return -ENOMEM;
2316 }
2317 
2318 static uint32_t srp_compute_rq_tmo(struct ib_qp_attr *qp_attr, int attr_mask)
2319 {
2320 	uint64_t T_tr_ns, max_compl_time_ms;
2321 	uint32_t rq_tmo_jiffies;
2322 
2323 	/*
2324 	 * According to section 11.2.4.2 in the IBTA spec (Modify Queue Pair,
2325 	 * table 91), both the QP timeout and the retry count have to be set
2326 	 * for RC QP's during the RTR to RTS transition.
2327 	 */
2328 	WARN_ON_ONCE((attr_mask & (IB_QP_TIMEOUT | IB_QP_RETRY_CNT)) !=
2329 		     (IB_QP_TIMEOUT | IB_QP_RETRY_CNT));
2330 
2331 	/*
2332 	 * Set target->rq_tmo_jiffies to one second more than the largest time
2333 	 * it can take before an error completion is generated. See also
2334 	 * C9-140..142 in the IBTA spec for more information about how to
2335 	 * convert the QP Local ACK Timeout value to nanoseconds.
2336 	 */
2337 	T_tr_ns = 4096 * (1ULL << qp_attr->timeout);
2338 	max_compl_time_ms = qp_attr->retry_cnt * 4 * T_tr_ns;
2339 	do_div(max_compl_time_ms, NSEC_PER_MSEC);
2340 	rq_tmo_jiffies = msecs_to_jiffies(max_compl_time_ms + 1000);
2341 
2342 	return rq_tmo_jiffies;
2343 }
2344 
2345 static void srp_cm_rep_handler(struct ib_cm_id *cm_id,
2346 			       const struct srp_login_rsp *lrsp,
2347 			       struct srp_rdma_ch *ch)
2348 {
2349 	struct srp_target_port *target = ch->target;
2350 	struct ib_qp_attr *qp_attr = NULL;
2351 	int attr_mask = 0;
2352 	int ret;
2353 	int i;
2354 
2355 	if (lrsp->opcode == SRP_LOGIN_RSP) {
2356 		ch->max_ti_iu_len = be32_to_cpu(lrsp->max_ti_iu_len);
2357 		ch->req_lim       = be32_to_cpu(lrsp->req_lim_delta);
2358 
2359 		/*
2360 		 * Reserve credits for task management so we don't
2361 		 * bounce requests back to the SCSI mid-layer.
2362 		 */
2363 		target->scsi_host->can_queue
2364 			= min(ch->req_lim - SRP_TSK_MGMT_SQ_SIZE,
2365 			      target->scsi_host->can_queue);
2366 		target->scsi_host->cmd_per_lun
2367 			= min_t(int, target->scsi_host->can_queue,
2368 				target->scsi_host->cmd_per_lun);
2369 	} else {
2370 		shost_printk(KERN_WARNING, target->scsi_host,
2371 			     PFX "Unhandled RSP opcode %#x\n", lrsp->opcode);
2372 		ret = -ECONNRESET;
2373 		goto error;
2374 	}
2375 
2376 	if (!ch->rx_ring) {
2377 		ret = srp_alloc_iu_bufs(ch);
2378 		if (ret)
2379 			goto error;
2380 	}
2381 
2382 	ret = -ENOMEM;
2383 	qp_attr = kmalloc(sizeof *qp_attr, GFP_KERNEL);
2384 	if (!qp_attr)
2385 		goto error;
2386 
2387 	qp_attr->qp_state = IB_QPS_RTR;
2388 	ret = ib_cm_init_qp_attr(cm_id, qp_attr, &attr_mask);
2389 	if (ret)
2390 		goto error_free;
2391 
2392 	ret = ib_modify_qp(ch->qp, qp_attr, attr_mask);
2393 	if (ret)
2394 		goto error_free;
2395 
2396 	for (i = 0; i < target->queue_size; i++) {
2397 		struct srp_iu *iu = ch->rx_ring[i];
2398 
2399 		ret = srp_post_recv(ch, iu);
2400 		if (ret)
2401 			goto error_free;
2402 	}
2403 
2404 	qp_attr->qp_state = IB_QPS_RTS;
2405 	ret = ib_cm_init_qp_attr(cm_id, qp_attr, &attr_mask);
2406 	if (ret)
2407 		goto error_free;
2408 
2409 	target->rq_tmo_jiffies = srp_compute_rq_tmo(qp_attr, attr_mask);
2410 
2411 	ret = ib_modify_qp(ch->qp, qp_attr, attr_mask);
2412 	if (ret)
2413 		goto error_free;
2414 
2415 	ret = ib_send_cm_rtu(cm_id, NULL, 0);
2416 
2417 error_free:
2418 	kfree(qp_attr);
2419 
2420 error:
2421 	ch->status = ret;
2422 }
2423 
2424 static void srp_cm_rej_handler(struct ib_cm_id *cm_id,
2425 			       struct ib_cm_event *event,
2426 			       struct srp_rdma_ch *ch)
2427 {
2428 	struct srp_target_port *target = ch->target;
2429 	struct Scsi_Host *shost = target->scsi_host;
2430 	struct ib_class_port_info *cpi;
2431 	int opcode;
2432 
2433 	switch (event->param.rej_rcvd.reason) {
2434 	case IB_CM_REJ_PORT_CM_REDIRECT:
2435 		cpi = event->param.rej_rcvd.ari;
2436 		sa_path_set_dlid(&ch->path, ntohs(cpi->redirect_lid));
2437 		ch->path.pkey = cpi->redirect_pkey;
2438 		cm_id->remote_cm_qpn = be32_to_cpu(cpi->redirect_qp) & 0x00ffffff;
2439 		memcpy(ch->path.dgid.raw, cpi->redirect_gid, 16);
2440 
2441 		ch->status = sa_path_get_dlid(&ch->path) ?
2442 			SRP_DLID_REDIRECT : SRP_PORT_REDIRECT;
2443 		break;
2444 
2445 	case IB_CM_REJ_PORT_REDIRECT:
2446 		if (srp_target_is_topspin(target)) {
2447 			/*
2448 			 * Topspin/Cisco SRP gateways incorrectly send
2449 			 * reject reason code 25 when they mean 24
2450 			 * (port redirect).
2451 			 */
2452 			memcpy(ch->path.dgid.raw,
2453 			       event->param.rej_rcvd.ari, 16);
2454 
2455 			shost_printk(KERN_DEBUG, shost,
2456 				     PFX "Topspin/Cisco redirect to target port GID %016llx%016llx\n",
2457 				     be64_to_cpu(ch->path.dgid.global.subnet_prefix),
2458 				     be64_to_cpu(ch->path.dgid.global.interface_id));
2459 
2460 			ch->status = SRP_PORT_REDIRECT;
2461 		} else {
2462 			shost_printk(KERN_WARNING, shost,
2463 				     "  REJ reason: IB_CM_REJ_PORT_REDIRECT\n");
2464 			ch->status = -ECONNRESET;
2465 		}
2466 		break;
2467 
2468 	case IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID:
2469 		shost_printk(KERN_WARNING, shost,
2470 			    "  REJ reason: IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID\n");
2471 		ch->status = -ECONNRESET;
2472 		break;
2473 
2474 	case IB_CM_REJ_CONSUMER_DEFINED:
2475 		opcode = *(u8 *) event->private_data;
2476 		if (opcode == SRP_LOGIN_REJ) {
2477 			struct srp_login_rej *rej = event->private_data;
2478 			u32 reason = be32_to_cpu(rej->reason);
2479 
2480 			if (reason == SRP_LOGIN_REJ_REQ_IT_IU_LENGTH_TOO_LARGE)
2481 				shost_printk(KERN_WARNING, shost,
2482 					     PFX "SRP_LOGIN_REJ: requested max_it_iu_len too large\n");
2483 			else
2484 				shost_printk(KERN_WARNING, shost, PFX
2485 					     "SRP LOGIN from %pI6 to %pI6 REJECTED, reason 0x%08x\n",
2486 					     target->sgid.raw,
2487 					     target->orig_dgid.raw, reason);
2488 		} else
2489 			shost_printk(KERN_WARNING, shost,
2490 				     "  REJ reason: IB_CM_REJ_CONSUMER_DEFINED,"
2491 				     " opcode 0x%02x\n", opcode);
2492 		ch->status = -ECONNRESET;
2493 		break;
2494 
2495 	case IB_CM_REJ_STALE_CONN:
2496 		shost_printk(KERN_WARNING, shost, "  REJ reason: stale connection\n");
2497 		ch->status = SRP_STALE_CONN;
2498 		break;
2499 
2500 	default:
2501 		shost_printk(KERN_WARNING, shost, "  REJ reason 0x%x\n",
2502 			     event->param.rej_rcvd.reason);
2503 		ch->status = -ECONNRESET;
2504 	}
2505 }
2506 
2507 static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
2508 {
2509 	struct srp_rdma_ch *ch = cm_id->context;
2510 	struct srp_target_port *target = ch->target;
2511 	int comp = 0;
2512 
2513 	switch (event->event) {
2514 	case IB_CM_REQ_ERROR:
2515 		shost_printk(KERN_DEBUG, target->scsi_host,
2516 			     PFX "Sending CM REQ failed\n");
2517 		comp = 1;
2518 		ch->status = -ECONNRESET;
2519 		break;
2520 
2521 	case IB_CM_REP_RECEIVED:
2522 		comp = 1;
2523 		srp_cm_rep_handler(cm_id, event->private_data, ch);
2524 		break;
2525 
2526 	case IB_CM_REJ_RECEIVED:
2527 		shost_printk(KERN_DEBUG, target->scsi_host, PFX "REJ received\n");
2528 		comp = 1;
2529 
2530 		srp_cm_rej_handler(cm_id, event, ch);
2531 		break;
2532 
2533 	case IB_CM_DREQ_RECEIVED:
2534 		shost_printk(KERN_WARNING, target->scsi_host,
2535 			     PFX "DREQ received - connection closed\n");
2536 		ch->connected = false;
2537 		if (ib_send_cm_drep(cm_id, NULL, 0))
2538 			shost_printk(KERN_ERR, target->scsi_host,
2539 				     PFX "Sending CM DREP failed\n");
2540 		queue_work(system_long_wq, &target->tl_err_work);
2541 		break;
2542 
2543 	case IB_CM_TIMEWAIT_EXIT:
2544 		shost_printk(KERN_ERR, target->scsi_host,
2545 			     PFX "connection closed\n");
2546 		comp = 1;
2547 
2548 		ch->status = 0;
2549 		break;
2550 
2551 	case IB_CM_MRA_RECEIVED:
2552 	case IB_CM_DREQ_ERROR:
2553 	case IB_CM_DREP_RECEIVED:
2554 		break;
2555 
2556 	default:
2557 		shost_printk(KERN_WARNING, target->scsi_host,
2558 			     PFX "Unhandled CM event %d\n", event->event);
2559 		break;
2560 	}
2561 
2562 	if (comp)
2563 		complete(&ch->done);
2564 
2565 	return 0;
2566 }
2567 
2568 /**
2569  * srp_change_queue_depth - setting device queue depth
2570  * @sdev: scsi device struct
2571  * @qdepth: requested queue depth
2572  *
2573  * Returns queue depth.
2574  */
2575 static int
2576 srp_change_queue_depth(struct scsi_device *sdev, int qdepth)
2577 {
2578 	if (!sdev->tagged_supported)
2579 		qdepth = 1;
2580 	return scsi_change_queue_depth(sdev, qdepth);
2581 }
2582 
2583 static int srp_send_tsk_mgmt(struct srp_rdma_ch *ch, u64 req_tag, u64 lun,
2584 			     u8 func, u8 *status)
2585 {
2586 	struct srp_target_port *target = ch->target;
2587 	struct srp_rport *rport = target->rport;
2588 	struct ib_device *dev = target->srp_host->srp_dev->dev;
2589 	struct srp_iu *iu;
2590 	struct srp_tsk_mgmt *tsk_mgmt;
2591 	int res;
2592 
2593 	if (!ch->connected || target->qp_in_error)
2594 		return -1;
2595 
2596 	/*
2597 	 * Lock the rport mutex to avoid that srp_create_ch_ib() is
2598 	 * invoked while a task management function is being sent.
2599 	 */
2600 	mutex_lock(&rport->mutex);
2601 	spin_lock_irq(&ch->lock);
2602 	iu = __srp_get_tx_iu(ch, SRP_IU_TSK_MGMT);
2603 	spin_unlock_irq(&ch->lock);
2604 
2605 	if (!iu) {
2606 		mutex_unlock(&rport->mutex);
2607 
2608 		return -1;
2609 	}
2610 
2611 	ib_dma_sync_single_for_cpu(dev, iu->dma, sizeof *tsk_mgmt,
2612 				   DMA_TO_DEVICE);
2613 	tsk_mgmt = iu->buf;
2614 	memset(tsk_mgmt, 0, sizeof *tsk_mgmt);
2615 
2616 	tsk_mgmt->opcode 	= SRP_TSK_MGMT;
2617 	int_to_scsilun(lun, &tsk_mgmt->lun);
2618 	tsk_mgmt->tsk_mgmt_func = func;
2619 	tsk_mgmt->task_tag	= req_tag;
2620 
2621 	spin_lock_irq(&ch->lock);
2622 	ch->tsk_mgmt_tag = (ch->tsk_mgmt_tag + 1) | SRP_TAG_TSK_MGMT;
2623 	tsk_mgmt->tag = ch->tsk_mgmt_tag;
2624 	spin_unlock_irq(&ch->lock);
2625 
2626 	init_completion(&ch->tsk_mgmt_done);
2627 
2628 	ib_dma_sync_single_for_device(dev, iu->dma, sizeof *tsk_mgmt,
2629 				      DMA_TO_DEVICE);
2630 	if (srp_post_send(ch, iu, sizeof(*tsk_mgmt))) {
2631 		srp_put_tx_iu(ch, iu, SRP_IU_TSK_MGMT);
2632 		mutex_unlock(&rport->mutex);
2633 
2634 		return -1;
2635 	}
2636 	res = wait_for_completion_timeout(&ch->tsk_mgmt_done,
2637 					msecs_to_jiffies(SRP_ABORT_TIMEOUT_MS));
2638 	if (res > 0 && status)
2639 		*status = ch->tsk_mgmt_status;
2640 	mutex_unlock(&rport->mutex);
2641 
2642 	WARN_ON_ONCE(res < 0);
2643 
2644 	return res > 0 ? 0 : -1;
2645 }
2646 
2647 static int srp_abort(struct scsi_cmnd *scmnd)
2648 {
2649 	struct srp_target_port *target = host_to_target(scmnd->device->host);
2650 	struct srp_request *req = (struct srp_request *) scmnd->host_scribble;
2651 	u32 tag;
2652 	u16 ch_idx;
2653 	struct srp_rdma_ch *ch;
2654 	int ret;
2655 
2656 	shost_printk(KERN_ERR, target->scsi_host, "SRP abort called\n");
2657 
2658 	if (!req)
2659 		return SUCCESS;
2660 	tag = blk_mq_unique_tag(scmnd->request);
2661 	ch_idx = blk_mq_unique_tag_to_hwq(tag);
2662 	if (WARN_ON_ONCE(ch_idx >= target->ch_count))
2663 		return SUCCESS;
2664 	ch = &target->ch[ch_idx];
2665 	if (!srp_claim_req(ch, req, NULL, scmnd))
2666 		return SUCCESS;
2667 	shost_printk(KERN_ERR, target->scsi_host,
2668 		     "Sending SRP abort for tag %#x\n", tag);
2669 	if (srp_send_tsk_mgmt(ch, tag, scmnd->device->lun,
2670 			      SRP_TSK_ABORT_TASK, NULL) == 0)
2671 		ret = SUCCESS;
2672 	else if (target->rport->state == SRP_RPORT_LOST)
2673 		ret = FAST_IO_FAIL;
2674 	else
2675 		ret = FAILED;
2676 	srp_free_req(ch, req, scmnd, 0);
2677 	scmnd->result = DID_ABORT << 16;
2678 	scmnd->scsi_done(scmnd);
2679 
2680 	return ret;
2681 }
2682 
2683 static int srp_reset_device(struct scsi_cmnd *scmnd)
2684 {
2685 	struct srp_target_port *target = host_to_target(scmnd->device->host);
2686 	struct srp_rdma_ch *ch;
2687 	int i;
2688 	u8 status;
2689 
2690 	shost_printk(KERN_ERR, target->scsi_host, "SRP reset_device called\n");
2691 
2692 	ch = &target->ch[0];
2693 	if (srp_send_tsk_mgmt(ch, SRP_TAG_NO_REQ, scmnd->device->lun,
2694 			      SRP_TSK_LUN_RESET, &status))
2695 		return FAILED;
2696 	if (status)
2697 		return FAILED;
2698 
2699 	for (i = 0; i < target->ch_count; i++) {
2700 		ch = &target->ch[i];
2701 		for (i = 0; i < target->req_ring_size; ++i) {
2702 			struct srp_request *req = &ch->req_ring[i];
2703 
2704 			srp_finish_req(ch, req, scmnd->device, DID_RESET << 16);
2705 		}
2706 	}
2707 
2708 	return SUCCESS;
2709 }
2710 
2711 static int srp_reset_host(struct scsi_cmnd *scmnd)
2712 {
2713 	struct srp_target_port *target = host_to_target(scmnd->device->host);
2714 
2715 	shost_printk(KERN_ERR, target->scsi_host, PFX "SRP reset_host called\n");
2716 
2717 	return srp_reconnect_rport(target->rport) == 0 ? SUCCESS : FAILED;
2718 }
2719 
2720 static int srp_slave_alloc(struct scsi_device *sdev)
2721 {
2722 	struct Scsi_Host *shost = sdev->host;
2723 	struct srp_target_port *target = host_to_target(shost);
2724 	struct srp_device *srp_dev = target->srp_host->srp_dev;
2725 
2726 	if (true)
2727 		blk_queue_virt_boundary(sdev->request_queue,
2728 					~srp_dev->mr_page_mask);
2729 
2730 	return 0;
2731 }
2732 
2733 static int srp_slave_configure(struct scsi_device *sdev)
2734 {
2735 	struct Scsi_Host *shost = sdev->host;
2736 	struct srp_target_port *target = host_to_target(shost);
2737 	struct request_queue *q = sdev->request_queue;
2738 	unsigned long timeout;
2739 
2740 	if (sdev->type == TYPE_DISK) {
2741 		timeout = max_t(unsigned, 30 * HZ, target->rq_tmo_jiffies);
2742 		blk_queue_rq_timeout(q, timeout);
2743 	}
2744 
2745 	return 0;
2746 }
2747 
2748 static ssize_t show_id_ext(struct device *dev, struct device_attribute *attr,
2749 			   char *buf)
2750 {
2751 	struct srp_target_port *target = host_to_target(class_to_shost(dev));
2752 
2753 	return sprintf(buf, "0x%016llx\n", be64_to_cpu(target->id_ext));
2754 }
2755 
2756 static ssize_t show_ioc_guid(struct device *dev, struct device_attribute *attr,
2757 			     char *buf)
2758 {
2759 	struct srp_target_port *target = host_to_target(class_to_shost(dev));
2760 
2761 	return sprintf(buf, "0x%016llx\n", be64_to_cpu(target->ioc_guid));
2762 }
2763 
2764 static ssize_t show_service_id(struct device *dev,
2765 			       struct device_attribute *attr, char *buf)
2766 {
2767 	struct srp_target_port *target = host_to_target(class_to_shost(dev));
2768 
2769 	return sprintf(buf, "0x%016llx\n", be64_to_cpu(target->service_id));
2770 }
2771 
2772 static ssize_t show_pkey(struct device *dev, struct device_attribute *attr,
2773 			 char *buf)
2774 {
2775 	struct srp_target_port *target = host_to_target(class_to_shost(dev));
2776 
2777 	return sprintf(buf, "0x%04x\n", be16_to_cpu(target->pkey));
2778 }
2779 
2780 static ssize_t show_sgid(struct device *dev, struct device_attribute *attr,
2781 			 char *buf)
2782 {
2783 	struct srp_target_port *target = host_to_target(class_to_shost(dev));
2784 
2785 	return sprintf(buf, "%pI6\n", target->sgid.raw);
2786 }
2787 
2788 static ssize_t show_dgid(struct device *dev, struct device_attribute *attr,
2789 			 char *buf)
2790 {
2791 	struct srp_target_port *target = host_to_target(class_to_shost(dev));
2792 	struct srp_rdma_ch *ch = &target->ch[0];
2793 
2794 	return sprintf(buf, "%pI6\n", ch->path.dgid.raw);
2795 }
2796 
2797 static ssize_t show_orig_dgid(struct device *dev,
2798 			      struct device_attribute *attr, char *buf)
2799 {
2800 	struct srp_target_port *target = host_to_target(class_to_shost(dev));
2801 
2802 	return sprintf(buf, "%pI6\n", target->orig_dgid.raw);
2803 }
2804 
2805 static ssize_t show_req_lim(struct device *dev,
2806 			    struct device_attribute *attr, char *buf)
2807 {
2808 	struct srp_target_port *target = host_to_target(class_to_shost(dev));
2809 	struct srp_rdma_ch *ch;
2810 	int i, req_lim = INT_MAX;
2811 
2812 	for (i = 0; i < target->ch_count; i++) {
2813 		ch = &target->ch[i];
2814 		req_lim = min(req_lim, ch->req_lim);
2815 	}
2816 	return sprintf(buf, "%d\n", req_lim);
2817 }
2818 
2819 static ssize_t show_zero_req_lim(struct device *dev,
2820 				 struct device_attribute *attr, char *buf)
2821 {
2822 	struct srp_target_port *target = host_to_target(class_to_shost(dev));
2823 
2824 	return sprintf(buf, "%d\n", target->zero_req_lim);
2825 }
2826 
2827 static ssize_t show_local_ib_port(struct device *dev,
2828 				  struct device_attribute *attr, char *buf)
2829 {
2830 	struct srp_target_port *target = host_to_target(class_to_shost(dev));
2831 
2832 	return sprintf(buf, "%d\n", target->srp_host->port);
2833 }
2834 
2835 static ssize_t show_local_ib_device(struct device *dev,
2836 				    struct device_attribute *attr, char *buf)
2837 {
2838 	struct srp_target_port *target = host_to_target(class_to_shost(dev));
2839 
2840 	return sprintf(buf, "%s\n", target->srp_host->srp_dev->dev->name);
2841 }
2842 
2843 static ssize_t show_ch_count(struct device *dev, struct device_attribute *attr,
2844 			     char *buf)
2845 {
2846 	struct srp_target_port *target = host_to_target(class_to_shost(dev));
2847 
2848 	return sprintf(buf, "%d\n", target->ch_count);
2849 }
2850 
2851 static ssize_t show_comp_vector(struct device *dev,
2852 				struct device_attribute *attr, char *buf)
2853 {
2854 	struct srp_target_port *target = host_to_target(class_to_shost(dev));
2855 
2856 	return sprintf(buf, "%d\n", target->comp_vector);
2857 }
2858 
2859 static ssize_t show_tl_retry_count(struct device *dev,
2860 				   struct device_attribute *attr, char *buf)
2861 {
2862 	struct srp_target_port *target = host_to_target(class_to_shost(dev));
2863 
2864 	return sprintf(buf, "%d\n", target->tl_retry_count);
2865 }
2866 
2867 static ssize_t show_cmd_sg_entries(struct device *dev,
2868 				   struct device_attribute *attr, char *buf)
2869 {
2870 	struct srp_target_port *target = host_to_target(class_to_shost(dev));
2871 
2872 	return sprintf(buf, "%u\n", target->cmd_sg_cnt);
2873 }
2874 
2875 static ssize_t show_allow_ext_sg(struct device *dev,
2876 				 struct device_attribute *attr, char *buf)
2877 {
2878 	struct srp_target_port *target = host_to_target(class_to_shost(dev));
2879 
2880 	return sprintf(buf, "%s\n", target->allow_ext_sg ? "true" : "false");
2881 }
2882 
2883 static DEVICE_ATTR(id_ext,	    S_IRUGO, show_id_ext,	   NULL);
2884 static DEVICE_ATTR(ioc_guid,	    S_IRUGO, show_ioc_guid,	   NULL);
2885 static DEVICE_ATTR(service_id,	    S_IRUGO, show_service_id,	   NULL);
2886 static DEVICE_ATTR(pkey,	    S_IRUGO, show_pkey,		   NULL);
2887 static DEVICE_ATTR(sgid,	    S_IRUGO, show_sgid,		   NULL);
2888 static DEVICE_ATTR(dgid,	    S_IRUGO, show_dgid,		   NULL);
2889 static DEVICE_ATTR(orig_dgid,	    S_IRUGO, show_orig_dgid,	   NULL);
2890 static DEVICE_ATTR(req_lim,         S_IRUGO, show_req_lim,         NULL);
2891 static DEVICE_ATTR(zero_req_lim,    S_IRUGO, show_zero_req_lim,	   NULL);
2892 static DEVICE_ATTR(local_ib_port,   S_IRUGO, show_local_ib_port,   NULL);
2893 static DEVICE_ATTR(local_ib_device, S_IRUGO, show_local_ib_device, NULL);
2894 static DEVICE_ATTR(ch_count,        S_IRUGO, show_ch_count,        NULL);
2895 static DEVICE_ATTR(comp_vector,     S_IRUGO, show_comp_vector,     NULL);
2896 static DEVICE_ATTR(tl_retry_count,  S_IRUGO, show_tl_retry_count,  NULL);
2897 static DEVICE_ATTR(cmd_sg_entries,  S_IRUGO, show_cmd_sg_entries,  NULL);
2898 static DEVICE_ATTR(allow_ext_sg,    S_IRUGO, show_allow_ext_sg,    NULL);
2899 
2900 static struct device_attribute *srp_host_attrs[] = {
2901 	&dev_attr_id_ext,
2902 	&dev_attr_ioc_guid,
2903 	&dev_attr_service_id,
2904 	&dev_attr_pkey,
2905 	&dev_attr_sgid,
2906 	&dev_attr_dgid,
2907 	&dev_attr_orig_dgid,
2908 	&dev_attr_req_lim,
2909 	&dev_attr_zero_req_lim,
2910 	&dev_attr_local_ib_port,
2911 	&dev_attr_local_ib_device,
2912 	&dev_attr_ch_count,
2913 	&dev_attr_comp_vector,
2914 	&dev_attr_tl_retry_count,
2915 	&dev_attr_cmd_sg_entries,
2916 	&dev_attr_allow_ext_sg,
2917 	NULL
2918 };
2919 
2920 static struct scsi_host_template srp_template = {
2921 	.module				= THIS_MODULE,
2922 	.name				= "InfiniBand SRP initiator",
2923 	.proc_name			= DRV_NAME,
2924 	.slave_alloc			= srp_slave_alloc,
2925 	.slave_configure		= srp_slave_configure,
2926 	.info				= srp_target_info,
2927 	.queuecommand			= srp_queuecommand,
2928 	.change_queue_depth             = srp_change_queue_depth,
2929 	.eh_timed_out			= srp_timed_out,
2930 	.eh_abort_handler		= srp_abort,
2931 	.eh_device_reset_handler	= srp_reset_device,
2932 	.eh_host_reset_handler		= srp_reset_host,
2933 	.skip_settle_delay		= true,
2934 	.sg_tablesize			= SRP_DEF_SG_TABLESIZE,
2935 	.can_queue			= SRP_DEFAULT_CMD_SQ_SIZE,
2936 	.this_id			= -1,
2937 	.cmd_per_lun			= SRP_DEFAULT_CMD_SQ_SIZE,
2938 	.use_clustering			= ENABLE_CLUSTERING,
2939 	.shost_attrs			= srp_host_attrs,
2940 	.track_queue_depth		= 1,
2941 };
2942 
2943 static int srp_sdev_count(struct Scsi_Host *host)
2944 {
2945 	struct scsi_device *sdev;
2946 	int c = 0;
2947 
2948 	shost_for_each_device(sdev, host)
2949 		c++;
2950 
2951 	return c;
2952 }
2953 
2954 /*
2955  * Return values:
2956  * < 0 upon failure. Caller is responsible for SRP target port cleanup.
2957  * 0 and target->state == SRP_TARGET_REMOVED if asynchronous target port
2958  *    removal has been scheduled.
2959  * 0 and target->state != SRP_TARGET_REMOVED upon success.
2960  */
2961 static int srp_add_target(struct srp_host *host, struct srp_target_port *target)
2962 {
2963 	struct srp_rport_identifiers ids;
2964 	struct srp_rport *rport;
2965 
2966 	target->state = SRP_TARGET_SCANNING;
2967 	sprintf(target->target_name, "SRP.T10:%016llX",
2968 		be64_to_cpu(target->id_ext));
2969 
2970 	if (scsi_add_host(target->scsi_host, host->srp_dev->dev->dev.parent))
2971 		return -ENODEV;
2972 
2973 	memcpy(ids.port_id, &target->id_ext, 8);
2974 	memcpy(ids.port_id + 8, &target->ioc_guid, 8);
2975 	ids.roles = SRP_RPORT_ROLE_TARGET;
2976 	rport = srp_rport_add(target->scsi_host, &ids);
2977 	if (IS_ERR(rport)) {
2978 		scsi_remove_host(target->scsi_host);
2979 		return PTR_ERR(rport);
2980 	}
2981 
2982 	rport->lld_data = target;
2983 	target->rport = rport;
2984 
2985 	spin_lock(&host->target_lock);
2986 	list_add_tail(&target->list, &host->target_list);
2987 	spin_unlock(&host->target_lock);
2988 
2989 	scsi_scan_target(&target->scsi_host->shost_gendev,
2990 			 0, target->scsi_id, SCAN_WILD_CARD, SCSI_SCAN_INITIAL);
2991 
2992 	if (srp_connected_ch(target) < target->ch_count ||
2993 	    target->qp_in_error) {
2994 		shost_printk(KERN_INFO, target->scsi_host,
2995 			     PFX "SCSI scan failed - removing SCSI host\n");
2996 		srp_queue_remove_work(target);
2997 		goto out;
2998 	}
2999 
3000 	pr_debug("%s: SCSI scan succeeded - detected %d LUNs\n",
3001 		 dev_name(&target->scsi_host->shost_gendev),
3002 		 srp_sdev_count(target->scsi_host));
3003 
3004 	spin_lock_irq(&target->lock);
3005 	if (target->state == SRP_TARGET_SCANNING)
3006 		target->state = SRP_TARGET_LIVE;
3007 	spin_unlock_irq(&target->lock);
3008 
3009 out:
3010 	return 0;
3011 }
3012 
3013 static void srp_release_dev(struct device *dev)
3014 {
3015 	struct srp_host *host =
3016 		container_of(dev, struct srp_host, dev);
3017 
3018 	complete(&host->released);
3019 }
3020 
3021 static struct class srp_class = {
3022 	.name    = "infiniband_srp",
3023 	.dev_release = srp_release_dev
3024 };
3025 
3026 /**
3027  * srp_conn_unique() - check whether the connection to a target is unique
3028  * @host:   SRP host.
3029  * @target: SRP target port.
3030  */
3031 static bool srp_conn_unique(struct srp_host *host,
3032 			    struct srp_target_port *target)
3033 {
3034 	struct srp_target_port *t;
3035 	bool ret = false;
3036 
3037 	if (target->state == SRP_TARGET_REMOVED)
3038 		goto out;
3039 
3040 	ret = true;
3041 
3042 	spin_lock(&host->target_lock);
3043 	list_for_each_entry(t, &host->target_list, list) {
3044 		if (t != target &&
3045 		    target->id_ext == t->id_ext &&
3046 		    target->ioc_guid == t->ioc_guid &&
3047 		    target->initiator_ext == t->initiator_ext) {
3048 			ret = false;
3049 			break;
3050 		}
3051 	}
3052 	spin_unlock(&host->target_lock);
3053 
3054 out:
3055 	return ret;
3056 }
3057 
3058 /*
3059  * Target ports are added by writing
3060  *
3061  *     id_ext=<SRP ID ext>,ioc_guid=<SRP IOC GUID>,dgid=<dest GID>,
3062  *     pkey=<P_Key>,service_id=<service ID>
3063  *
3064  * to the add_target sysfs attribute.
3065  */
3066 enum {
3067 	SRP_OPT_ERR		= 0,
3068 	SRP_OPT_ID_EXT		= 1 << 0,
3069 	SRP_OPT_IOC_GUID	= 1 << 1,
3070 	SRP_OPT_DGID		= 1 << 2,
3071 	SRP_OPT_PKEY		= 1 << 3,
3072 	SRP_OPT_SERVICE_ID	= 1 << 4,
3073 	SRP_OPT_MAX_SECT	= 1 << 5,
3074 	SRP_OPT_MAX_CMD_PER_LUN	= 1 << 6,
3075 	SRP_OPT_IO_CLASS	= 1 << 7,
3076 	SRP_OPT_INITIATOR_EXT	= 1 << 8,
3077 	SRP_OPT_CMD_SG_ENTRIES	= 1 << 9,
3078 	SRP_OPT_ALLOW_EXT_SG	= 1 << 10,
3079 	SRP_OPT_SG_TABLESIZE	= 1 << 11,
3080 	SRP_OPT_COMP_VECTOR	= 1 << 12,
3081 	SRP_OPT_TL_RETRY_COUNT	= 1 << 13,
3082 	SRP_OPT_QUEUE_SIZE	= 1 << 14,
3083 	SRP_OPT_ALL		= (SRP_OPT_ID_EXT	|
3084 				   SRP_OPT_IOC_GUID	|
3085 				   SRP_OPT_DGID		|
3086 				   SRP_OPT_PKEY		|
3087 				   SRP_OPT_SERVICE_ID),
3088 };
3089 
3090 static const match_table_t srp_opt_tokens = {
3091 	{ SRP_OPT_ID_EXT,		"id_ext=%s" 		},
3092 	{ SRP_OPT_IOC_GUID,		"ioc_guid=%s" 		},
3093 	{ SRP_OPT_DGID,			"dgid=%s" 		},
3094 	{ SRP_OPT_PKEY,			"pkey=%x" 		},
3095 	{ SRP_OPT_SERVICE_ID,		"service_id=%s"		},
3096 	{ SRP_OPT_MAX_SECT,		"max_sect=%d" 		},
3097 	{ SRP_OPT_MAX_CMD_PER_LUN,	"max_cmd_per_lun=%d" 	},
3098 	{ SRP_OPT_IO_CLASS,		"io_class=%x"		},
3099 	{ SRP_OPT_INITIATOR_EXT,	"initiator_ext=%s"	},
3100 	{ SRP_OPT_CMD_SG_ENTRIES,	"cmd_sg_entries=%u"	},
3101 	{ SRP_OPT_ALLOW_EXT_SG,		"allow_ext_sg=%u"	},
3102 	{ SRP_OPT_SG_TABLESIZE,		"sg_tablesize=%u"	},
3103 	{ SRP_OPT_COMP_VECTOR,		"comp_vector=%u"	},
3104 	{ SRP_OPT_TL_RETRY_COUNT,	"tl_retry_count=%u"	},
3105 	{ SRP_OPT_QUEUE_SIZE,		"queue_size=%d"		},
3106 	{ SRP_OPT_ERR,			NULL 			}
3107 };
3108 
3109 static int srp_parse_options(const char *buf, struct srp_target_port *target)
3110 {
3111 	char *options, *sep_opt;
3112 	char *p;
3113 	char dgid[3];
3114 	substring_t args[MAX_OPT_ARGS];
3115 	int opt_mask = 0;
3116 	int token;
3117 	int ret = -EINVAL;
3118 	int i;
3119 
3120 	options = kstrdup(buf, GFP_KERNEL);
3121 	if (!options)
3122 		return -ENOMEM;
3123 
3124 	sep_opt = options;
3125 	while ((p = strsep(&sep_opt, ",\n")) != NULL) {
3126 		if (!*p)
3127 			continue;
3128 
3129 		token = match_token(p, srp_opt_tokens, args);
3130 		opt_mask |= token;
3131 
3132 		switch (token) {
3133 		case SRP_OPT_ID_EXT:
3134 			p = match_strdup(args);
3135 			if (!p) {
3136 				ret = -ENOMEM;
3137 				goto out;
3138 			}
3139 			target->id_ext = cpu_to_be64(simple_strtoull(p, NULL, 16));
3140 			kfree(p);
3141 			break;
3142 
3143 		case SRP_OPT_IOC_GUID:
3144 			p = match_strdup(args);
3145 			if (!p) {
3146 				ret = -ENOMEM;
3147 				goto out;
3148 			}
3149 			target->ioc_guid = cpu_to_be64(simple_strtoull(p, NULL, 16));
3150 			kfree(p);
3151 			break;
3152 
3153 		case SRP_OPT_DGID:
3154 			p = match_strdup(args);
3155 			if (!p) {
3156 				ret = -ENOMEM;
3157 				goto out;
3158 			}
3159 			if (strlen(p) != 32) {
3160 				pr_warn("bad dest GID parameter '%s'\n", p);
3161 				kfree(p);
3162 				goto out;
3163 			}
3164 
3165 			for (i = 0; i < 16; ++i) {
3166 				strlcpy(dgid, p + i * 2, sizeof(dgid));
3167 				if (sscanf(dgid, "%hhx",
3168 					   &target->orig_dgid.raw[i]) < 1) {
3169 					ret = -EINVAL;
3170 					kfree(p);
3171 					goto out;
3172 				}
3173 			}
3174 			kfree(p);
3175 			break;
3176 
3177 		case SRP_OPT_PKEY:
3178 			if (match_hex(args, &token)) {
3179 				pr_warn("bad P_Key parameter '%s'\n", p);
3180 				goto out;
3181 			}
3182 			target->pkey = cpu_to_be16(token);
3183 			break;
3184 
3185 		case SRP_OPT_SERVICE_ID:
3186 			p = match_strdup(args);
3187 			if (!p) {
3188 				ret = -ENOMEM;
3189 				goto out;
3190 			}
3191 			target->service_id = cpu_to_be64(simple_strtoull(p, NULL, 16));
3192 			kfree(p);
3193 			break;
3194 
3195 		case SRP_OPT_MAX_SECT:
3196 			if (match_int(args, &token)) {
3197 				pr_warn("bad max sect parameter '%s'\n", p);
3198 				goto out;
3199 			}
3200 			target->scsi_host->max_sectors = token;
3201 			break;
3202 
3203 		case SRP_OPT_QUEUE_SIZE:
3204 			if (match_int(args, &token) || token < 1) {
3205 				pr_warn("bad queue_size parameter '%s'\n", p);
3206 				goto out;
3207 			}
3208 			target->scsi_host->can_queue = token;
3209 			target->queue_size = token + SRP_RSP_SQ_SIZE +
3210 					     SRP_TSK_MGMT_SQ_SIZE;
3211 			if (!(opt_mask & SRP_OPT_MAX_CMD_PER_LUN))
3212 				target->scsi_host->cmd_per_lun = token;
3213 			break;
3214 
3215 		case SRP_OPT_MAX_CMD_PER_LUN:
3216 			if (match_int(args, &token) || token < 1) {
3217 				pr_warn("bad max cmd_per_lun parameter '%s'\n",
3218 					p);
3219 				goto out;
3220 			}
3221 			target->scsi_host->cmd_per_lun = token;
3222 			break;
3223 
3224 		case SRP_OPT_IO_CLASS:
3225 			if (match_hex(args, &token)) {
3226 				pr_warn("bad IO class parameter '%s'\n", p);
3227 				goto out;
3228 			}
3229 			if (token != SRP_REV10_IB_IO_CLASS &&
3230 			    token != SRP_REV16A_IB_IO_CLASS) {
3231 				pr_warn("unknown IO class parameter value %x specified (use %x or %x).\n",
3232 					token, SRP_REV10_IB_IO_CLASS,
3233 					SRP_REV16A_IB_IO_CLASS);
3234 				goto out;
3235 			}
3236 			target->io_class = token;
3237 			break;
3238 
3239 		case SRP_OPT_INITIATOR_EXT:
3240 			p = match_strdup(args);
3241 			if (!p) {
3242 				ret = -ENOMEM;
3243 				goto out;
3244 			}
3245 			target->initiator_ext = cpu_to_be64(simple_strtoull(p, NULL, 16));
3246 			kfree(p);
3247 			break;
3248 
3249 		case SRP_OPT_CMD_SG_ENTRIES:
3250 			if (match_int(args, &token) || token < 1 || token > 255) {
3251 				pr_warn("bad max cmd_sg_entries parameter '%s'\n",
3252 					p);
3253 				goto out;
3254 			}
3255 			target->cmd_sg_cnt = token;
3256 			break;
3257 
3258 		case SRP_OPT_ALLOW_EXT_SG:
3259 			if (match_int(args, &token)) {
3260 				pr_warn("bad allow_ext_sg parameter '%s'\n", p);
3261 				goto out;
3262 			}
3263 			target->allow_ext_sg = !!token;
3264 			break;
3265 
3266 		case SRP_OPT_SG_TABLESIZE:
3267 			if (match_int(args, &token) || token < 1 ||
3268 					token > SG_MAX_SEGMENTS) {
3269 				pr_warn("bad max sg_tablesize parameter '%s'\n",
3270 					p);
3271 				goto out;
3272 			}
3273 			target->sg_tablesize = token;
3274 			break;
3275 
3276 		case SRP_OPT_COMP_VECTOR:
3277 			if (match_int(args, &token) || token < 0) {
3278 				pr_warn("bad comp_vector parameter '%s'\n", p);
3279 				goto out;
3280 			}
3281 			target->comp_vector = token;
3282 			break;
3283 
3284 		case SRP_OPT_TL_RETRY_COUNT:
3285 			if (match_int(args, &token) || token < 2 || token > 7) {
3286 				pr_warn("bad tl_retry_count parameter '%s' (must be a number between 2 and 7)\n",
3287 					p);
3288 				goto out;
3289 			}
3290 			target->tl_retry_count = token;
3291 			break;
3292 
3293 		default:
3294 			pr_warn("unknown parameter or missing value '%s' in target creation request\n",
3295 				p);
3296 			goto out;
3297 		}
3298 	}
3299 
3300 	if ((opt_mask & SRP_OPT_ALL) == SRP_OPT_ALL)
3301 		ret = 0;
3302 	else
3303 		for (i = 0; i < ARRAY_SIZE(srp_opt_tokens); ++i)
3304 			if ((srp_opt_tokens[i].token & SRP_OPT_ALL) &&
3305 			    !(srp_opt_tokens[i].token & opt_mask))
3306 				pr_warn("target creation request is missing parameter '%s'\n",
3307 					srp_opt_tokens[i].pattern);
3308 
3309 	if (target->scsi_host->cmd_per_lun > target->scsi_host->can_queue
3310 	    && (opt_mask & SRP_OPT_MAX_CMD_PER_LUN))
3311 		pr_warn("cmd_per_lun = %d > queue_size = %d\n",
3312 			target->scsi_host->cmd_per_lun,
3313 			target->scsi_host->can_queue);
3314 
3315 out:
3316 	kfree(options);
3317 	return ret;
3318 }
3319 
3320 static ssize_t srp_create_target(struct device *dev,
3321 				 struct device_attribute *attr,
3322 				 const char *buf, size_t count)
3323 {
3324 	struct srp_host *host =
3325 		container_of(dev, struct srp_host, dev);
3326 	struct Scsi_Host *target_host;
3327 	struct srp_target_port *target;
3328 	struct srp_rdma_ch *ch;
3329 	struct srp_device *srp_dev = host->srp_dev;
3330 	struct ib_device *ibdev = srp_dev->dev;
3331 	int ret, node_idx, node, cpu, i;
3332 	unsigned int max_sectors_per_mr, mr_per_cmd = 0;
3333 	bool multich = false;
3334 
3335 	target_host = scsi_host_alloc(&srp_template,
3336 				      sizeof (struct srp_target_port));
3337 	if (!target_host)
3338 		return -ENOMEM;
3339 
3340 	target_host->transportt  = ib_srp_transport_template;
3341 	target_host->max_channel = 0;
3342 	target_host->max_id      = 1;
3343 	target_host->max_lun     = -1LL;
3344 	target_host->max_cmd_len = sizeof ((struct srp_cmd *) (void *) 0L)->cdb;
3345 
3346 	target = host_to_target(target_host);
3347 
3348 	target->io_class	= SRP_REV16A_IB_IO_CLASS;
3349 	target->scsi_host	= target_host;
3350 	target->srp_host	= host;
3351 	target->lkey		= host->srp_dev->pd->local_dma_lkey;
3352 	target->global_rkey	= host->srp_dev->global_rkey;
3353 	target->cmd_sg_cnt	= cmd_sg_entries;
3354 	target->sg_tablesize	= indirect_sg_entries ? : cmd_sg_entries;
3355 	target->allow_ext_sg	= allow_ext_sg;
3356 	target->tl_retry_count	= 7;
3357 	target->queue_size	= SRP_DEFAULT_QUEUE_SIZE;
3358 
3359 	/*
3360 	 * Avoid that the SCSI host can be removed by srp_remove_target()
3361 	 * before this function returns.
3362 	 */
3363 	scsi_host_get(target->scsi_host);
3364 
3365 	ret = mutex_lock_interruptible(&host->add_target_mutex);
3366 	if (ret < 0)
3367 		goto put;
3368 
3369 	ret = srp_parse_options(buf, target);
3370 	if (ret)
3371 		goto out;
3372 
3373 	target->req_ring_size = target->queue_size - SRP_TSK_MGMT_SQ_SIZE;
3374 
3375 	if (!srp_conn_unique(target->srp_host, target)) {
3376 		shost_printk(KERN_INFO, target->scsi_host,
3377 			     PFX "Already connected to target port with id_ext=%016llx;ioc_guid=%016llx;initiator_ext=%016llx\n",
3378 			     be64_to_cpu(target->id_ext),
3379 			     be64_to_cpu(target->ioc_guid),
3380 			     be64_to_cpu(target->initiator_ext));
3381 		ret = -EEXIST;
3382 		goto out;
3383 	}
3384 
3385 	if (!srp_dev->has_fmr && !srp_dev->has_fr && !target->allow_ext_sg &&
3386 	    target->cmd_sg_cnt < target->sg_tablesize) {
3387 		pr_warn("No MR pool and no external indirect descriptors, limiting sg_tablesize to cmd_sg_cnt\n");
3388 		target->sg_tablesize = target->cmd_sg_cnt;
3389 	}
3390 
3391 	if (srp_dev->use_fast_reg || srp_dev->use_fmr) {
3392 		/*
3393 		 * FR and FMR can only map one HCA page per entry. If the
3394 		 * start address is not aligned on a HCA page boundary two
3395 		 * entries will be used for the head and the tail although
3396 		 * these two entries combined contain at most one HCA page of
3397 		 * data. Hence the "+ 1" in the calculation below.
3398 		 *
3399 		 * The indirect data buffer descriptor is contiguous so the
3400 		 * memory for that buffer will only be registered if
3401 		 * register_always is true. Hence add one to mr_per_cmd if
3402 		 * register_always has been set.
3403 		 */
3404 		max_sectors_per_mr = srp_dev->max_pages_per_mr <<
3405 				  (ilog2(srp_dev->mr_page_size) - 9);
3406 		mr_per_cmd = register_always +
3407 			(target->scsi_host->max_sectors + 1 +
3408 			 max_sectors_per_mr - 1) / max_sectors_per_mr;
3409 		pr_debug("max_sectors = %u; max_pages_per_mr = %u; mr_page_size = %u; max_sectors_per_mr = %u; mr_per_cmd = %u\n",
3410 			 target->scsi_host->max_sectors,
3411 			 srp_dev->max_pages_per_mr, srp_dev->mr_page_size,
3412 			 max_sectors_per_mr, mr_per_cmd);
3413 	}
3414 
3415 	target_host->sg_tablesize = target->sg_tablesize;
3416 	target->mr_pool_size = target->scsi_host->can_queue * mr_per_cmd;
3417 	target->mr_per_cmd = mr_per_cmd;
3418 	target->indirect_size = target->sg_tablesize *
3419 				sizeof (struct srp_direct_buf);
3420 	target->max_iu_len = sizeof (struct srp_cmd) +
3421 			     sizeof (struct srp_indirect_buf) +
3422 			     target->cmd_sg_cnt * sizeof (struct srp_direct_buf);
3423 
3424 	INIT_WORK(&target->tl_err_work, srp_tl_err_work);
3425 	INIT_WORK(&target->remove_work, srp_remove_work);
3426 	spin_lock_init(&target->lock);
3427 	ret = ib_query_gid(ibdev, host->port, 0, &target->sgid, NULL);
3428 	if (ret)
3429 		goto out;
3430 
3431 	ret = -ENOMEM;
3432 	target->ch_count = max_t(unsigned, num_online_nodes(),
3433 				 min(ch_count ? :
3434 				     min(4 * num_online_nodes(),
3435 					 ibdev->num_comp_vectors),
3436 				     num_online_cpus()));
3437 	target->ch = kcalloc(target->ch_count, sizeof(*target->ch),
3438 			     GFP_KERNEL);
3439 	if (!target->ch)
3440 		goto out;
3441 
3442 	node_idx = 0;
3443 	for_each_online_node(node) {
3444 		const int ch_start = (node_idx * target->ch_count /
3445 				      num_online_nodes());
3446 		const int ch_end = ((node_idx + 1) * target->ch_count /
3447 				    num_online_nodes());
3448 		const int cv_start = (node_idx * ibdev->num_comp_vectors /
3449 				      num_online_nodes() + target->comp_vector)
3450 				     % ibdev->num_comp_vectors;
3451 		const int cv_end = ((node_idx + 1) * ibdev->num_comp_vectors /
3452 				    num_online_nodes() + target->comp_vector)
3453 				   % ibdev->num_comp_vectors;
3454 		int cpu_idx = 0;
3455 
3456 		for_each_online_cpu(cpu) {
3457 			if (cpu_to_node(cpu) != node)
3458 				continue;
3459 			if (ch_start + cpu_idx >= ch_end)
3460 				continue;
3461 			ch = &target->ch[ch_start + cpu_idx];
3462 			ch->target = target;
3463 			ch->comp_vector = cv_start == cv_end ? cv_start :
3464 				cv_start + cpu_idx % (cv_end - cv_start);
3465 			spin_lock_init(&ch->lock);
3466 			INIT_LIST_HEAD(&ch->free_tx);
3467 			ret = srp_new_cm_id(ch);
3468 			if (ret)
3469 				goto err_disconnect;
3470 
3471 			ret = srp_create_ch_ib(ch);
3472 			if (ret)
3473 				goto err_disconnect;
3474 
3475 			ret = srp_alloc_req_data(ch);
3476 			if (ret)
3477 				goto err_disconnect;
3478 
3479 			ret = srp_connect_ch(ch, multich);
3480 			if (ret) {
3481 				shost_printk(KERN_ERR, target->scsi_host,
3482 					     PFX "Connection %d/%d to %pI6 failed\n",
3483 					     ch_start + cpu_idx,
3484 					     target->ch_count,
3485 					     ch->target->orig_dgid.raw);
3486 				if (node_idx == 0 && cpu_idx == 0) {
3487 					goto free_ch;
3488 				} else {
3489 					srp_free_ch_ib(target, ch);
3490 					srp_free_req_data(target, ch);
3491 					target->ch_count = ch - target->ch;
3492 					goto connected;
3493 				}
3494 			}
3495 
3496 			multich = true;
3497 			cpu_idx++;
3498 		}
3499 		node_idx++;
3500 	}
3501 
3502 connected:
3503 	target->scsi_host->nr_hw_queues = target->ch_count;
3504 
3505 	ret = srp_add_target(host, target);
3506 	if (ret)
3507 		goto err_disconnect;
3508 
3509 	if (target->state != SRP_TARGET_REMOVED) {
3510 		shost_printk(KERN_DEBUG, target->scsi_host, PFX
3511 			     "new target: id_ext %016llx ioc_guid %016llx pkey %04x service_id %016llx sgid %pI6 dgid %pI6\n",
3512 			     be64_to_cpu(target->id_ext),
3513 			     be64_to_cpu(target->ioc_guid),
3514 			     be16_to_cpu(target->pkey),
3515 			     be64_to_cpu(target->service_id),
3516 			     target->sgid.raw, target->orig_dgid.raw);
3517 	}
3518 
3519 	ret = count;
3520 
3521 out:
3522 	mutex_unlock(&host->add_target_mutex);
3523 
3524 put:
3525 	scsi_host_put(target->scsi_host);
3526 	if (ret < 0)
3527 		scsi_host_put(target->scsi_host);
3528 
3529 	return ret;
3530 
3531 err_disconnect:
3532 	srp_disconnect_target(target);
3533 
3534 free_ch:
3535 	for (i = 0; i < target->ch_count; i++) {
3536 		ch = &target->ch[i];
3537 		srp_free_ch_ib(target, ch);
3538 		srp_free_req_data(target, ch);
3539 	}
3540 
3541 	kfree(target->ch);
3542 	goto out;
3543 }
3544 
3545 static DEVICE_ATTR(add_target, S_IWUSR, NULL, srp_create_target);
3546 
3547 static ssize_t show_ibdev(struct device *dev, struct device_attribute *attr,
3548 			  char *buf)
3549 {
3550 	struct srp_host *host = container_of(dev, struct srp_host, dev);
3551 
3552 	return sprintf(buf, "%s\n", host->srp_dev->dev->name);
3553 }
3554 
3555 static DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL);
3556 
3557 static ssize_t show_port(struct device *dev, struct device_attribute *attr,
3558 			 char *buf)
3559 {
3560 	struct srp_host *host = container_of(dev, struct srp_host, dev);
3561 
3562 	return sprintf(buf, "%d\n", host->port);
3563 }
3564 
3565 static DEVICE_ATTR(port, S_IRUGO, show_port, NULL);
3566 
3567 static struct srp_host *srp_add_port(struct srp_device *device, u8 port)
3568 {
3569 	struct srp_host *host;
3570 
3571 	host = kzalloc(sizeof *host, GFP_KERNEL);
3572 	if (!host)
3573 		return NULL;
3574 
3575 	INIT_LIST_HEAD(&host->target_list);
3576 	spin_lock_init(&host->target_lock);
3577 	init_completion(&host->released);
3578 	mutex_init(&host->add_target_mutex);
3579 	host->srp_dev = device;
3580 	host->port = port;
3581 
3582 	host->dev.class = &srp_class;
3583 	host->dev.parent = device->dev->dev.parent;
3584 	dev_set_name(&host->dev, "srp-%s-%d", device->dev->name, port);
3585 
3586 	if (device_register(&host->dev))
3587 		goto free_host;
3588 	if (device_create_file(&host->dev, &dev_attr_add_target))
3589 		goto err_class;
3590 	if (device_create_file(&host->dev, &dev_attr_ibdev))
3591 		goto err_class;
3592 	if (device_create_file(&host->dev, &dev_attr_port))
3593 		goto err_class;
3594 
3595 	return host;
3596 
3597 err_class:
3598 	device_unregister(&host->dev);
3599 
3600 free_host:
3601 	kfree(host);
3602 
3603 	return NULL;
3604 }
3605 
3606 static void srp_add_one(struct ib_device *device)
3607 {
3608 	struct srp_device *srp_dev;
3609 	struct ib_device_attr *attr = &device->attrs;
3610 	struct srp_host *host;
3611 	int mr_page_shift, p;
3612 	u64 max_pages_per_mr;
3613 	unsigned int flags = 0;
3614 
3615 	srp_dev = kzalloc(sizeof(*srp_dev), GFP_KERNEL);
3616 	if (!srp_dev)
3617 		return;
3618 
3619 	/*
3620 	 * Use the smallest page size supported by the HCA, down to a
3621 	 * minimum of 4096 bytes. We're unlikely to build large sglists
3622 	 * out of smaller entries.
3623 	 */
3624 	mr_page_shift		= max(12, ffs(attr->page_size_cap) - 1);
3625 	srp_dev->mr_page_size	= 1 << mr_page_shift;
3626 	srp_dev->mr_page_mask	= ~((u64) srp_dev->mr_page_size - 1);
3627 	max_pages_per_mr	= attr->max_mr_size;
3628 	do_div(max_pages_per_mr, srp_dev->mr_page_size);
3629 	pr_debug("%s: %llu / %u = %llu <> %u\n", __func__,
3630 		 attr->max_mr_size, srp_dev->mr_page_size,
3631 		 max_pages_per_mr, SRP_MAX_PAGES_PER_MR);
3632 	srp_dev->max_pages_per_mr = min_t(u64, SRP_MAX_PAGES_PER_MR,
3633 					  max_pages_per_mr);
3634 
3635 	srp_dev->has_fmr = (device->alloc_fmr && device->dealloc_fmr &&
3636 			    device->map_phys_fmr && device->unmap_fmr);
3637 	srp_dev->has_fr = (attr->device_cap_flags &
3638 			   IB_DEVICE_MEM_MGT_EXTENSIONS);
3639 	if (!never_register && !srp_dev->has_fmr && !srp_dev->has_fr) {
3640 		dev_warn(&device->dev, "neither FMR nor FR is supported\n");
3641 	} else if (!never_register &&
3642 		   attr->max_mr_size >= 2 * srp_dev->mr_page_size) {
3643 		srp_dev->use_fast_reg = (srp_dev->has_fr &&
3644 					 (!srp_dev->has_fmr || prefer_fr));
3645 		srp_dev->use_fmr = !srp_dev->use_fast_reg && srp_dev->has_fmr;
3646 	}
3647 
3648 	if (never_register || !register_always ||
3649 	    (!srp_dev->has_fmr && !srp_dev->has_fr))
3650 		flags |= IB_PD_UNSAFE_GLOBAL_RKEY;
3651 
3652 	if (srp_dev->use_fast_reg) {
3653 		srp_dev->max_pages_per_mr =
3654 			min_t(u32, srp_dev->max_pages_per_mr,
3655 			      attr->max_fast_reg_page_list_len);
3656 	}
3657 	srp_dev->mr_max_size	= srp_dev->mr_page_size *
3658 				   srp_dev->max_pages_per_mr;
3659 	pr_debug("%s: mr_page_shift = %d, device->max_mr_size = %#llx, device->max_fast_reg_page_list_len = %u, max_pages_per_mr = %d, mr_max_size = %#x\n",
3660 		 device->name, mr_page_shift, attr->max_mr_size,
3661 		 attr->max_fast_reg_page_list_len,
3662 		 srp_dev->max_pages_per_mr, srp_dev->mr_max_size);
3663 
3664 	INIT_LIST_HEAD(&srp_dev->dev_list);
3665 
3666 	srp_dev->dev = device;
3667 	srp_dev->pd  = ib_alloc_pd(device, flags);
3668 	if (IS_ERR(srp_dev->pd))
3669 		goto free_dev;
3670 
3671 	if (flags & IB_PD_UNSAFE_GLOBAL_RKEY) {
3672 		srp_dev->global_rkey = srp_dev->pd->unsafe_global_rkey;
3673 		WARN_ON_ONCE(srp_dev->global_rkey == 0);
3674 	}
3675 
3676 	for (p = rdma_start_port(device); p <= rdma_end_port(device); ++p) {
3677 		host = srp_add_port(srp_dev, p);
3678 		if (host)
3679 			list_add_tail(&host->list, &srp_dev->dev_list);
3680 	}
3681 
3682 	ib_set_client_data(device, &srp_client, srp_dev);
3683 	return;
3684 
3685 free_dev:
3686 	kfree(srp_dev);
3687 }
3688 
3689 static void srp_remove_one(struct ib_device *device, void *client_data)
3690 {
3691 	struct srp_device *srp_dev;
3692 	struct srp_host *host, *tmp_host;
3693 	struct srp_target_port *target;
3694 
3695 	srp_dev = client_data;
3696 	if (!srp_dev)
3697 		return;
3698 
3699 	list_for_each_entry_safe(host, tmp_host, &srp_dev->dev_list, list) {
3700 		device_unregister(&host->dev);
3701 		/*
3702 		 * Wait for the sysfs entry to go away, so that no new
3703 		 * target ports can be created.
3704 		 */
3705 		wait_for_completion(&host->released);
3706 
3707 		/*
3708 		 * Remove all target ports.
3709 		 */
3710 		spin_lock(&host->target_lock);
3711 		list_for_each_entry(target, &host->target_list, list)
3712 			srp_queue_remove_work(target);
3713 		spin_unlock(&host->target_lock);
3714 
3715 		/*
3716 		 * Wait for tl_err and target port removal tasks.
3717 		 */
3718 		flush_workqueue(system_long_wq);
3719 		flush_workqueue(srp_remove_wq);
3720 
3721 		kfree(host);
3722 	}
3723 
3724 	ib_dealloc_pd(srp_dev->pd);
3725 
3726 	kfree(srp_dev);
3727 }
3728 
3729 static struct srp_function_template ib_srp_transport_functions = {
3730 	.has_rport_state	 = true,
3731 	.reset_timer_if_blocked	 = true,
3732 	.reconnect_delay	 = &srp_reconnect_delay,
3733 	.fast_io_fail_tmo	 = &srp_fast_io_fail_tmo,
3734 	.dev_loss_tmo		 = &srp_dev_loss_tmo,
3735 	.reconnect		 = srp_rport_reconnect,
3736 	.rport_delete		 = srp_rport_delete,
3737 	.terminate_rport_io	 = srp_terminate_io,
3738 };
3739 
3740 static int __init srp_init_module(void)
3741 {
3742 	int ret;
3743 
3744 	if (srp_sg_tablesize) {
3745 		pr_warn("srp_sg_tablesize is deprecated, please use cmd_sg_entries\n");
3746 		if (!cmd_sg_entries)
3747 			cmd_sg_entries = srp_sg_tablesize;
3748 	}
3749 
3750 	if (!cmd_sg_entries)
3751 		cmd_sg_entries = SRP_DEF_SG_TABLESIZE;
3752 
3753 	if (cmd_sg_entries > 255) {
3754 		pr_warn("Clamping cmd_sg_entries to 255\n");
3755 		cmd_sg_entries = 255;
3756 	}
3757 
3758 	if (!indirect_sg_entries)
3759 		indirect_sg_entries = cmd_sg_entries;
3760 	else if (indirect_sg_entries < cmd_sg_entries) {
3761 		pr_warn("Bumping up indirect_sg_entries to match cmd_sg_entries (%u)\n",
3762 			cmd_sg_entries);
3763 		indirect_sg_entries = cmd_sg_entries;
3764 	}
3765 
3766 	if (indirect_sg_entries > SG_MAX_SEGMENTS) {
3767 		pr_warn("Clamping indirect_sg_entries to %u\n",
3768 			SG_MAX_SEGMENTS);
3769 		indirect_sg_entries = SG_MAX_SEGMENTS;
3770 	}
3771 
3772 	srp_remove_wq = create_workqueue("srp_remove");
3773 	if (!srp_remove_wq) {
3774 		ret = -ENOMEM;
3775 		goto out;
3776 	}
3777 
3778 	ret = -ENOMEM;
3779 	ib_srp_transport_template =
3780 		srp_attach_transport(&ib_srp_transport_functions);
3781 	if (!ib_srp_transport_template)
3782 		goto destroy_wq;
3783 
3784 	ret = class_register(&srp_class);
3785 	if (ret) {
3786 		pr_err("couldn't register class infiniband_srp\n");
3787 		goto release_tr;
3788 	}
3789 
3790 	ib_sa_register_client(&srp_sa_client);
3791 
3792 	ret = ib_register_client(&srp_client);
3793 	if (ret) {
3794 		pr_err("couldn't register IB client\n");
3795 		goto unreg_sa;
3796 	}
3797 
3798 out:
3799 	return ret;
3800 
3801 unreg_sa:
3802 	ib_sa_unregister_client(&srp_sa_client);
3803 	class_unregister(&srp_class);
3804 
3805 release_tr:
3806 	srp_release_transport(ib_srp_transport_template);
3807 
3808 destroy_wq:
3809 	destroy_workqueue(srp_remove_wq);
3810 	goto out;
3811 }
3812 
3813 static void __exit srp_cleanup_module(void)
3814 {
3815 	ib_unregister_client(&srp_client);
3816 	ib_sa_unregister_client(&srp_sa_client);
3817 	class_unregister(&srp_class);
3818 	srp_release_transport(ib_srp_transport_template);
3819 	destroy_workqueue(srp_remove_wq);
3820 }
3821 
3822 module_init(srp_init_module);
3823 module_exit(srp_cleanup_module);
3824