1 /*
2  * Copyright (c) 2005 Cisco Systems.  All rights reserved.
3  *
4  * This software is available to you under a choice of one of two
5  * licenses.  You may choose to be licensed under the terms of the GNU
6  * General Public License (GPL) Version 2, available from the file
7  * COPYING in the main directory of this source tree, or the
8  * OpenIB.org BSD license below:
9  *
10  *     Redistribution and use in source and binary forms, with or
11  *     without modification, are permitted provided that the following
12  *     conditions are met:
13  *
14  *      - Redistributions of source code must retain the above
15  *        copyright notice, this list of conditions and the following
16  *        disclaimer.
17  *
18  *      - Redistributions in binary form must reproduce the above
19  *        copyright notice, this list of conditions and the following
20  *        disclaimer in the documentation and/or other materials
21  *        provided with the distribution.
22  *
23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30  * SOFTWARE.
31  */
32 
33 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
34 
35 #include <linux/module.h>
36 #include <linux/init.h>
37 #include <linux/slab.h>
38 #include <linux/err.h>
39 #include <linux/string.h>
40 #include <linux/parser.h>
41 #include <linux/random.h>
42 #include <linux/jiffies.h>
43 #include <linux/lockdep.h>
44 #include <rdma/ib_cache.h>
45 
46 #include <linux/atomic.h>
47 
48 #include <scsi/scsi.h>
49 #include <scsi/scsi_device.h>
50 #include <scsi/scsi_dbg.h>
51 #include <scsi/scsi_tcq.h>
52 #include <scsi/srp.h>
53 #include <scsi/scsi_transport_srp.h>
54 
55 #include "ib_srp.h"
56 
57 #define DRV_NAME	"ib_srp"
58 #define PFX		DRV_NAME ": "
59 #define DRV_VERSION	"2.0"
60 #define DRV_RELDATE	"July 26, 2015"
61 
62 MODULE_AUTHOR("Roland Dreier");
63 MODULE_DESCRIPTION("InfiniBand SCSI RDMA Protocol initiator");
64 MODULE_LICENSE("Dual BSD/GPL");
65 MODULE_INFO(release_date, DRV_RELDATE);
66 
67 #if !defined(CONFIG_DYNAMIC_DEBUG)
68 #define DEFINE_DYNAMIC_DEBUG_METADATA(name, fmt)
69 #define DYNAMIC_DEBUG_BRANCH(descriptor) false
70 #endif
71 
72 static unsigned int srp_sg_tablesize;
73 static unsigned int cmd_sg_entries;
74 static unsigned int indirect_sg_entries;
75 static bool allow_ext_sg;
76 static bool prefer_fr = true;
77 static bool register_always = true;
78 static bool never_register;
79 static int topspin_workarounds = 1;
80 
81 module_param(srp_sg_tablesize, uint, 0444);
82 MODULE_PARM_DESC(srp_sg_tablesize, "Deprecated name for cmd_sg_entries");
83 
84 module_param(cmd_sg_entries, uint, 0444);
85 MODULE_PARM_DESC(cmd_sg_entries,
86 		 "Default number of gather/scatter entries in the SRP command (default is 12, max 255)");
87 
88 module_param(indirect_sg_entries, uint, 0444);
89 MODULE_PARM_DESC(indirect_sg_entries,
90 		 "Default max number of gather/scatter entries (default is 12, max is " __stringify(SG_MAX_SEGMENTS) ")");
91 
92 module_param(allow_ext_sg, bool, 0444);
93 MODULE_PARM_DESC(allow_ext_sg,
94 		  "Default behavior when there are more than cmd_sg_entries S/G entries after mapping; fails the request when false (default false)");
95 
96 module_param(topspin_workarounds, int, 0444);
97 MODULE_PARM_DESC(topspin_workarounds,
98 		 "Enable workarounds for Topspin/Cisco SRP target bugs if != 0");
99 
100 module_param(prefer_fr, bool, 0444);
101 MODULE_PARM_DESC(prefer_fr,
102 "Whether to use fast registration if both FMR and fast registration are supported");
103 
104 module_param(register_always, bool, 0444);
105 MODULE_PARM_DESC(register_always,
106 		 "Use memory registration even for contiguous memory regions");
107 
108 module_param(never_register, bool, 0444);
109 MODULE_PARM_DESC(never_register, "Never register memory");
110 
111 static const struct kernel_param_ops srp_tmo_ops;
112 
113 static int srp_reconnect_delay = 10;
114 module_param_cb(reconnect_delay, &srp_tmo_ops, &srp_reconnect_delay,
115 		S_IRUGO | S_IWUSR);
116 MODULE_PARM_DESC(reconnect_delay, "Time between successive reconnect attempts");
117 
118 static int srp_fast_io_fail_tmo = 15;
119 module_param_cb(fast_io_fail_tmo, &srp_tmo_ops, &srp_fast_io_fail_tmo,
120 		S_IRUGO | S_IWUSR);
121 MODULE_PARM_DESC(fast_io_fail_tmo,
122 		 "Number of seconds between the observation of a transport"
123 		 " layer error and failing all I/O. \"off\" means that this"
124 		 " functionality is disabled.");
125 
126 static int srp_dev_loss_tmo = 600;
127 module_param_cb(dev_loss_tmo, &srp_tmo_ops, &srp_dev_loss_tmo,
128 		S_IRUGO | S_IWUSR);
129 MODULE_PARM_DESC(dev_loss_tmo,
130 		 "Maximum number of seconds that the SRP transport should"
131 		 " insulate transport layer errors. After this time has been"
132 		 " exceeded the SCSI host is removed. Should be"
133 		 " between 1 and " __stringify(SCSI_DEVICE_BLOCK_MAX_TIMEOUT)
134 		 " if fast_io_fail_tmo has not been set. \"off\" means that"
135 		 " this functionality is disabled.");
136 
137 static unsigned ch_count;
138 module_param(ch_count, uint, 0444);
139 MODULE_PARM_DESC(ch_count,
140 		 "Number of RDMA channels to use for communication with an SRP target. Using more than one channel improves performance if the HCA supports multiple completion vectors. The default value is the minimum of four times the number of online CPU sockets and the number of completion vectors supported by the HCA.");
141 
142 static void srp_add_one(struct ib_device *device);
143 static void srp_remove_one(struct ib_device *device, void *client_data);
144 static void srp_recv_done(struct ib_cq *cq, struct ib_wc *wc);
145 static void srp_handle_qp_err(struct ib_cq *cq, struct ib_wc *wc,
146 		const char *opname);
147 static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event);
148 
149 static struct scsi_transport_template *ib_srp_transport_template;
150 static struct workqueue_struct *srp_remove_wq;
151 
152 static struct ib_client srp_client = {
153 	.name   = "srp",
154 	.add    = srp_add_one,
155 	.remove = srp_remove_one
156 };
157 
158 static struct ib_sa_client srp_sa_client;
159 
160 static int srp_tmo_get(char *buffer, const struct kernel_param *kp)
161 {
162 	int tmo = *(int *)kp->arg;
163 
164 	if (tmo >= 0)
165 		return sprintf(buffer, "%d", tmo);
166 	else
167 		return sprintf(buffer, "off");
168 }
169 
170 static int srp_tmo_set(const char *val, const struct kernel_param *kp)
171 {
172 	int tmo, res;
173 
174 	res = srp_parse_tmo(&tmo, val);
175 	if (res)
176 		goto out;
177 
178 	if (kp->arg == &srp_reconnect_delay)
179 		res = srp_tmo_valid(tmo, srp_fast_io_fail_tmo,
180 				    srp_dev_loss_tmo);
181 	else if (kp->arg == &srp_fast_io_fail_tmo)
182 		res = srp_tmo_valid(srp_reconnect_delay, tmo, srp_dev_loss_tmo);
183 	else
184 		res = srp_tmo_valid(srp_reconnect_delay, srp_fast_io_fail_tmo,
185 				    tmo);
186 	if (res)
187 		goto out;
188 	*(int *)kp->arg = tmo;
189 
190 out:
191 	return res;
192 }
193 
194 static const struct kernel_param_ops srp_tmo_ops = {
195 	.get = srp_tmo_get,
196 	.set = srp_tmo_set,
197 };
198 
199 static inline struct srp_target_port *host_to_target(struct Scsi_Host *host)
200 {
201 	return (struct srp_target_port *) host->hostdata;
202 }
203 
204 static const char *srp_target_info(struct Scsi_Host *host)
205 {
206 	return host_to_target(host)->target_name;
207 }
208 
209 static int srp_target_is_topspin(struct srp_target_port *target)
210 {
211 	static const u8 topspin_oui[3] = { 0x00, 0x05, 0xad };
212 	static const u8 cisco_oui[3]   = { 0x00, 0x1b, 0x0d };
213 
214 	return topspin_workarounds &&
215 		(!memcmp(&target->ioc_guid, topspin_oui, sizeof topspin_oui) ||
216 		 !memcmp(&target->ioc_guid, cisco_oui, sizeof cisco_oui));
217 }
218 
219 static struct srp_iu *srp_alloc_iu(struct srp_host *host, size_t size,
220 				   gfp_t gfp_mask,
221 				   enum dma_data_direction direction)
222 {
223 	struct srp_iu *iu;
224 
225 	iu = kmalloc(sizeof *iu, gfp_mask);
226 	if (!iu)
227 		goto out;
228 
229 	iu->buf = kzalloc(size, gfp_mask);
230 	if (!iu->buf)
231 		goto out_free_iu;
232 
233 	iu->dma = ib_dma_map_single(host->srp_dev->dev, iu->buf, size,
234 				    direction);
235 	if (ib_dma_mapping_error(host->srp_dev->dev, iu->dma))
236 		goto out_free_buf;
237 
238 	iu->size      = size;
239 	iu->direction = direction;
240 
241 	return iu;
242 
243 out_free_buf:
244 	kfree(iu->buf);
245 out_free_iu:
246 	kfree(iu);
247 out:
248 	return NULL;
249 }
250 
251 static void srp_free_iu(struct srp_host *host, struct srp_iu *iu)
252 {
253 	if (!iu)
254 		return;
255 
256 	ib_dma_unmap_single(host->srp_dev->dev, iu->dma, iu->size,
257 			    iu->direction);
258 	kfree(iu->buf);
259 	kfree(iu);
260 }
261 
262 static void srp_qp_event(struct ib_event *event, void *context)
263 {
264 	pr_debug("QP event %s (%d)\n",
265 		 ib_event_msg(event->event), event->event);
266 }
267 
268 static int srp_init_qp(struct srp_target_port *target,
269 		       struct ib_qp *qp)
270 {
271 	struct ib_qp_attr *attr;
272 	int ret;
273 
274 	attr = kmalloc(sizeof *attr, GFP_KERNEL);
275 	if (!attr)
276 		return -ENOMEM;
277 
278 	ret = ib_find_cached_pkey(target->srp_host->srp_dev->dev,
279 				  target->srp_host->port,
280 				  be16_to_cpu(target->pkey),
281 				  &attr->pkey_index);
282 	if (ret)
283 		goto out;
284 
285 	attr->qp_state        = IB_QPS_INIT;
286 	attr->qp_access_flags = (IB_ACCESS_REMOTE_READ |
287 				    IB_ACCESS_REMOTE_WRITE);
288 	attr->port_num        = target->srp_host->port;
289 
290 	ret = ib_modify_qp(qp, attr,
291 			   IB_QP_STATE		|
292 			   IB_QP_PKEY_INDEX	|
293 			   IB_QP_ACCESS_FLAGS	|
294 			   IB_QP_PORT);
295 
296 out:
297 	kfree(attr);
298 	return ret;
299 }
300 
301 static int srp_new_cm_id(struct srp_rdma_ch *ch)
302 {
303 	struct srp_target_port *target = ch->target;
304 	struct ib_cm_id *new_cm_id;
305 
306 	new_cm_id = ib_create_cm_id(target->srp_host->srp_dev->dev,
307 				    srp_cm_handler, ch);
308 	if (IS_ERR(new_cm_id))
309 		return PTR_ERR(new_cm_id);
310 
311 	if (ch->cm_id)
312 		ib_destroy_cm_id(ch->cm_id);
313 	ch->cm_id = new_cm_id;
314 	if (rdma_cap_opa_ah(target->srp_host->srp_dev->dev,
315 			    target->srp_host->port))
316 		ch->path.rec_type = SA_PATH_REC_TYPE_OPA;
317 	else
318 		ch->path.rec_type = SA_PATH_REC_TYPE_IB;
319 	ch->path.sgid = target->sgid;
320 	ch->path.dgid = target->orig_dgid;
321 	ch->path.pkey = target->pkey;
322 	ch->path.service_id = target->service_id;
323 
324 	return 0;
325 }
326 
327 static struct ib_fmr_pool *srp_alloc_fmr_pool(struct srp_target_port *target)
328 {
329 	struct srp_device *dev = target->srp_host->srp_dev;
330 	struct ib_fmr_pool_param fmr_param;
331 
332 	memset(&fmr_param, 0, sizeof(fmr_param));
333 	fmr_param.pool_size	    = target->mr_pool_size;
334 	fmr_param.dirty_watermark   = fmr_param.pool_size / 4;
335 	fmr_param.cache		    = 1;
336 	fmr_param.max_pages_per_fmr = dev->max_pages_per_mr;
337 	fmr_param.page_shift	    = ilog2(dev->mr_page_size);
338 	fmr_param.access	    = (IB_ACCESS_LOCAL_WRITE |
339 				       IB_ACCESS_REMOTE_WRITE |
340 				       IB_ACCESS_REMOTE_READ);
341 
342 	return ib_create_fmr_pool(dev->pd, &fmr_param);
343 }
344 
345 /**
346  * srp_destroy_fr_pool() - free the resources owned by a pool
347  * @pool: Fast registration pool to be destroyed.
348  */
349 static void srp_destroy_fr_pool(struct srp_fr_pool *pool)
350 {
351 	int i;
352 	struct srp_fr_desc *d;
353 
354 	if (!pool)
355 		return;
356 
357 	for (i = 0, d = &pool->desc[0]; i < pool->size; i++, d++) {
358 		if (d->mr)
359 			ib_dereg_mr(d->mr);
360 	}
361 	kfree(pool);
362 }
363 
364 /**
365  * srp_create_fr_pool() - allocate and initialize a pool for fast registration
366  * @device:            IB device to allocate fast registration descriptors for.
367  * @pd:                Protection domain associated with the FR descriptors.
368  * @pool_size:         Number of descriptors to allocate.
369  * @max_page_list_len: Maximum fast registration work request page list length.
370  */
371 static struct srp_fr_pool *srp_create_fr_pool(struct ib_device *device,
372 					      struct ib_pd *pd, int pool_size,
373 					      int max_page_list_len)
374 {
375 	struct srp_fr_pool *pool;
376 	struct srp_fr_desc *d;
377 	struct ib_mr *mr;
378 	int i, ret = -EINVAL;
379 
380 	if (pool_size <= 0)
381 		goto err;
382 	ret = -ENOMEM;
383 	pool = kzalloc(sizeof(struct srp_fr_pool) +
384 		       pool_size * sizeof(struct srp_fr_desc), GFP_KERNEL);
385 	if (!pool)
386 		goto err;
387 	pool->size = pool_size;
388 	pool->max_page_list_len = max_page_list_len;
389 	spin_lock_init(&pool->lock);
390 	INIT_LIST_HEAD(&pool->free_list);
391 
392 	for (i = 0, d = &pool->desc[0]; i < pool->size; i++, d++) {
393 		mr = ib_alloc_mr(pd, IB_MR_TYPE_MEM_REG,
394 				 max_page_list_len);
395 		if (IS_ERR(mr)) {
396 			ret = PTR_ERR(mr);
397 			if (ret == -ENOMEM)
398 				pr_info("%s: ib_alloc_mr() failed. Try to reduce max_cmd_per_lun, max_sect or ch_count\n",
399 					dev_name(&device->dev));
400 			goto destroy_pool;
401 		}
402 		d->mr = mr;
403 		list_add_tail(&d->entry, &pool->free_list);
404 	}
405 
406 out:
407 	return pool;
408 
409 destroy_pool:
410 	srp_destroy_fr_pool(pool);
411 
412 err:
413 	pool = ERR_PTR(ret);
414 	goto out;
415 }
416 
417 /**
418  * srp_fr_pool_get() - obtain a descriptor suitable for fast registration
419  * @pool: Pool to obtain descriptor from.
420  */
421 static struct srp_fr_desc *srp_fr_pool_get(struct srp_fr_pool *pool)
422 {
423 	struct srp_fr_desc *d = NULL;
424 	unsigned long flags;
425 
426 	spin_lock_irqsave(&pool->lock, flags);
427 	if (!list_empty(&pool->free_list)) {
428 		d = list_first_entry(&pool->free_list, typeof(*d), entry);
429 		list_del(&d->entry);
430 	}
431 	spin_unlock_irqrestore(&pool->lock, flags);
432 
433 	return d;
434 }
435 
436 /**
437  * srp_fr_pool_put() - put an FR descriptor back in the free list
438  * @pool: Pool the descriptor was allocated from.
439  * @desc: Pointer to an array of fast registration descriptor pointers.
440  * @n:    Number of descriptors to put back.
441  *
442  * Note: The caller must already have queued an invalidation request for
443  * desc->mr->rkey before calling this function.
444  */
445 static void srp_fr_pool_put(struct srp_fr_pool *pool, struct srp_fr_desc **desc,
446 			    int n)
447 {
448 	unsigned long flags;
449 	int i;
450 
451 	spin_lock_irqsave(&pool->lock, flags);
452 	for (i = 0; i < n; i++)
453 		list_add(&desc[i]->entry, &pool->free_list);
454 	spin_unlock_irqrestore(&pool->lock, flags);
455 }
456 
457 static struct srp_fr_pool *srp_alloc_fr_pool(struct srp_target_port *target)
458 {
459 	struct srp_device *dev = target->srp_host->srp_dev;
460 
461 	return srp_create_fr_pool(dev->dev, dev->pd, target->mr_pool_size,
462 				  dev->max_pages_per_mr);
463 }
464 
465 /**
466  * srp_destroy_qp() - destroy an RDMA queue pair
467  * @qp: RDMA queue pair.
468  *
469  * Drain the qp before destroying it.  This avoids that the receive
470  * completion handler can access the queue pair while it is
471  * being destroyed.
472  */
473 static void srp_destroy_qp(struct srp_rdma_ch *ch, struct ib_qp *qp)
474 {
475 	spin_lock_irq(&ch->lock);
476 	ib_process_cq_direct(ch->send_cq, -1);
477 	spin_unlock_irq(&ch->lock);
478 
479 	ib_drain_qp(qp);
480 	ib_destroy_qp(qp);
481 }
482 
483 static int srp_create_ch_ib(struct srp_rdma_ch *ch)
484 {
485 	struct srp_target_port *target = ch->target;
486 	struct srp_device *dev = target->srp_host->srp_dev;
487 	struct ib_qp_init_attr *init_attr;
488 	struct ib_cq *recv_cq, *send_cq;
489 	struct ib_qp *qp;
490 	struct ib_fmr_pool *fmr_pool = NULL;
491 	struct srp_fr_pool *fr_pool = NULL;
492 	const int m = 1 + dev->use_fast_reg * target->mr_per_cmd * 2;
493 	int ret;
494 
495 	init_attr = kzalloc(sizeof *init_attr, GFP_KERNEL);
496 	if (!init_attr)
497 		return -ENOMEM;
498 
499 	/* queue_size + 1 for ib_drain_rq() */
500 	recv_cq = ib_alloc_cq(dev->dev, ch, target->queue_size + 1,
501 				ch->comp_vector, IB_POLL_SOFTIRQ);
502 	if (IS_ERR(recv_cq)) {
503 		ret = PTR_ERR(recv_cq);
504 		goto err;
505 	}
506 
507 	send_cq = ib_alloc_cq(dev->dev, ch, m * target->queue_size,
508 				ch->comp_vector, IB_POLL_DIRECT);
509 	if (IS_ERR(send_cq)) {
510 		ret = PTR_ERR(send_cq);
511 		goto err_recv_cq;
512 	}
513 
514 	init_attr->event_handler       = srp_qp_event;
515 	init_attr->cap.max_send_wr     = m * target->queue_size;
516 	init_attr->cap.max_recv_wr     = target->queue_size + 1;
517 	init_attr->cap.max_recv_sge    = 1;
518 	init_attr->cap.max_send_sge    = 1;
519 	init_attr->sq_sig_type         = IB_SIGNAL_REQ_WR;
520 	init_attr->qp_type             = IB_QPT_RC;
521 	init_attr->send_cq             = send_cq;
522 	init_attr->recv_cq             = recv_cq;
523 
524 	qp = ib_create_qp(dev->pd, init_attr);
525 	if (IS_ERR(qp)) {
526 		ret = PTR_ERR(qp);
527 		goto err_send_cq;
528 	}
529 
530 	ret = srp_init_qp(target, qp);
531 	if (ret)
532 		goto err_qp;
533 
534 	if (dev->use_fast_reg) {
535 		fr_pool = srp_alloc_fr_pool(target);
536 		if (IS_ERR(fr_pool)) {
537 			ret = PTR_ERR(fr_pool);
538 			shost_printk(KERN_WARNING, target->scsi_host, PFX
539 				     "FR pool allocation failed (%d)\n", ret);
540 			goto err_qp;
541 		}
542 	} else if (dev->use_fmr) {
543 		fmr_pool = srp_alloc_fmr_pool(target);
544 		if (IS_ERR(fmr_pool)) {
545 			ret = PTR_ERR(fmr_pool);
546 			shost_printk(KERN_WARNING, target->scsi_host, PFX
547 				     "FMR pool allocation failed (%d)\n", ret);
548 			goto err_qp;
549 		}
550 	}
551 
552 	if (ch->qp)
553 		srp_destroy_qp(ch, ch->qp);
554 	if (ch->recv_cq)
555 		ib_free_cq(ch->recv_cq);
556 	if (ch->send_cq)
557 		ib_free_cq(ch->send_cq);
558 
559 	ch->qp = qp;
560 	ch->recv_cq = recv_cq;
561 	ch->send_cq = send_cq;
562 
563 	if (dev->use_fast_reg) {
564 		if (ch->fr_pool)
565 			srp_destroy_fr_pool(ch->fr_pool);
566 		ch->fr_pool = fr_pool;
567 	} else if (dev->use_fmr) {
568 		if (ch->fmr_pool)
569 			ib_destroy_fmr_pool(ch->fmr_pool);
570 		ch->fmr_pool = fmr_pool;
571 	}
572 
573 	kfree(init_attr);
574 	return 0;
575 
576 err_qp:
577 	ib_destroy_qp(qp);
578 
579 err_send_cq:
580 	ib_free_cq(send_cq);
581 
582 err_recv_cq:
583 	ib_free_cq(recv_cq);
584 
585 err:
586 	kfree(init_attr);
587 	return ret;
588 }
589 
590 /*
591  * Note: this function may be called without srp_alloc_iu_bufs() having been
592  * invoked. Hence the ch->[rt]x_ring checks.
593  */
594 static void srp_free_ch_ib(struct srp_target_port *target,
595 			   struct srp_rdma_ch *ch)
596 {
597 	struct srp_device *dev = target->srp_host->srp_dev;
598 	int i;
599 
600 	if (!ch->target)
601 		return;
602 
603 	if (ch->cm_id) {
604 		ib_destroy_cm_id(ch->cm_id);
605 		ch->cm_id = NULL;
606 	}
607 
608 	/* If srp_new_cm_id() succeeded but srp_create_ch_ib() not, return. */
609 	if (!ch->qp)
610 		return;
611 
612 	if (dev->use_fast_reg) {
613 		if (ch->fr_pool)
614 			srp_destroy_fr_pool(ch->fr_pool);
615 	} else if (dev->use_fmr) {
616 		if (ch->fmr_pool)
617 			ib_destroy_fmr_pool(ch->fmr_pool);
618 	}
619 
620 	srp_destroy_qp(ch, ch->qp);
621 	ib_free_cq(ch->send_cq);
622 	ib_free_cq(ch->recv_cq);
623 
624 	/*
625 	 * Avoid that the SCSI error handler tries to use this channel after
626 	 * it has been freed. The SCSI error handler can namely continue
627 	 * trying to perform recovery actions after scsi_remove_host()
628 	 * returned.
629 	 */
630 	ch->target = NULL;
631 
632 	ch->qp = NULL;
633 	ch->send_cq = ch->recv_cq = NULL;
634 
635 	if (ch->rx_ring) {
636 		for (i = 0; i < target->queue_size; ++i)
637 			srp_free_iu(target->srp_host, ch->rx_ring[i]);
638 		kfree(ch->rx_ring);
639 		ch->rx_ring = NULL;
640 	}
641 	if (ch->tx_ring) {
642 		for (i = 0; i < target->queue_size; ++i)
643 			srp_free_iu(target->srp_host, ch->tx_ring[i]);
644 		kfree(ch->tx_ring);
645 		ch->tx_ring = NULL;
646 	}
647 }
648 
649 static void srp_path_rec_completion(int status,
650 				    struct sa_path_rec *pathrec,
651 				    void *ch_ptr)
652 {
653 	struct srp_rdma_ch *ch = ch_ptr;
654 	struct srp_target_port *target = ch->target;
655 
656 	ch->status = status;
657 	if (status)
658 		shost_printk(KERN_ERR, target->scsi_host,
659 			     PFX "Got failed path rec status %d\n", status);
660 	else
661 		ch->path = *pathrec;
662 	complete(&ch->done);
663 }
664 
665 static int srp_lookup_path(struct srp_rdma_ch *ch)
666 {
667 	struct srp_target_port *target = ch->target;
668 	int ret;
669 
670 	ch->path.numb_path = 1;
671 
672 	init_completion(&ch->done);
673 
674 	ch->path_query_id = ib_sa_path_rec_get(&srp_sa_client,
675 					       target->srp_host->srp_dev->dev,
676 					       target->srp_host->port,
677 					       &ch->path,
678 					       IB_SA_PATH_REC_SERVICE_ID |
679 					       IB_SA_PATH_REC_DGID	 |
680 					       IB_SA_PATH_REC_SGID	 |
681 					       IB_SA_PATH_REC_NUMB_PATH	 |
682 					       IB_SA_PATH_REC_PKEY,
683 					       SRP_PATH_REC_TIMEOUT_MS,
684 					       GFP_KERNEL,
685 					       srp_path_rec_completion,
686 					       ch, &ch->path_query);
687 	if (ch->path_query_id < 0)
688 		return ch->path_query_id;
689 
690 	ret = wait_for_completion_interruptible(&ch->done);
691 	if (ret < 0)
692 		return ret;
693 
694 	if (ch->status < 0)
695 		shost_printk(KERN_WARNING, target->scsi_host,
696 			     PFX "Path record query failed\n");
697 
698 	return ch->status;
699 }
700 
701 static int srp_send_req(struct srp_rdma_ch *ch, bool multich)
702 {
703 	struct srp_target_port *target = ch->target;
704 	struct {
705 		struct ib_cm_req_param param;
706 		struct srp_login_req   priv;
707 	} *req = NULL;
708 	int status;
709 
710 	req = kzalloc(sizeof *req, GFP_KERNEL);
711 	if (!req)
712 		return -ENOMEM;
713 
714 	req->param.primary_path		      = &ch->path;
715 	req->param.alternate_path 	      = NULL;
716 	req->param.service_id 		      = target->service_id;
717 	req->param.qp_num		      = ch->qp->qp_num;
718 	req->param.qp_type		      = ch->qp->qp_type;
719 	req->param.private_data 	      = &req->priv;
720 	req->param.private_data_len 	      = sizeof req->priv;
721 	req->param.flow_control 	      = 1;
722 
723 	get_random_bytes(&req->param.starting_psn, 4);
724 	req->param.starting_psn 	     &= 0xffffff;
725 
726 	/*
727 	 * Pick some arbitrary defaults here; we could make these
728 	 * module parameters if anyone cared about setting them.
729 	 */
730 	req->param.responder_resources	      = 4;
731 	req->param.remote_cm_response_timeout = 20;
732 	req->param.local_cm_response_timeout  = 20;
733 	req->param.retry_count                = target->tl_retry_count;
734 	req->param.rnr_retry_count 	      = 7;
735 	req->param.max_cm_retries 	      = 15;
736 
737 	req->priv.opcode     	= SRP_LOGIN_REQ;
738 	req->priv.tag        	= 0;
739 	req->priv.req_it_iu_len = cpu_to_be32(target->max_iu_len);
740 	req->priv.req_buf_fmt 	= cpu_to_be16(SRP_BUF_FORMAT_DIRECT |
741 					      SRP_BUF_FORMAT_INDIRECT);
742 	req->priv.req_flags	= (multich ? SRP_MULTICHAN_MULTI :
743 				   SRP_MULTICHAN_SINGLE);
744 	/*
745 	 * In the published SRP specification (draft rev. 16a), the
746 	 * port identifier format is 8 bytes of ID extension followed
747 	 * by 8 bytes of GUID.  Older drafts put the two halves in the
748 	 * opposite order, so that the GUID comes first.
749 	 *
750 	 * Targets conforming to these obsolete drafts can be
751 	 * recognized by the I/O Class they report.
752 	 */
753 	if (target->io_class == SRP_REV10_IB_IO_CLASS) {
754 		memcpy(req->priv.initiator_port_id,
755 		       &target->sgid.global.interface_id, 8);
756 		memcpy(req->priv.initiator_port_id + 8,
757 		       &target->initiator_ext, 8);
758 		memcpy(req->priv.target_port_id,     &target->ioc_guid, 8);
759 		memcpy(req->priv.target_port_id + 8, &target->id_ext, 8);
760 	} else {
761 		memcpy(req->priv.initiator_port_id,
762 		       &target->initiator_ext, 8);
763 		memcpy(req->priv.initiator_port_id + 8,
764 		       &target->sgid.global.interface_id, 8);
765 		memcpy(req->priv.target_port_id,     &target->id_ext, 8);
766 		memcpy(req->priv.target_port_id + 8, &target->ioc_guid, 8);
767 	}
768 
769 	/*
770 	 * Topspin/Cisco SRP targets will reject our login unless we
771 	 * zero out the first 8 bytes of our initiator port ID and set
772 	 * the second 8 bytes to the local node GUID.
773 	 */
774 	if (srp_target_is_topspin(target)) {
775 		shost_printk(KERN_DEBUG, target->scsi_host,
776 			     PFX "Topspin/Cisco initiator port ID workaround "
777 			     "activated for target GUID %016llx\n",
778 			     be64_to_cpu(target->ioc_guid));
779 		memset(req->priv.initiator_port_id, 0, 8);
780 		memcpy(req->priv.initiator_port_id + 8,
781 		       &target->srp_host->srp_dev->dev->node_guid, 8);
782 	}
783 
784 	status = ib_send_cm_req(ch->cm_id, &req->param);
785 
786 	kfree(req);
787 
788 	return status;
789 }
790 
791 static bool srp_queue_remove_work(struct srp_target_port *target)
792 {
793 	bool changed = false;
794 
795 	spin_lock_irq(&target->lock);
796 	if (target->state != SRP_TARGET_REMOVED) {
797 		target->state = SRP_TARGET_REMOVED;
798 		changed = true;
799 	}
800 	spin_unlock_irq(&target->lock);
801 
802 	if (changed)
803 		queue_work(srp_remove_wq, &target->remove_work);
804 
805 	return changed;
806 }
807 
808 static void srp_disconnect_target(struct srp_target_port *target)
809 {
810 	struct srp_rdma_ch *ch;
811 	int i;
812 
813 	/* XXX should send SRP_I_LOGOUT request */
814 
815 	for (i = 0; i < target->ch_count; i++) {
816 		ch = &target->ch[i];
817 		ch->connected = false;
818 		if (ch->cm_id && ib_send_cm_dreq(ch->cm_id, NULL, 0)) {
819 			shost_printk(KERN_DEBUG, target->scsi_host,
820 				     PFX "Sending CM DREQ failed\n");
821 		}
822 	}
823 }
824 
825 static void srp_free_req_data(struct srp_target_port *target,
826 			      struct srp_rdma_ch *ch)
827 {
828 	struct srp_device *dev = target->srp_host->srp_dev;
829 	struct ib_device *ibdev = dev->dev;
830 	struct srp_request *req;
831 	int i;
832 
833 	if (!ch->req_ring)
834 		return;
835 
836 	for (i = 0; i < target->req_ring_size; ++i) {
837 		req = &ch->req_ring[i];
838 		if (dev->use_fast_reg) {
839 			kfree(req->fr_list);
840 		} else {
841 			kfree(req->fmr_list);
842 			kfree(req->map_page);
843 		}
844 		if (req->indirect_dma_addr) {
845 			ib_dma_unmap_single(ibdev, req->indirect_dma_addr,
846 					    target->indirect_size,
847 					    DMA_TO_DEVICE);
848 		}
849 		kfree(req->indirect_desc);
850 	}
851 
852 	kfree(ch->req_ring);
853 	ch->req_ring = NULL;
854 }
855 
856 static int srp_alloc_req_data(struct srp_rdma_ch *ch)
857 {
858 	struct srp_target_port *target = ch->target;
859 	struct srp_device *srp_dev = target->srp_host->srp_dev;
860 	struct ib_device *ibdev = srp_dev->dev;
861 	struct srp_request *req;
862 	void *mr_list;
863 	dma_addr_t dma_addr;
864 	int i, ret = -ENOMEM;
865 
866 	ch->req_ring = kcalloc(target->req_ring_size, sizeof(*ch->req_ring),
867 			       GFP_KERNEL);
868 	if (!ch->req_ring)
869 		goto out;
870 
871 	for (i = 0; i < target->req_ring_size; ++i) {
872 		req = &ch->req_ring[i];
873 		mr_list = kmalloc(target->mr_per_cmd * sizeof(void *),
874 				  GFP_KERNEL);
875 		if (!mr_list)
876 			goto out;
877 		if (srp_dev->use_fast_reg) {
878 			req->fr_list = mr_list;
879 		} else {
880 			req->fmr_list = mr_list;
881 			req->map_page = kmalloc(srp_dev->max_pages_per_mr *
882 						sizeof(void *), GFP_KERNEL);
883 			if (!req->map_page)
884 				goto out;
885 		}
886 		req->indirect_desc = kmalloc(target->indirect_size, GFP_KERNEL);
887 		if (!req->indirect_desc)
888 			goto out;
889 
890 		dma_addr = ib_dma_map_single(ibdev, req->indirect_desc,
891 					     target->indirect_size,
892 					     DMA_TO_DEVICE);
893 		if (ib_dma_mapping_error(ibdev, dma_addr))
894 			goto out;
895 
896 		req->indirect_dma_addr = dma_addr;
897 	}
898 	ret = 0;
899 
900 out:
901 	return ret;
902 }
903 
904 /**
905  * srp_del_scsi_host_attr() - Remove attributes defined in the host template.
906  * @shost: SCSI host whose attributes to remove from sysfs.
907  *
908  * Note: Any attributes defined in the host template and that did not exist
909  * before invocation of this function will be ignored.
910  */
911 static void srp_del_scsi_host_attr(struct Scsi_Host *shost)
912 {
913 	struct device_attribute **attr;
914 
915 	for (attr = shost->hostt->shost_attrs; attr && *attr; ++attr)
916 		device_remove_file(&shost->shost_dev, *attr);
917 }
918 
919 static void srp_remove_target(struct srp_target_port *target)
920 {
921 	struct srp_rdma_ch *ch;
922 	int i;
923 
924 	WARN_ON_ONCE(target->state != SRP_TARGET_REMOVED);
925 
926 	srp_del_scsi_host_attr(target->scsi_host);
927 	srp_rport_get(target->rport);
928 	srp_remove_host(target->scsi_host);
929 	scsi_remove_host(target->scsi_host);
930 	srp_stop_rport_timers(target->rport);
931 	srp_disconnect_target(target);
932 	for (i = 0; i < target->ch_count; i++) {
933 		ch = &target->ch[i];
934 		srp_free_ch_ib(target, ch);
935 	}
936 	cancel_work_sync(&target->tl_err_work);
937 	srp_rport_put(target->rport);
938 	for (i = 0; i < target->ch_count; i++) {
939 		ch = &target->ch[i];
940 		srp_free_req_data(target, ch);
941 	}
942 	kfree(target->ch);
943 	target->ch = NULL;
944 
945 	spin_lock(&target->srp_host->target_lock);
946 	list_del(&target->list);
947 	spin_unlock(&target->srp_host->target_lock);
948 
949 	scsi_host_put(target->scsi_host);
950 }
951 
952 static void srp_remove_work(struct work_struct *work)
953 {
954 	struct srp_target_port *target =
955 		container_of(work, struct srp_target_port, remove_work);
956 
957 	WARN_ON_ONCE(target->state != SRP_TARGET_REMOVED);
958 
959 	srp_remove_target(target);
960 }
961 
962 static void srp_rport_delete(struct srp_rport *rport)
963 {
964 	struct srp_target_port *target = rport->lld_data;
965 
966 	srp_queue_remove_work(target);
967 }
968 
969 /**
970  * srp_connected_ch() - number of connected channels
971  * @target: SRP target port.
972  */
973 static int srp_connected_ch(struct srp_target_port *target)
974 {
975 	int i, c = 0;
976 
977 	for (i = 0; i < target->ch_count; i++)
978 		c += target->ch[i].connected;
979 
980 	return c;
981 }
982 
983 static int srp_connect_ch(struct srp_rdma_ch *ch, bool multich)
984 {
985 	struct srp_target_port *target = ch->target;
986 	int ret;
987 
988 	WARN_ON_ONCE(!multich && srp_connected_ch(target) > 0);
989 
990 	ret = srp_lookup_path(ch);
991 	if (ret)
992 		goto out;
993 
994 	while (1) {
995 		init_completion(&ch->done);
996 		ret = srp_send_req(ch, multich);
997 		if (ret)
998 			goto out;
999 		ret = wait_for_completion_interruptible(&ch->done);
1000 		if (ret < 0)
1001 			goto out;
1002 
1003 		/*
1004 		 * The CM event handling code will set status to
1005 		 * SRP_PORT_REDIRECT if we get a port redirect REJ
1006 		 * back, or SRP_DLID_REDIRECT if we get a lid/qp
1007 		 * redirect REJ back.
1008 		 */
1009 		ret = ch->status;
1010 		switch (ret) {
1011 		case 0:
1012 			ch->connected = true;
1013 			goto out;
1014 
1015 		case SRP_PORT_REDIRECT:
1016 			ret = srp_lookup_path(ch);
1017 			if (ret)
1018 				goto out;
1019 			break;
1020 
1021 		case SRP_DLID_REDIRECT:
1022 			break;
1023 
1024 		case SRP_STALE_CONN:
1025 			shost_printk(KERN_ERR, target->scsi_host, PFX
1026 				     "giving up on stale connection\n");
1027 			ret = -ECONNRESET;
1028 			goto out;
1029 
1030 		default:
1031 			goto out;
1032 		}
1033 	}
1034 
1035 out:
1036 	return ret <= 0 ? ret : -ENODEV;
1037 }
1038 
1039 static void srp_inv_rkey_err_done(struct ib_cq *cq, struct ib_wc *wc)
1040 {
1041 	srp_handle_qp_err(cq, wc, "INV RKEY");
1042 }
1043 
1044 static int srp_inv_rkey(struct srp_request *req, struct srp_rdma_ch *ch,
1045 		u32 rkey)
1046 {
1047 	struct ib_send_wr *bad_wr;
1048 	struct ib_send_wr wr = {
1049 		.opcode		    = IB_WR_LOCAL_INV,
1050 		.next		    = NULL,
1051 		.num_sge	    = 0,
1052 		.send_flags	    = 0,
1053 		.ex.invalidate_rkey = rkey,
1054 	};
1055 
1056 	wr.wr_cqe = &req->reg_cqe;
1057 	req->reg_cqe.done = srp_inv_rkey_err_done;
1058 	return ib_post_send(ch->qp, &wr, &bad_wr);
1059 }
1060 
1061 static void srp_unmap_data(struct scsi_cmnd *scmnd,
1062 			   struct srp_rdma_ch *ch,
1063 			   struct srp_request *req)
1064 {
1065 	struct srp_target_port *target = ch->target;
1066 	struct srp_device *dev = target->srp_host->srp_dev;
1067 	struct ib_device *ibdev = dev->dev;
1068 	int i, res;
1069 
1070 	if (!scsi_sglist(scmnd) ||
1071 	    (scmnd->sc_data_direction != DMA_TO_DEVICE &&
1072 	     scmnd->sc_data_direction != DMA_FROM_DEVICE))
1073 		return;
1074 
1075 	if (dev->use_fast_reg) {
1076 		struct srp_fr_desc **pfr;
1077 
1078 		for (i = req->nmdesc, pfr = req->fr_list; i > 0; i--, pfr++) {
1079 			res = srp_inv_rkey(req, ch, (*pfr)->mr->rkey);
1080 			if (res < 0) {
1081 				shost_printk(KERN_ERR, target->scsi_host, PFX
1082 				  "Queueing INV WR for rkey %#x failed (%d)\n",
1083 				  (*pfr)->mr->rkey, res);
1084 				queue_work(system_long_wq,
1085 					   &target->tl_err_work);
1086 			}
1087 		}
1088 		if (req->nmdesc)
1089 			srp_fr_pool_put(ch->fr_pool, req->fr_list,
1090 					req->nmdesc);
1091 	} else if (dev->use_fmr) {
1092 		struct ib_pool_fmr **pfmr;
1093 
1094 		for (i = req->nmdesc, pfmr = req->fmr_list; i > 0; i--, pfmr++)
1095 			ib_fmr_pool_unmap(*pfmr);
1096 	}
1097 
1098 	ib_dma_unmap_sg(ibdev, scsi_sglist(scmnd), scsi_sg_count(scmnd),
1099 			scmnd->sc_data_direction);
1100 }
1101 
1102 /**
1103  * srp_claim_req - Take ownership of the scmnd associated with a request.
1104  * @ch: SRP RDMA channel.
1105  * @req: SRP request.
1106  * @sdev: If not NULL, only take ownership for this SCSI device.
1107  * @scmnd: If NULL, take ownership of @req->scmnd. If not NULL, only take
1108  *         ownership of @req->scmnd if it equals @scmnd.
1109  *
1110  * Return value:
1111  * Either NULL or a pointer to the SCSI command the caller became owner of.
1112  */
1113 static struct scsi_cmnd *srp_claim_req(struct srp_rdma_ch *ch,
1114 				       struct srp_request *req,
1115 				       struct scsi_device *sdev,
1116 				       struct scsi_cmnd *scmnd)
1117 {
1118 	unsigned long flags;
1119 
1120 	spin_lock_irqsave(&ch->lock, flags);
1121 	if (req->scmnd &&
1122 	    (!sdev || req->scmnd->device == sdev) &&
1123 	    (!scmnd || req->scmnd == scmnd)) {
1124 		scmnd = req->scmnd;
1125 		req->scmnd = NULL;
1126 	} else {
1127 		scmnd = NULL;
1128 	}
1129 	spin_unlock_irqrestore(&ch->lock, flags);
1130 
1131 	return scmnd;
1132 }
1133 
1134 /**
1135  * srp_free_req() - Unmap data and adjust ch->req_lim.
1136  * @ch:     SRP RDMA channel.
1137  * @req:    Request to be freed.
1138  * @scmnd:  SCSI command associated with @req.
1139  * @req_lim_delta: Amount to be added to @target->req_lim.
1140  */
1141 static void srp_free_req(struct srp_rdma_ch *ch, struct srp_request *req,
1142 			 struct scsi_cmnd *scmnd, s32 req_lim_delta)
1143 {
1144 	unsigned long flags;
1145 
1146 	srp_unmap_data(scmnd, ch, req);
1147 
1148 	spin_lock_irqsave(&ch->lock, flags);
1149 	ch->req_lim += req_lim_delta;
1150 	spin_unlock_irqrestore(&ch->lock, flags);
1151 }
1152 
1153 static void srp_finish_req(struct srp_rdma_ch *ch, struct srp_request *req,
1154 			   struct scsi_device *sdev, int result)
1155 {
1156 	struct scsi_cmnd *scmnd = srp_claim_req(ch, req, sdev, NULL);
1157 
1158 	if (scmnd) {
1159 		srp_free_req(ch, req, scmnd, 0);
1160 		scmnd->result = result;
1161 		scmnd->scsi_done(scmnd);
1162 	}
1163 }
1164 
1165 static void srp_terminate_io(struct srp_rport *rport)
1166 {
1167 	struct srp_target_port *target = rport->lld_data;
1168 	struct srp_rdma_ch *ch;
1169 	struct Scsi_Host *shost = target->scsi_host;
1170 	struct scsi_device *sdev;
1171 	int i, j;
1172 
1173 	/*
1174 	 * Invoking srp_terminate_io() while srp_queuecommand() is running
1175 	 * is not safe. Hence the warning statement below.
1176 	 */
1177 	shost_for_each_device(sdev, shost)
1178 		WARN_ON_ONCE(sdev->request_queue->request_fn_active);
1179 
1180 	for (i = 0; i < target->ch_count; i++) {
1181 		ch = &target->ch[i];
1182 
1183 		for (j = 0; j < target->req_ring_size; ++j) {
1184 			struct srp_request *req = &ch->req_ring[j];
1185 
1186 			srp_finish_req(ch, req, NULL,
1187 				       DID_TRANSPORT_FAILFAST << 16);
1188 		}
1189 	}
1190 }
1191 
1192 /*
1193  * It is up to the caller to ensure that srp_rport_reconnect() calls are
1194  * serialized and that no concurrent srp_queuecommand(), srp_abort(),
1195  * srp_reset_device() or srp_reset_host() calls will occur while this function
1196  * is in progress. One way to realize that is not to call this function
1197  * directly but to call srp_reconnect_rport() instead since that last function
1198  * serializes calls of this function via rport->mutex and also blocks
1199  * srp_queuecommand() calls before invoking this function.
1200  */
1201 static int srp_rport_reconnect(struct srp_rport *rport)
1202 {
1203 	struct srp_target_port *target = rport->lld_data;
1204 	struct srp_rdma_ch *ch;
1205 	int i, j, ret = 0;
1206 	bool multich = false;
1207 
1208 	srp_disconnect_target(target);
1209 
1210 	if (target->state == SRP_TARGET_SCANNING)
1211 		return -ENODEV;
1212 
1213 	/*
1214 	 * Now get a new local CM ID so that we avoid confusing the target in
1215 	 * case things are really fouled up. Doing so also ensures that all CM
1216 	 * callbacks will have finished before a new QP is allocated.
1217 	 */
1218 	for (i = 0; i < target->ch_count; i++) {
1219 		ch = &target->ch[i];
1220 		ret += srp_new_cm_id(ch);
1221 	}
1222 	for (i = 0; i < target->ch_count; i++) {
1223 		ch = &target->ch[i];
1224 		for (j = 0; j < target->req_ring_size; ++j) {
1225 			struct srp_request *req = &ch->req_ring[j];
1226 
1227 			srp_finish_req(ch, req, NULL, DID_RESET << 16);
1228 		}
1229 	}
1230 	for (i = 0; i < target->ch_count; i++) {
1231 		ch = &target->ch[i];
1232 		/*
1233 		 * Whether or not creating a new CM ID succeeded, create a new
1234 		 * QP. This guarantees that all completion callback function
1235 		 * invocations have finished before request resetting starts.
1236 		 */
1237 		ret += srp_create_ch_ib(ch);
1238 
1239 		INIT_LIST_HEAD(&ch->free_tx);
1240 		for (j = 0; j < target->queue_size; ++j)
1241 			list_add(&ch->tx_ring[j]->list, &ch->free_tx);
1242 	}
1243 
1244 	target->qp_in_error = false;
1245 
1246 	for (i = 0; i < target->ch_count; i++) {
1247 		ch = &target->ch[i];
1248 		if (ret)
1249 			break;
1250 		ret = srp_connect_ch(ch, multich);
1251 		multich = true;
1252 	}
1253 
1254 	if (ret == 0)
1255 		shost_printk(KERN_INFO, target->scsi_host,
1256 			     PFX "reconnect succeeded\n");
1257 
1258 	return ret;
1259 }
1260 
1261 static void srp_map_desc(struct srp_map_state *state, dma_addr_t dma_addr,
1262 			 unsigned int dma_len, u32 rkey)
1263 {
1264 	struct srp_direct_buf *desc = state->desc;
1265 
1266 	WARN_ON_ONCE(!dma_len);
1267 
1268 	desc->va = cpu_to_be64(dma_addr);
1269 	desc->key = cpu_to_be32(rkey);
1270 	desc->len = cpu_to_be32(dma_len);
1271 
1272 	state->total_len += dma_len;
1273 	state->desc++;
1274 	state->ndesc++;
1275 }
1276 
1277 static int srp_map_finish_fmr(struct srp_map_state *state,
1278 			      struct srp_rdma_ch *ch)
1279 {
1280 	struct srp_target_port *target = ch->target;
1281 	struct srp_device *dev = target->srp_host->srp_dev;
1282 	struct ib_pd *pd = target->pd;
1283 	struct ib_pool_fmr *fmr;
1284 	u64 io_addr = 0;
1285 
1286 	if (state->fmr.next >= state->fmr.end) {
1287 		shost_printk(KERN_ERR, ch->target->scsi_host,
1288 			     PFX "Out of MRs (mr_per_cmd = %d)\n",
1289 			     ch->target->mr_per_cmd);
1290 		return -ENOMEM;
1291 	}
1292 
1293 	WARN_ON_ONCE(!dev->use_fmr);
1294 
1295 	if (state->npages == 0)
1296 		return 0;
1297 
1298 	if (state->npages == 1 && (pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY)) {
1299 		srp_map_desc(state, state->base_dma_addr, state->dma_len,
1300 			     pd->unsafe_global_rkey);
1301 		goto reset_state;
1302 	}
1303 
1304 	fmr = ib_fmr_pool_map_phys(ch->fmr_pool, state->pages,
1305 				   state->npages, io_addr);
1306 	if (IS_ERR(fmr))
1307 		return PTR_ERR(fmr);
1308 
1309 	*state->fmr.next++ = fmr;
1310 	state->nmdesc++;
1311 
1312 	srp_map_desc(state, state->base_dma_addr & ~dev->mr_page_mask,
1313 		     state->dma_len, fmr->fmr->rkey);
1314 
1315 reset_state:
1316 	state->npages = 0;
1317 	state->dma_len = 0;
1318 
1319 	return 0;
1320 }
1321 
1322 static void srp_reg_mr_err_done(struct ib_cq *cq, struct ib_wc *wc)
1323 {
1324 	srp_handle_qp_err(cq, wc, "FAST REG");
1325 }
1326 
1327 /*
1328  * Map up to sg_nents elements of state->sg where *sg_offset_p is the offset
1329  * where to start in the first element. If sg_offset_p != NULL then
1330  * *sg_offset_p is updated to the offset in state->sg[retval] of the first
1331  * byte that has not yet been mapped.
1332  */
1333 static int srp_map_finish_fr(struct srp_map_state *state,
1334 			     struct srp_request *req,
1335 			     struct srp_rdma_ch *ch, int sg_nents,
1336 			     unsigned int *sg_offset_p)
1337 {
1338 	struct srp_target_port *target = ch->target;
1339 	struct srp_device *dev = target->srp_host->srp_dev;
1340 	struct ib_pd *pd = target->pd;
1341 	struct ib_send_wr *bad_wr;
1342 	struct ib_reg_wr wr;
1343 	struct srp_fr_desc *desc;
1344 	u32 rkey;
1345 	int n, err;
1346 
1347 	if (state->fr.next >= state->fr.end) {
1348 		shost_printk(KERN_ERR, ch->target->scsi_host,
1349 			     PFX "Out of MRs (mr_per_cmd = %d)\n",
1350 			     ch->target->mr_per_cmd);
1351 		return -ENOMEM;
1352 	}
1353 
1354 	WARN_ON_ONCE(!dev->use_fast_reg);
1355 
1356 	if (sg_nents == 1 && (pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY)) {
1357 		unsigned int sg_offset = sg_offset_p ? *sg_offset_p : 0;
1358 
1359 		srp_map_desc(state, sg_dma_address(state->sg) + sg_offset,
1360 			     sg_dma_len(state->sg) - sg_offset,
1361 			     pd->unsafe_global_rkey);
1362 		if (sg_offset_p)
1363 			*sg_offset_p = 0;
1364 		return 1;
1365 	}
1366 
1367 	desc = srp_fr_pool_get(ch->fr_pool);
1368 	if (!desc)
1369 		return -ENOMEM;
1370 
1371 	rkey = ib_inc_rkey(desc->mr->rkey);
1372 	ib_update_fast_reg_key(desc->mr, rkey);
1373 
1374 	n = ib_map_mr_sg(desc->mr, state->sg, sg_nents, sg_offset_p,
1375 			 dev->mr_page_size);
1376 	if (unlikely(n < 0)) {
1377 		srp_fr_pool_put(ch->fr_pool, &desc, 1);
1378 		pr_debug("%s: ib_map_mr_sg(%d, %d) returned %d.\n",
1379 			 dev_name(&req->scmnd->device->sdev_gendev), sg_nents,
1380 			 sg_offset_p ? *sg_offset_p : -1, n);
1381 		return n;
1382 	}
1383 
1384 	WARN_ON_ONCE(desc->mr->length == 0);
1385 
1386 	req->reg_cqe.done = srp_reg_mr_err_done;
1387 
1388 	wr.wr.next = NULL;
1389 	wr.wr.opcode = IB_WR_REG_MR;
1390 	wr.wr.wr_cqe = &req->reg_cqe;
1391 	wr.wr.num_sge = 0;
1392 	wr.wr.send_flags = 0;
1393 	wr.mr = desc->mr;
1394 	wr.key = desc->mr->rkey;
1395 	wr.access = (IB_ACCESS_LOCAL_WRITE |
1396 		     IB_ACCESS_REMOTE_READ |
1397 		     IB_ACCESS_REMOTE_WRITE);
1398 
1399 	*state->fr.next++ = desc;
1400 	state->nmdesc++;
1401 
1402 	srp_map_desc(state, desc->mr->iova,
1403 		     desc->mr->length, desc->mr->rkey);
1404 
1405 	err = ib_post_send(ch->qp, &wr.wr, &bad_wr);
1406 	if (unlikely(err)) {
1407 		WARN_ON_ONCE(err == -ENOMEM);
1408 		return err;
1409 	}
1410 
1411 	return n;
1412 }
1413 
1414 static int srp_map_sg_entry(struct srp_map_state *state,
1415 			    struct srp_rdma_ch *ch,
1416 			    struct scatterlist *sg)
1417 {
1418 	struct srp_target_port *target = ch->target;
1419 	struct srp_device *dev = target->srp_host->srp_dev;
1420 	struct ib_device *ibdev = dev->dev;
1421 	dma_addr_t dma_addr = ib_sg_dma_address(ibdev, sg);
1422 	unsigned int dma_len = ib_sg_dma_len(ibdev, sg);
1423 	unsigned int len = 0;
1424 	int ret;
1425 
1426 	WARN_ON_ONCE(!dma_len);
1427 
1428 	while (dma_len) {
1429 		unsigned offset = dma_addr & ~dev->mr_page_mask;
1430 
1431 		if (state->npages == dev->max_pages_per_mr ||
1432 		    (state->npages > 0 && offset != 0)) {
1433 			ret = srp_map_finish_fmr(state, ch);
1434 			if (ret)
1435 				return ret;
1436 		}
1437 
1438 		len = min_t(unsigned int, dma_len, dev->mr_page_size - offset);
1439 
1440 		if (!state->npages)
1441 			state->base_dma_addr = dma_addr;
1442 		state->pages[state->npages++] = dma_addr & dev->mr_page_mask;
1443 		state->dma_len += len;
1444 		dma_addr += len;
1445 		dma_len -= len;
1446 	}
1447 
1448 	/*
1449 	 * If the end of the MR is not on a page boundary then we need to
1450 	 * close it out and start a new one -- we can only merge at page
1451 	 * boundaries.
1452 	 */
1453 	ret = 0;
1454 	if ((dma_addr & ~dev->mr_page_mask) != 0)
1455 		ret = srp_map_finish_fmr(state, ch);
1456 	return ret;
1457 }
1458 
1459 static int srp_map_sg_fmr(struct srp_map_state *state, struct srp_rdma_ch *ch,
1460 			  struct srp_request *req, struct scatterlist *scat,
1461 			  int count)
1462 {
1463 	struct scatterlist *sg;
1464 	int i, ret;
1465 
1466 	state->pages = req->map_page;
1467 	state->fmr.next = req->fmr_list;
1468 	state->fmr.end = req->fmr_list + ch->target->mr_per_cmd;
1469 
1470 	for_each_sg(scat, sg, count, i) {
1471 		ret = srp_map_sg_entry(state, ch, sg);
1472 		if (ret)
1473 			return ret;
1474 	}
1475 
1476 	ret = srp_map_finish_fmr(state, ch);
1477 	if (ret)
1478 		return ret;
1479 
1480 	return 0;
1481 }
1482 
1483 static int srp_map_sg_fr(struct srp_map_state *state, struct srp_rdma_ch *ch,
1484 			 struct srp_request *req, struct scatterlist *scat,
1485 			 int count)
1486 {
1487 	unsigned int sg_offset = 0;
1488 
1489 	state->fr.next = req->fr_list;
1490 	state->fr.end = req->fr_list + ch->target->mr_per_cmd;
1491 	state->sg = scat;
1492 
1493 	if (count == 0)
1494 		return 0;
1495 
1496 	while (count) {
1497 		int i, n;
1498 
1499 		n = srp_map_finish_fr(state, req, ch, count, &sg_offset);
1500 		if (unlikely(n < 0))
1501 			return n;
1502 
1503 		count -= n;
1504 		for (i = 0; i < n; i++)
1505 			state->sg = sg_next(state->sg);
1506 	}
1507 
1508 	return 0;
1509 }
1510 
1511 static int srp_map_sg_dma(struct srp_map_state *state, struct srp_rdma_ch *ch,
1512 			  struct srp_request *req, struct scatterlist *scat,
1513 			  int count)
1514 {
1515 	struct srp_target_port *target = ch->target;
1516 	struct srp_device *dev = target->srp_host->srp_dev;
1517 	struct scatterlist *sg;
1518 	int i;
1519 
1520 	for_each_sg(scat, sg, count, i) {
1521 		srp_map_desc(state, ib_sg_dma_address(dev->dev, sg),
1522 			     ib_sg_dma_len(dev->dev, sg),
1523 			     target->pd->unsafe_global_rkey);
1524 	}
1525 
1526 	return 0;
1527 }
1528 
1529 /*
1530  * Register the indirect data buffer descriptor with the HCA.
1531  *
1532  * Note: since the indirect data buffer descriptor has been allocated with
1533  * kmalloc() it is guaranteed that this buffer is a physically contiguous
1534  * memory buffer.
1535  */
1536 static int srp_map_idb(struct srp_rdma_ch *ch, struct srp_request *req,
1537 		       void **next_mr, void **end_mr, u32 idb_len,
1538 		       __be32 *idb_rkey)
1539 {
1540 	struct srp_target_port *target = ch->target;
1541 	struct srp_device *dev = target->srp_host->srp_dev;
1542 	struct srp_map_state state;
1543 	struct srp_direct_buf idb_desc;
1544 	u64 idb_pages[1];
1545 	struct scatterlist idb_sg[1];
1546 	int ret;
1547 
1548 	memset(&state, 0, sizeof(state));
1549 	memset(&idb_desc, 0, sizeof(idb_desc));
1550 	state.gen.next = next_mr;
1551 	state.gen.end = end_mr;
1552 	state.desc = &idb_desc;
1553 	state.base_dma_addr = req->indirect_dma_addr;
1554 	state.dma_len = idb_len;
1555 
1556 	if (dev->use_fast_reg) {
1557 		state.sg = idb_sg;
1558 		sg_init_one(idb_sg, req->indirect_desc, idb_len);
1559 		idb_sg->dma_address = req->indirect_dma_addr; /* hack! */
1560 #ifdef CONFIG_NEED_SG_DMA_LENGTH
1561 		idb_sg->dma_length = idb_sg->length;	      /* hack^2 */
1562 #endif
1563 		ret = srp_map_finish_fr(&state, req, ch, 1, NULL);
1564 		if (ret < 0)
1565 			return ret;
1566 		WARN_ON_ONCE(ret < 1);
1567 	} else if (dev->use_fmr) {
1568 		state.pages = idb_pages;
1569 		state.pages[0] = (req->indirect_dma_addr &
1570 				  dev->mr_page_mask);
1571 		state.npages = 1;
1572 		ret = srp_map_finish_fmr(&state, ch);
1573 		if (ret < 0)
1574 			return ret;
1575 	} else {
1576 		return -EINVAL;
1577 	}
1578 
1579 	*idb_rkey = idb_desc.key;
1580 
1581 	return 0;
1582 }
1583 
1584 static void srp_check_mapping(struct srp_map_state *state,
1585 			      struct srp_rdma_ch *ch, struct srp_request *req,
1586 			      struct scatterlist *scat, int count)
1587 {
1588 	struct srp_device *dev = ch->target->srp_host->srp_dev;
1589 	struct srp_fr_desc **pfr;
1590 	u64 desc_len = 0, mr_len = 0;
1591 	int i;
1592 
1593 	for (i = 0; i < state->ndesc; i++)
1594 		desc_len += be32_to_cpu(req->indirect_desc[i].len);
1595 	if (dev->use_fast_reg)
1596 		for (i = 0, pfr = req->fr_list; i < state->nmdesc; i++, pfr++)
1597 			mr_len += (*pfr)->mr->length;
1598 	else if (dev->use_fmr)
1599 		for (i = 0; i < state->nmdesc; i++)
1600 			mr_len += be32_to_cpu(req->indirect_desc[i].len);
1601 	if (desc_len != scsi_bufflen(req->scmnd) ||
1602 	    mr_len > scsi_bufflen(req->scmnd))
1603 		pr_err("Inconsistent: scsi len %d <> desc len %lld <> mr len %lld; ndesc %d; nmdesc = %d\n",
1604 		       scsi_bufflen(req->scmnd), desc_len, mr_len,
1605 		       state->ndesc, state->nmdesc);
1606 }
1607 
1608 /**
1609  * srp_map_data() - map SCSI data buffer onto an SRP request
1610  * @scmnd: SCSI command to map
1611  * @ch: SRP RDMA channel
1612  * @req: SRP request
1613  *
1614  * Returns the length in bytes of the SRP_CMD IU or a negative value if
1615  * mapping failed.
1616  */
1617 static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_rdma_ch *ch,
1618 			struct srp_request *req)
1619 {
1620 	struct srp_target_port *target = ch->target;
1621 	struct ib_pd *pd = target->pd;
1622 	struct scatterlist *scat;
1623 	struct srp_cmd *cmd = req->cmd->buf;
1624 	int len, nents, count, ret;
1625 	struct srp_device *dev;
1626 	struct ib_device *ibdev;
1627 	struct srp_map_state state;
1628 	struct srp_indirect_buf *indirect_hdr;
1629 	u32 idb_len, table_len;
1630 	__be32 idb_rkey;
1631 	u8 fmt;
1632 
1633 	if (!scsi_sglist(scmnd) || scmnd->sc_data_direction == DMA_NONE)
1634 		return sizeof (struct srp_cmd);
1635 
1636 	if (scmnd->sc_data_direction != DMA_FROM_DEVICE &&
1637 	    scmnd->sc_data_direction != DMA_TO_DEVICE) {
1638 		shost_printk(KERN_WARNING, target->scsi_host,
1639 			     PFX "Unhandled data direction %d\n",
1640 			     scmnd->sc_data_direction);
1641 		return -EINVAL;
1642 	}
1643 
1644 	nents = scsi_sg_count(scmnd);
1645 	scat  = scsi_sglist(scmnd);
1646 
1647 	dev = target->srp_host->srp_dev;
1648 	ibdev = dev->dev;
1649 
1650 	count = ib_dma_map_sg(ibdev, scat, nents, scmnd->sc_data_direction);
1651 	if (unlikely(count == 0))
1652 		return -EIO;
1653 
1654 	fmt = SRP_DATA_DESC_DIRECT;
1655 	len = sizeof (struct srp_cmd) +	sizeof (struct srp_direct_buf);
1656 
1657 	if (count == 1 && (pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY)) {
1658 		/*
1659 		 * The midlayer only generated a single gather/scatter
1660 		 * entry, or DMA mapping coalesced everything to a
1661 		 * single entry.  So a direct descriptor along with
1662 		 * the DMA MR suffices.
1663 		 */
1664 		struct srp_direct_buf *buf = (void *) cmd->add_data;
1665 
1666 		buf->va  = cpu_to_be64(ib_sg_dma_address(ibdev, scat));
1667 		buf->key = cpu_to_be32(pd->unsafe_global_rkey);
1668 		buf->len = cpu_to_be32(ib_sg_dma_len(ibdev, scat));
1669 
1670 		req->nmdesc = 0;
1671 		goto map_complete;
1672 	}
1673 
1674 	/*
1675 	 * We have more than one scatter/gather entry, so build our indirect
1676 	 * descriptor table, trying to merge as many entries as we can.
1677 	 */
1678 	indirect_hdr = (void *) cmd->add_data;
1679 
1680 	ib_dma_sync_single_for_cpu(ibdev, req->indirect_dma_addr,
1681 				   target->indirect_size, DMA_TO_DEVICE);
1682 
1683 	memset(&state, 0, sizeof(state));
1684 	state.desc = req->indirect_desc;
1685 	if (dev->use_fast_reg)
1686 		ret = srp_map_sg_fr(&state, ch, req, scat, count);
1687 	else if (dev->use_fmr)
1688 		ret = srp_map_sg_fmr(&state, ch, req, scat, count);
1689 	else
1690 		ret = srp_map_sg_dma(&state, ch, req, scat, count);
1691 	req->nmdesc = state.nmdesc;
1692 	if (ret < 0)
1693 		goto unmap;
1694 
1695 	{
1696 		DEFINE_DYNAMIC_DEBUG_METADATA(ddm,
1697 			"Memory mapping consistency check");
1698 		if (DYNAMIC_DEBUG_BRANCH(ddm))
1699 			srp_check_mapping(&state, ch, req, scat, count);
1700 	}
1701 
1702 	/* We've mapped the request, now pull as much of the indirect
1703 	 * descriptor table as we can into the command buffer. If this
1704 	 * target is not using an external indirect table, we are
1705 	 * guaranteed to fit into the command, as the SCSI layer won't
1706 	 * give us more S/G entries than we allow.
1707 	 */
1708 	if (state.ndesc == 1) {
1709 		/*
1710 		 * Memory registration collapsed the sg-list into one entry,
1711 		 * so use a direct descriptor.
1712 		 */
1713 		struct srp_direct_buf *buf = (void *) cmd->add_data;
1714 
1715 		*buf = req->indirect_desc[0];
1716 		goto map_complete;
1717 	}
1718 
1719 	if (unlikely(target->cmd_sg_cnt < state.ndesc &&
1720 						!target->allow_ext_sg)) {
1721 		shost_printk(KERN_ERR, target->scsi_host,
1722 			     "Could not fit S/G list into SRP_CMD\n");
1723 		ret = -EIO;
1724 		goto unmap;
1725 	}
1726 
1727 	count = min(state.ndesc, target->cmd_sg_cnt);
1728 	table_len = state.ndesc * sizeof (struct srp_direct_buf);
1729 	idb_len = sizeof(struct srp_indirect_buf) + table_len;
1730 
1731 	fmt = SRP_DATA_DESC_INDIRECT;
1732 	len = sizeof(struct srp_cmd) + sizeof (struct srp_indirect_buf);
1733 	len += count * sizeof (struct srp_direct_buf);
1734 
1735 	memcpy(indirect_hdr->desc_list, req->indirect_desc,
1736 	       count * sizeof (struct srp_direct_buf));
1737 
1738 	if (!(pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY)) {
1739 		ret = srp_map_idb(ch, req, state.gen.next, state.gen.end,
1740 				  idb_len, &idb_rkey);
1741 		if (ret < 0)
1742 			goto unmap;
1743 		req->nmdesc++;
1744 	} else {
1745 		idb_rkey = cpu_to_be32(pd->unsafe_global_rkey);
1746 	}
1747 
1748 	indirect_hdr->table_desc.va = cpu_to_be64(req->indirect_dma_addr);
1749 	indirect_hdr->table_desc.key = idb_rkey;
1750 	indirect_hdr->table_desc.len = cpu_to_be32(table_len);
1751 	indirect_hdr->len = cpu_to_be32(state.total_len);
1752 
1753 	if (scmnd->sc_data_direction == DMA_TO_DEVICE)
1754 		cmd->data_out_desc_cnt = count;
1755 	else
1756 		cmd->data_in_desc_cnt = count;
1757 
1758 	ib_dma_sync_single_for_device(ibdev, req->indirect_dma_addr, table_len,
1759 				      DMA_TO_DEVICE);
1760 
1761 map_complete:
1762 	if (scmnd->sc_data_direction == DMA_TO_DEVICE)
1763 		cmd->buf_fmt = fmt << 4;
1764 	else
1765 		cmd->buf_fmt = fmt;
1766 
1767 	return len;
1768 
1769 unmap:
1770 	srp_unmap_data(scmnd, ch, req);
1771 	if (ret == -ENOMEM && req->nmdesc >= target->mr_pool_size)
1772 		ret = -E2BIG;
1773 	return ret;
1774 }
1775 
1776 /*
1777  * Return an IU and possible credit to the free pool
1778  */
1779 static void srp_put_tx_iu(struct srp_rdma_ch *ch, struct srp_iu *iu,
1780 			  enum srp_iu_type iu_type)
1781 {
1782 	unsigned long flags;
1783 
1784 	spin_lock_irqsave(&ch->lock, flags);
1785 	list_add(&iu->list, &ch->free_tx);
1786 	if (iu_type != SRP_IU_RSP)
1787 		++ch->req_lim;
1788 	spin_unlock_irqrestore(&ch->lock, flags);
1789 }
1790 
1791 /*
1792  * Must be called with ch->lock held to protect req_lim and free_tx.
1793  * If IU is not sent, it must be returned using srp_put_tx_iu().
1794  *
1795  * Note:
1796  * An upper limit for the number of allocated information units for each
1797  * request type is:
1798  * - SRP_IU_CMD: SRP_CMD_SQ_SIZE, since the SCSI mid-layer never queues
1799  *   more than Scsi_Host.can_queue requests.
1800  * - SRP_IU_TSK_MGMT: SRP_TSK_MGMT_SQ_SIZE.
1801  * - SRP_IU_RSP: 1, since a conforming SRP target never sends more than
1802  *   one unanswered SRP request to an initiator.
1803  */
1804 static struct srp_iu *__srp_get_tx_iu(struct srp_rdma_ch *ch,
1805 				      enum srp_iu_type iu_type)
1806 {
1807 	struct srp_target_port *target = ch->target;
1808 	s32 rsv = (iu_type == SRP_IU_TSK_MGMT) ? 0 : SRP_TSK_MGMT_SQ_SIZE;
1809 	struct srp_iu *iu;
1810 
1811 	lockdep_assert_held(&ch->lock);
1812 
1813 	ib_process_cq_direct(ch->send_cq, -1);
1814 
1815 	if (list_empty(&ch->free_tx))
1816 		return NULL;
1817 
1818 	/* Initiator responses to target requests do not consume credits */
1819 	if (iu_type != SRP_IU_RSP) {
1820 		if (ch->req_lim <= rsv) {
1821 			++target->zero_req_lim;
1822 			return NULL;
1823 		}
1824 
1825 		--ch->req_lim;
1826 	}
1827 
1828 	iu = list_first_entry(&ch->free_tx, struct srp_iu, list);
1829 	list_del(&iu->list);
1830 	return iu;
1831 }
1832 
1833 /*
1834  * Note: if this function is called from inside ib_drain_sq() then it will
1835  * be called without ch->lock being held. If ib_drain_sq() dequeues a WQE
1836  * with status IB_WC_SUCCESS then that's a bug.
1837  */
1838 static void srp_send_done(struct ib_cq *cq, struct ib_wc *wc)
1839 {
1840 	struct srp_iu *iu = container_of(wc->wr_cqe, struct srp_iu, cqe);
1841 	struct srp_rdma_ch *ch = cq->cq_context;
1842 
1843 	if (unlikely(wc->status != IB_WC_SUCCESS)) {
1844 		srp_handle_qp_err(cq, wc, "SEND");
1845 		return;
1846 	}
1847 
1848 	lockdep_assert_held(&ch->lock);
1849 
1850 	list_add(&iu->list, &ch->free_tx);
1851 }
1852 
1853 static int srp_post_send(struct srp_rdma_ch *ch, struct srp_iu *iu, int len)
1854 {
1855 	struct srp_target_port *target = ch->target;
1856 	struct ib_sge list;
1857 	struct ib_send_wr wr, *bad_wr;
1858 
1859 	list.addr   = iu->dma;
1860 	list.length = len;
1861 	list.lkey   = target->lkey;
1862 
1863 	iu->cqe.done = srp_send_done;
1864 
1865 	wr.next       = NULL;
1866 	wr.wr_cqe     = &iu->cqe;
1867 	wr.sg_list    = &list;
1868 	wr.num_sge    = 1;
1869 	wr.opcode     = IB_WR_SEND;
1870 	wr.send_flags = IB_SEND_SIGNALED;
1871 
1872 	return ib_post_send(ch->qp, &wr, &bad_wr);
1873 }
1874 
1875 static int srp_post_recv(struct srp_rdma_ch *ch, struct srp_iu *iu)
1876 {
1877 	struct srp_target_port *target = ch->target;
1878 	struct ib_recv_wr wr, *bad_wr;
1879 	struct ib_sge list;
1880 
1881 	list.addr   = iu->dma;
1882 	list.length = iu->size;
1883 	list.lkey   = target->lkey;
1884 
1885 	iu->cqe.done = srp_recv_done;
1886 
1887 	wr.next     = NULL;
1888 	wr.wr_cqe   = &iu->cqe;
1889 	wr.sg_list  = &list;
1890 	wr.num_sge  = 1;
1891 
1892 	return ib_post_recv(ch->qp, &wr, &bad_wr);
1893 }
1894 
1895 static void srp_process_rsp(struct srp_rdma_ch *ch, struct srp_rsp *rsp)
1896 {
1897 	struct srp_target_port *target = ch->target;
1898 	struct srp_request *req;
1899 	struct scsi_cmnd *scmnd;
1900 	unsigned long flags;
1901 
1902 	if (unlikely(rsp->tag & SRP_TAG_TSK_MGMT)) {
1903 		spin_lock_irqsave(&ch->lock, flags);
1904 		ch->req_lim += be32_to_cpu(rsp->req_lim_delta);
1905 		if (rsp->tag == ch->tsk_mgmt_tag) {
1906 			ch->tsk_mgmt_status = -1;
1907 			if (be32_to_cpu(rsp->resp_data_len) >= 4)
1908 				ch->tsk_mgmt_status = rsp->data[3];
1909 			complete(&ch->tsk_mgmt_done);
1910 		} else {
1911 			shost_printk(KERN_ERR, target->scsi_host,
1912 				     "Received tsk mgmt response too late for tag %#llx\n",
1913 				     rsp->tag);
1914 		}
1915 		spin_unlock_irqrestore(&ch->lock, flags);
1916 	} else {
1917 		scmnd = scsi_host_find_tag(target->scsi_host, rsp->tag);
1918 		if (scmnd && scmnd->host_scribble) {
1919 			req = (void *)scmnd->host_scribble;
1920 			scmnd = srp_claim_req(ch, req, NULL, scmnd);
1921 		} else {
1922 			scmnd = NULL;
1923 		}
1924 		if (!scmnd) {
1925 			shost_printk(KERN_ERR, target->scsi_host,
1926 				     "Null scmnd for RSP w/tag %#016llx received on ch %td / QP %#x\n",
1927 				     rsp->tag, ch - target->ch, ch->qp->qp_num);
1928 
1929 			spin_lock_irqsave(&ch->lock, flags);
1930 			ch->req_lim += be32_to_cpu(rsp->req_lim_delta);
1931 			spin_unlock_irqrestore(&ch->lock, flags);
1932 
1933 			return;
1934 		}
1935 		scmnd->result = rsp->status;
1936 
1937 		if (rsp->flags & SRP_RSP_FLAG_SNSVALID) {
1938 			memcpy(scmnd->sense_buffer, rsp->data +
1939 			       be32_to_cpu(rsp->resp_data_len),
1940 			       min_t(int, be32_to_cpu(rsp->sense_data_len),
1941 				     SCSI_SENSE_BUFFERSIZE));
1942 		}
1943 
1944 		if (unlikely(rsp->flags & SRP_RSP_FLAG_DIUNDER))
1945 			scsi_set_resid(scmnd, be32_to_cpu(rsp->data_in_res_cnt));
1946 		else if (unlikely(rsp->flags & SRP_RSP_FLAG_DIOVER))
1947 			scsi_set_resid(scmnd, -be32_to_cpu(rsp->data_in_res_cnt));
1948 		else if (unlikely(rsp->flags & SRP_RSP_FLAG_DOUNDER))
1949 			scsi_set_resid(scmnd, be32_to_cpu(rsp->data_out_res_cnt));
1950 		else if (unlikely(rsp->flags & SRP_RSP_FLAG_DOOVER))
1951 			scsi_set_resid(scmnd, -be32_to_cpu(rsp->data_out_res_cnt));
1952 
1953 		srp_free_req(ch, req, scmnd,
1954 			     be32_to_cpu(rsp->req_lim_delta));
1955 
1956 		scmnd->host_scribble = NULL;
1957 		scmnd->scsi_done(scmnd);
1958 	}
1959 }
1960 
1961 static int srp_response_common(struct srp_rdma_ch *ch, s32 req_delta,
1962 			       void *rsp, int len)
1963 {
1964 	struct srp_target_port *target = ch->target;
1965 	struct ib_device *dev = target->srp_host->srp_dev->dev;
1966 	unsigned long flags;
1967 	struct srp_iu *iu;
1968 	int err;
1969 
1970 	spin_lock_irqsave(&ch->lock, flags);
1971 	ch->req_lim += req_delta;
1972 	iu = __srp_get_tx_iu(ch, SRP_IU_RSP);
1973 	spin_unlock_irqrestore(&ch->lock, flags);
1974 
1975 	if (!iu) {
1976 		shost_printk(KERN_ERR, target->scsi_host, PFX
1977 			     "no IU available to send response\n");
1978 		return 1;
1979 	}
1980 
1981 	ib_dma_sync_single_for_cpu(dev, iu->dma, len, DMA_TO_DEVICE);
1982 	memcpy(iu->buf, rsp, len);
1983 	ib_dma_sync_single_for_device(dev, iu->dma, len, DMA_TO_DEVICE);
1984 
1985 	err = srp_post_send(ch, iu, len);
1986 	if (err) {
1987 		shost_printk(KERN_ERR, target->scsi_host, PFX
1988 			     "unable to post response: %d\n", err);
1989 		srp_put_tx_iu(ch, iu, SRP_IU_RSP);
1990 	}
1991 
1992 	return err;
1993 }
1994 
1995 static void srp_process_cred_req(struct srp_rdma_ch *ch,
1996 				 struct srp_cred_req *req)
1997 {
1998 	struct srp_cred_rsp rsp = {
1999 		.opcode = SRP_CRED_RSP,
2000 		.tag = req->tag,
2001 	};
2002 	s32 delta = be32_to_cpu(req->req_lim_delta);
2003 
2004 	if (srp_response_common(ch, delta, &rsp, sizeof(rsp)))
2005 		shost_printk(KERN_ERR, ch->target->scsi_host, PFX
2006 			     "problems processing SRP_CRED_REQ\n");
2007 }
2008 
2009 static void srp_process_aer_req(struct srp_rdma_ch *ch,
2010 				struct srp_aer_req *req)
2011 {
2012 	struct srp_target_port *target = ch->target;
2013 	struct srp_aer_rsp rsp = {
2014 		.opcode = SRP_AER_RSP,
2015 		.tag = req->tag,
2016 	};
2017 	s32 delta = be32_to_cpu(req->req_lim_delta);
2018 
2019 	shost_printk(KERN_ERR, target->scsi_host, PFX
2020 		     "ignoring AER for LUN %llu\n", scsilun_to_int(&req->lun));
2021 
2022 	if (srp_response_common(ch, delta, &rsp, sizeof(rsp)))
2023 		shost_printk(KERN_ERR, target->scsi_host, PFX
2024 			     "problems processing SRP_AER_REQ\n");
2025 }
2026 
2027 static void srp_recv_done(struct ib_cq *cq, struct ib_wc *wc)
2028 {
2029 	struct srp_iu *iu = container_of(wc->wr_cqe, struct srp_iu, cqe);
2030 	struct srp_rdma_ch *ch = cq->cq_context;
2031 	struct srp_target_port *target = ch->target;
2032 	struct ib_device *dev = target->srp_host->srp_dev->dev;
2033 	int res;
2034 	u8 opcode;
2035 
2036 	if (unlikely(wc->status != IB_WC_SUCCESS)) {
2037 		srp_handle_qp_err(cq, wc, "RECV");
2038 		return;
2039 	}
2040 
2041 	ib_dma_sync_single_for_cpu(dev, iu->dma, ch->max_ti_iu_len,
2042 				   DMA_FROM_DEVICE);
2043 
2044 	opcode = *(u8 *) iu->buf;
2045 
2046 	if (0) {
2047 		shost_printk(KERN_ERR, target->scsi_host,
2048 			     PFX "recv completion, opcode 0x%02x\n", opcode);
2049 		print_hex_dump(KERN_ERR, "", DUMP_PREFIX_OFFSET, 8, 1,
2050 			       iu->buf, wc->byte_len, true);
2051 	}
2052 
2053 	switch (opcode) {
2054 	case SRP_RSP:
2055 		srp_process_rsp(ch, iu->buf);
2056 		break;
2057 
2058 	case SRP_CRED_REQ:
2059 		srp_process_cred_req(ch, iu->buf);
2060 		break;
2061 
2062 	case SRP_AER_REQ:
2063 		srp_process_aer_req(ch, iu->buf);
2064 		break;
2065 
2066 	case SRP_T_LOGOUT:
2067 		/* XXX Handle target logout */
2068 		shost_printk(KERN_WARNING, target->scsi_host,
2069 			     PFX "Got target logout request\n");
2070 		break;
2071 
2072 	default:
2073 		shost_printk(KERN_WARNING, target->scsi_host,
2074 			     PFX "Unhandled SRP opcode 0x%02x\n", opcode);
2075 		break;
2076 	}
2077 
2078 	ib_dma_sync_single_for_device(dev, iu->dma, ch->max_ti_iu_len,
2079 				      DMA_FROM_DEVICE);
2080 
2081 	res = srp_post_recv(ch, iu);
2082 	if (res != 0)
2083 		shost_printk(KERN_ERR, target->scsi_host,
2084 			     PFX "Recv failed with error code %d\n", res);
2085 }
2086 
2087 /**
2088  * srp_tl_err_work() - handle a transport layer error
2089  * @work: Work structure embedded in an SRP target port.
2090  *
2091  * Note: This function may get invoked before the rport has been created,
2092  * hence the target->rport test.
2093  */
2094 static void srp_tl_err_work(struct work_struct *work)
2095 {
2096 	struct srp_target_port *target;
2097 
2098 	target = container_of(work, struct srp_target_port, tl_err_work);
2099 	if (target->rport)
2100 		srp_start_tl_fail_timers(target->rport);
2101 }
2102 
2103 static void srp_handle_qp_err(struct ib_cq *cq, struct ib_wc *wc,
2104 		const char *opname)
2105 {
2106 	struct srp_rdma_ch *ch = cq->cq_context;
2107 	struct srp_target_port *target = ch->target;
2108 
2109 	if (ch->connected && !target->qp_in_error) {
2110 		shost_printk(KERN_ERR, target->scsi_host,
2111 			     PFX "failed %s status %s (%d) for CQE %p\n",
2112 			     opname, ib_wc_status_msg(wc->status), wc->status,
2113 			     wc->wr_cqe);
2114 		queue_work(system_long_wq, &target->tl_err_work);
2115 	}
2116 	target->qp_in_error = true;
2117 }
2118 
2119 static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd)
2120 {
2121 	struct srp_target_port *target = host_to_target(shost);
2122 	struct srp_rport *rport = target->rport;
2123 	struct srp_rdma_ch *ch;
2124 	struct srp_request *req;
2125 	struct srp_iu *iu;
2126 	struct srp_cmd *cmd;
2127 	struct ib_device *dev;
2128 	unsigned long flags;
2129 	u32 tag;
2130 	u16 idx;
2131 	int len, ret;
2132 	const bool in_scsi_eh = !in_interrupt() && current == shost->ehandler;
2133 
2134 	/*
2135 	 * The SCSI EH thread is the only context from which srp_queuecommand()
2136 	 * can get invoked for blocked devices (SDEV_BLOCK /
2137 	 * SDEV_CREATED_BLOCK). Avoid racing with srp_reconnect_rport() by
2138 	 * locking the rport mutex if invoked from inside the SCSI EH.
2139 	 */
2140 	if (in_scsi_eh)
2141 		mutex_lock(&rport->mutex);
2142 
2143 	scmnd->result = srp_chkready(target->rport);
2144 	if (unlikely(scmnd->result))
2145 		goto err;
2146 
2147 	WARN_ON_ONCE(scmnd->request->tag < 0);
2148 	tag = blk_mq_unique_tag(scmnd->request);
2149 	ch = &target->ch[blk_mq_unique_tag_to_hwq(tag)];
2150 	idx = blk_mq_unique_tag_to_tag(tag);
2151 	WARN_ONCE(idx >= target->req_ring_size, "%s: tag %#x: idx %d >= %d\n",
2152 		  dev_name(&shost->shost_gendev), tag, idx,
2153 		  target->req_ring_size);
2154 
2155 	spin_lock_irqsave(&ch->lock, flags);
2156 	iu = __srp_get_tx_iu(ch, SRP_IU_CMD);
2157 	spin_unlock_irqrestore(&ch->lock, flags);
2158 
2159 	if (!iu)
2160 		goto err;
2161 
2162 	req = &ch->req_ring[idx];
2163 	dev = target->srp_host->srp_dev->dev;
2164 	ib_dma_sync_single_for_cpu(dev, iu->dma, target->max_iu_len,
2165 				   DMA_TO_DEVICE);
2166 
2167 	scmnd->host_scribble = (void *) req;
2168 
2169 	cmd = iu->buf;
2170 	memset(cmd, 0, sizeof *cmd);
2171 
2172 	cmd->opcode = SRP_CMD;
2173 	int_to_scsilun(scmnd->device->lun, &cmd->lun);
2174 	cmd->tag    = tag;
2175 	memcpy(cmd->cdb, scmnd->cmnd, scmnd->cmd_len);
2176 
2177 	req->scmnd    = scmnd;
2178 	req->cmd      = iu;
2179 
2180 	len = srp_map_data(scmnd, ch, req);
2181 	if (len < 0) {
2182 		shost_printk(KERN_ERR, target->scsi_host,
2183 			     PFX "Failed to map data (%d)\n", len);
2184 		/*
2185 		 * If we ran out of memory descriptors (-ENOMEM) because an
2186 		 * application is queuing many requests with more than
2187 		 * max_pages_per_mr sg-list elements, tell the SCSI mid-layer
2188 		 * to reduce queue depth temporarily.
2189 		 */
2190 		scmnd->result = len == -ENOMEM ?
2191 			DID_OK << 16 | QUEUE_FULL << 1 : DID_ERROR << 16;
2192 		goto err_iu;
2193 	}
2194 
2195 	ib_dma_sync_single_for_device(dev, iu->dma, target->max_iu_len,
2196 				      DMA_TO_DEVICE);
2197 
2198 	if (srp_post_send(ch, iu, len)) {
2199 		shost_printk(KERN_ERR, target->scsi_host, PFX "Send failed\n");
2200 		goto err_unmap;
2201 	}
2202 
2203 	ret = 0;
2204 
2205 unlock_rport:
2206 	if (in_scsi_eh)
2207 		mutex_unlock(&rport->mutex);
2208 
2209 	return ret;
2210 
2211 err_unmap:
2212 	srp_unmap_data(scmnd, ch, req);
2213 
2214 err_iu:
2215 	srp_put_tx_iu(ch, iu, SRP_IU_CMD);
2216 
2217 	/*
2218 	 * Avoid that the loops that iterate over the request ring can
2219 	 * encounter a dangling SCSI command pointer.
2220 	 */
2221 	req->scmnd = NULL;
2222 
2223 err:
2224 	if (scmnd->result) {
2225 		scmnd->scsi_done(scmnd);
2226 		ret = 0;
2227 	} else {
2228 		ret = SCSI_MLQUEUE_HOST_BUSY;
2229 	}
2230 
2231 	goto unlock_rport;
2232 }
2233 
2234 /*
2235  * Note: the resources allocated in this function are freed in
2236  * srp_free_ch_ib().
2237  */
2238 static int srp_alloc_iu_bufs(struct srp_rdma_ch *ch)
2239 {
2240 	struct srp_target_port *target = ch->target;
2241 	int i;
2242 
2243 	ch->rx_ring = kcalloc(target->queue_size, sizeof(*ch->rx_ring),
2244 			      GFP_KERNEL);
2245 	if (!ch->rx_ring)
2246 		goto err_no_ring;
2247 	ch->tx_ring = kcalloc(target->queue_size, sizeof(*ch->tx_ring),
2248 			      GFP_KERNEL);
2249 	if (!ch->tx_ring)
2250 		goto err_no_ring;
2251 
2252 	for (i = 0; i < target->queue_size; ++i) {
2253 		ch->rx_ring[i] = srp_alloc_iu(target->srp_host,
2254 					      ch->max_ti_iu_len,
2255 					      GFP_KERNEL, DMA_FROM_DEVICE);
2256 		if (!ch->rx_ring[i])
2257 			goto err;
2258 	}
2259 
2260 	for (i = 0; i < target->queue_size; ++i) {
2261 		ch->tx_ring[i] = srp_alloc_iu(target->srp_host,
2262 					      target->max_iu_len,
2263 					      GFP_KERNEL, DMA_TO_DEVICE);
2264 		if (!ch->tx_ring[i])
2265 			goto err;
2266 
2267 		list_add(&ch->tx_ring[i]->list, &ch->free_tx);
2268 	}
2269 
2270 	return 0;
2271 
2272 err:
2273 	for (i = 0; i < target->queue_size; ++i) {
2274 		srp_free_iu(target->srp_host, ch->rx_ring[i]);
2275 		srp_free_iu(target->srp_host, ch->tx_ring[i]);
2276 	}
2277 
2278 
2279 err_no_ring:
2280 	kfree(ch->tx_ring);
2281 	ch->tx_ring = NULL;
2282 	kfree(ch->rx_ring);
2283 	ch->rx_ring = NULL;
2284 
2285 	return -ENOMEM;
2286 }
2287 
2288 static uint32_t srp_compute_rq_tmo(struct ib_qp_attr *qp_attr, int attr_mask)
2289 {
2290 	uint64_t T_tr_ns, max_compl_time_ms;
2291 	uint32_t rq_tmo_jiffies;
2292 
2293 	/*
2294 	 * According to section 11.2.4.2 in the IBTA spec (Modify Queue Pair,
2295 	 * table 91), both the QP timeout and the retry count have to be set
2296 	 * for RC QP's during the RTR to RTS transition.
2297 	 */
2298 	WARN_ON_ONCE((attr_mask & (IB_QP_TIMEOUT | IB_QP_RETRY_CNT)) !=
2299 		     (IB_QP_TIMEOUT | IB_QP_RETRY_CNT));
2300 
2301 	/*
2302 	 * Set target->rq_tmo_jiffies to one second more than the largest time
2303 	 * it can take before an error completion is generated. See also
2304 	 * C9-140..142 in the IBTA spec for more information about how to
2305 	 * convert the QP Local ACK Timeout value to nanoseconds.
2306 	 */
2307 	T_tr_ns = 4096 * (1ULL << qp_attr->timeout);
2308 	max_compl_time_ms = qp_attr->retry_cnt * 4 * T_tr_ns;
2309 	do_div(max_compl_time_ms, NSEC_PER_MSEC);
2310 	rq_tmo_jiffies = msecs_to_jiffies(max_compl_time_ms + 1000);
2311 
2312 	return rq_tmo_jiffies;
2313 }
2314 
2315 static void srp_cm_rep_handler(struct ib_cm_id *cm_id,
2316 			       const struct srp_login_rsp *lrsp,
2317 			       struct srp_rdma_ch *ch)
2318 {
2319 	struct srp_target_port *target = ch->target;
2320 	struct ib_qp_attr *qp_attr = NULL;
2321 	int attr_mask = 0;
2322 	int ret;
2323 	int i;
2324 
2325 	if (lrsp->opcode == SRP_LOGIN_RSP) {
2326 		ch->max_ti_iu_len = be32_to_cpu(lrsp->max_ti_iu_len);
2327 		ch->req_lim       = be32_to_cpu(lrsp->req_lim_delta);
2328 
2329 		/*
2330 		 * Reserve credits for task management so we don't
2331 		 * bounce requests back to the SCSI mid-layer.
2332 		 */
2333 		target->scsi_host->can_queue
2334 			= min(ch->req_lim - SRP_TSK_MGMT_SQ_SIZE,
2335 			      target->scsi_host->can_queue);
2336 		target->scsi_host->cmd_per_lun
2337 			= min_t(int, target->scsi_host->can_queue,
2338 				target->scsi_host->cmd_per_lun);
2339 	} else {
2340 		shost_printk(KERN_WARNING, target->scsi_host,
2341 			     PFX "Unhandled RSP opcode %#x\n", lrsp->opcode);
2342 		ret = -ECONNRESET;
2343 		goto error;
2344 	}
2345 
2346 	if (!ch->rx_ring) {
2347 		ret = srp_alloc_iu_bufs(ch);
2348 		if (ret)
2349 			goto error;
2350 	}
2351 
2352 	ret = -ENOMEM;
2353 	qp_attr = kmalloc(sizeof *qp_attr, GFP_KERNEL);
2354 	if (!qp_attr)
2355 		goto error;
2356 
2357 	qp_attr->qp_state = IB_QPS_RTR;
2358 	ret = ib_cm_init_qp_attr(cm_id, qp_attr, &attr_mask);
2359 	if (ret)
2360 		goto error_free;
2361 
2362 	ret = ib_modify_qp(ch->qp, qp_attr, attr_mask);
2363 	if (ret)
2364 		goto error_free;
2365 
2366 	for (i = 0; i < target->queue_size; i++) {
2367 		struct srp_iu *iu = ch->rx_ring[i];
2368 
2369 		ret = srp_post_recv(ch, iu);
2370 		if (ret)
2371 			goto error_free;
2372 	}
2373 
2374 	qp_attr->qp_state = IB_QPS_RTS;
2375 	ret = ib_cm_init_qp_attr(cm_id, qp_attr, &attr_mask);
2376 	if (ret)
2377 		goto error_free;
2378 
2379 	target->rq_tmo_jiffies = srp_compute_rq_tmo(qp_attr, attr_mask);
2380 
2381 	ret = ib_modify_qp(ch->qp, qp_attr, attr_mask);
2382 	if (ret)
2383 		goto error_free;
2384 
2385 	ret = ib_send_cm_rtu(cm_id, NULL, 0);
2386 
2387 error_free:
2388 	kfree(qp_attr);
2389 
2390 error:
2391 	ch->status = ret;
2392 }
2393 
2394 static void srp_cm_rej_handler(struct ib_cm_id *cm_id,
2395 			       struct ib_cm_event *event,
2396 			       struct srp_rdma_ch *ch)
2397 {
2398 	struct srp_target_port *target = ch->target;
2399 	struct Scsi_Host *shost = target->scsi_host;
2400 	struct ib_class_port_info *cpi;
2401 	int opcode;
2402 
2403 	switch (event->param.rej_rcvd.reason) {
2404 	case IB_CM_REJ_PORT_CM_REDIRECT:
2405 		cpi = event->param.rej_rcvd.ari;
2406 		sa_path_set_dlid(&ch->path, htonl(ntohs(cpi->redirect_lid)));
2407 		ch->path.pkey = cpi->redirect_pkey;
2408 		cm_id->remote_cm_qpn = be32_to_cpu(cpi->redirect_qp) & 0x00ffffff;
2409 		memcpy(ch->path.dgid.raw, cpi->redirect_gid, 16);
2410 
2411 		ch->status = sa_path_get_dlid(&ch->path) ?
2412 			SRP_DLID_REDIRECT : SRP_PORT_REDIRECT;
2413 		break;
2414 
2415 	case IB_CM_REJ_PORT_REDIRECT:
2416 		if (srp_target_is_topspin(target)) {
2417 			/*
2418 			 * Topspin/Cisco SRP gateways incorrectly send
2419 			 * reject reason code 25 when they mean 24
2420 			 * (port redirect).
2421 			 */
2422 			memcpy(ch->path.dgid.raw,
2423 			       event->param.rej_rcvd.ari, 16);
2424 
2425 			shost_printk(KERN_DEBUG, shost,
2426 				     PFX "Topspin/Cisco redirect to target port GID %016llx%016llx\n",
2427 				     be64_to_cpu(ch->path.dgid.global.subnet_prefix),
2428 				     be64_to_cpu(ch->path.dgid.global.interface_id));
2429 
2430 			ch->status = SRP_PORT_REDIRECT;
2431 		} else {
2432 			shost_printk(KERN_WARNING, shost,
2433 				     "  REJ reason: IB_CM_REJ_PORT_REDIRECT\n");
2434 			ch->status = -ECONNRESET;
2435 		}
2436 		break;
2437 
2438 	case IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID:
2439 		shost_printk(KERN_WARNING, shost,
2440 			    "  REJ reason: IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID\n");
2441 		ch->status = -ECONNRESET;
2442 		break;
2443 
2444 	case IB_CM_REJ_CONSUMER_DEFINED:
2445 		opcode = *(u8 *) event->private_data;
2446 		if (opcode == SRP_LOGIN_REJ) {
2447 			struct srp_login_rej *rej = event->private_data;
2448 			u32 reason = be32_to_cpu(rej->reason);
2449 
2450 			if (reason == SRP_LOGIN_REJ_REQ_IT_IU_LENGTH_TOO_LARGE)
2451 				shost_printk(KERN_WARNING, shost,
2452 					     PFX "SRP_LOGIN_REJ: requested max_it_iu_len too large\n");
2453 			else
2454 				shost_printk(KERN_WARNING, shost, PFX
2455 					     "SRP LOGIN from %pI6 to %pI6 REJECTED, reason 0x%08x\n",
2456 					     target->sgid.raw,
2457 					     target->orig_dgid.raw, reason);
2458 		} else
2459 			shost_printk(KERN_WARNING, shost,
2460 				     "  REJ reason: IB_CM_REJ_CONSUMER_DEFINED,"
2461 				     " opcode 0x%02x\n", opcode);
2462 		ch->status = -ECONNRESET;
2463 		break;
2464 
2465 	case IB_CM_REJ_STALE_CONN:
2466 		shost_printk(KERN_WARNING, shost, "  REJ reason: stale connection\n");
2467 		ch->status = SRP_STALE_CONN;
2468 		break;
2469 
2470 	default:
2471 		shost_printk(KERN_WARNING, shost, "  REJ reason 0x%x\n",
2472 			     event->param.rej_rcvd.reason);
2473 		ch->status = -ECONNRESET;
2474 	}
2475 }
2476 
2477 static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
2478 {
2479 	struct srp_rdma_ch *ch = cm_id->context;
2480 	struct srp_target_port *target = ch->target;
2481 	int comp = 0;
2482 
2483 	switch (event->event) {
2484 	case IB_CM_REQ_ERROR:
2485 		shost_printk(KERN_DEBUG, target->scsi_host,
2486 			     PFX "Sending CM REQ failed\n");
2487 		comp = 1;
2488 		ch->status = -ECONNRESET;
2489 		break;
2490 
2491 	case IB_CM_REP_RECEIVED:
2492 		comp = 1;
2493 		srp_cm_rep_handler(cm_id, event->private_data, ch);
2494 		break;
2495 
2496 	case IB_CM_REJ_RECEIVED:
2497 		shost_printk(KERN_DEBUG, target->scsi_host, PFX "REJ received\n");
2498 		comp = 1;
2499 
2500 		srp_cm_rej_handler(cm_id, event, ch);
2501 		break;
2502 
2503 	case IB_CM_DREQ_RECEIVED:
2504 		shost_printk(KERN_WARNING, target->scsi_host,
2505 			     PFX "DREQ received - connection closed\n");
2506 		ch->connected = false;
2507 		if (ib_send_cm_drep(cm_id, NULL, 0))
2508 			shost_printk(KERN_ERR, target->scsi_host,
2509 				     PFX "Sending CM DREP failed\n");
2510 		queue_work(system_long_wq, &target->tl_err_work);
2511 		break;
2512 
2513 	case IB_CM_TIMEWAIT_EXIT:
2514 		shost_printk(KERN_ERR, target->scsi_host,
2515 			     PFX "connection closed\n");
2516 		comp = 1;
2517 
2518 		ch->status = 0;
2519 		break;
2520 
2521 	case IB_CM_MRA_RECEIVED:
2522 	case IB_CM_DREQ_ERROR:
2523 	case IB_CM_DREP_RECEIVED:
2524 		break;
2525 
2526 	default:
2527 		shost_printk(KERN_WARNING, target->scsi_host,
2528 			     PFX "Unhandled CM event %d\n", event->event);
2529 		break;
2530 	}
2531 
2532 	if (comp)
2533 		complete(&ch->done);
2534 
2535 	return 0;
2536 }
2537 
2538 /**
2539  * srp_change_queue_depth - setting device queue depth
2540  * @sdev: scsi device struct
2541  * @qdepth: requested queue depth
2542  *
2543  * Returns queue depth.
2544  */
2545 static int
2546 srp_change_queue_depth(struct scsi_device *sdev, int qdepth)
2547 {
2548 	if (!sdev->tagged_supported)
2549 		qdepth = 1;
2550 	return scsi_change_queue_depth(sdev, qdepth);
2551 }
2552 
2553 static int srp_send_tsk_mgmt(struct srp_rdma_ch *ch, u64 req_tag, u64 lun,
2554 			     u8 func, u8 *status)
2555 {
2556 	struct srp_target_port *target = ch->target;
2557 	struct srp_rport *rport = target->rport;
2558 	struct ib_device *dev = target->srp_host->srp_dev->dev;
2559 	struct srp_iu *iu;
2560 	struct srp_tsk_mgmt *tsk_mgmt;
2561 	int res;
2562 
2563 	if (!ch->connected || target->qp_in_error)
2564 		return -1;
2565 
2566 	/*
2567 	 * Lock the rport mutex to avoid that srp_create_ch_ib() is
2568 	 * invoked while a task management function is being sent.
2569 	 */
2570 	mutex_lock(&rport->mutex);
2571 	spin_lock_irq(&ch->lock);
2572 	iu = __srp_get_tx_iu(ch, SRP_IU_TSK_MGMT);
2573 	spin_unlock_irq(&ch->lock);
2574 
2575 	if (!iu) {
2576 		mutex_unlock(&rport->mutex);
2577 
2578 		return -1;
2579 	}
2580 
2581 	ib_dma_sync_single_for_cpu(dev, iu->dma, sizeof *tsk_mgmt,
2582 				   DMA_TO_DEVICE);
2583 	tsk_mgmt = iu->buf;
2584 	memset(tsk_mgmt, 0, sizeof *tsk_mgmt);
2585 
2586 	tsk_mgmt->opcode 	= SRP_TSK_MGMT;
2587 	int_to_scsilun(lun, &tsk_mgmt->lun);
2588 	tsk_mgmt->tsk_mgmt_func = func;
2589 	tsk_mgmt->task_tag	= req_tag;
2590 
2591 	spin_lock_irq(&ch->lock);
2592 	ch->tsk_mgmt_tag = (ch->tsk_mgmt_tag + 1) | SRP_TAG_TSK_MGMT;
2593 	tsk_mgmt->tag = ch->tsk_mgmt_tag;
2594 	spin_unlock_irq(&ch->lock);
2595 
2596 	init_completion(&ch->tsk_mgmt_done);
2597 
2598 	ib_dma_sync_single_for_device(dev, iu->dma, sizeof *tsk_mgmt,
2599 				      DMA_TO_DEVICE);
2600 	if (srp_post_send(ch, iu, sizeof(*tsk_mgmt))) {
2601 		srp_put_tx_iu(ch, iu, SRP_IU_TSK_MGMT);
2602 		mutex_unlock(&rport->mutex);
2603 
2604 		return -1;
2605 	}
2606 	res = wait_for_completion_timeout(&ch->tsk_mgmt_done,
2607 					msecs_to_jiffies(SRP_ABORT_TIMEOUT_MS));
2608 	if (res > 0 && status)
2609 		*status = ch->tsk_mgmt_status;
2610 	mutex_unlock(&rport->mutex);
2611 
2612 	WARN_ON_ONCE(res < 0);
2613 
2614 	return res > 0 ? 0 : -1;
2615 }
2616 
2617 static int srp_abort(struct scsi_cmnd *scmnd)
2618 {
2619 	struct srp_target_port *target = host_to_target(scmnd->device->host);
2620 	struct srp_request *req = (struct srp_request *) scmnd->host_scribble;
2621 	u32 tag;
2622 	u16 ch_idx;
2623 	struct srp_rdma_ch *ch;
2624 	int ret;
2625 
2626 	shost_printk(KERN_ERR, target->scsi_host, "SRP abort called\n");
2627 
2628 	if (!req)
2629 		return SUCCESS;
2630 	tag = blk_mq_unique_tag(scmnd->request);
2631 	ch_idx = blk_mq_unique_tag_to_hwq(tag);
2632 	if (WARN_ON_ONCE(ch_idx >= target->ch_count))
2633 		return SUCCESS;
2634 	ch = &target->ch[ch_idx];
2635 	if (!srp_claim_req(ch, req, NULL, scmnd))
2636 		return SUCCESS;
2637 	shost_printk(KERN_ERR, target->scsi_host,
2638 		     "Sending SRP abort for tag %#x\n", tag);
2639 	if (srp_send_tsk_mgmt(ch, tag, scmnd->device->lun,
2640 			      SRP_TSK_ABORT_TASK, NULL) == 0)
2641 		ret = SUCCESS;
2642 	else if (target->rport->state == SRP_RPORT_LOST)
2643 		ret = FAST_IO_FAIL;
2644 	else
2645 		ret = FAILED;
2646 	srp_free_req(ch, req, scmnd, 0);
2647 	scmnd->result = DID_ABORT << 16;
2648 	scmnd->scsi_done(scmnd);
2649 
2650 	return ret;
2651 }
2652 
2653 static int srp_reset_device(struct scsi_cmnd *scmnd)
2654 {
2655 	struct srp_target_port *target = host_to_target(scmnd->device->host);
2656 	struct srp_rdma_ch *ch;
2657 	int i;
2658 	u8 status;
2659 
2660 	shost_printk(KERN_ERR, target->scsi_host, "SRP reset_device called\n");
2661 
2662 	ch = &target->ch[0];
2663 	if (srp_send_tsk_mgmt(ch, SRP_TAG_NO_REQ, scmnd->device->lun,
2664 			      SRP_TSK_LUN_RESET, &status))
2665 		return FAILED;
2666 	if (status)
2667 		return FAILED;
2668 
2669 	for (i = 0; i < target->ch_count; i++) {
2670 		ch = &target->ch[i];
2671 		for (i = 0; i < target->req_ring_size; ++i) {
2672 			struct srp_request *req = &ch->req_ring[i];
2673 
2674 			srp_finish_req(ch, req, scmnd->device, DID_RESET << 16);
2675 		}
2676 	}
2677 
2678 	return SUCCESS;
2679 }
2680 
2681 static int srp_reset_host(struct scsi_cmnd *scmnd)
2682 {
2683 	struct srp_target_port *target = host_to_target(scmnd->device->host);
2684 
2685 	shost_printk(KERN_ERR, target->scsi_host, PFX "SRP reset_host called\n");
2686 
2687 	return srp_reconnect_rport(target->rport) == 0 ? SUCCESS : FAILED;
2688 }
2689 
2690 static int srp_slave_alloc(struct scsi_device *sdev)
2691 {
2692 	struct Scsi_Host *shost = sdev->host;
2693 	struct srp_target_port *target = host_to_target(shost);
2694 	struct srp_device *srp_dev = target->srp_host->srp_dev;
2695 
2696 	if (true)
2697 		blk_queue_virt_boundary(sdev->request_queue,
2698 					~srp_dev->mr_page_mask);
2699 
2700 	return 0;
2701 }
2702 
2703 static int srp_slave_configure(struct scsi_device *sdev)
2704 {
2705 	struct Scsi_Host *shost = sdev->host;
2706 	struct srp_target_port *target = host_to_target(shost);
2707 	struct request_queue *q = sdev->request_queue;
2708 	unsigned long timeout;
2709 
2710 	if (sdev->type == TYPE_DISK) {
2711 		timeout = max_t(unsigned, 30 * HZ, target->rq_tmo_jiffies);
2712 		blk_queue_rq_timeout(q, timeout);
2713 	}
2714 
2715 	return 0;
2716 }
2717 
2718 static ssize_t show_id_ext(struct device *dev, struct device_attribute *attr,
2719 			   char *buf)
2720 {
2721 	struct srp_target_port *target = host_to_target(class_to_shost(dev));
2722 
2723 	return sprintf(buf, "0x%016llx\n", be64_to_cpu(target->id_ext));
2724 }
2725 
2726 static ssize_t show_ioc_guid(struct device *dev, struct device_attribute *attr,
2727 			     char *buf)
2728 {
2729 	struct srp_target_port *target = host_to_target(class_to_shost(dev));
2730 
2731 	return sprintf(buf, "0x%016llx\n", be64_to_cpu(target->ioc_guid));
2732 }
2733 
2734 static ssize_t show_service_id(struct device *dev,
2735 			       struct device_attribute *attr, char *buf)
2736 {
2737 	struct srp_target_port *target = host_to_target(class_to_shost(dev));
2738 
2739 	return sprintf(buf, "0x%016llx\n", be64_to_cpu(target->service_id));
2740 }
2741 
2742 static ssize_t show_pkey(struct device *dev, struct device_attribute *attr,
2743 			 char *buf)
2744 {
2745 	struct srp_target_port *target = host_to_target(class_to_shost(dev));
2746 
2747 	return sprintf(buf, "0x%04x\n", be16_to_cpu(target->pkey));
2748 }
2749 
2750 static ssize_t show_sgid(struct device *dev, struct device_attribute *attr,
2751 			 char *buf)
2752 {
2753 	struct srp_target_port *target = host_to_target(class_to_shost(dev));
2754 
2755 	return sprintf(buf, "%pI6\n", target->sgid.raw);
2756 }
2757 
2758 static ssize_t show_dgid(struct device *dev, struct device_attribute *attr,
2759 			 char *buf)
2760 {
2761 	struct srp_target_port *target = host_to_target(class_to_shost(dev));
2762 	struct srp_rdma_ch *ch = &target->ch[0];
2763 
2764 	return sprintf(buf, "%pI6\n", ch->path.dgid.raw);
2765 }
2766 
2767 static ssize_t show_orig_dgid(struct device *dev,
2768 			      struct device_attribute *attr, char *buf)
2769 {
2770 	struct srp_target_port *target = host_to_target(class_to_shost(dev));
2771 
2772 	return sprintf(buf, "%pI6\n", target->orig_dgid.raw);
2773 }
2774 
2775 static ssize_t show_req_lim(struct device *dev,
2776 			    struct device_attribute *attr, char *buf)
2777 {
2778 	struct srp_target_port *target = host_to_target(class_to_shost(dev));
2779 	struct srp_rdma_ch *ch;
2780 	int i, req_lim = INT_MAX;
2781 
2782 	for (i = 0; i < target->ch_count; i++) {
2783 		ch = &target->ch[i];
2784 		req_lim = min(req_lim, ch->req_lim);
2785 	}
2786 	return sprintf(buf, "%d\n", req_lim);
2787 }
2788 
2789 static ssize_t show_zero_req_lim(struct device *dev,
2790 				 struct device_attribute *attr, char *buf)
2791 {
2792 	struct srp_target_port *target = host_to_target(class_to_shost(dev));
2793 
2794 	return sprintf(buf, "%d\n", target->zero_req_lim);
2795 }
2796 
2797 static ssize_t show_local_ib_port(struct device *dev,
2798 				  struct device_attribute *attr, char *buf)
2799 {
2800 	struct srp_target_port *target = host_to_target(class_to_shost(dev));
2801 
2802 	return sprintf(buf, "%d\n", target->srp_host->port);
2803 }
2804 
2805 static ssize_t show_local_ib_device(struct device *dev,
2806 				    struct device_attribute *attr, char *buf)
2807 {
2808 	struct srp_target_port *target = host_to_target(class_to_shost(dev));
2809 
2810 	return sprintf(buf, "%s\n", target->srp_host->srp_dev->dev->name);
2811 }
2812 
2813 static ssize_t show_ch_count(struct device *dev, struct device_attribute *attr,
2814 			     char *buf)
2815 {
2816 	struct srp_target_port *target = host_to_target(class_to_shost(dev));
2817 
2818 	return sprintf(buf, "%d\n", target->ch_count);
2819 }
2820 
2821 static ssize_t show_comp_vector(struct device *dev,
2822 				struct device_attribute *attr, char *buf)
2823 {
2824 	struct srp_target_port *target = host_to_target(class_to_shost(dev));
2825 
2826 	return sprintf(buf, "%d\n", target->comp_vector);
2827 }
2828 
2829 static ssize_t show_tl_retry_count(struct device *dev,
2830 				   struct device_attribute *attr, char *buf)
2831 {
2832 	struct srp_target_port *target = host_to_target(class_to_shost(dev));
2833 
2834 	return sprintf(buf, "%d\n", target->tl_retry_count);
2835 }
2836 
2837 static ssize_t show_cmd_sg_entries(struct device *dev,
2838 				   struct device_attribute *attr, char *buf)
2839 {
2840 	struct srp_target_port *target = host_to_target(class_to_shost(dev));
2841 
2842 	return sprintf(buf, "%u\n", target->cmd_sg_cnt);
2843 }
2844 
2845 static ssize_t show_allow_ext_sg(struct device *dev,
2846 				 struct device_attribute *attr, char *buf)
2847 {
2848 	struct srp_target_port *target = host_to_target(class_to_shost(dev));
2849 
2850 	return sprintf(buf, "%s\n", target->allow_ext_sg ? "true" : "false");
2851 }
2852 
2853 static DEVICE_ATTR(id_ext,	    S_IRUGO, show_id_ext,	   NULL);
2854 static DEVICE_ATTR(ioc_guid,	    S_IRUGO, show_ioc_guid,	   NULL);
2855 static DEVICE_ATTR(service_id,	    S_IRUGO, show_service_id,	   NULL);
2856 static DEVICE_ATTR(pkey,	    S_IRUGO, show_pkey,		   NULL);
2857 static DEVICE_ATTR(sgid,	    S_IRUGO, show_sgid,		   NULL);
2858 static DEVICE_ATTR(dgid,	    S_IRUGO, show_dgid,		   NULL);
2859 static DEVICE_ATTR(orig_dgid,	    S_IRUGO, show_orig_dgid,	   NULL);
2860 static DEVICE_ATTR(req_lim,         S_IRUGO, show_req_lim,         NULL);
2861 static DEVICE_ATTR(zero_req_lim,    S_IRUGO, show_zero_req_lim,	   NULL);
2862 static DEVICE_ATTR(local_ib_port,   S_IRUGO, show_local_ib_port,   NULL);
2863 static DEVICE_ATTR(local_ib_device, S_IRUGO, show_local_ib_device, NULL);
2864 static DEVICE_ATTR(ch_count,        S_IRUGO, show_ch_count,        NULL);
2865 static DEVICE_ATTR(comp_vector,     S_IRUGO, show_comp_vector,     NULL);
2866 static DEVICE_ATTR(tl_retry_count,  S_IRUGO, show_tl_retry_count,  NULL);
2867 static DEVICE_ATTR(cmd_sg_entries,  S_IRUGO, show_cmd_sg_entries,  NULL);
2868 static DEVICE_ATTR(allow_ext_sg,    S_IRUGO, show_allow_ext_sg,    NULL);
2869 
2870 static struct device_attribute *srp_host_attrs[] = {
2871 	&dev_attr_id_ext,
2872 	&dev_attr_ioc_guid,
2873 	&dev_attr_service_id,
2874 	&dev_attr_pkey,
2875 	&dev_attr_sgid,
2876 	&dev_attr_dgid,
2877 	&dev_attr_orig_dgid,
2878 	&dev_attr_req_lim,
2879 	&dev_attr_zero_req_lim,
2880 	&dev_attr_local_ib_port,
2881 	&dev_attr_local_ib_device,
2882 	&dev_attr_ch_count,
2883 	&dev_attr_comp_vector,
2884 	&dev_attr_tl_retry_count,
2885 	&dev_attr_cmd_sg_entries,
2886 	&dev_attr_allow_ext_sg,
2887 	NULL
2888 };
2889 
2890 static struct scsi_host_template srp_template = {
2891 	.module				= THIS_MODULE,
2892 	.name				= "InfiniBand SRP initiator",
2893 	.proc_name			= DRV_NAME,
2894 	.slave_alloc			= srp_slave_alloc,
2895 	.slave_configure		= srp_slave_configure,
2896 	.info				= srp_target_info,
2897 	.queuecommand			= srp_queuecommand,
2898 	.change_queue_depth             = srp_change_queue_depth,
2899 	.eh_timed_out			= srp_timed_out,
2900 	.eh_abort_handler		= srp_abort,
2901 	.eh_device_reset_handler	= srp_reset_device,
2902 	.eh_host_reset_handler		= srp_reset_host,
2903 	.skip_settle_delay		= true,
2904 	.sg_tablesize			= SRP_DEF_SG_TABLESIZE,
2905 	.can_queue			= SRP_DEFAULT_CMD_SQ_SIZE,
2906 	.this_id			= -1,
2907 	.cmd_per_lun			= SRP_DEFAULT_CMD_SQ_SIZE,
2908 	.use_clustering			= ENABLE_CLUSTERING,
2909 	.shost_attrs			= srp_host_attrs,
2910 	.track_queue_depth		= 1,
2911 };
2912 
2913 static int srp_sdev_count(struct Scsi_Host *host)
2914 {
2915 	struct scsi_device *sdev;
2916 	int c = 0;
2917 
2918 	shost_for_each_device(sdev, host)
2919 		c++;
2920 
2921 	return c;
2922 }
2923 
2924 /*
2925  * Return values:
2926  * < 0 upon failure. Caller is responsible for SRP target port cleanup.
2927  * 0 and target->state == SRP_TARGET_REMOVED if asynchronous target port
2928  *    removal has been scheduled.
2929  * 0 and target->state != SRP_TARGET_REMOVED upon success.
2930  */
2931 static int srp_add_target(struct srp_host *host, struct srp_target_port *target)
2932 {
2933 	struct srp_rport_identifiers ids;
2934 	struct srp_rport *rport;
2935 
2936 	target->state = SRP_TARGET_SCANNING;
2937 	sprintf(target->target_name, "SRP.T10:%016llX",
2938 		be64_to_cpu(target->id_ext));
2939 
2940 	if (scsi_add_host(target->scsi_host, host->srp_dev->dev->dev.parent))
2941 		return -ENODEV;
2942 
2943 	memcpy(ids.port_id, &target->id_ext, 8);
2944 	memcpy(ids.port_id + 8, &target->ioc_guid, 8);
2945 	ids.roles = SRP_RPORT_ROLE_TARGET;
2946 	rport = srp_rport_add(target->scsi_host, &ids);
2947 	if (IS_ERR(rport)) {
2948 		scsi_remove_host(target->scsi_host);
2949 		return PTR_ERR(rport);
2950 	}
2951 
2952 	rport->lld_data = target;
2953 	target->rport = rport;
2954 
2955 	spin_lock(&host->target_lock);
2956 	list_add_tail(&target->list, &host->target_list);
2957 	spin_unlock(&host->target_lock);
2958 
2959 	scsi_scan_target(&target->scsi_host->shost_gendev,
2960 			 0, target->scsi_id, SCAN_WILD_CARD, SCSI_SCAN_INITIAL);
2961 
2962 	if (srp_connected_ch(target) < target->ch_count ||
2963 	    target->qp_in_error) {
2964 		shost_printk(KERN_INFO, target->scsi_host,
2965 			     PFX "SCSI scan failed - removing SCSI host\n");
2966 		srp_queue_remove_work(target);
2967 		goto out;
2968 	}
2969 
2970 	pr_debug("%s: SCSI scan succeeded - detected %d LUNs\n",
2971 		 dev_name(&target->scsi_host->shost_gendev),
2972 		 srp_sdev_count(target->scsi_host));
2973 
2974 	spin_lock_irq(&target->lock);
2975 	if (target->state == SRP_TARGET_SCANNING)
2976 		target->state = SRP_TARGET_LIVE;
2977 	spin_unlock_irq(&target->lock);
2978 
2979 out:
2980 	return 0;
2981 }
2982 
2983 static void srp_release_dev(struct device *dev)
2984 {
2985 	struct srp_host *host =
2986 		container_of(dev, struct srp_host, dev);
2987 
2988 	complete(&host->released);
2989 }
2990 
2991 static struct class srp_class = {
2992 	.name    = "infiniband_srp",
2993 	.dev_release = srp_release_dev
2994 };
2995 
2996 /**
2997  * srp_conn_unique() - check whether the connection to a target is unique
2998  * @host:   SRP host.
2999  * @target: SRP target port.
3000  */
3001 static bool srp_conn_unique(struct srp_host *host,
3002 			    struct srp_target_port *target)
3003 {
3004 	struct srp_target_port *t;
3005 	bool ret = false;
3006 
3007 	if (target->state == SRP_TARGET_REMOVED)
3008 		goto out;
3009 
3010 	ret = true;
3011 
3012 	spin_lock(&host->target_lock);
3013 	list_for_each_entry(t, &host->target_list, list) {
3014 		if (t != target &&
3015 		    target->id_ext == t->id_ext &&
3016 		    target->ioc_guid == t->ioc_guid &&
3017 		    target->initiator_ext == t->initiator_ext) {
3018 			ret = false;
3019 			break;
3020 		}
3021 	}
3022 	spin_unlock(&host->target_lock);
3023 
3024 out:
3025 	return ret;
3026 }
3027 
3028 /*
3029  * Target ports are added by writing
3030  *
3031  *     id_ext=<SRP ID ext>,ioc_guid=<SRP IOC GUID>,dgid=<dest GID>,
3032  *     pkey=<P_Key>,service_id=<service ID>
3033  *
3034  * to the add_target sysfs attribute.
3035  */
3036 enum {
3037 	SRP_OPT_ERR		= 0,
3038 	SRP_OPT_ID_EXT		= 1 << 0,
3039 	SRP_OPT_IOC_GUID	= 1 << 1,
3040 	SRP_OPT_DGID		= 1 << 2,
3041 	SRP_OPT_PKEY		= 1 << 3,
3042 	SRP_OPT_SERVICE_ID	= 1 << 4,
3043 	SRP_OPT_MAX_SECT	= 1 << 5,
3044 	SRP_OPT_MAX_CMD_PER_LUN	= 1 << 6,
3045 	SRP_OPT_IO_CLASS	= 1 << 7,
3046 	SRP_OPT_INITIATOR_EXT	= 1 << 8,
3047 	SRP_OPT_CMD_SG_ENTRIES	= 1 << 9,
3048 	SRP_OPT_ALLOW_EXT_SG	= 1 << 10,
3049 	SRP_OPT_SG_TABLESIZE	= 1 << 11,
3050 	SRP_OPT_COMP_VECTOR	= 1 << 12,
3051 	SRP_OPT_TL_RETRY_COUNT	= 1 << 13,
3052 	SRP_OPT_QUEUE_SIZE	= 1 << 14,
3053 	SRP_OPT_ALL		= (SRP_OPT_ID_EXT	|
3054 				   SRP_OPT_IOC_GUID	|
3055 				   SRP_OPT_DGID		|
3056 				   SRP_OPT_PKEY		|
3057 				   SRP_OPT_SERVICE_ID),
3058 };
3059 
3060 static const match_table_t srp_opt_tokens = {
3061 	{ SRP_OPT_ID_EXT,		"id_ext=%s" 		},
3062 	{ SRP_OPT_IOC_GUID,		"ioc_guid=%s" 		},
3063 	{ SRP_OPT_DGID,			"dgid=%s" 		},
3064 	{ SRP_OPT_PKEY,			"pkey=%x" 		},
3065 	{ SRP_OPT_SERVICE_ID,		"service_id=%s"		},
3066 	{ SRP_OPT_MAX_SECT,		"max_sect=%d" 		},
3067 	{ SRP_OPT_MAX_CMD_PER_LUN,	"max_cmd_per_lun=%d" 	},
3068 	{ SRP_OPT_IO_CLASS,		"io_class=%x"		},
3069 	{ SRP_OPT_INITIATOR_EXT,	"initiator_ext=%s"	},
3070 	{ SRP_OPT_CMD_SG_ENTRIES,	"cmd_sg_entries=%u"	},
3071 	{ SRP_OPT_ALLOW_EXT_SG,		"allow_ext_sg=%u"	},
3072 	{ SRP_OPT_SG_TABLESIZE,		"sg_tablesize=%u"	},
3073 	{ SRP_OPT_COMP_VECTOR,		"comp_vector=%u"	},
3074 	{ SRP_OPT_TL_RETRY_COUNT,	"tl_retry_count=%u"	},
3075 	{ SRP_OPT_QUEUE_SIZE,		"queue_size=%d"		},
3076 	{ SRP_OPT_ERR,			NULL 			}
3077 };
3078 
3079 static int srp_parse_options(const char *buf, struct srp_target_port *target)
3080 {
3081 	char *options, *sep_opt;
3082 	char *p;
3083 	char dgid[3];
3084 	substring_t args[MAX_OPT_ARGS];
3085 	int opt_mask = 0;
3086 	int token;
3087 	int ret = -EINVAL;
3088 	int i;
3089 
3090 	options = kstrdup(buf, GFP_KERNEL);
3091 	if (!options)
3092 		return -ENOMEM;
3093 
3094 	sep_opt = options;
3095 	while ((p = strsep(&sep_opt, ",\n")) != NULL) {
3096 		if (!*p)
3097 			continue;
3098 
3099 		token = match_token(p, srp_opt_tokens, args);
3100 		opt_mask |= token;
3101 
3102 		switch (token) {
3103 		case SRP_OPT_ID_EXT:
3104 			p = match_strdup(args);
3105 			if (!p) {
3106 				ret = -ENOMEM;
3107 				goto out;
3108 			}
3109 			target->id_ext = cpu_to_be64(simple_strtoull(p, NULL, 16));
3110 			kfree(p);
3111 			break;
3112 
3113 		case SRP_OPT_IOC_GUID:
3114 			p = match_strdup(args);
3115 			if (!p) {
3116 				ret = -ENOMEM;
3117 				goto out;
3118 			}
3119 			target->ioc_guid = cpu_to_be64(simple_strtoull(p, NULL, 16));
3120 			kfree(p);
3121 			break;
3122 
3123 		case SRP_OPT_DGID:
3124 			p = match_strdup(args);
3125 			if (!p) {
3126 				ret = -ENOMEM;
3127 				goto out;
3128 			}
3129 			if (strlen(p) != 32) {
3130 				pr_warn("bad dest GID parameter '%s'\n", p);
3131 				kfree(p);
3132 				goto out;
3133 			}
3134 
3135 			for (i = 0; i < 16; ++i) {
3136 				strlcpy(dgid, p + i * 2, sizeof(dgid));
3137 				if (sscanf(dgid, "%hhx",
3138 					   &target->orig_dgid.raw[i]) < 1) {
3139 					ret = -EINVAL;
3140 					kfree(p);
3141 					goto out;
3142 				}
3143 			}
3144 			kfree(p);
3145 			break;
3146 
3147 		case SRP_OPT_PKEY:
3148 			if (match_hex(args, &token)) {
3149 				pr_warn("bad P_Key parameter '%s'\n", p);
3150 				goto out;
3151 			}
3152 			target->pkey = cpu_to_be16(token);
3153 			break;
3154 
3155 		case SRP_OPT_SERVICE_ID:
3156 			p = match_strdup(args);
3157 			if (!p) {
3158 				ret = -ENOMEM;
3159 				goto out;
3160 			}
3161 			target->service_id = cpu_to_be64(simple_strtoull(p, NULL, 16));
3162 			kfree(p);
3163 			break;
3164 
3165 		case SRP_OPT_MAX_SECT:
3166 			if (match_int(args, &token)) {
3167 				pr_warn("bad max sect parameter '%s'\n", p);
3168 				goto out;
3169 			}
3170 			target->scsi_host->max_sectors = token;
3171 			break;
3172 
3173 		case SRP_OPT_QUEUE_SIZE:
3174 			if (match_int(args, &token) || token < 1) {
3175 				pr_warn("bad queue_size parameter '%s'\n", p);
3176 				goto out;
3177 			}
3178 			target->scsi_host->can_queue = token;
3179 			target->queue_size = token + SRP_RSP_SQ_SIZE +
3180 					     SRP_TSK_MGMT_SQ_SIZE;
3181 			if (!(opt_mask & SRP_OPT_MAX_CMD_PER_LUN))
3182 				target->scsi_host->cmd_per_lun = token;
3183 			break;
3184 
3185 		case SRP_OPT_MAX_CMD_PER_LUN:
3186 			if (match_int(args, &token) || token < 1) {
3187 				pr_warn("bad max cmd_per_lun parameter '%s'\n",
3188 					p);
3189 				goto out;
3190 			}
3191 			target->scsi_host->cmd_per_lun = token;
3192 			break;
3193 
3194 		case SRP_OPT_IO_CLASS:
3195 			if (match_hex(args, &token)) {
3196 				pr_warn("bad IO class parameter '%s'\n", p);
3197 				goto out;
3198 			}
3199 			if (token != SRP_REV10_IB_IO_CLASS &&
3200 			    token != SRP_REV16A_IB_IO_CLASS) {
3201 				pr_warn("unknown IO class parameter value %x specified (use %x or %x).\n",
3202 					token, SRP_REV10_IB_IO_CLASS,
3203 					SRP_REV16A_IB_IO_CLASS);
3204 				goto out;
3205 			}
3206 			target->io_class = token;
3207 			break;
3208 
3209 		case SRP_OPT_INITIATOR_EXT:
3210 			p = match_strdup(args);
3211 			if (!p) {
3212 				ret = -ENOMEM;
3213 				goto out;
3214 			}
3215 			target->initiator_ext = cpu_to_be64(simple_strtoull(p, NULL, 16));
3216 			kfree(p);
3217 			break;
3218 
3219 		case SRP_OPT_CMD_SG_ENTRIES:
3220 			if (match_int(args, &token) || token < 1 || token > 255) {
3221 				pr_warn("bad max cmd_sg_entries parameter '%s'\n",
3222 					p);
3223 				goto out;
3224 			}
3225 			target->cmd_sg_cnt = token;
3226 			break;
3227 
3228 		case SRP_OPT_ALLOW_EXT_SG:
3229 			if (match_int(args, &token)) {
3230 				pr_warn("bad allow_ext_sg parameter '%s'\n", p);
3231 				goto out;
3232 			}
3233 			target->allow_ext_sg = !!token;
3234 			break;
3235 
3236 		case SRP_OPT_SG_TABLESIZE:
3237 			if (match_int(args, &token) || token < 1 ||
3238 					token > SG_MAX_SEGMENTS) {
3239 				pr_warn("bad max sg_tablesize parameter '%s'\n",
3240 					p);
3241 				goto out;
3242 			}
3243 			target->sg_tablesize = token;
3244 			break;
3245 
3246 		case SRP_OPT_COMP_VECTOR:
3247 			if (match_int(args, &token) || token < 0) {
3248 				pr_warn("bad comp_vector parameter '%s'\n", p);
3249 				goto out;
3250 			}
3251 			target->comp_vector = token;
3252 			break;
3253 
3254 		case SRP_OPT_TL_RETRY_COUNT:
3255 			if (match_int(args, &token) || token < 2 || token > 7) {
3256 				pr_warn("bad tl_retry_count parameter '%s' (must be a number between 2 and 7)\n",
3257 					p);
3258 				goto out;
3259 			}
3260 			target->tl_retry_count = token;
3261 			break;
3262 
3263 		default:
3264 			pr_warn("unknown parameter or missing value '%s' in target creation request\n",
3265 				p);
3266 			goto out;
3267 		}
3268 	}
3269 
3270 	if ((opt_mask & SRP_OPT_ALL) == SRP_OPT_ALL)
3271 		ret = 0;
3272 	else
3273 		for (i = 0; i < ARRAY_SIZE(srp_opt_tokens); ++i)
3274 			if ((srp_opt_tokens[i].token & SRP_OPT_ALL) &&
3275 			    !(srp_opt_tokens[i].token & opt_mask))
3276 				pr_warn("target creation request is missing parameter '%s'\n",
3277 					srp_opt_tokens[i].pattern);
3278 
3279 	if (target->scsi_host->cmd_per_lun > target->scsi_host->can_queue
3280 	    && (opt_mask & SRP_OPT_MAX_CMD_PER_LUN))
3281 		pr_warn("cmd_per_lun = %d > queue_size = %d\n",
3282 			target->scsi_host->cmd_per_lun,
3283 			target->scsi_host->can_queue);
3284 
3285 out:
3286 	kfree(options);
3287 	return ret;
3288 }
3289 
3290 static ssize_t srp_create_target(struct device *dev,
3291 				 struct device_attribute *attr,
3292 				 const char *buf, size_t count)
3293 {
3294 	struct srp_host *host =
3295 		container_of(dev, struct srp_host, dev);
3296 	struct Scsi_Host *target_host;
3297 	struct srp_target_port *target;
3298 	struct srp_rdma_ch *ch;
3299 	struct srp_device *srp_dev = host->srp_dev;
3300 	struct ib_device *ibdev = srp_dev->dev;
3301 	int ret, node_idx, node, cpu, i;
3302 	unsigned int max_sectors_per_mr, mr_per_cmd = 0;
3303 	bool multich = false;
3304 
3305 	target_host = scsi_host_alloc(&srp_template,
3306 				      sizeof (struct srp_target_port));
3307 	if (!target_host)
3308 		return -ENOMEM;
3309 
3310 	target_host->transportt  = ib_srp_transport_template;
3311 	target_host->max_channel = 0;
3312 	target_host->max_id      = 1;
3313 	target_host->max_lun     = -1LL;
3314 	target_host->max_cmd_len = sizeof ((struct srp_cmd *) (void *) 0L)->cdb;
3315 
3316 	target = host_to_target(target_host);
3317 
3318 	target->io_class	= SRP_REV16A_IB_IO_CLASS;
3319 	target->scsi_host	= target_host;
3320 	target->srp_host	= host;
3321 	target->pd		= host->srp_dev->pd;
3322 	target->lkey		= host->srp_dev->pd->local_dma_lkey;
3323 	target->cmd_sg_cnt	= cmd_sg_entries;
3324 	target->sg_tablesize	= indirect_sg_entries ? : cmd_sg_entries;
3325 	target->allow_ext_sg	= allow_ext_sg;
3326 	target->tl_retry_count	= 7;
3327 	target->queue_size	= SRP_DEFAULT_QUEUE_SIZE;
3328 
3329 	/*
3330 	 * Avoid that the SCSI host can be removed by srp_remove_target()
3331 	 * before this function returns.
3332 	 */
3333 	scsi_host_get(target->scsi_host);
3334 
3335 	ret = mutex_lock_interruptible(&host->add_target_mutex);
3336 	if (ret < 0)
3337 		goto put;
3338 
3339 	ret = srp_parse_options(buf, target);
3340 	if (ret)
3341 		goto out;
3342 
3343 	target->req_ring_size = target->queue_size - SRP_TSK_MGMT_SQ_SIZE;
3344 
3345 	if (!srp_conn_unique(target->srp_host, target)) {
3346 		shost_printk(KERN_INFO, target->scsi_host,
3347 			     PFX "Already connected to target port with id_ext=%016llx;ioc_guid=%016llx;initiator_ext=%016llx\n",
3348 			     be64_to_cpu(target->id_ext),
3349 			     be64_to_cpu(target->ioc_guid),
3350 			     be64_to_cpu(target->initiator_ext));
3351 		ret = -EEXIST;
3352 		goto out;
3353 	}
3354 
3355 	if (!srp_dev->has_fmr && !srp_dev->has_fr && !target->allow_ext_sg &&
3356 	    target->cmd_sg_cnt < target->sg_tablesize) {
3357 		pr_warn("No MR pool and no external indirect descriptors, limiting sg_tablesize to cmd_sg_cnt\n");
3358 		target->sg_tablesize = target->cmd_sg_cnt;
3359 	}
3360 
3361 	if (srp_dev->use_fast_reg || srp_dev->use_fmr) {
3362 		/*
3363 		 * FR and FMR can only map one HCA page per entry. If the
3364 		 * start address is not aligned on a HCA page boundary two
3365 		 * entries will be used for the head and the tail although
3366 		 * these two entries combined contain at most one HCA page of
3367 		 * data. Hence the "+ 1" in the calculation below.
3368 		 *
3369 		 * The indirect data buffer descriptor is contiguous so the
3370 		 * memory for that buffer will only be registered if
3371 		 * register_always is true. Hence add one to mr_per_cmd if
3372 		 * register_always has been set.
3373 		 */
3374 		max_sectors_per_mr = srp_dev->max_pages_per_mr <<
3375 				  (ilog2(srp_dev->mr_page_size) - 9);
3376 		mr_per_cmd = register_always +
3377 			(target->scsi_host->max_sectors + 1 +
3378 			 max_sectors_per_mr - 1) / max_sectors_per_mr;
3379 		pr_debug("max_sectors = %u; max_pages_per_mr = %u; mr_page_size = %u; max_sectors_per_mr = %u; mr_per_cmd = %u\n",
3380 			 target->scsi_host->max_sectors,
3381 			 srp_dev->max_pages_per_mr, srp_dev->mr_page_size,
3382 			 max_sectors_per_mr, mr_per_cmd);
3383 	}
3384 
3385 	target_host->sg_tablesize = target->sg_tablesize;
3386 	target->mr_pool_size = target->scsi_host->can_queue * mr_per_cmd;
3387 	target->mr_per_cmd = mr_per_cmd;
3388 	target->indirect_size = target->sg_tablesize *
3389 				sizeof (struct srp_direct_buf);
3390 	target->max_iu_len = sizeof (struct srp_cmd) +
3391 			     sizeof (struct srp_indirect_buf) +
3392 			     target->cmd_sg_cnt * sizeof (struct srp_direct_buf);
3393 
3394 	INIT_WORK(&target->tl_err_work, srp_tl_err_work);
3395 	INIT_WORK(&target->remove_work, srp_remove_work);
3396 	spin_lock_init(&target->lock);
3397 	ret = ib_query_gid(ibdev, host->port, 0, &target->sgid, NULL);
3398 	if (ret)
3399 		goto out;
3400 
3401 	ret = -ENOMEM;
3402 	target->ch_count = max_t(unsigned, num_online_nodes(),
3403 				 min(ch_count ? :
3404 				     min(4 * num_online_nodes(),
3405 					 ibdev->num_comp_vectors),
3406 				     num_online_cpus()));
3407 	target->ch = kcalloc(target->ch_count, sizeof(*target->ch),
3408 			     GFP_KERNEL);
3409 	if (!target->ch)
3410 		goto out;
3411 
3412 	node_idx = 0;
3413 	for_each_online_node(node) {
3414 		const int ch_start = (node_idx * target->ch_count /
3415 				      num_online_nodes());
3416 		const int ch_end = ((node_idx + 1) * target->ch_count /
3417 				    num_online_nodes());
3418 		const int cv_start = (node_idx * ibdev->num_comp_vectors /
3419 				      num_online_nodes() + target->comp_vector)
3420 				     % ibdev->num_comp_vectors;
3421 		const int cv_end = ((node_idx + 1) * ibdev->num_comp_vectors /
3422 				    num_online_nodes() + target->comp_vector)
3423 				   % ibdev->num_comp_vectors;
3424 		int cpu_idx = 0;
3425 
3426 		for_each_online_cpu(cpu) {
3427 			if (cpu_to_node(cpu) != node)
3428 				continue;
3429 			if (ch_start + cpu_idx >= ch_end)
3430 				continue;
3431 			ch = &target->ch[ch_start + cpu_idx];
3432 			ch->target = target;
3433 			ch->comp_vector = cv_start == cv_end ? cv_start :
3434 				cv_start + cpu_idx % (cv_end - cv_start);
3435 			spin_lock_init(&ch->lock);
3436 			INIT_LIST_HEAD(&ch->free_tx);
3437 			ret = srp_new_cm_id(ch);
3438 			if (ret)
3439 				goto err_disconnect;
3440 
3441 			ret = srp_create_ch_ib(ch);
3442 			if (ret)
3443 				goto err_disconnect;
3444 
3445 			ret = srp_alloc_req_data(ch);
3446 			if (ret)
3447 				goto err_disconnect;
3448 
3449 			ret = srp_connect_ch(ch, multich);
3450 			if (ret) {
3451 				shost_printk(KERN_ERR, target->scsi_host,
3452 					     PFX "Connection %d/%d to %pI6 failed\n",
3453 					     ch_start + cpu_idx,
3454 					     target->ch_count,
3455 					     ch->target->orig_dgid.raw);
3456 				if (node_idx == 0 && cpu_idx == 0) {
3457 					goto free_ch;
3458 				} else {
3459 					srp_free_ch_ib(target, ch);
3460 					srp_free_req_data(target, ch);
3461 					target->ch_count = ch - target->ch;
3462 					goto connected;
3463 				}
3464 			}
3465 
3466 			multich = true;
3467 			cpu_idx++;
3468 		}
3469 		node_idx++;
3470 	}
3471 
3472 connected:
3473 	target->scsi_host->nr_hw_queues = target->ch_count;
3474 
3475 	ret = srp_add_target(host, target);
3476 	if (ret)
3477 		goto err_disconnect;
3478 
3479 	if (target->state != SRP_TARGET_REMOVED) {
3480 		shost_printk(KERN_DEBUG, target->scsi_host, PFX
3481 			     "new target: id_ext %016llx ioc_guid %016llx pkey %04x service_id %016llx sgid %pI6 dgid %pI6\n",
3482 			     be64_to_cpu(target->id_ext),
3483 			     be64_to_cpu(target->ioc_guid),
3484 			     be16_to_cpu(target->pkey),
3485 			     be64_to_cpu(target->service_id),
3486 			     target->sgid.raw, target->orig_dgid.raw);
3487 	}
3488 
3489 	ret = count;
3490 
3491 out:
3492 	mutex_unlock(&host->add_target_mutex);
3493 
3494 put:
3495 	scsi_host_put(target->scsi_host);
3496 	if (ret < 0)
3497 		scsi_host_put(target->scsi_host);
3498 
3499 	return ret;
3500 
3501 err_disconnect:
3502 	srp_disconnect_target(target);
3503 
3504 free_ch:
3505 	for (i = 0; i < target->ch_count; i++) {
3506 		ch = &target->ch[i];
3507 		srp_free_ch_ib(target, ch);
3508 		srp_free_req_data(target, ch);
3509 	}
3510 
3511 	kfree(target->ch);
3512 	goto out;
3513 }
3514 
3515 static DEVICE_ATTR(add_target, S_IWUSR, NULL, srp_create_target);
3516 
3517 static ssize_t show_ibdev(struct device *dev, struct device_attribute *attr,
3518 			  char *buf)
3519 {
3520 	struct srp_host *host = container_of(dev, struct srp_host, dev);
3521 
3522 	return sprintf(buf, "%s\n", host->srp_dev->dev->name);
3523 }
3524 
3525 static DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL);
3526 
3527 static ssize_t show_port(struct device *dev, struct device_attribute *attr,
3528 			 char *buf)
3529 {
3530 	struct srp_host *host = container_of(dev, struct srp_host, dev);
3531 
3532 	return sprintf(buf, "%d\n", host->port);
3533 }
3534 
3535 static DEVICE_ATTR(port, S_IRUGO, show_port, NULL);
3536 
3537 static struct srp_host *srp_add_port(struct srp_device *device, u8 port)
3538 {
3539 	struct srp_host *host;
3540 
3541 	host = kzalloc(sizeof *host, GFP_KERNEL);
3542 	if (!host)
3543 		return NULL;
3544 
3545 	INIT_LIST_HEAD(&host->target_list);
3546 	spin_lock_init(&host->target_lock);
3547 	init_completion(&host->released);
3548 	mutex_init(&host->add_target_mutex);
3549 	host->srp_dev = device;
3550 	host->port = port;
3551 
3552 	host->dev.class = &srp_class;
3553 	host->dev.parent = device->dev->dev.parent;
3554 	dev_set_name(&host->dev, "srp-%s-%d", device->dev->name, port);
3555 
3556 	if (device_register(&host->dev))
3557 		goto free_host;
3558 	if (device_create_file(&host->dev, &dev_attr_add_target))
3559 		goto err_class;
3560 	if (device_create_file(&host->dev, &dev_attr_ibdev))
3561 		goto err_class;
3562 	if (device_create_file(&host->dev, &dev_attr_port))
3563 		goto err_class;
3564 
3565 	return host;
3566 
3567 err_class:
3568 	device_unregister(&host->dev);
3569 
3570 free_host:
3571 	kfree(host);
3572 
3573 	return NULL;
3574 }
3575 
3576 static void srp_add_one(struct ib_device *device)
3577 {
3578 	struct srp_device *srp_dev;
3579 	struct ib_device_attr *attr = &device->attrs;
3580 	struct srp_host *host;
3581 	int mr_page_shift, p;
3582 	u64 max_pages_per_mr;
3583 	unsigned int flags = 0;
3584 
3585 	srp_dev = kzalloc(sizeof(*srp_dev), GFP_KERNEL);
3586 	if (!srp_dev)
3587 		return;
3588 
3589 	/*
3590 	 * Use the smallest page size supported by the HCA, down to a
3591 	 * minimum of 4096 bytes. We're unlikely to build large sglists
3592 	 * out of smaller entries.
3593 	 */
3594 	mr_page_shift		= max(12, ffs(attr->page_size_cap) - 1);
3595 	srp_dev->mr_page_size	= 1 << mr_page_shift;
3596 	srp_dev->mr_page_mask	= ~((u64) srp_dev->mr_page_size - 1);
3597 	max_pages_per_mr	= attr->max_mr_size;
3598 	do_div(max_pages_per_mr, srp_dev->mr_page_size);
3599 	pr_debug("%s: %llu / %u = %llu <> %u\n", __func__,
3600 		 attr->max_mr_size, srp_dev->mr_page_size,
3601 		 max_pages_per_mr, SRP_MAX_PAGES_PER_MR);
3602 	srp_dev->max_pages_per_mr = min_t(u64, SRP_MAX_PAGES_PER_MR,
3603 					  max_pages_per_mr);
3604 
3605 	srp_dev->has_fmr = (device->alloc_fmr && device->dealloc_fmr &&
3606 			    device->map_phys_fmr && device->unmap_fmr);
3607 	srp_dev->has_fr = (attr->device_cap_flags &
3608 			   IB_DEVICE_MEM_MGT_EXTENSIONS);
3609 	if (!never_register && !srp_dev->has_fmr && !srp_dev->has_fr) {
3610 		dev_warn(&device->dev, "neither FMR nor FR is supported\n");
3611 	} else if (!never_register &&
3612 		   attr->max_mr_size >= 2 * srp_dev->mr_page_size) {
3613 		srp_dev->use_fast_reg = (srp_dev->has_fr &&
3614 					 (!srp_dev->has_fmr || prefer_fr));
3615 		srp_dev->use_fmr = !srp_dev->use_fast_reg && srp_dev->has_fmr;
3616 	}
3617 
3618 	if (never_register || !register_always ||
3619 	    (!srp_dev->has_fmr && !srp_dev->has_fr))
3620 		flags |= IB_PD_UNSAFE_GLOBAL_RKEY;
3621 
3622 	if (srp_dev->use_fast_reg) {
3623 		srp_dev->max_pages_per_mr =
3624 			min_t(u32, srp_dev->max_pages_per_mr,
3625 			      attr->max_fast_reg_page_list_len);
3626 	}
3627 	srp_dev->mr_max_size	= srp_dev->mr_page_size *
3628 				   srp_dev->max_pages_per_mr;
3629 	pr_debug("%s: mr_page_shift = %d, device->max_mr_size = %#llx, device->max_fast_reg_page_list_len = %u, max_pages_per_mr = %d, mr_max_size = %#x\n",
3630 		 device->name, mr_page_shift, attr->max_mr_size,
3631 		 attr->max_fast_reg_page_list_len,
3632 		 srp_dev->max_pages_per_mr, srp_dev->mr_max_size);
3633 
3634 	INIT_LIST_HEAD(&srp_dev->dev_list);
3635 
3636 	srp_dev->dev = device;
3637 	srp_dev->pd  = ib_alloc_pd(device, flags);
3638 	if (IS_ERR(srp_dev->pd))
3639 		goto free_dev;
3640 
3641 
3642 	for (p = rdma_start_port(device); p <= rdma_end_port(device); ++p) {
3643 		host = srp_add_port(srp_dev, p);
3644 		if (host)
3645 			list_add_tail(&host->list, &srp_dev->dev_list);
3646 	}
3647 
3648 	ib_set_client_data(device, &srp_client, srp_dev);
3649 	return;
3650 
3651 free_dev:
3652 	kfree(srp_dev);
3653 }
3654 
3655 static void srp_remove_one(struct ib_device *device, void *client_data)
3656 {
3657 	struct srp_device *srp_dev;
3658 	struct srp_host *host, *tmp_host;
3659 	struct srp_target_port *target;
3660 
3661 	srp_dev = client_data;
3662 	if (!srp_dev)
3663 		return;
3664 
3665 	list_for_each_entry_safe(host, tmp_host, &srp_dev->dev_list, list) {
3666 		device_unregister(&host->dev);
3667 		/*
3668 		 * Wait for the sysfs entry to go away, so that no new
3669 		 * target ports can be created.
3670 		 */
3671 		wait_for_completion(&host->released);
3672 
3673 		/*
3674 		 * Remove all target ports.
3675 		 */
3676 		spin_lock(&host->target_lock);
3677 		list_for_each_entry(target, &host->target_list, list)
3678 			srp_queue_remove_work(target);
3679 		spin_unlock(&host->target_lock);
3680 
3681 		/*
3682 		 * Wait for tl_err and target port removal tasks.
3683 		 */
3684 		flush_workqueue(system_long_wq);
3685 		flush_workqueue(srp_remove_wq);
3686 
3687 		kfree(host);
3688 	}
3689 
3690 	ib_dealloc_pd(srp_dev->pd);
3691 
3692 	kfree(srp_dev);
3693 }
3694 
3695 static struct srp_function_template ib_srp_transport_functions = {
3696 	.has_rport_state	 = true,
3697 	.reset_timer_if_blocked	 = true,
3698 	.reconnect_delay	 = &srp_reconnect_delay,
3699 	.fast_io_fail_tmo	 = &srp_fast_io_fail_tmo,
3700 	.dev_loss_tmo		 = &srp_dev_loss_tmo,
3701 	.reconnect		 = srp_rport_reconnect,
3702 	.rport_delete		 = srp_rport_delete,
3703 	.terminate_rport_io	 = srp_terminate_io,
3704 };
3705 
3706 static int __init srp_init_module(void)
3707 {
3708 	int ret;
3709 
3710 	if (srp_sg_tablesize) {
3711 		pr_warn("srp_sg_tablesize is deprecated, please use cmd_sg_entries\n");
3712 		if (!cmd_sg_entries)
3713 			cmd_sg_entries = srp_sg_tablesize;
3714 	}
3715 
3716 	if (!cmd_sg_entries)
3717 		cmd_sg_entries = SRP_DEF_SG_TABLESIZE;
3718 
3719 	if (cmd_sg_entries > 255) {
3720 		pr_warn("Clamping cmd_sg_entries to 255\n");
3721 		cmd_sg_entries = 255;
3722 	}
3723 
3724 	if (!indirect_sg_entries)
3725 		indirect_sg_entries = cmd_sg_entries;
3726 	else if (indirect_sg_entries < cmd_sg_entries) {
3727 		pr_warn("Bumping up indirect_sg_entries to match cmd_sg_entries (%u)\n",
3728 			cmd_sg_entries);
3729 		indirect_sg_entries = cmd_sg_entries;
3730 	}
3731 
3732 	if (indirect_sg_entries > SG_MAX_SEGMENTS) {
3733 		pr_warn("Clamping indirect_sg_entries to %u\n",
3734 			SG_MAX_SEGMENTS);
3735 		indirect_sg_entries = SG_MAX_SEGMENTS;
3736 	}
3737 
3738 	srp_remove_wq = create_workqueue("srp_remove");
3739 	if (!srp_remove_wq) {
3740 		ret = -ENOMEM;
3741 		goto out;
3742 	}
3743 
3744 	ret = -ENOMEM;
3745 	ib_srp_transport_template =
3746 		srp_attach_transport(&ib_srp_transport_functions);
3747 	if (!ib_srp_transport_template)
3748 		goto destroy_wq;
3749 
3750 	ret = class_register(&srp_class);
3751 	if (ret) {
3752 		pr_err("couldn't register class infiniband_srp\n");
3753 		goto release_tr;
3754 	}
3755 
3756 	ib_sa_register_client(&srp_sa_client);
3757 
3758 	ret = ib_register_client(&srp_client);
3759 	if (ret) {
3760 		pr_err("couldn't register IB client\n");
3761 		goto unreg_sa;
3762 	}
3763 
3764 out:
3765 	return ret;
3766 
3767 unreg_sa:
3768 	ib_sa_unregister_client(&srp_sa_client);
3769 	class_unregister(&srp_class);
3770 
3771 release_tr:
3772 	srp_release_transport(ib_srp_transport_template);
3773 
3774 destroy_wq:
3775 	destroy_workqueue(srp_remove_wq);
3776 	goto out;
3777 }
3778 
3779 static void __exit srp_cleanup_module(void)
3780 {
3781 	ib_unregister_client(&srp_client);
3782 	ib_sa_unregister_client(&srp_sa_client);
3783 	class_unregister(&srp_class);
3784 	srp_release_transport(ib_srp_transport_template);
3785 	destroy_workqueue(srp_remove_wq);
3786 }
3787 
3788 module_init(srp_init_module);
3789 module_exit(srp_cleanup_module);
3790