1 /*
2  * Copyright (c) 2005 Cisco Systems.  All rights reserved.
3  *
4  * This software is available to you under a choice of one of two
5  * licenses.  You may choose to be licensed under the terms of the GNU
6  * General Public License (GPL) Version 2, available from the file
7  * COPYING in the main directory of this source tree, or the
8  * OpenIB.org BSD license below:
9  *
10  *     Redistribution and use in source and binary forms, with or
11  *     without modification, are permitted provided that the following
12  *     conditions are met:
13  *
14  *      - Redistributions of source code must retain the above
15  *        copyright notice, this list of conditions and the following
16  *        disclaimer.
17  *
18  *      - Redistributions in binary form must reproduce the above
19  *        copyright notice, this list of conditions and the following
20  *        disclaimer in the documentation and/or other materials
21  *        provided with the distribution.
22  *
23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30  * SOFTWARE.
31  */
32 
33 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
34 
35 #include <linux/module.h>
36 #include <linux/init.h>
37 #include <linux/slab.h>
38 #include <linux/err.h>
39 #include <linux/string.h>
40 #include <linux/parser.h>
41 #include <linux/random.h>
42 #include <linux/jiffies.h>
43 #include <linux/lockdep.h>
44 #include <rdma/ib_cache.h>
45 
46 #include <linux/atomic.h>
47 
48 #include <scsi/scsi.h>
49 #include <scsi/scsi_device.h>
50 #include <scsi/scsi_dbg.h>
51 #include <scsi/scsi_tcq.h>
52 #include <scsi/srp.h>
53 #include <scsi/scsi_transport_srp.h>
54 
55 #include "ib_srp.h"
56 
57 #define DRV_NAME	"ib_srp"
58 #define PFX		DRV_NAME ": "
59 #define DRV_VERSION	"2.0"
60 #define DRV_RELDATE	"July 26, 2015"
61 
62 MODULE_AUTHOR("Roland Dreier");
63 MODULE_DESCRIPTION("InfiniBand SCSI RDMA Protocol initiator");
64 MODULE_LICENSE("Dual BSD/GPL");
65 MODULE_VERSION(DRV_VERSION);
66 MODULE_INFO(release_date, DRV_RELDATE);
67 
68 #if !defined(CONFIG_DYNAMIC_DEBUG)
69 #define DEFINE_DYNAMIC_DEBUG_METADATA(name, fmt)
70 #define DYNAMIC_DEBUG_BRANCH(descriptor) false
71 #endif
72 
73 static unsigned int srp_sg_tablesize;
74 static unsigned int cmd_sg_entries;
75 static unsigned int indirect_sg_entries;
76 static bool allow_ext_sg;
77 static bool prefer_fr = true;
78 static bool register_always = true;
79 static bool never_register;
80 static int topspin_workarounds = 1;
81 
82 module_param(srp_sg_tablesize, uint, 0444);
83 MODULE_PARM_DESC(srp_sg_tablesize, "Deprecated name for cmd_sg_entries");
84 
85 module_param(cmd_sg_entries, uint, 0444);
86 MODULE_PARM_DESC(cmd_sg_entries,
87 		 "Default number of gather/scatter entries in the SRP command (default is 12, max 255)");
88 
89 module_param(indirect_sg_entries, uint, 0444);
90 MODULE_PARM_DESC(indirect_sg_entries,
91 		 "Default max number of gather/scatter entries (default is 12, max is " __stringify(SG_MAX_SEGMENTS) ")");
92 
93 module_param(allow_ext_sg, bool, 0444);
94 MODULE_PARM_DESC(allow_ext_sg,
95 		  "Default behavior when there are more than cmd_sg_entries S/G entries after mapping; fails the request when false (default false)");
96 
97 module_param(topspin_workarounds, int, 0444);
98 MODULE_PARM_DESC(topspin_workarounds,
99 		 "Enable workarounds for Topspin/Cisco SRP target bugs if != 0");
100 
101 module_param(prefer_fr, bool, 0444);
102 MODULE_PARM_DESC(prefer_fr,
103 "Whether to use fast registration if both FMR and fast registration are supported");
104 
105 module_param(register_always, bool, 0444);
106 MODULE_PARM_DESC(register_always,
107 		 "Use memory registration even for contiguous memory regions");
108 
109 module_param(never_register, bool, 0444);
110 MODULE_PARM_DESC(never_register, "Never register memory");
111 
112 static const struct kernel_param_ops srp_tmo_ops;
113 
114 static int srp_reconnect_delay = 10;
115 module_param_cb(reconnect_delay, &srp_tmo_ops, &srp_reconnect_delay,
116 		S_IRUGO | S_IWUSR);
117 MODULE_PARM_DESC(reconnect_delay, "Time between successive reconnect attempts");
118 
119 static int srp_fast_io_fail_tmo = 15;
120 module_param_cb(fast_io_fail_tmo, &srp_tmo_ops, &srp_fast_io_fail_tmo,
121 		S_IRUGO | S_IWUSR);
122 MODULE_PARM_DESC(fast_io_fail_tmo,
123 		 "Number of seconds between the observation of a transport"
124 		 " layer error and failing all I/O. \"off\" means that this"
125 		 " functionality is disabled.");
126 
127 static int srp_dev_loss_tmo = 600;
128 module_param_cb(dev_loss_tmo, &srp_tmo_ops, &srp_dev_loss_tmo,
129 		S_IRUGO | S_IWUSR);
130 MODULE_PARM_DESC(dev_loss_tmo,
131 		 "Maximum number of seconds that the SRP transport should"
132 		 " insulate transport layer errors. After this time has been"
133 		 " exceeded the SCSI host is removed. Should be"
134 		 " between 1 and " __stringify(SCSI_DEVICE_BLOCK_MAX_TIMEOUT)
135 		 " if fast_io_fail_tmo has not been set. \"off\" means that"
136 		 " this functionality is disabled.");
137 
138 static unsigned ch_count;
139 module_param(ch_count, uint, 0444);
140 MODULE_PARM_DESC(ch_count,
141 		 "Number of RDMA channels to use for communication with an SRP target. Using more than one channel improves performance if the HCA supports multiple completion vectors. The default value is the minimum of four times the number of online CPU sockets and the number of completion vectors supported by the HCA.");
142 
143 static void srp_add_one(struct ib_device *device);
144 static void srp_remove_one(struct ib_device *device, void *client_data);
145 static void srp_recv_done(struct ib_cq *cq, struct ib_wc *wc);
146 static void srp_handle_qp_err(struct ib_cq *cq, struct ib_wc *wc,
147 		const char *opname);
148 static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event);
149 
150 static struct scsi_transport_template *ib_srp_transport_template;
151 static struct workqueue_struct *srp_remove_wq;
152 
153 static struct ib_client srp_client = {
154 	.name   = "srp",
155 	.add    = srp_add_one,
156 	.remove = srp_remove_one
157 };
158 
159 static struct ib_sa_client srp_sa_client;
160 
161 static int srp_tmo_get(char *buffer, const struct kernel_param *kp)
162 {
163 	int tmo = *(int *)kp->arg;
164 
165 	if (tmo >= 0)
166 		return sprintf(buffer, "%d", tmo);
167 	else
168 		return sprintf(buffer, "off");
169 }
170 
171 static int srp_tmo_set(const char *val, const struct kernel_param *kp)
172 {
173 	int tmo, res;
174 
175 	res = srp_parse_tmo(&tmo, val);
176 	if (res)
177 		goto out;
178 
179 	if (kp->arg == &srp_reconnect_delay)
180 		res = srp_tmo_valid(tmo, srp_fast_io_fail_tmo,
181 				    srp_dev_loss_tmo);
182 	else if (kp->arg == &srp_fast_io_fail_tmo)
183 		res = srp_tmo_valid(srp_reconnect_delay, tmo, srp_dev_loss_tmo);
184 	else
185 		res = srp_tmo_valid(srp_reconnect_delay, srp_fast_io_fail_tmo,
186 				    tmo);
187 	if (res)
188 		goto out;
189 	*(int *)kp->arg = tmo;
190 
191 out:
192 	return res;
193 }
194 
195 static const struct kernel_param_ops srp_tmo_ops = {
196 	.get = srp_tmo_get,
197 	.set = srp_tmo_set,
198 };
199 
200 static inline struct srp_target_port *host_to_target(struct Scsi_Host *host)
201 {
202 	return (struct srp_target_port *) host->hostdata;
203 }
204 
205 static const char *srp_target_info(struct Scsi_Host *host)
206 {
207 	return host_to_target(host)->target_name;
208 }
209 
210 static int srp_target_is_topspin(struct srp_target_port *target)
211 {
212 	static const u8 topspin_oui[3] = { 0x00, 0x05, 0xad };
213 	static const u8 cisco_oui[3]   = { 0x00, 0x1b, 0x0d };
214 
215 	return topspin_workarounds &&
216 		(!memcmp(&target->ioc_guid, topspin_oui, sizeof topspin_oui) ||
217 		 !memcmp(&target->ioc_guid, cisco_oui, sizeof cisco_oui));
218 }
219 
220 static struct srp_iu *srp_alloc_iu(struct srp_host *host, size_t size,
221 				   gfp_t gfp_mask,
222 				   enum dma_data_direction direction)
223 {
224 	struct srp_iu *iu;
225 
226 	iu = kmalloc(sizeof *iu, gfp_mask);
227 	if (!iu)
228 		goto out;
229 
230 	iu->buf = kzalloc(size, gfp_mask);
231 	if (!iu->buf)
232 		goto out_free_iu;
233 
234 	iu->dma = ib_dma_map_single(host->srp_dev->dev, iu->buf, size,
235 				    direction);
236 	if (ib_dma_mapping_error(host->srp_dev->dev, iu->dma))
237 		goto out_free_buf;
238 
239 	iu->size      = size;
240 	iu->direction = direction;
241 
242 	return iu;
243 
244 out_free_buf:
245 	kfree(iu->buf);
246 out_free_iu:
247 	kfree(iu);
248 out:
249 	return NULL;
250 }
251 
252 static void srp_free_iu(struct srp_host *host, struct srp_iu *iu)
253 {
254 	if (!iu)
255 		return;
256 
257 	ib_dma_unmap_single(host->srp_dev->dev, iu->dma, iu->size,
258 			    iu->direction);
259 	kfree(iu->buf);
260 	kfree(iu);
261 }
262 
263 static void srp_qp_event(struct ib_event *event, void *context)
264 {
265 	pr_debug("QP event %s (%d)\n",
266 		 ib_event_msg(event->event), event->event);
267 }
268 
269 static int srp_init_qp(struct srp_target_port *target,
270 		       struct ib_qp *qp)
271 {
272 	struct ib_qp_attr *attr;
273 	int ret;
274 
275 	attr = kmalloc(sizeof *attr, GFP_KERNEL);
276 	if (!attr)
277 		return -ENOMEM;
278 
279 	ret = ib_find_cached_pkey(target->srp_host->srp_dev->dev,
280 				  target->srp_host->port,
281 				  be16_to_cpu(target->pkey),
282 				  &attr->pkey_index);
283 	if (ret)
284 		goto out;
285 
286 	attr->qp_state        = IB_QPS_INIT;
287 	attr->qp_access_flags = (IB_ACCESS_REMOTE_READ |
288 				    IB_ACCESS_REMOTE_WRITE);
289 	attr->port_num        = target->srp_host->port;
290 
291 	ret = ib_modify_qp(qp, attr,
292 			   IB_QP_STATE		|
293 			   IB_QP_PKEY_INDEX	|
294 			   IB_QP_ACCESS_FLAGS	|
295 			   IB_QP_PORT);
296 
297 out:
298 	kfree(attr);
299 	return ret;
300 }
301 
302 static int srp_new_cm_id(struct srp_rdma_ch *ch)
303 {
304 	struct srp_target_port *target = ch->target;
305 	struct ib_cm_id *new_cm_id;
306 
307 	new_cm_id = ib_create_cm_id(target->srp_host->srp_dev->dev,
308 				    srp_cm_handler, ch);
309 	if (IS_ERR(new_cm_id))
310 		return PTR_ERR(new_cm_id);
311 
312 	if (ch->cm_id)
313 		ib_destroy_cm_id(ch->cm_id);
314 	ch->cm_id = new_cm_id;
315 	if (rdma_cap_opa_ah(target->srp_host->srp_dev->dev,
316 			    target->srp_host->port))
317 		ch->path.rec_type = SA_PATH_REC_TYPE_OPA;
318 	else
319 		ch->path.rec_type = SA_PATH_REC_TYPE_IB;
320 	ch->path.sgid = target->sgid;
321 	ch->path.dgid = target->orig_dgid;
322 	ch->path.pkey = target->pkey;
323 	sa_path_set_service_id(&ch->path, target->service_id);
324 
325 	return 0;
326 }
327 
328 static struct ib_fmr_pool *srp_alloc_fmr_pool(struct srp_target_port *target)
329 {
330 	struct srp_device *dev = target->srp_host->srp_dev;
331 	struct ib_fmr_pool_param fmr_param;
332 
333 	memset(&fmr_param, 0, sizeof(fmr_param));
334 	fmr_param.pool_size	    = target->mr_pool_size;
335 	fmr_param.dirty_watermark   = fmr_param.pool_size / 4;
336 	fmr_param.cache		    = 1;
337 	fmr_param.max_pages_per_fmr = dev->max_pages_per_mr;
338 	fmr_param.page_shift	    = ilog2(dev->mr_page_size);
339 	fmr_param.access	    = (IB_ACCESS_LOCAL_WRITE |
340 				       IB_ACCESS_REMOTE_WRITE |
341 				       IB_ACCESS_REMOTE_READ);
342 
343 	return ib_create_fmr_pool(dev->pd, &fmr_param);
344 }
345 
346 /**
347  * srp_destroy_fr_pool() - free the resources owned by a pool
348  * @pool: Fast registration pool to be destroyed.
349  */
350 static void srp_destroy_fr_pool(struct srp_fr_pool *pool)
351 {
352 	int i;
353 	struct srp_fr_desc *d;
354 
355 	if (!pool)
356 		return;
357 
358 	for (i = 0, d = &pool->desc[0]; i < pool->size; i++, d++) {
359 		if (d->mr)
360 			ib_dereg_mr(d->mr);
361 	}
362 	kfree(pool);
363 }
364 
365 /**
366  * srp_create_fr_pool() - allocate and initialize a pool for fast registration
367  * @device:            IB device to allocate fast registration descriptors for.
368  * @pd:                Protection domain associated with the FR descriptors.
369  * @pool_size:         Number of descriptors to allocate.
370  * @max_page_list_len: Maximum fast registration work request page list length.
371  */
372 static struct srp_fr_pool *srp_create_fr_pool(struct ib_device *device,
373 					      struct ib_pd *pd, int pool_size,
374 					      int max_page_list_len)
375 {
376 	struct srp_fr_pool *pool;
377 	struct srp_fr_desc *d;
378 	struct ib_mr *mr;
379 	int i, ret = -EINVAL;
380 
381 	if (pool_size <= 0)
382 		goto err;
383 	ret = -ENOMEM;
384 	pool = kzalloc(sizeof(struct srp_fr_pool) +
385 		       pool_size * sizeof(struct srp_fr_desc), GFP_KERNEL);
386 	if (!pool)
387 		goto err;
388 	pool->size = pool_size;
389 	pool->max_page_list_len = max_page_list_len;
390 	spin_lock_init(&pool->lock);
391 	INIT_LIST_HEAD(&pool->free_list);
392 
393 	for (i = 0, d = &pool->desc[0]; i < pool->size; i++, d++) {
394 		mr = ib_alloc_mr(pd, IB_MR_TYPE_MEM_REG,
395 				 max_page_list_len);
396 		if (IS_ERR(mr)) {
397 			ret = PTR_ERR(mr);
398 			if (ret == -ENOMEM)
399 				pr_info("%s: ib_alloc_mr() failed. Try to reduce max_cmd_per_lun, max_sect or ch_count\n",
400 					dev_name(&device->dev));
401 			goto destroy_pool;
402 		}
403 		d->mr = mr;
404 		list_add_tail(&d->entry, &pool->free_list);
405 	}
406 
407 out:
408 	return pool;
409 
410 destroy_pool:
411 	srp_destroy_fr_pool(pool);
412 
413 err:
414 	pool = ERR_PTR(ret);
415 	goto out;
416 }
417 
418 /**
419  * srp_fr_pool_get() - obtain a descriptor suitable for fast registration
420  * @pool: Pool to obtain descriptor from.
421  */
422 static struct srp_fr_desc *srp_fr_pool_get(struct srp_fr_pool *pool)
423 {
424 	struct srp_fr_desc *d = NULL;
425 	unsigned long flags;
426 
427 	spin_lock_irqsave(&pool->lock, flags);
428 	if (!list_empty(&pool->free_list)) {
429 		d = list_first_entry(&pool->free_list, typeof(*d), entry);
430 		list_del(&d->entry);
431 	}
432 	spin_unlock_irqrestore(&pool->lock, flags);
433 
434 	return d;
435 }
436 
437 /**
438  * srp_fr_pool_put() - put an FR descriptor back in the free list
439  * @pool: Pool the descriptor was allocated from.
440  * @desc: Pointer to an array of fast registration descriptor pointers.
441  * @n:    Number of descriptors to put back.
442  *
443  * Note: The caller must already have queued an invalidation request for
444  * desc->mr->rkey before calling this function.
445  */
446 static void srp_fr_pool_put(struct srp_fr_pool *pool, struct srp_fr_desc **desc,
447 			    int n)
448 {
449 	unsigned long flags;
450 	int i;
451 
452 	spin_lock_irqsave(&pool->lock, flags);
453 	for (i = 0; i < n; i++)
454 		list_add(&desc[i]->entry, &pool->free_list);
455 	spin_unlock_irqrestore(&pool->lock, flags);
456 }
457 
458 static struct srp_fr_pool *srp_alloc_fr_pool(struct srp_target_port *target)
459 {
460 	struct srp_device *dev = target->srp_host->srp_dev;
461 
462 	return srp_create_fr_pool(dev->dev, dev->pd, target->mr_pool_size,
463 				  dev->max_pages_per_mr);
464 }
465 
466 /**
467  * srp_destroy_qp() - destroy an RDMA queue pair
468  * @qp: RDMA queue pair.
469  *
470  * Drain the qp before destroying it.  This avoids that the receive
471  * completion handler can access the queue pair while it is
472  * being destroyed.
473  */
474 static void srp_destroy_qp(struct srp_rdma_ch *ch, struct ib_qp *qp)
475 {
476 	spin_lock_irq(&ch->lock);
477 	ib_process_cq_direct(ch->send_cq, -1);
478 	spin_unlock_irq(&ch->lock);
479 
480 	ib_drain_qp(qp);
481 	ib_destroy_qp(qp);
482 }
483 
484 static int srp_create_ch_ib(struct srp_rdma_ch *ch)
485 {
486 	struct srp_target_port *target = ch->target;
487 	struct srp_device *dev = target->srp_host->srp_dev;
488 	struct ib_qp_init_attr *init_attr;
489 	struct ib_cq *recv_cq, *send_cq;
490 	struct ib_qp *qp;
491 	struct ib_fmr_pool *fmr_pool = NULL;
492 	struct srp_fr_pool *fr_pool = NULL;
493 	const int m = 1 + dev->use_fast_reg * target->mr_per_cmd * 2;
494 	int ret;
495 
496 	init_attr = kzalloc(sizeof *init_attr, GFP_KERNEL);
497 	if (!init_attr)
498 		return -ENOMEM;
499 
500 	/* queue_size + 1 for ib_drain_rq() */
501 	recv_cq = ib_alloc_cq(dev->dev, ch, target->queue_size + 1,
502 				ch->comp_vector, IB_POLL_SOFTIRQ);
503 	if (IS_ERR(recv_cq)) {
504 		ret = PTR_ERR(recv_cq);
505 		goto err;
506 	}
507 
508 	send_cq = ib_alloc_cq(dev->dev, ch, m * target->queue_size,
509 				ch->comp_vector, IB_POLL_DIRECT);
510 	if (IS_ERR(send_cq)) {
511 		ret = PTR_ERR(send_cq);
512 		goto err_recv_cq;
513 	}
514 
515 	init_attr->event_handler       = srp_qp_event;
516 	init_attr->cap.max_send_wr     = m * target->queue_size;
517 	init_attr->cap.max_recv_wr     = target->queue_size + 1;
518 	init_attr->cap.max_recv_sge    = 1;
519 	init_attr->cap.max_send_sge    = 1;
520 	init_attr->sq_sig_type         = IB_SIGNAL_REQ_WR;
521 	init_attr->qp_type             = IB_QPT_RC;
522 	init_attr->send_cq             = send_cq;
523 	init_attr->recv_cq             = recv_cq;
524 
525 	qp = ib_create_qp(dev->pd, init_attr);
526 	if (IS_ERR(qp)) {
527 		ret = PTR_ERR(qp);
528 		goto err_send_cq;
529 	}
530 
531 	ret = srp_init_qp(target, qp);
532 	if (ret)
533 		goto err_qp;
534 
535 	if (dev->use_fast_reg) {
536 		fr_pool = srp_alloc_fr_pool(target);
537 		if (IS_ERR(fr_pool)) {
538 			ret = PTR_ERR(fr_pool);
539 			shost_printk(KERN_WARNING, target->scsi_host, PFX
540 				     "FR pool allocation failed (%d)\n", ret);
541 			goto err_qp;
542 		}
543 	} else if (dev->use_fmr) {
544 		fmr_pool = srp_alloc_fmr_pool(target);
545 		if (IS_ERR(fmr_pool)) {
546 			ret = PTR_ERR(fmr_pool);
547 			shost_printk(KERN_WARNING, target->scsi_host, PFX
548 				     "FMR pool allocation failed (%d)\n", ret);
549 			goto err_qp;
550 		}
551 	}
552 
553 	if (ch->qp)
554 		srp_destroy_qp(ch, ch->qp);
555 	if (ch->recv_cq)
556 		ib_free_cq(ch->recv_cq);
557 	if (ch->send_cq)
558 		ib_free_cq(ch->send_cq);
559 
560 	ch->qp = qp;
561 	ch->recv_cq = recv_cq;
562 	ch->send_cq = send_cq;
563 
564 	if (dev->use_fast_reg) {
565 		if (ch->fr_pool)
566 			srp_destroy_fr_pool(ch->fr_pool);
567 		ch->fr_pool = fr_pool;
568 	} else if (dev->use_fmr) {
569 		if (ch->fmr_pool)
570 			ib_destroy_fmr_pool(ch->fmr_pool);
571 		ch->fmr_pool = fmr_pool;
572 	}
573 
574 	kfree(init_attr);
575 	return 0;
576 
577 err_qp:
578 	srp_destroy_qp(ch, qp);
579 
580 err_send_cq:
581 	ib_free_cq(send_cq);
582 
583 err_recv_cq:
584 	ib_free_cq(recv_cq);
585 
586 err:
587 	kfree(init_attr);
588 	return ret;
589 }
590 
591 /*
592  * Note: this function may be called without srp_alloc_iu_bufs() having been
593  * invoked. Hence the ch->[rt]x_ring checks.
594  */
595 static void srp_free_ch_ib(struct srp_target_port *target,
596 			   struct srp_rdma_ch *ch)
597 {
598 	struct srp_device *dev = target->srp_host->srp_dev;
599 	int i;
600 
601 	if (!ch->target)
602 		return;
603 
604 	if (ch->cm_id) {
605 		ib_destroy_cm_id(ch->cm_id);
606 		ch->cm_id = NULL;
607 	}
608 
609 	/* If srp_new_cm_id() succeeded but srp_create_ch_ib() not, return. */
610 	if (!ch->qp)
611 		return;
612 
613 	if (dev->use_fast_reg) {
614 		if (ch->fr_pool)
615 			srp_destroy_fr_pool(ch->fr_pool);
616 	} else if (dev->use_fmr) {
617 		if (ch->fmr_pool)
618 			ib_destroy_fmr_pool(ch->fmr_pool);
619 	}
620 
621 	srp_destroy_qp(ch, ch->qp);
622 	ib_free_cq(ch->send_cq);
623 	ib_free_cq(ch->recv_cq);
624 
625 	/*
626 	 * Avoid that the SCSI error handler tries to use this channel after
627 	 * it has been freed. The SCSI error handler can namely continue
628 	 * trying to perform recovery actions after scsi_remove_host()
629 	 * returned.
630 	 */
631 	ch->target = NULL;
632 
633 	ch->qp = NULL;
634 	ch->send_cq = ch->recv_cq = NULL;
635 
636 	if (ch->rx_ring) {
637 		for (i = 0; i < target->queue_size; ++i)
638 			srp_free_iu(target->srp_host, ch->rx_ring[i]);
639 		kfree(ch->rx_ring);
640 		ch->rx_ring = NULL;
641 	}
642 	if (ch->tx_ring) {
643 		for (i = 0; i < target->queue_size; ++i)
644 			srp_free_iu(target->srp_host, ch->tx_ring[i]);
645 		kfree(ch->tx_ring);
646 		ch->tx_ring = NULL;
647 	}
648 }
649 
650 static void srp_path_rec_completion(int status,
651 				    struct sa_path_rec *pathrec,
652 				    void *ch_ptr)
653 {
654 	struct srp_rdma_ch *ch = ch_ptr;
655 	struct srp_target_port *target = ch->target;
656 
657 	ch->status = status;
658 	if (status)
659 		shost_printk(KERN_ERR, target->scsi_host,
660 			     PFX "Got failed path rec status %d\n", status);
661 	else
662 		ch->path = *pathrec;
663 	complete(&ch->done);
664 }
665 
666 static int srp_lookup_path(struct srp_rdma_ch *ch)
667 {
668 	struct srp_target_port *target = ch->target;
669 	int ret;
670 
671 	ch->path.numb_path = 1;
672 
673 	init_completion(&ch->done);
674 
675 	ch->path_query_id = ib_sa_path_rec_get(&srp_sa_client,
676 					       target->srp_host->srp_dev->dev,
677 					       target->srp_host->port,
678 					       &ch->path,
679 					       IB_SA_PATH_REC_SERVICE_ID |
680 					       IB_SA_PATH_REC_DGID	 |
681 					       IB_SA_PATH_REC_SGID	 |
682 					       IB_SA_PATH_REC_NUMB_PATH	 |
683 					       IB_SA_PATH_REC_PKEY,
684 					       SRP_PATH_REC_TIMEOUT_MS,
685 					       GFP_KERNEL,
686 					       srp_path_rec_completion,
687 					       ch, &ch->path_query);
688 	if (ch->path_query_id < 0)
689 		return ch->path_query_id;
690 
691 	ret = wait_for_completion_interruptible(&ch->done);
692 	if (ret < 0)
693 		return ret;
694 
695 	if (ch->status < 0)
696 		shost_printk(KERN_WARNING, target->scsi_host,
697 			     PFX "Path record query failed\n");
698 
699 	return ch->status;
700 }
701 
702 static int srp_send_req(struct srp_rdma_ch *ch, bool multich)
703 {
704 	struct srp_target_port *target = ch->target;
705 	struct {
706 		struct ib_cm_req_param param;
707 		struct srp_login_req   priv;
708 	} *req = NULL;
709 	int status;
710 
711 	req = kzalloc(sizeof *req, GFP_KERNEL);
712 	if (!req)
713 		return -ENOMEM;
714 
715 	req->param.primary_path		      = &ch->path;
716 	req->param.alternate_path 	      = NULL;
717 	req->param.service_id 		      = target->service_id;
718 	req->param.qp_num		      = ch->qp->qp_num;
719 	req->param.qp_type		      = ch->qp->qp_type;
720 	req->param.private_data 	      = &req->priv;
721 	req->param.private_data_len 	      = sizeof req->priv;
722 	req->param.flow_control 	      = 1;
723 
724 	get_random_bytes(&req->param.starting_psn, 4);
725 	req->param.starting_psn 	     &= 0xffffff;
726 
727 	/*
728 	 * Pick some arbitrary defaults here; we could make these
729 	 * module parameters if anyone cared about setting them.
730 	 */
731 	req->param.responder_resources	      = 4;
732 	req->param.remote_cm_response_timeout = 20;
733 	req->param.local_cm_response_timeout  = 20;
734 	req->param.retry_count                = target->tl_retry_count;
735 	req->param.rnr_retry_count 	      = 7;
736 	req->param.max_cm_retries 	      = 15;
737 
738 	req->priv.opcode     	= SRP_LOGIN_REQ;
739 	req->priv.tag        	= 0;
740 	req->priv.req_it_iu_len = cpu_to_be32(target->max_iu_len);
741 	req->priv.req_buf_fmt 	= cpu_to_be16(SRP_BUF_FORMAT_DIRECT |
742 					      SRP_BUF_FORMAT_INDIRECT);
743 	req->priv.req_flags	= (multich ? SRP_MULTICHAN_MULTI :
744 				   SRP_MULTICHAN_SINGLE);
745 	/*
746 	 * In the published SRP specification (draft rev. 16a), the
747 	 * port identifier format is 8 bytes of ID extension followed
748 	 * by 8 bytes of GUID.  Older drafts put the two halves in the
749 	 * opposite order, so that the GUID comes first.
750 	 *
751 	 * Targets conforming to these obsolete drafts can be
752 	 * recognized by the I/O Class they report.
753 	 */
754 	if (target->io_class == SRP_REV10_IB_IO_CLASS) {
755 		memcpy(req->priv.initiator_port_id,
756 		       &target->sgid.global.interface_id, 8);
757 		memcpy(req->priv.initiator_port_id + 8,
758 		       &target->initiator_ext, 8);
759 		memcpy(req->priv.target_port_id,     &target->ioc_guid, 8);
760 		memcpy(req->priv.target_port_id + 8, &target->id_ext, 8);
761 	} else {
762 		memcpy(req->priv.initiator_port_id,
763 		       &target->initiator_ext, 8);
764 		memcpy(req->priv.initiator_port_id + 8,
765 		       &target->sgid.global.interface_id, 8);
766 		memcpy(req->priv.target_port_id,     &target->id_ext, 8);
767 		memcpy(req->priv.target_port_id + 8, &target->ioc_guid, 8);
768 	}
769 
770 	/*
771 	 * Topspin/Cisco SRP targets will reject our login unless we
772 	 * zero out the first 8 bytes of our initiator port ID and set
773 	 * the second 8 bytes to the local node GUID.
774 	 */
775 	if (srp_target_is_topspin(target)) {
776 		shost_printk(KERN_DEBUG, target->scsi_host,
777 			     PFX "Topspin/Cisco initiator port ID workaround "
778 			     "activated for target GUID %016llx\n",
779 			     be64_to_cpu(target->ioc_guid));
780 		memset(req->priv.initiator_port_id, 0, 8);
781 		memcpy(req->priv.initiator_port_id + 8,
782 		       &target->srp_host->srp_dev->dev->node_guid, 8);
783 	}
784 
785 	status = ib_send_cm_req(ch->cm_id, &req->param);
786 
787 	kfree(req);
788 
789 	return status;
790 }
791 
792 static bool srp_queue_remove_work(struct srp_target_port *target)
793 {
794 	bool changed = false;
795 
796 	spin_lock_irq(&target->lock);
797 	if (target->state != SRP_TARGET_REMOVED) {
798 		target->state = SRP_TARGET_REMOVED;
799 		changed = true;
800 	}
801 	spin_unlock_irq(&target->lock);
802 
803 	if (changed)
804 		queue_work(srp_remove_wq, &target->remove_work);
805 
806 	return changed;
807 }
808 
809 static void srp_disconnect_target(struct srp_target_port *target)
810 {
811 	struct srp_rdma_ch *ch;
812 	int i;
813 
814 	/* XXX should send SRP_I_LOGOUT request */
815 
816 	for (i = 0; i < target->ch_count; i++) {
817 		ch = &target->ch[i];
818 		ch->connected = false;
819 		if (ch->cm_id && ib_send_cm_dreq(ch->cm_id, NULL, 0)) {
820 			shost_printk(KERN_DEBUG, target->scsi_host,
821 				     PFX "Sending CM DREQ failed\n");
822 		}
823 	}
824 }
825 
826 static void srp_free_req_data(struct srp_target_port *target,
827 			      struct srp_rdma_ch *ch)
828 {
829 	struct srp_device *dev = target->srp_host->srp_dev;
830 	struct ib_device *ibdev = dev->dev;
831 	struct srp_request *req;
832 	int i;
833 
834 	if (!ch->req_ring)
835 		return;
836 
837 	for (i = 0; i < target->req_ring_size; ++i) {
838 		req = &ch->req_ring[i];
839 		if (dev->use_fast_reg) {
840 			kfree(req->fr_list);
841 		} else {
842 			kfree(req->fmr_list);
843 			kfree(req->map_page);
844 		}
845 		if (req->indirect_dma_addr) {
846 			ib_dma_unmap_single(ibdev, req->indirect_dma_addr,
847 					    target->indirect_size,
848 					    DMA_TO_DEVICE);
849 		}
850 		kfree(req->indirect_desc);
851 	}
852 
853 	kfree(ch->req_ring);
854 	ch->req_ring = NULL;
855 }
856 
857 static int srp_alloc_req_data(struct srp_rdma_ch *ch)
858 {
859 	struct srp_target_port *target = ch->target;
860 	struct srp_device *srp_dev = target->srp_host->srp_dev;
861 	struct ib_device *ibdev = srp_dev->dev;
862 	struct srp_request *req;
863 	void *mr_list;
864 	dma_addr_t dma_addr;
865 	int i, ret = -ENOMEM;
866 
867 	ch->req_ring = kcalloc(target->req_ring_size, sizeof(*ch->req_ring),
868 			       GFP_KERNEL);
869 	if (!ch->req_ring)
870 		goto out;
871 
872 	for (i = 0; i < target->req_ring_size; ++i) {
873 		req = &ch->req_ring[i];
874 		mr_list = kmalloc(target->mr_per_cmd * sizeof(void *),
875 				  GFP_KERNEL);
876 		if (!mr_list)
877 			goto out;
878 		if (srp_dev->use_fast_reg) {
879 			req->fr_list = mr_list;
880 		} else {
881 			req->fmr_list = mr_list;
882 			req->map_page = kmalloc(srp_dev->max_pages_per_mr *
883 						sizeof(void *), GFP_KERNEL);
884 			if (!req->map_page)
885 				goto out;
886 		}
887 		req->indirect_desc = kmalloc(target->indirect_size, GFP_KERNEL);
888 		if (!req->indirect_desc)
889 			goto out;
890 
891 		dma_addr = ib_dma_map_single(ibdev, req->indirect_desc,
892 					     target->indirect_size,
893 					     DMA_TO_DEVICE);
894 		if (ib_dma_mapping_error(ibdev, dma_addr))
895 			goto out;
896 
897 		req->indirect_dma_addr = dma_addr;
898 	}
899 	ret = 0;
900 
901 out:
902 	return ret;
903 }
904 
905 /**
906  * srp_del_scsi_host_attr() - Remove attributes defined in the host template.
907  * @shost: SCSI host whose attributes to remove from sysfs.
908  *
909  * Note: Any attributes defined in the host template and that did not exist
910  * before invocation of this function will be ignored.
911  */
912 static void srp_del_scsi_host_attr(struct Scsi_Host *shost)
913 {
914 	struct device_attribute **attr;
915 
916 	for (attr = shost->hostt->shost_attrs; attr && *attr; ++attr)
917 		device_remove_file(&shost->shost_dev, *attr);
918 }
919 
920 static void srp_remove_target(struct srp_target_port *target)
921 {
922 	struct srp_rdma_ch *ch;
923 	int i;
924 
925 	WARN_ON_ONCE(target->state != SRP_TARGET_REMOVED);
926 
927 	srp_del_scsi_host_attr(target->scsi_host);
928 	srp_rport_get(target->rport);
929 	srp_remove_host(target->scsi_host);
930 	scsi_remove_host(target->scsi_host);
931 	srp_stop_rport_timers(target->rport);
932 	srp_disconnect_target(target);
933 	for (i = 0; i < target->ch_count; i++) {
934 		ch = &target->ch[i];
935 		srp_free_ch_ib(target, ch);
936 	}
937 	cancel_work_sync(&target->tl_err_work);
938 	srp_rport_put(target->rport);
939 	for (i = 0; i < target->ch_count; i++) {
940 		ch = &target->ch[i];
941 		srp_free_req_data(target, ch);
942 	}
943 	kfree(target->ch);
944 	target->ch = NULL;
945 
946 	spin_lock(&target->srp_host->target_lock);
947 	list_del(&target->list);
948 	spin_unlock(&target->srp_host->target_lock);
949 
950 	scsi_host_put(target->scsi_host);
951 }
952 
953 static void srp_remove_work(struct work_struct *work)
954 {
955 	struct srp_target_port *target =
956 		container_of(work, struct srp_target_port, remove_work);
957 
958 	WARN_ON_ONCE(target->state != SRP_TARGET_REMOVED);
959 
960 	srp_remove_target(target);
961 }
962 
963 static void srp_rport_delete(struct srp_rport *rport)
964 {
965 	struct srp_target_port *target = rport->lld_data;
966 
967 	srp_queue_remove_work(target);
968 }
969 
970 /**
971  * srp_connected_ch() - number of connected channels
972  * @target: SRP target port.
973  */
974 static int srp_connected_ch(struct srp_target_port *target)
975 {
976 	int i, c = 0;
977 
978 	for (i = 0; i < target->ch_count; i++)
979 		c += target->ch[i].connected;
980 
981 	return c;
982 }
983 
984 static int srp_connect_ch(struct srp_rdma_ch *ch, bool multich)
985 {
986 	struct srp_target_port *target = ch->target;
987 	int ret;
988 
989 	WARN_ON_ONCE(!multich && srp_connected_ch(target) > 0);
990 
991 	ret = srp_lookup_path(ch);
992 	if (ret)
993 		goto out;
994 
995 	while (1) {
996 		init_completion(&ch->done);
997 		ret = srp_send_req(ch, multich);
998 		if (ret)
999 			goto out;
1000 		ret = wait_for_completion_interruptible(&ch->done);
1001 		if (ret < 0)
1002 			goto out;
1003 
1004 		/*
1005 		 * The CM event handling code will set status to
1006 		 * SRP_PORT_REDIRECT if we get a port redirect REJ
1007 		 * back, or SRP_DLID_REDIRECT if we get a lid/qp
1008 		 * redirect REJ back.
1009 		 */
1010 		ret = ch->status;
1011 		switch (ret) {
1012 		case 0:
1013 			ch->connected = true;
1014 			goto out;
1015 
1016 		case SRP_PORT_REDIRECT:
1017 			ret = srp_lookup_path(ch);
1018 			if (ret)
1019 				goto out;
1020 			break;
1021 
1022 		case SRP_DLID_REDIRECT:
1023 			break;
1024 
1025 		case SRP_STALE_CONN:
1026 			shost_printk(KERN_ERR, target->scsi_host, PFX
1027 				     "giving up on stale connection\n");
1028 			ret = -ECONNRESET;
1029 			goto out;
1030 
1031 		default:
1032 			goto out;
1033 		}
1034 	}
1035 
1036 out:
1037 	return ret <= 0 ? ret : -ENODEV;
1038 }
1039 
1040 static void srp_inv_rkey_err_done(struct ib_cq *cq, struct ib_wc *wc)
1041 {
1042 	srp_handle_qp_err(cq, wc, "INV RKEY");
1043 }
1044 
1045 static int srp_inv_rkey(struct srp_request *req, struct srp_rdma_ch *ch,
1046 		u32 rkey)
1047 {
1048 	struct ib_send_wr *bad_wr;
1049 	struct ib_send_wr wr = {
1050 		.opcode		    = IB_WR_LOCAL_INV,
1051 		.next		    = NULL,
1052 		.num_sge	    = 0,
1053 		.send_flags	    = 0,
1054 		.ex.invalidate_rkey = rkey,
1055 	};
1056 
1057 	wr.wr_cqe = &req->reg_cqe;
1058 	req->reg_cqe.done = srp_inv_rkey_err_done;
1059 	return ib_post_send(ch->qp, &wr, &bad_wr);
1060 }
1061 
1062 static void srp_unmap_data(struct scsi_cmnd *scmnd,
1063 			   struct srp_rdma_ch *ch,
1064 			   struct srp_request *req)
1065 {
1066 	struct srp_target_port *target = ch->target;
1067 	struct srp_device *dev = target->srp_host->srp_dev;
1068 	struct ib_device *ibdev = dev->dev;
1069 	int i, res;
1070 
1071 	if (!scsi_sglist(scmnd) ||
1072 	    (scmnd->sc_data_direction != DMA_TO_DEVICE &&
1073 	     scmnd->sc_data_direction != DMA_FROM_DEVICE))
1074 		return;
1075 
1076 	if (dev->use_fast_reg) {
1077 		struct srp_fr_desc **pfr;
1078 
1079 		for (i = req->nmdesc, pfr = req->fr_list; i > 0; i--, pfr++) {
1080 			res = srp_inv_rkey(req, ch, (*pfr)->mr->rkey);
1081 			if (res < 0) {
1082 				shost_printk(KERN_ERR, target->scsi_host, PFX
1083 				  "Queueing INV WR for rkey %#x failed (%d)\n",
1084 				  (*pfr)->mr->rkey, res);
1085 				queue_work(system_long_wq,
1086 					   &target->tl_err_work);
1087 			}
1088 		}
1089 		if (req->nmdesc)
1090 			srp_fr_pool_put(ch->fr_pool, req->fr_list,
1091 					req->nmdesc);
1092 	} else if (dev->use_fmr) {
1093 		struct ib_pool_fmr **pfmr;
1094 
1095 		for (i = req->nmdesc, pfmr = req->fmr_list; i > 0; i--, pfmr++)
1096 			ib_fmr_pool_unmap(*pfmr);
1097 	}
1098 
1099 	ib_dma_unmap_sg(ibdev, scsi_sglist(scmnd), scsi_sg_count(scmnd),
1100 			scmnd->sc_data_direction);
1101 }
1102 
1103 /**
1104  * srp_claim_req - Take ownership of the scmnd associated with a request.
1105  * @ch: SRP RDMA channel.
1106  * @req: SRP request.
1107  * @sdev: If not NULL, only take ownership for this SCSI device.
1108  * @scmnd: If NULL, take ownership of @req->scmnd. If not NULL, only take
1109  *         ownership of @req->scmnd if it equals @scmnd.
1110  *
1111  * Return value:
1112  * Either NULL or a pointer to the SCSI command the caller became owner of.
1113  */
1114 static struct scsi_cmnd *srp_claim_req(struct srp_rdma_ch *ch,
1115 				       struct srp_request *req,
1116 				       struct scsi_device *sdev,
1117 				       struct scsi_cmnd *scmnd)
1118 {
1119 	unsigned long flags;
1120 
1121 	spin_lock_irqsave(&ch->lock, flags);
1122 	if (req->scmnd &&
1123 	    (!sdev || req->scmnd->device == sdev) &&
1124 	    (!scmnd || req->scmnd == scmnd)) {
1125 		scmnd = req->scmnd;
1126 		req->scmnd = NULL;
1127 	} else {
1128 		scmnd = NULL;
1129 	}
1130 	spin_unlock_irqrestore(&ch->lock, flags);
1131 
1132 	return scmnd;
1133 }
1134 
1135 /**
1136  * srp_free_req() - Unmap data and adjust ch->req_lim.
1137  * @ch:     SRP RDMA channel.
1138  * @req:    Request to be freed.
1139  * @scmnd:  SCSI command associated with @req.
1140  * @req_lim_delta: Amount to be added to @target->req_lim.
1141  */
1142 static void srp_free_req(struct srp_rdma_ch *ch, struct srp_request *req,
1143 			 struct scsi_cmnd *scmnd, s32 req_lim_delta)
1144 {
1145 	unsigned long flags;
1146 
1147 	srp_unmap_data(scmnd, ch, req);
1148 
1149 	spin_lock_irqsave(&ch->lock, flags);
1150 	ch->req_lim += req_lim_delta;
1151 	spin_unlock_irqrestore(&ch->lock, flags);
1152 }
1153 
1154 static void srp_finish_req(struct srp_rdma_ch *ch, struct srp_request *req,
1155 			   struct scsi_device *sdev, int result)
1156 {
1157 	struct scsi_cmnd *scmnd = srp_claim_req(ch, req, sdev, NULL);
1158 
1159 	if (scmnd) {
1160 		srp_free_req(ch, req, scmnd, 0);
1161 		scmnd->result = result;
1162 		scmnd->scsi_done(scmnd);
1163 	}
1164 }
1165 
1166 static void srp_terminate_io(struct srp_rport *rport)
1167 {
1168 	struct srp_target_port *target = rport->lld_data;
1169 	struct srp_rdma_ch *ch;
1170 	struct Scsi_Host *shost = target->scsi_host;
1171 	struct scsi_device *sdev;
1172 	int i, j;
1173 
1174 	/*
1175 	 * Invoking srp_terminate_io() while srp_queuecommand() is running
1176 	 * is not safe. Hence the warning statement below.
1177 	 */
1178 	shost_for_each_device(sdev, shost)
1179 		WARN_ON_ONCE(sdev->request_queue->request_fn_active);
1180 
1181 	for (i = 0; i < target->ch_count; i++) {
1182 		ch = &target->ch[i];
1183 
1184 		for (j = 0; j < target->req_ring_size; ++j) {
1185 			struct srp_request *req = &ch->req_ring[j];
1186 
1187 			srp_finish_req(ch, req, NULL,
1188 				       DID_TRANSPORT_FAILFAST << 16);
1189 		}
1190 	}
1191 }
1192 
1193 /*
1194  * It is up to the caller to ensure that srp_rport_reconnect() calls are
1195  * serialized and that no concurrent srp_queuecommand(), srp_abort(),
1196  * srp_reset_device() or srp_reset_host() calls will occur while this function
1197  * is in progress. One way to realize that is not to call this function
1198  * directly but to call srp_reconnect_rport() instead since that last function
1199  * serializes calls of this function via rport->mutex and also blocks
1200  * srp_queuecommand() calls before invoking this function.
1201  */
1202 static int srp_rport_reconnect(struct srp_rport *rport)
1203 {
1204 	struct srp_target_port *target = rport->lld_data;
1205 	struct srp_rdma_ch *ch;
1206 	int i, j, ret = 0;
1207 	bool multich = false;
1208 
1209 	srp_disconnect_target(target);
1210 
1211 	if (target->state == SRP_TARGET_SCANNING)
1212 		return -ENODEV;
1213 
1214 	/*
1215 	 * Now get a new local CM ID so that we avoid confusing the target in
1216 	 * case things are really fouled up. Doing so also ensures that all CM
1217 	 * callbacks will have finished before a new QP is allocated.
1218 	 */
1219 	for (i = 0; i < target->ch_count; i++) {
1220 		ch = &target->ch[i];
1221 		ret += srp_new_cm_id(ch);
1222 	}
1223 	for (i = 0; i < target->ch_count; i++) {
1224 		ch = &target->ch[i];
1225 		for (j = 0; j < target->req_ring_size; ++j) {
1226 			struct srp_request *req = &ch->req_ring[j];
1227 
1228 			srp_finish_req(ch, req, NULL, DID_RESET << 16);
1229 		}
1230 	}
1231 	for (i = 0; i < target->ch_count; i++) {
1232 		ch = &target->ch[i];
1233 		/*
1234 		 * Whether or not creating a new CM ID succeeded, create a new
1235 		 * QP. This guarantees that all completion callback function
1236 		 * invocations have finished before request resetting starts.
1237 		 */
1238 		ret += srp_create_ch_ib(ch);
1239 
1240 		INIT_LIST_HEAD(&ch->free_tx);
1241 		for (j = 0; j < target->queue_size; ++j)
1242 			list_add(&ch->tx_ring[j]->list, &ch->free_tx);
1243 	}
1244 
1245 	target->qp_in_error = false;
1246 
1247 	for (i = 0; i < target->ch_count; i++) {
1248 		ch = &target->ch[i];
1249 		if (ret)
1250 			break;
1251 		ret = srp_connect_ch(ch, multich);
1252 		multich = true;
1253 	}
1254 
1255 	if (ret == 0)
1256 		shost_printk(KERN_INFO, target->scsi_host,
1257 			     PFX "reconnect succeeded\n");
1258 
1259 	return ret;
1260 }
1261 
1262 static void srp_map_desc(struct srp_map_state *state, dma_addr_t dma_addr,
1263 			 unsigned int dma_len, u32 rkey)
1264 {
1265 	struct srp_direct_buf *desc = state->desc;
1266 
1267 	WARN_ON_ONCE(!dma_len);
1268 
1269 	desc->va = cpu_to_be64(dma_addr);
1270 	desc->key = cpu_to_be32(rkey);
1271 	desc->len = cpu_to_be32(dma_len);
1272 
1273 	state->total_len += dma_len;
1274 	state->desc++;
1275 	state->ndesc++;
1276 }
1277 
1278 static int srp_map_finish_fmr(struct srp_map_state *state,
1279 			      struct srp_rdma_ch *ch)
1280 {
1281 	struct srp_target_port *target = ch->target;
1282 	struct srp_device *dev = target->srp_host->srp_dev;
1283 	struct ib_pd *pd = target->pd;
1284 	struct ib_pool_fmr *fmr;
1285 	u64 io_addr = 0;
1286 
1287 	if (state->fmr.next >= state->fmr.end) {
1288 		shost_printk(KERN_ERR, ch->target->scsi_host,
1289 			     PFX "Out of MRs (mr_per_cmd = %d)\n",
1290 			     ch->target->mr_per_cmd);
1291 		return -ENOMEM;
1292 	}
1293 
1294 	WARN_ON_ONCE(!dev->use_fmr);
1295 
1296 	if (state->npages == 0)
1297 		return 0;
1298 
1299 	if (state->npages == 1 && (pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY)) {
1300 		srp_map_desc(state, state->base_dma_addr, state->dma_len,
1301 			     pd->unsafe_global_rkey);
1302 		goto reset_state;
1303 	}
1304 
1305 	fmr = ib_fmr_pool_map_phys(ch->fmr_pool, state->pages,
1306 				   state->npages, io_addr);
1307 	if (IS_ERR(fmr))
1308 		return PTR_ERR(fmr);
1309 
1310 	*state->fmr.next++ = fmr;
1311 	state->nmdesc++;
1312 
1313 	srp_map_desc(state, state->base_dma_addr & ~dev->mr_page_mask,
1314 		     state->dma_len, fmr->fmr->rkey);
1315 
1316 reset_state:
1317 	state->npages = 0;
1318 	state->dma_len = 0;
1319 
1320 	return 0;
1321 }
1322 
1323 static void srp_reg_mr_err_done(struct ib_cq *cq, struct ib_wc *wc)
1324 {
1325 	srp_handle_qp_err(cq, wc, "FAST REG");
1326 }
1327 
1328 /*
1329  * Map up to sg_nents elements of state->sg where *sg_offset_p is the offset
1330  * where to start in the first element. If sg_offset_p != NULL then
1331  * *sg_offset_p is updated to the offset in state->sg[retval] of the first
1332  * byte that has not yet been mapped.
1333  */
1334 static int srp_map_finish_fr(struct srp_map_state *state,
1335 			     struct srp_request *req,
1336 			     struct srp_rdma_ch *ch, int sg_nents,
1337 			     unsigned int *sg_offset_p)
1338 {
1339 	struct srp_target_port *target = ch->target;
1340 	struct srp_device *dev = target->srp_host->srp_dev;
1341 	struct ib_pd *pd = target->pd;
1342 	struct ib_send_wr *bad_wr;
1343 	struct ib_reg_wr wr;
1344 	struct srp_fr_desc *desc;
1345 	u32 rkey;
1346 	int n, err;
1347 
1348 	if (state->fr.next >= state->fr.end) {
1349 		shost_printk(KERN_ERR, ch->target->scsi_host,
1350 			     PFX "Out of MRs (mr_per_cmd = %d)\n",
1351 			     ch->target->mr_per_cmd);
1352 		return -ENOMEM;
1353 	}
1354 
1355 	WARN_ON_ONCE(!dev->use_fast_reg);
1356 
1357 	if (sg_nents == 1 && (pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY)) {
1358 		unsigned int sg_offset = sg_offset_p ? *sg_offset_p : 0;
1359 
1360 		srp_map_desc(state, sg_dma_address(state->sg) + sg_offset,
1361 			     sg_dma_len(state->sg) - sg_offset,
1362 			     pd->unsafe_global_rkey);
1363 		if (sg_offset_p)
1364 			*sg_offset_p = 0;
1365 		return 1;
1366 	}
1367 
1368 	desc = srp_fr_pool_get(ch->fr_pool);
1369 	if (!desc)
1370 		return -ENOMEM;
1371 
1372 	rkey = ib_inc_rkey(desc->mr->rkey);
1373 	ib_update_fast_reg_key(desc->mr, rkey);
1374 
1375 	n = ib_map_mr_sg(desc->mr, state->sg, sg_nents, sg_offset_p,
1376 			 dev->mr_page_size);
1377 	if (unlikely(n < 0)) {
1378 		srp_fr_pool_put(ch->fr_pool, &desc, 1);
1379 		pr_debug("%s: ib_map_mr_sg(%d, %d) returned %d.\n",
1380 			 dev_name(&req->scmnd->device->sdev_gendev), sg_nents,
1381 			 sg_offset_p ? *sg_offset_p : -1, n);
1382 		return n;
1383 	}
1384 
1385 	WARN_ON_ONCE(desc->mr->length == 0);
1386 
1387 	req->reg_cqe.done = srp_reg_mr_err_done;
1388 
1389 	wr.wr.next = NULL;
1390 	wr.wr.opcode = IB_WR_REG_MR;
1391 	wr.wr.wr_cqe = &req->reg_cqe;
1392 	wr.wr.num_sge = 0;
1393 	wr.wr.send_flags = 0;
1394 	wr.mr = desc->mr;
1395 	wr.key = desc->mr->rkey;
1396 	wr.access = (IB_ACCESS_LOCAL_WRITE |
1397 		     IB_ACCESS_REMOTE_READ |
1398 		     IB_ACCESS_REMOTE_WRITE);
1399 
1400 	*state->fr.next++ = desc;
1401 	state->nmdesc++;
1402 
1403 	srp_map_desc(state, desc->mr->iova,
1404 		     desc->mr->length, desc->mr->rkey);
1405 
1406 	err = ib_post_send(ch->qp, &wr.wr, &bad_wr);
1407 	if (unlikely(err)) {
1408 		WARN_ON_ONCE(err == -ENOMEM);
1409 		return err;
1410 	}
1411 
1412 	return n;
1413 }
1414 
1415 static int srp_map_sg_entry(struct srp_map_state *state,
1416 			    struct srp_rdma_ch *ch,
1417 			    struct scatterlist *sg)
1418 {
1419 	struct srp_target_port *target = ch->target;
1420 	struct srp_device *dev = target->srp_host->srp_dev;
1421 	struct ib_device *ibdev = dev->dev;
1422 	dma_addr_t dma_addr = ib_sg_dma_address(ibdev, sg);
1423 	unsigned int dma_len = ib_sg_dma_len(ibdev, sg);
1424 	unsigned int len = 0;
1425 	int ret;
1426 
1427 	WARN_ON_ONCE(!dma_len);
1428 
1429 	while (dma_len) {
1430 		unsigned offset = dma_addr & ~dev->mr_page_mask;
1431 
1432 		if (state->npages == dev->max_pages_per_mr ||
1433 		    (state->npages > 0 && offset != 0)) {
1434 			ret = srp_map_finish_fmr(state, ch);
1435 			if (ret)
1436 				return ret;
1437 		}
1438 
1439 		len = min_t(unsigned int, dma_len, dev->mr_page_size - offset);
1440 
1441 		if (!state->npages)
1442 			state->base_dma_addr = dma_addr;
1443 		state->pages[state->npages++] = dma_addr & dev->mr_page_mask;
1444 		state->dma_len += len;
1445 		dma_addr += len;
1446 		dma_len -= len;
1447 	}
1448 
1449 	/*
1450 	 * If the end of the MR is not on a page boundary then we need to
1451 	 * close it out and start a new one -- we can only merge at page
1452 	 * boundaries.
1453 	 */
1454 	ret = 0;
1455 	if ((dma_addr & ~dev->mr_page_mask) != 0)
1456 		ret = srp_map_finish_fmr(state, ch);
1457 	return ret;
1458 }
1459 
1460 static int srp_map_sg_fmr(struct srp_map_state *state, struct srp_rdma_ch *ch,
1461 			  struct srp_request *req, struct scatterlist *scat,
1462 			  int count)
1463 {
1464 	struct scatterlist *sg;
1465 	int i, ret;
1466 
1467 	state->pages = req->map_page;
1468 	state->fmr.next = req->fmr_list;
1469 	state->fmr.end = req->fmr_list + ch->target->mr_per_cmd;
1470 
1471 	for_each_sg(scat, sg, count, i) {
1472 		ret = srp_map_sg_entry(state, ch, sg);
1473 		if (ret)
1474 			return ret;
1475 	}
1476 
1477 	ret = srp_map_finish_fmr(state, ch);
1478 	if (ret)
1479 		return ret;
1480 
1481 	return 0;
1482 }
1483 
1484 static int srp_map_sg_fr(struct srp_map_state *state, struct srp_rdma_ch *ch,
1485 			 struct srp_request *req, struct scatterlist *scat,
1486 			 int count)
1487 {
1488 	unsigned int sg_offset = 0;
1489 
1490 	state->fr.next = req->fr_list;
1491 	state->fr.end = req->fr_list + ch->target->mr_per_cmd;
1492 	state->sg = scat;
1493 
1494 	if (count == 0)
1495 		return 0;
1496 
1497 	while (count) {
1498 		int i, n;
1499 
1500 		n = srp_map_finish_fr(state, req, ch, count, &sg_offset);
1501 		if (unlikely(n < 0))
1502 			return n;
1503 
1504 		count -= n;
1505 		for (i = 0; i < n; i++)
1506 			state->sg = sg_next(state->sg);
1507 	}
1508 
1509 	return 0;
1510 }
1511 
1512 static int srp_map_sg_dma(struct srp_map_state *state, struct srp_rdma_ch *ch,
1513 			  struct srp_request *req, struct scatterlist *scat,
1514 			  int count)
1515 {
1516 	struct srp_target_port *target = ch->target;
1517 	struct srp_device *dev = target->srp_host->srp_dev;
1518 	struct scatterlist *sg;
1519 	int i;
1520 
1521 	for_each_sg(scat, sg, count, i) {
1522 		srp_map_desc(state, ib_sg_dma_address(dev->dev, sg),
1523 			     ib_sg_dma_len(dev->dev, sg),
1524 			     target->pd->unsafe_global_rkey);
1525 	}
1526 
1527 	return 0;
1528 }
1529 
1530 /*
1531  * Register the indirect data buffer descriptor with the HCA.
1532  *
1533  * Note: since the indirect data buffer descriptor has been allocated with
1534  * kmalloc() it is guaranteed that this buffer is a physically contiguous
1535  * memory buffer.
1536  */
1537 static int srp_map_idb(struct srp_rdma_ch *ch, struct srp_request *req,
1538 		       void **next_mr, void **end_mr, u32 idb_len,
1539 		       __be32 *idb_rkey)
1540 {
1541 	struct srp_target_port *target = ch->target;
1542 	struct srp_device *dev = target->srp_host->srp_dev;
1543 	struct srp_map_state state;
1544 	struct srp_direct_buf idb_desc;
1545 	u64 idb_pages[1];
1546 	struct scatterlist idb_sg[1];
1547 	int ret;
1548 
1549 	memset(&state, 0, sizeof(state));
1550 	memset(&idb_desc, 0, sizeof(idb_desc));
1551 	state.gen.next = next_mr;
1552 	state.gen.end = end_mr;
1553 	state.desc = &idb_desc;
1554 	state.base_dma_addr = req->indirect_dma_addr;
1555 	state.dma_len = idb_len;
1556 
1557 	if (dev->use_fast_reg) {
1558 		state.sg = idb_sg;
1559 		sg_init_one(idb_sg, req->indirect_desc, idb_len);
1560 		idb_sg->dma_address = req->indirect_dma_addr; /* hack! */
1561 #ifdef CONFIG_NEED_SG_DMA_LENGTH
1562 		idb_sg->dma_length = idb_sg->length;	      /* hack^2 */
1563 #endif
1564 		ret = srp_map_finish_fr(&state, req, ch, 1, NULL);
1565 		if (ret < 0)
1566 			return ret;
1567 		WARN_ON_ONCE(ret < 1);
1568 	} else if (dev->use_fmr) {
1569 		state.pages = idb_pages;
1570 		state.pages[0] = (req->indirect_dma_addr &
1571 				  dev->mr_page_mask);
1572 		state.npages = 1;
1573 		ret = srp_map_finish_fmr(&state, ch);
1574 		if (ret < 0)
1575 			return ret;
1576 	} else {
1577 		return -EINVAL;
1578 	}
1579 
1580 	*idb_rkey = idb_desc.key;
1581 
1582 	return 0;
1583 }
1584 
1585 static void srp_check_mapping(struct srp_map_state *state,
1586 			      struct srp_rdma_ch *ch, struct srp_request *req,
1587 			      struct scatterlist *scat, int count)
1588 {
1589 	struct srp_device *dev = ch->target->srp_host->srp_dev;
1590 	struct srp_fr_desc **pfr;
1591 	u64 desc_len = 0, mr_len = 0;
1592 	int i;
1593 
1594 	for (i = 0; i < state->ndesc; i++)
1595 		desc_len += be32_to_cpu(req->indirect_desc[i].len);
1596 	if (dev->use_fast_reg)
1597 		for (i = 0, pfr = req->fr_list; i < state->nmdesc; i++, pfr++)
1598 			mr_len += (*pfr)->mr->length;
1599 	else if (dev->use_fmr)
1600 		for (i = 0; i < state->nmdesc; i++)
1601 			mr_len += be32_to_cpu(req->indirect_desc[i].len);
1602 	if (desc_len != scsi_bufflen(req->scmnd) ||
1603 	    mr_len > scsi_bufflen(req->scmnd))
1604 		pr_err("Inconsistent: scsi len %d <> desc len %lld <> mr len %lld; ndesc %d; nmdesc = %d\n",
1605 		       scsi_bufflen(req->scmnd), desc_len, mr_len,
1606 		       state->ndesc, state->nmdesc);
1607 }
1608 
1609 /**
1610  * srp_map_data() - map SCSI data buffer onto an SRP request
1611  * @scmnd: SCSI command to map
1612  * @ch: SRP RDMA channel
1613  * @req: SRP request
1614  *
1615  * Returns the length in bytes of the SRP_CMD IU or a negative value if
1616  * mapping failed.
1617  */
1618 static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_rdma_ch *ch,
1619 			struct srp_request *req)
1620 {
1621 	struct srp_target_port *target = ch->target;
1622 	struct ib_pd *pd = target->pd;
1623 	struct scatterlist *scat;
1624 	struct srp_cmd *cmd = req->cmd->buf;
1625 	int len, nents, count, ret;
1626 	struct srp_device *dev;
1627 	struct ib_device *ibdev;
1628 	struct srp_map_state state;
1629 	struct srp_indirect_buf *indirect_hdr;
1630 	u32 idb_len, table_len;
1631 	__be32 idb_rkey;
1632 	u8 fmt;
1633 
1634 	if (!scsi_sglist(scmnd) || scmnd->sc_data_direction == DMA_NONE)
1635 		return sizeof (struct srp_cmd);
1636 
1637 	if (scmnd->sc_data_direction != DMA_FROM_DEVICE &&
1638 	    scmnd->sc_data_direction != DMA_TO_DEVICE) {
1639 		shost_printk(KERN_WARNING, target->scsi_host,
1640 			     PFX "Unhandled data direction %d\n",
1641 			     scmnd->sc_data_direction);
1642 		return -EINVAL;
1643 	}
1644 
1645 	nents = scsi_sg_count(scmnd);
1646 	scat  = scsi_sglist(scmnd);
1647 
1648 	dev = target->srp_host->srp_dev;
1649 	ibdev = dev->dev;
1650 
1651 	count = ib_dma_map_sg(ibdev, scat, nents, scmnd->sc_data_direction);
1652 	if (unlikely(count == 0))
1653 		return -EIO;
1654 
1655 	fmt = SRP_DATA_DESC_DIRECT;
1656 	len = sizeof (struct srp_cmd) +	sizeof (struct srp_direct_buf);
1657 
1658 	if (count == 1 && (pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY)) {
1659 		/*
1660 		 * The midlayer only generated a single gather/scatter
1661 		 * entry, or DMA mapping coalesced everything to a
1662 		 * single entry.  So a direct descriptor along with
1663 		 * the DMA MR suffices.
1664 		 */
1665 		struct srp_direct_buf *buf = (void *) cmd->add_data;
1666 
1667 		buf->va  = cpu_to_be64(ib_sg_dma_address(ibdev, scat));
1668 		buf->key = cpu_to_be32(pd->unsafe_global_rkey);
1669 		buf->len = cpu_to_be32(ib_sg_dma_len(ibdev, scat));
1670 
1671 		req->nmdesc = 0;
1672 		goto map_complete;
1673 	}
1674 
1675 	/*
1676 	 * We have more than one scatter/gather entry, so build our indirect
1677 	 * descriptor table, trying to merge as many entries as we can.
1678 	 */
1679 	indirect_hdr = (void *) cmd->add_data;
1680 
1681 	ib_dma_sync_single_for_cpu(ibdev, req->indirect_dma_addr,
1682 				   target->indirect_size, DMA_TO_DEVICE);
1683 
1684 	memset(&state, 0, sizeof(state));
1685 	state.desc = req->indirect_desc;
1686 	if (dev->use_fast_reg)
1687 		ret = srp_map_sg_fr(&state, ch, req, scat, count);
1688 	else if (dev->use_fmr)
1689 		ret = srp_map_sg_fmr(&state, ch, req, scat, count);
1690 	else
1691 		ret = srp_map_sg_dma(&state, ch, req, scat, count);
1692 	req->nmdesc = state.nmdesc;
1693 	if (ret < 0)
1694 		goto unmap;
1695 
1696 	{
1697 		DEFINE_DYNAMIC_DEBUG_METADATA(ddm,
1698 			"Memory mapping consistency check");
1699 		if (DYNAMIC_DEBUG_BRANCH(ddm))
1700 			srp_check_mapping(&state, ch, req, scat, count);
1701 	}
1702 
1703 	/* We've mapped the request, now pull as much of the indirect
1704 	 * descriptor table as we can into the command buffer. If this
1705 	 * target is not using an external indirect table, we are
1706 	 * guaranteed to fit into the command, as the SCSI layer won't
1707 	 * give us more S/G entries than we allow.
1708 	 */
1709 	if (state.ndesc == 1) {
1710 		/*
1711 		 * Memory registration collapsed the sg-list into one entry,
1712 		 * so use a direct descriptor.
1713 		 */
1714 		struct srp_direct_buf *buf = (void *) cmd->add_data;
1715 
1716 		*buf = req->indirect_desc[0];
1717 		goto map_complete;
1718 	}
1719 
1720 	if (unlikely(target->cmd_sg_cnt < state.ndesc &&
1721 						!target->allow_ext_sg)) {
1722 		shost_printk(KERN_ERR, target->scsi_host,
1723 			     "Could not fit S/G list into SRP_CMD\n");
1724 		ret = -EIO;
1725 		goto unmap;
1726 	}
1727 
1728 	count = min(state.ndesc, target->cmd_sg_cnt);
1729 	table_len = state.ndesc * sizeof (struct srp_direct_buf);
1730 	idb_len = sizeof(struct srp_indirect_buf) + table_len;
1731 
1732 	fmt = SRP_DATA_DESC_INDIRECT;
1733 	len = sizeof(struct srp_cmd) + sizeof (struct srp_indirect_buf);
1734 	len += count * sizeof (struct srp_direct_buf);
1735 
1736 	memcpy(indirect_hdr->desc_list, req->indirect_desc,
1737 	       count * sizeof (struct srp_direct_buf));
1738 
1739 	if (!(pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY)) {
1740 		ret = srp_map_idb(ch, req, state.gen.next, state.gen.end,
1741 				  idb_len, &idb_rkey);
1742 		if (ret < 0)
1743 			goto unmap;
1744 		req->nmdesc++;
1745 	} else {
1746 		idb_rkey = cpu_to_be32(pd->unsafe_global_rkey);
1747 	}
1748 
1749 	indirect_hdr->table_desc.va = cpu_to_be64(req->indirect_dma_addr);
1750 	indirect_hdr->table_desc.key = idb_rkey;
1751 	indirect_hdr->table_desc.len = cpu_to_be32(table_len);
1752 	indirect_hdr->len = cpu_to_be32(state.total_len);
1753 
1754 	if (scmnd->sc_data_direction == DMA_TO_DEVICE)
1755 		cmd->data_out_desc_cnt = count;
1756 	else
1757 		cmd->data_in_desc_cnt = count;
1758 
1759 	ib_dma_sync_single_for_device(ibdev, req->indirect_dma_addr, table_len,
1760 				      DMA_TO_DEVICE);
1761 
1762 map_complete:
1763 	if (scmnd->sc_data_direction == DMA_TO_DEVICE)
1764 		cmd->buf_fmt = fmt << 4;
1765 	else
1766 		cmd->buf_fmt = fmt;
1767 
1768 	return len;
1769 
1770 unmap:
1771 	srp_unmap_data(scmnd, ch, req);
1772 	if (ret == -ENOMEM && req->nmdesc >= target->mr_pool_size)
1773 		ret = -E2BIG;
1774 	return ret;
1775 }
1776 
1777 /*
1778  * Return an IU and possible credit to the free pool
1779  */
1780 static void srp_put_tx_iu(struct srp_rdma_ch *ch, struct srp_iu *iu,
1781 			  enum srp_iu_type iu_type)
1782 {
1783 	unsigned long flags;
1784 
1785 	spin_lock_irqsave(&ch->lock, flags);
1786 	list_add(&iu->list, &ch->free_tx);
1787 	if (iu_type != SRP_IU_RSP)
1788 		++ch->req_lim;
1789 	spin_unlock_irqrestore(&ch->lock, flags);
1790 }
1791 
1792 /*
1793  * Must be called with ch->lock held to protect req_lim and free_tx.
1794  * If IU is not sent, it must be returned using srp_put_tx_iu().
1795  *
1796  * Note:
1797  * An upper limit for the number of allocated information units for each
1798  * request type is:
1799  * - SRP_IU_CMD: SRP_CMD_SQ_SIZE, since the SCSI mid-layer never queues
1800  *   more than Scsi_Host.can_queue requests.
1801  * - SRP_IU_TSK_MGMT: SRP_TSK_MGMT_SQ_SIZE.
1802  * - SRP_IU_RSP: 1, since a conforming SRP target never sends more than
1803  *   one unanswered SRP request to an initiator.
1804  */
1805 static struct srp_iu *__srp_get_tx_iu(struct srp_rdma_ch *ch,
1806 				      enum srp_iu_type iu_type)
1807 {
1808 	struct srp_target_port *target = ch->target;
1809 	s32 rsv = (iu_type == SRP_IU_TSK_MGMT) ? 0 : SRP_TSK_MGMT_SQ_SIZE;
1810 	struct srp_iu *iu;
1811 
1812 	lockdep_assert_held(&ch->lock);
1813 
1814 	ib_process_cq_direct(ch->send_cq, -1);
1815 
1816 	if (list_empty(&ch->free_tx))
1817 		return NULL;
1818 
1819 	/* Initiator responses to target requests do not consume credits */
1820 	if (iu_type != SRP_IU_RSP) {
1821 		if (ch->req_lim <= rsv) {
1822 			++target->zero_req_lim;
1823 			return NULL;
1824 		}
1825 
1826 		--ch->req_lim;
1827 	}
1828 
1829 	iu = list_first_entry(&ch->free_tx, struct srp_iu, list);
1830 	list_del(&iu->list);
1831 	return iu;
1832 }
1833 
1834 /*
1835  * Note: if this function is called from inside ib_drain_sq() then it will
1836  * be called without ch->lock being held. If ib_drain_sq() dequeues a WQE
1837  * with status IB_WC_SUCCESS then that's a bug.
1838  */
1839 static void srp_send_done(struct ib_cq *cq, struct ib_wc *wc)
1840 {
1841 	struct srp_iu *iu = container_of(wc->wr_cqe, struct srp_iu, cqe);
1842 	struct srp_rdma_ch *ch = cq->cq_context;
1843 
1844 	if (unlikely(wc->status != IB_WC_SUCCESS)) {
1845 		srp_handle_qp_err(cq, wc, "SEND");
1846 		return;
1847 	}
1848 
1849 	lockdep_assert_held(&ch->lock);
1850 
1851 	list_add(&iu->list, &ch->free_tx);
1852 }
1853 
1854 static int srp_post_send(struct srp_rdma_ch *ch, struct srp_iu *iu, int len)
1855 {
1856 	struct srp_target_port *target = ch->target;
1857 	struct ib_sge list;
1858 	struct ib_send_wr wr, *bad_wr;
1859 
1860 	list.addr   = iu->dma;
1861 	list.length = len;
1862 	list.lkey   = target->lkey;
1863 
1864 	iu->cqe.done = srp_send_done;
1865 
1866 	wr.next       = NULL;
1867 	wr.wr_cqe     = &iu->cqe;
1868 	wr.sg_list    = &list;
1869 	wr.num_sge    = 1;
1870 	wr.opcode     = IB_WR_SEND;
1871 	wr.send_flags = IB_SEND_SIGNALED;
1872 
1873 	return ib_post_send(ch->qp, &wr, &bad_wr);
1874 }
1875 
1876 static int srp_post_recv(struct srp_rdma_ch *ch, struct srp_iu *iu)
1877 {
1878 	struct srp_target_port *target = ch->target;
1879 	struct ib_recv_wr wr, *bad_wr;
1880 	struct ib_sge list;
1881 
1882 	list.addr   = iu->dma;
1883 	list.length = iu->size;
1884 	list.lkey   = target->lkey;
1885 
1886 	iu->cqe.done = srp_recv_done;
1887 
1888 	wr.next     = NULL;
1889 	wr.wr_cqe   = &iu->cqe;
1890 	wr.sg_list  = &list;
1891 	wr.num_sge  = 1;
1892 
1893 	return ib_post_recv(ch->qp, &wr, &bad_wr);
1894 }
1895 
1896 static void srp_process_rsp(struct srp_rdma_ch *ch, struct srp_rsp *rsp)
1897 {
1898 	struct srp_target_port *target = ch->target;
1899 	struct srp_request *req;
1900 	struct scsi_cmnd *scmnd;
1901 	unsigned long flags;
1902 
1903 	if (unlikely(rsp->tag & SRP_TAG_TSK_MGMT)) {
1904 		spin_lock_irqsave(&ch->lock, flags);
1905 		ch->req_lim += be32_to_cpu(rsp->req_lim_delta);
1906 		if (rsp->tag == ch->tsk_mgmt_tag) {
1907 			ch->tsk_mgmt_status = -1;
1908 			if (be32_to_cpu(rsp->resp_data_len) >= 4)
1909 				ch->tsk_mgmt_status = rsp->data[3];
1910 			complete(&ch->tsk_mgmt_done);
1911 		} else {
1912 			shost_printk(KERN_ERR, target->scsi_host,
1913 				     "Received tsk mgmt response too late for tag %#llx\n",
1914 				     rsp->tag);
1915 		}
1916 		spin_unlock_irqrestore(&ch->lock, flags);
1917 	} else {
1918 		scmnd = scsi_host_find_tag(target->scsi_host, rsp->tag);
1919 		if (scmnd && scmnd->host_scribble) {
1920 			req = (void *)scmnd->host_scribble;
1921 			scmnd = srp_claim_req(ch, req, NULL, scmnd);
1922 		} else {
1923 			scmnd = NULL;
1924 		}
1925 		if (!scmnd) {
1926 			shost_printk(KERN_ERR, target->scsi_host,
1927 				     "Null scmnd for RSP w/tag %#016llx received on ch %td / QP %#x\n",
1928 				     rsp->tag, ch - target->ch, ch->qp->qp_num);
1929 
1930 			spin_lock_irqsave(&ch->lock, flags);
1931 			ch->req_lim += be32_to_cpu(rsp->req_lim_delta);
1932 			spin_unlock_irqrestore(&ch->lock, flags);
1933 
1934 			return;
1935 		}
1936 		scmnd->result = rsp->status;
1937 
1938 		if (rsp->flags & SRP_RSP_FLAG_SNSVALID) {
1939 			memcpy(scmnd->sense_buffer, rsp->data +
1940 			       be32_to_cpu(rsp->resp_data_len),
1941 			       min_t(int, be32_to_cpu(rsp->sense_data_len),
1942 				     SCSI_SENSE_BUFFERSIZE));
1943 		}
1944 
1945 		if (unlikely(rsp->flags & SRP_RSP_FLAG_DIUNDER))
1946 			scsi_set_resid(scmnd, be32_to_cpu(rsp->data_in_res_cnt));
1947 		else if (unlikely(rsp->flags & SRP_RSP_FLAG_DIOVER))
1948 			scsi_set_resid(scmnd, -be32_to_cpu(rsp->data_in_res_cnt));
1949 		else if (unlikely(rsp->flags & SRP_RSP_FLAG_DOUNDER))
1950 			scsi_set_resid(scmnd, be32_to_cpu(rsp->data_out_res_cnt));
1951 		else if (unlikely(rsp->flags & SRP_RSP_FLAG_DOOVER))
1952 			scsi_set_resid(scmnd, -be32_to_cpu(rsp->data_out_res_cnt));
1953 
1954 		srp_free_req(ch, req, scmnd,
1955 			     be32_to_cpu(rsp->req_lim_delta));
1956 
1957 		scmnd->host_scribble = NULL;
1958 		scmnd->scsi_done(scmnd);
1959 	}
1960 }
1961 
1962 static int srp_response_common(struct srp_rdma_ch *ch, s32 req_delta,
1963 			       void *rsp, int len)
1964 {
1965 	struct srp_target_port *target = ch->target;
1966 	struct ib_device *dev = target->srp_host->srp_dev->dev;
1967 	unsigned long flags;
1968 	struct srp_iu *iu;
1969 	int err;
1970 
1971 	spin_lock_irqsave(&ch->lock, flags);
1972 	ch->req_lim += req_delta;
1973 	iu = __srp_get_tx_iu(ch, SRP_IU_RSP);
1974 	spin_unlock_irqrestore(&ch->lock, flags);
1975 
1976 	if (!iu) {
1977 		shost_printk(KERN_ERR, target->scsi_host, PFX
1978 			     "no IU available to send response\n");
1979 		return 1;
1980 	}
1981 
1982 	ib_dma_sync_single_for_cpu(dev, iu->dma, len, DMA_TO_DEVICE);
1983 	memcpy(iu->buf, rsp, len);
1984 	ib_dma_sync_single_for_device(dev, iu->dma, len, DMA_TO_DEVICE);
1985 
1986 	err = srp_post_send(ch, iu, len);
1987 	if (err) {
1988 		shost_printk(KERN_ERR, target->scsi_host, PFX
1989 			     "unable to post response: %d\n", err);
1990 		srp_put_tx_iu(ch, iu, SRP_IU_RSP);
1991 	}
1992 
1993 	return err;
1994 }
1995 
1996 static void srp_process_cred_req(struct srp_rdma_ch *ch,
1997 				 struct srp_cred_req *req)
1998 {
1999 	struct srp_cred_rsp rsp = {
2000 		.opcode = SRP_CRED_RSP,
2001 		.tag = req->tag,
2002 	};
2003 	s32 delta = be32_to_cpu(req->req_lim_delta);
2004 
2005 	if (srp_response_common(ch, delta, &rsp, sizeof(rsp)))
2006 		shost_printk(KERN_ERR, ch->target->scsi_host, PFX
2007 			     "problems processing SRP_CRED_REQ\n");
2008 }
2009 
2010 static void srp_process_aer_req(struct srp_rdma_ch *ch,
2011 				struct srp_aer_req *req)
2012 {
2013 	struct srp_target_port *target = ch->target;
2014 	struct srp_aer_rsp rsp = {
2015 		.opcode = SRP_AER_RSP,
2016 		.tag = req->tag,
2017 	};
2018 	s32 delta = be32_to_cpu(req->req_lim_delta);
2019 
2020 	shost_printk(KERN_ERR, target->scsi_host, PFX
2021 		     "ignoring AER for LUN %llu\n", scsilun_to_int(&req->lun));
2022 
2023 	if (srp_response_common(ch, delta, &rsp, sizeof(rsp)))
2024 		shost_printk(KERN_ERR, target->scsi_host, PFX
2025 			     "problems processing SRP_AER_REQ\n");
2026 }
2027 
2028 static void srp_recv_done(struct ib_cq *cq, struct ib_wc *wc)
2029 {
2030 	struct srp_iu *iu = container_of(wc->wr_cqe, struct srp_iu, cqe);
2031 	struct srp_rdma_ch *ch = cq->cq_context;
2032 	struct srp_target_port *target = ch->target;
2033 	struct ib_device *dev = target->srp_host->srp_dev->dev;
2034 	int res;
2035 	u8 opcode;
2036 
2037 	if (unlikely(wc->status != IB_WC_SUCCESS)) {
2038 		srp_handle_qp_err(cq, wc, "RECV");
2039 		return;
2040 	}
2041 
2042 	ib_dma_sync_single_for_cpu(dev, iu->dma, ch->max_ti_iu_len,
2043 				   DMA_FROM_DEVICE);
2044 
2045 	opcode = *(u8 *) iu->buf;
2046 
2047 	if (0) {
2048 		shost_printk(KERN_ERR, target->scsi_host,
2049 			     PFX "recv completion, opcode 0x%02x\n", opcode);
2050 		print_hex_dump(KERN_ERR, "", DUMP_PREFIX_OFFSET, 8, 1,
2051 			       iu->buf, wc->byte_len, true);
2052 	}
2053 
2054 	switch (opcode) {
2055 	case SRP_RSP:
2056 		srp_process_rsp(ch, iu->buf);
2057 		break;
2058 
2059 	case SRP_CRED_REQ:
2060 		srp_process_cred_req(ch, iu->buf);
2061 		break;
2062 
2063 	case SRP_AER_REQ:
2064 		srp_process_aer_req(ch, iu->buf);
2065 		break;
2066 
2067 	case SRP_T_LOGOUT:
2068 		/* XXX Handle target logout */
2069 		shost_printk(KERN_WARNING, target->scsi_host,
2070 			     PFX "Got target logout request\n");
2071 		break;
2072 
2073 	default:
2074 		shost_printk(KERN_WARNING, target->scsi_host,
2075 			     PFX "Unhandled SRP opcode 0x%02x\n", opcode);
2076 		break;
2077 	}
2078 
2079 	ib_dma_sync_single_for_device(dev, iu->dma, ch->max_ti_iu_len,
2080 				      DMA_FROM_DEVICE);
2081 
2082 	res = srp_post_recv(ch, iu);
2083 	if (res != 0)
2084 		shost_printk(KERN_ERR, target->scsi_host,
2085 			     PFX "Recv failed with error code %d\n", res);
2086 }
2087 
2088 /**
2089  * srp_tl_err_work() - handle a transport layer error
2090  * @work: Work structure embedded in an SRP target port.
2091  *
2092  * Note: This function may get invoked before the rport has been created,
2093  * hence the target->rport test.
2094  */
2095 static void srp_tl_err_work(struct work_struct *work)
2096 {
2097 	struct srp_target_port *target;
2098 
2099 	target = container_of(work, struct srp_target_port, tl_err_work);
2100 	if (target->rport)
2101 		srp_start_tl_fail_timers(target->rport);
2102 }
2103 
2104 static void srp_handle_qp_err(struct ib_cq *cq, struct ib_wc *wc,
2105 		const char *opname)
2106 {
2107 	struct srp_rdma_ch *ch = cq->cq_context;
2108 	struct srp_target_port *target = ch->target;
2109 
2110 	if (ch->connected && !target->qp_in_error) {
2111 		shost_printk(KERN_ERR, target->scsi_host,
2112 			     PFX "failed %s status %s (%d) for CQE %p\n",
2113 			     opname, ib_wc_status_msg(wc->status), wc->status,
2114 			     wc->wr_cqe);
2115 		queue_work(system_long_wq, &target->tl_err_work);
2116 	}
2117 	target->qp_in_error = true;
2118 }
2119 
2120 static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd)
2121 {
2122 	struct srp_target_port *target = host_to_target(shost);
2123 	struct srp_rport *rport = target->rport;
2124 	struct srp_rdma_ch *ch;
2125 	struct srp_request *req;
2126 	struct srp_iu *iu;
2127 	struct srp_cmd *cmd;
2128 	struct ib_device *dev;
2129 	unsigned long flags;
2130 	u32 tag;
2131 	u16 idx;
2132 	int len, ret;
2133 	const bool in_scsi_eh = !in_interrupt() && current == shost->ehandler;
2134 
2135 	/*
2136 	 * The SCSI EH thread is the only context from which srp_queuecommand()
2137 	 * can get invoked for blocked devices (SDEV_BLOCK /
2138 	 * SDEV_CREATED_BLOCK). Avoid racing with srp_reconnect_rport() by
2139 	 * locking the rport mutex if invoked from inside the SCSI EH.
2140 	 */
2141 	if (in_scsi_eh)
2142 		mutex_lock(&rport->mutex);
2143 
2144 	scmnd->result = srp_chkready(target->rport);
2145 	if (unlikely(scmnd->result))
2146 		goto err;
2147 
2148 	WARN_ON_ONCE(scmnd->request->tag < 0);
2149 	tag = blk_mq_unique_tag(scmnd->request);
2150 	ch = &target->ch[blk_mq_unique_tag_to_hwq(tag)];
2151 	idx = blk_mq_unique_tag_to_tag(tag);
2152 	WARN_ONCE(idx >= target->req_ring_size, "%s: tag %#x: idx %d >= %d\n",
2153 		  dev_name(&shost->shost_gendev), tag, idx,
2154 		  target->req_ring_size);
2155 
2156 	spin_lock_irqsave(&ch->lock, flags);
2157 	iu = __srp_get_tx_iu(ch, SRP_IU_CMD);
2158 	spin_unlock_irqrestore(&ch->lock, flags);
2159 
2160 	if (!iu)
2161 		goto err;
2162 
2163 	req = &ch->req_ring[idx];
2164 	dev = target->srp_host->srp_dev->dev;
2165 	ib_dma_sync_single_for_cpu(dev, iu->dma, target->max_iu_len,
2166 				   DMA_TO_DEVICE);
2167 
2168 	scmnd->host_scribble = (void *) req;
2169 
2170 	cmd = iu->buf;
2171 	memset(cmd, 0, sizeof *cmd);
2172 
2173 	cmd->opcode = SRP_CMD;
2174 	int_to_scsilun(scmnd->device->lun, &cmd->lun);
2175 	cmd->tag    = tag;
2176 	memcpy(cmd->cdb, scmnd->cmnd, scmnd->cmd_len);
2177 
2178 	req->scmnd    = scmnd;
2179 	req->cmd      = iu;
2180 
2181 	len = srp_map_data(scmnd, ch, req);
2182 	if (len < 0) {
2183 		shost_printk(KERN_ERR, target->scsi_host,
2184 			     PFX "Failed to map data (%d)\n", len);
2185 		/*
2186 		 * If we ran out of memory descriptors (-ENOMEM) because an
2187 		 * application is queuing many requests with more than
2188 		 * max_pages_per_mr sg-list elements, tell the SCSI mid-layer
2189 		 * to reduce queue depth temporarily.
2190 		 */
2191 		scmnd->result = len == -ENOMEM ?
2192 			DID_OK << 16 | QUEUE_FULL << 1 : DID_ERROR << 16;
2193 		goto err_iu;
2194 	}
2195 
2196 	ib_dma_sync_single_for_device(dev, iu->dma, target->max_iu_len,
2197 				      DMA_TO_DEVICE);
2198 
2199 	if (srp_post_send(ch, iu, len)) {
2200 		shost_printk(KERN_ERR, target->scsi_host, PFX "Send failed\n");
2201 		goto err_unmap;
2202 	}
2203 
2204 	ret = 0;
2205 
2206 unlock_rport:
2207 	if (in_scsi_eh)
2208 		mutex_unlock(&rport->mutex);
2209 
2210 	return ret;
2211 
2212 err_unmap:
2213 	srp_unmap_data(scmnd, ch, req);
2214 
2215 err_iu:
2216 	srp_put_tx_iu(ch, iu, SRP_IU_CMD);
2217 
2218 	/*
2219 	 * Avoid that the loops that iterate over the request ring can
2220 	 * encounter a dangling SCSI command pointer.
2221 	 */
2222 	req->scmnd = NULL;
2223 
2224 err:
2225 	if (scmnd->result) {
2226 		scmnd->scsi_done(scmnd);
2227 		ret = 0;
2228 	} else {
2229 		ret = SCSI_MLQUEUE_HOST_BUSY;
2230 	}
2231 
2232 	goto unlock_rport;
2233 }
2234 
2235 /*
2236  * Note: the resources allocated in this function are freed in
2237  * srp_free_ch_ib().
2238  */
2239 static int srp_alloc_iu_bufs(struct srp_rdma_ch *ch)
2240 {
2241 	struct srp_target_port *target = ch->target;
2242 	int i;
2243 
2244 	ch->rx_ring = kcalloc(target->queue_size, sizeof(*ch->rx_ring),
2245 			      GFP_KERNEL);
2246 	if (!ch->rx_ring)
2247 		goto err_no_ring;
2248 	ch->tx_ring = kcalloc(target->queue_size, sizeof(*ch->tx_ring),
2249 			      GFP_KERNEL);
2250 	if (!ch->tx_ring)
2251 		goto err_no_ring;
2252 
2253 	for (i = 0; i < target->queue_size; ++i) {
2254 		ch->rx_ring[i] = srp_alloc_iu(target->srp_host,
2255 					      ch->max_ti_iu_len,
2256 					      GFP_KERNEL, DMA_FROM_DEVICE);
2257 		if (!ch->rx_ring[i])
2258 			goto err;
2259 	}
2260 
2261 	for (i = 0; i < target->queue_size; ++i) {
2262 		ch->tx_ring[i] = srp_alloc_iu(target->srp_host,
2263 					      target->max_iu_len,
2264 					      GFP_KERNEL, DMA_TO_DEVICE);
2265 		if (!ch->tx_ring[i])
2266 			goto err;
2267 
2268 		list_add(&ch->tx_ring[i]->list, &ch->free_tx);
2269 	}
2270 
2271 	return 0;
2272 
2273 err:
2274 	for (i = 0; i < target->queue_size; ++i) {
2275 		srp_free_iu(target->srp_host, ch->rx_ring[i]);
2276 		srp_free_iu(target->srp_host, ch->tx_ring[i]);
2277 	}
2278 
2279 
2280 err_no_ring:
2281 	kfree(ch->tx_ring);
2282 	ch->tx_ring = NULL;
2283 	kfree(ch->rx_ring);
2284 	ch->rx_ring = NULL;
2285 
2286 	return -ENOMEM;
2287 }
2288 
2289 static uint32_t srp_compute_rq_tmo(struct ib_qp_attr *qp_attr, int attr_mask)
2290 {
2291 	uint64_t T_tr_ns, max_compl_time_ms;
2292 	uint32_t rq_tmo_jiffies;
2293 
2294 	/*
2295 	 * According to section 11.2.4.2 in the IBTA spec (Modify Queue Pair,
2296 	 * table 91), both the QP timeout and the retry count have to be set
2297 	 * for RC QP's during the RTR to RTS transition.
2298 	 */
2299 	WARN_ON_ONCE((attr_mask & (IB_QP_TIMEOUT | IB_QP_RETRY_CNT)) !=
2300 		     (IB_QP_TIMEOUT | IB_QP_RETRY_CNT));
2301 
2302 	/*
2303 	 * Set target->rq_tmo_jiffies to one second more than the largest time
2304 	 * it can take before an error completion is generated. See also
2305 	 * C9-140..142 in the IBTA spec for more information about how to
2306 	 * convert the QP Local ACK Timeout value to nanoseconds.
2307 	 */
2308 	T_tr_ns = 4096 * (1ULL << qp_attr->timeout);
2309 	max_compl_time_ms = qp_attr->retry_cnt * 4 * T_tr_ns;
2310 	do_div(max_compl_time_ms, NSEC_PER_MSEC);
2311 	rq_tmo_jiffies = msecs_to_jiffies(max_compl_time_ms + 1000);
2312 
2313 	return rq_tmo_jiffies;
2314 }
2315 
2316 static void srp_cm_rep_handler(struct ib_cm_id *cm_id,
2317 			       const struct srp_login_rsp *lrsp,
2318 			       struct srp_rdma_ch *ch)
2319 {
2320 	struct srp_target_port *target = ch->target;
2321 	struct ib_qp_attr *qp_attr = NULL;
2322 	int attr_mask = 0;
2323 	int ret;
2324 	int i;
2325 
2326 	if (lrsp->opcode == SRP_LOGIN_RSP) {
2327 		ch->max_ti_iu_len = be32_to_cpu(lrsp->max_ti_iu_len);
2328 		ch->req_lim       = be32_to_cpu(lrsp->req_lim_delta);
2329 
2330 		/*
2331 		 * Reserve credits for task management so we don't
2332 		 * bounce requests back to the SCSI mid-layer.
2333 		 */
2334 		target->scsi_host->can_queue
2335 			= min(ch->req_lim - SRP_TSK_MGMT_SQ_SIZE,
2336 			      target->scsi_host->can_queue);
2337 		target->scsi_host->cmd_per_lun
2338 			= min_t(int, target->scsi_host->can_queue,
2339 				target->scsi_host->cmd_per_lun);
2340 	} else {
2341 		shost_printk(KERN_WARNING, target->scsi_host,
2342 			     PFX "Unhandled RSP opcode %#x\n", lrsp->opcode);
2343 		ret = -ECONNRESET;
2344 		goto error;
2345 	}
2346 
2347 	if (!ch->rx_ring) {
2348 		ret = srp_alloc_iu_bufs(ch);
2349 		if (ret)
2350 			goto error;
2351 	}
2352 
2353 	ret = -ENOMEM;
2354 	qp_attr = kmalloc(sizeof *qp_attr, GFP_KERNEL);
2355 	if (!qp_attr)
2356 		goto error;
2357 
2358 	qp_attr->qp_state = IB_QPS_RTR;
2359 	ret = ib_cm_init_qp_attr(cm_id, qp_attr, &attr_mask);
2360 	if (ret)
2361 		goto error_free;
2362 
2363 	ret = ib_modify_qp(ch->qp, qp_attr, attr_mask);
2364 	if (ret)
2365 		goto error_free;
2366 
2367 	for (i = 0; i < target->queue_size; i++) {
2368 		struct srp_iu *iu = ch->rx_ring[i];
2369 
2370 		ret = srp_post_recv(ch, iu);
2371 		if (ret)
2372 			goto error_free;
2373 	}
2374 
2375 	qp_attr->qp_state = IB_QPS_RTS;
2376 	ret = ib_cm_init_qp_attr(cm_id, qp_attr, &attr_mask);
2377 	if (ret)
2378 		goto error_free;
2379 
2380 	target->rq_tmo_jiffies = srp_compute_rq_tmo(qp_attr, attr_mask);
2381 
2382 	ret = ib_modify_qp(ch->qp, qp_attr, attr_mask);
2383 	if (ret)
2384 		goto error_free;
2385 
2386 	ret = ib_send_cm_rtu(cm_id, NULL, 0);
2387 
2388 error_free:
2389 	kfree(qp_attr);
2390 
2391 error:
2392 	ch->status = ret;
2393 }
2394 
2395 static void srp_cm_rej_handler(struct ib_cm_id *cm_id,
2396 			       struct ib_cm_event *event,
2397 			       struct srp_rdma_ch *ch)
2398 {
2399 	struct srp_target_port *target = ch->target;
2400 	struct Scsi_Host *shost = target->scsi_host;
2401 	struct ib_class_port_info *cpi;
2402 	int opcode;
2403 
2404 	switch (event->param.rej_rcvd.reason) {
2405 	case IB_CM_REJ_PORT_CM_REDIRECT:
2406 		cpi = event->param.rej_rcvd.ari;
2407 		sa_path_set_dlid(&ch->path, htonl(ntohs(cpi->redirect_lid)));
2408 		ch->path.pkey = cpi->redirect_pkey;
2409 		cm_id->remote_cm_qpn = be32_to_cpu(cpi->redirect_qp) & 0x00ffffff;
2410 		memcpy(ch->path.dgid.raw, cpi->redirect_gid, 16);
2411 
2412 		ch->status = sa_path_get_dlid(&ch->path) ?
2413 			SRP_DLID_REDIRECT : SRP_PORT_REDIRECT;
2414 		break;
2415 
2416 	case IB_CM_REJ_PORT_REDIRECT:
2417 		if (srp_target_is_topspin(target)) {
2418 			/*
2419 			 * Topspin/Cisco SRP gateways incorrectly send
2420 			 * reject reason code 25 when they mean 24
2421 			 * (port redirect).
2422 			 */
2423 			memcpy(ch->path.dgid.raw,
2424 			       event->param.rej_rcvd.ari, 16);
2425 
2426 			shost_printk(KERN_DEBUG, shost,
2427 				     PFX "Topspin/Cisco redirect to target port GID %016llx%016llx\n",
2428 				     be64_to_cpu(ch->path.dgid.global.subnet_prefix),
2429 				     be64_to_cpu(ch->path.dgid.global.interface_id));
2430 
2431 			ch->status = SRP_PORT_REDIRECT;
2432 		} else {
2433 			shost_printk(KERN_WARNING, shost,
2434 				     "  REJ reason: IB_CM_REJ_PORT_REDIRECT\n");
2435 			ch->status = -ECONNRESET;
2436 		}
2437 		break;
2438 
2439 	case IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID:
2440 		shost_printk(KERN_WARNING, shost,
2441 			    "  REJ reason: IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID\n");
2442 		ch->status = -ECONNRESET;
2443 		break;
2444 
2445 	case IB_CM_REJ_CONSUMER_DEFINED:
2446 		opcode = *(u8 *) event->private_data;
2447 		if (opcode == SRP_LOGIN_REJ) {
2448 			struct srp_login_rej *rej = event->private_data;
2449 			u32 reason = be32_to_cpu(rej->reason);
2450 
2451 			if (reason == SRP_LOGIN_REJ_REQ_IT_IU_LENGTH_TOO_LARGE)
2452 				shost_printk(KERN_WARNING, shost,
2453 					     PFX "SRP_LOGIN_REJ: requested max_it_iu_len too large\n");
2454 			else
2455 				shost_printk(KERN_WARNING, shost, PFX
2456 					     "SRP LOGIN from %pI6 to %pI6 REJECTED, reason 0x%08x\n",
2457 					     target->sgid.raw,
2458 					     target->orig_dgid.raw, reason);
2459 		} else
2460 			shost_printk(KERN_WARNING, shost,
2461 				     "  REJ reason: IB_CM_REJ_CONSUMER_DEFINED,"
2462 				     " opcode 0x%02x\n", opcode);
2463 		ch->status = -ECONNRESET;
2464 		break;
2465 
2466 	case IB_CM_REJ_STALE_CONN:
2467 		shost_printk(KERN_WARNING, shost, "  REJ reason: stale connection\n");
2468 		ch->status = SRP_STALE_CONN;
2469 		break;
2470 
2471 	default:
2472 		shost_printk(KERN_WARNING, shost, "  REJ reason 0x%x\n",
2473 			     event->param.rej_rcvd.reason);
2474 		ch->status = -ECONNRESET;
2475 	}
2476 }
2477 
2478 static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
2479 {
2480 	struct srp_rdma_ch *ch = cm_id->context;
2481 	struct srp_target_port *target = ch->target;
2482 	int comp = 0;
2483 
2484 	switch (event->event) {
2485 	case IB_CM_REQ_ERROR:
2486 		shost_printk(KERN_DEBUG, target->scsi_host,
2487 			     PFX "Sending CM REQ failed\n");
2488 		comp = 1;
2489 		ch->status = -ECONNRESET;
2490 		break;
2491 
2492 	case IB_CM_REP_RECEIVED:
2493 		comp = 1;
2494 		srp_cm_rep_handler(cm_id, event->private_data, ch);
2495 		break;
2496 
2497 	case IB_CM_REJ_RECEIVED:
2498 		shost_printk(KERN_DEBUG, target->scsi_host, PFX "REJ received\n");
2499 		comp = 1;
2500 
2501 		srp_cm_rej_handler(cm_id, event, ch);
2502 		break;
2503 
2504 	case IB_CM_DREQ_RECEIVED:
2505 		shost_printk(KERN_WARNING, target->scsi_host,
2506 			     PFX "DREQ received - connection closed\n");
2507 		ch->connected = false;
2508 		if (ib_send_cm_drep(cm_id, NULL, 0))
2509 			shost_printk(KERN_ERR, target->scsi_host,
2510 				     PFX "Sending CM DREP failed\n");
2511 		queue_work(system_long_wq, &target->tl_err_work);
2512 		break;
2513 
2514 	case IB_CM_TIMEWAIT_EXIT:
2515 		shost_printk(KERN_ERR, target->scsi_host,
2516 			     PFX "connection closed\n");
2517 		comp = 1;
2518 
2519 		ch->status = 0;
2520 		break;
2521 
2522 	case IB_CM_MRA_RECEIVED:
2523 	case IB_CM_DREQ_ERROR:
2524 	case IB_CM_DREP_RECEIVED:
2525 		break;
2526 
2527 	default:
2528 		shost_printk(KERN_WARNING, target->scsi_host,
2529 			     PFX "Unhandled CM event %d\n", event->event);
2530 		break;
2531 	}
2532 
2533 	if (comp)
2534 		complete(&ch->done);
2535 
2536 	return 0;
2537 }
2538 
2539 /**
2540  * srp_change_queue_depth - setting device queue depth
2541  * @sdev: scsi device struct
2542  * @qdepth: requested queue depth
2543  *
2544  * Returns queue depth.
2545  */
2546 static int
2547 srp_change_queue_depth(struct scsi_device *sdev, int qdepth)
2548 {
2549 	if (!sdev->tagged_supported)
2550 		qdepth = 1;
2551 	return scsi_change_queue_depth(sdev, qdepth);
2552 }
2553 
2554 static int srp_send_tsk_mgmt(struct srp_rdma_ch *ch, u64 req_tag, u64 lun,
2555 			     u8 func, u8 *status)
2556 {
2557 	struct srp_target_port *target = ch->target;
2558 	struct srp_rport *rport = target->rport;
2559 	struct ib_device *dev = target->srp_host->srp_dev->dev;
2560 	struct srp_iu *iu;
2561 	struct srp_tsk_mgmt *tsk_mgmt;
2562 	int res;
2563 
2564 	if (!ch->connected || target->qp_in_error)
2565 		return -1;
2566 
2567 	/*
2568 	 * Lock the rport mutex to avoid that srp_create_ch_ib() is
2569 	 * invoked while a task management function is being sent.
2570 	 */
2571 	mutex_lock(&rport->mutex);
2572 	spin_lock_irq(&ch->lock);
2573 	iu = __srp_get_tx_iu(ch, SRP_IU_TSK_MGMT);
2574 	spin_unlock_irq(&ch->lock);
2575 
2576 	if (!iu) {
2577 		mutex_unlock(&rport->mutex);
2578 
2579 		return -1;
2580 	}
2581 
2582 	ib_dma_sync_single_for_cpu(dev, iu->dma, sizeof *tsk_mgmt,
2583 				   DMA_TO_DEVICE);
2584 	tsk_mgmt = iu->buf;
2585 	memset(tsk_mgmt, 0, sizeof *tsk_mgmt);
2586 
2587 	tsk_mgmt->opcode 	= SRP_TSK_MGMT;
2588 	int_to_scsilun(lun, &tsk_mgmt->lun);
2589 	tsk_mgmt->tsk_mgmt_func = func;
2590 	tsk_mgmt->task_tag	= req_tag;
2591 
2592 	spin_lock_irq(&ch->lock);
2593 	ch->tsk_mgmt_tag = (ch->tsk_mgmt_tag + 1) | SRP_TAG_TSK_MGMT;
2594 	tsk_mgmt->tag = ch->tsk_mgmt_tag;
2595 	spin_unlock_irq(&ch->lock);
2596 
2597 	init_completion(&ch->tsk_mgmt_done);
2598 
2599 	ib_dma_sync_single_for_device(dev, iu->dma, sizeof *tsk_mgmt,
2600 				      DMA_TO_DEVICE);
2601 	if (srp_post_send(ch, iu, sizeof(*tsk_mgmt))) {
2602 		srp_put_tx_iu(ch, iu, SRP_IU_TSK_MGMT);
2603 		mutex_unlock(&rport->mutex);
2604 
2605 		return -1;
2606 	}
2607 	res = wait_for_completion_timeout(&ch->tsk_mgmt_done,
2608 					msecs_to_jiffies(SRP_ABORT_TIMEOUT_MS));
2609 	if (res > 0 && status)
2610 		*status = ch->tsk_mgmt_status;
2611 	mutex_unlock(&rport->mutex);
2612 
2613 	WARN_ON_ONCE(res < 0);
2614 
2615 	return res > 0 ? 0 : -1;
2616 }
2617 
2618 static int srp_abort(struct scsi_cmnd *scmnd)
2619 {
2620 	struct srp_target_port *target = host_to_target(scmnd->device->host);
2621 	struct srp_request *req = (struct srp_request *) scmnd->host_scribble;
2622 	u32 tag;
2623 	u16 ch_idx;
2624 	struct srp_rdma_ch *ch;
2625 	int ret;
2626 
2627 	shost_printk(KERN_ERR, target->scsi_host, "SRP abort called\n");
2628 
2629 	if (!req)
2630 		return SUCCESS;
2631 	tag = blk_mq_unique_tag(scmnd->request);
2632 	ch_idx = blk_mq_unique_tag_to_hwq(tag);
2633 	if (WARN_ON_ONCE(ch_idx >= target->ch_count))
2634 		return SUCCESS;
2635 	ch = &target->ch[ch_idx];
2636 	if (!srp_claim_req(ch, req, NULL, scmnd))
2637 		return SUCCESS;
2638 	shost_printk(KERN_ERR, target->scsi_host,
2639 		     "Sending SRP abort for tag %#x\n", tag);
2640 	if (srp_send_tsk_mgmt(ch, tag, scmnd->device->lun,
2641 			      SRP_TSK_ABORT_TASK, NULL) == 0)
2642 		ret = SUCCESS;
2643 	else if (target->rport->state == SRP_RPORT_LOST)
2644 		ret = FAST_IO_FAIL;
2645 	else
2646 		ret = FAILED;
2647 	srp_free_req(ch, req, scmnd, 0);
2648 	scmnd->result = DID_ABORT << 16;
2649 	scmnd->scsi_done(scmnd);
2650 
2651 	return ret;
2652 }
2653 
2654 static int srp_reset_device(struct scsi_cmnd *scmnd)
2655 {
2656 	struct srp_target_port *target = host_to_target(scmnd->device->host);
2657 	struct srp_rdma_ch *ch;
2658 	int i;
2659 	u8 status;
2660 
2661 	shost_printk(KERN_ERR, target->scsi_host, "SRP reset_device called\n");
2662 
2663 	ch = &target->ch[0];
2664 	if (srp_send_tsk_mgmt(ch, SRP_TAG_NO_REQ, scmnd->device->lun,
2665 			      SRP_TSK_LUN_RESET, &status))
2666 		return FAILED;
2667 	if (status)
2668 		return FAILED;
2669 
2670 	for (i = 0; i < target->ch_count; i++) {
2671 		ch = &target->ch[i];
2672 		for (i = 0; i < target->req_ring_size; ++i) {
2673 			struct srp_request *req = &ch->req_ring[i];
2674 
2675 			srp_finish_req(ch, req, scmnd->device, DID_RESET << 16);
2676 		}
2677 	}
2678 
2679 	return SUCCESS;
2680 }
2681 
2682 static int srp_reset_host(struct scsi_cmnd *scmnd)
2683 {
2684 	struct srp_target_port *target = host_to_target(scmnd->device->host);
2685 
2686 	shost_printk(KERN_ERR, target->scsi_host, PFX "SRP reset_host called\n");
2687 
2688 	return srp_reconnect_rport(target->rport) == 0 ? SUCCESS : FAILED;
2689 }
2690 
2691 static int srp_slave_alloc(struct scsi_device *sdev)
2692 {
2693 	struct Scsi_Host *shost = sdev->host;
2694 	struct srp_target_port *target = host_to_target(shost);
2695 	struct srp_device *srp_dev = target->srp_host->srp_dev;
2696 
2697 	if (true)
2698 		blk_queue_virt_boundary(sdev->request_queue,
2699 					~srp_dev->mr_page_mask);
2700 
2701 	return 0;
2702 }
2703 
2704 static int srp_slave_configure(struct scsi_device *sdev)
2705 {
2706 	struct Scsi_Host *shost = sdev->host;
2707 	struct srp_target_port *target = host_to_target(shost);
2708 	struct request_queue *q = sdev->request_queue;
2709 	unsigned long timeout;
2710 
2711 	if (sdev->type == TYPE_DISK) {
2712 		timeout = max_t(unsigned, 30 * HZ, target->rq_tmo_jiffies);
2713 		blk_queue_rq_timeout(q, timeout);
2714 	}
2715 
2716 	return 0;
2717 }
2718 
2719 static ssize_t show_id_ext(struct device *dev, struct device_attribute *attr,
2720 			   char *buf)
2721 {
2722 	struct srp_target_port *target = host_to_target(class_to_shost(dev));
2723 
2724 	return sprintf(buf, "0x%016llx\n", be64_to_cpu(target->id_ext));
2725 }
2726 
2727 static ssize_t show_ioc_guid(struct device *dev, struct device_attribute *attr,
2728 			     char *buf)
2729 {
2730 	struct srp_target_port *target = host_to_target(class_to_shost(dev));
2731 
2732 	return sprintf(buf, "0x%016llx\n", be64_to_cpu(target->ioc_guid));
2733 }
2734 
2735 static ssize_t show_service_id(struct device *dev,
2736 			       struct device_attribute *attr, char *buf)
2737 {
2738 	struct srp_target_port *target = host_to_target(class_to_shost(dev));
2739 
2740 	return sprintf(buf, "0x%016llx\n", be64_to_cpu(target->service_id));
2741 }
2742 
2743 static ssize_t show_pkey(struct device *dev, struct device_attribute *attr,
2744 			 char *buf)
2745 {
2746 	struct srp_target_port *target = host_to_target(class_to_shost(dev));
2747 
2748 	return sprintf(buf, "0x%04x\n", be16_to_cpu(target->pkey));
2749 }
2750 
2751 static ssize_t show_sgid(struct device *dev, struct device_attribute *attr,
2752 			 char *buf)
2753 {
2754 	struct srp_target_port *target = host_to_target(class_to_shost(dev));
2755 
2756 	return sprintf(buf, "%pI6\n", target->sgid.raw);
2757 }
2758 
2759 static ssize_t show_dgid(struct device *dev, struct device_attribute *attr,
2760 			 char *buf)
2761 {
2762 	struct srp_target_port *target = host_to_target(class_to_shost(dev));
2763 	struct srp_rdma_ch *ch = &target->ch[0];
2764 
2765 	return sprintf(buf, "%pI6\n", ch->path.dgid.raw);
2766 }
2767 
2768 static ssize_t show_orig_dgid(struct device *dev,
2769 			      struct device_attribute *attr, char *buf)
2770 {
2771 	struct srp_target_port *target = host_to_target(class_to_shost(dev));
2772 
2773 	return sprintf(buf, "%pI6\n", target->orig_dgid.raw);
2774 }
2775 
2776 static ssize_t show_req_lim(struct device *dev,
2777 			    struct device_attribute *attr, char *buf)
2778 {
2779 	struct srp_target_port *target = host_to_target(class_to_shost(dev));
2780 	struct srp_rdma_ch *ch;
2781 	int i, req_lim = INT_MAX;
2782 
2783 	for (i = 0; i < target->ch_count; i++) {
2784 		ch = &target->ch[i];
2785 		req_lim = min(req_lim, ch->req_lim);
2786 	}
2787 	return sprintf(buf, "%d\n", req_lim);
2788 }
2789 
2790 static ssize_t show_zero_req_lim(struct device *dev,
2791 				 struct device_attribute *attr, char *buf)
2792 {
2793 	struct srp_target_port *target = host_to_target(class_to_shost(dev));
2794 
2795 	return sprintf(buf, "%d\n", target->zero_req_lim);
2796 }
2797 
2798 static ssize_t show_local_ib_port(struct device *dev,
2799 				  struct device_attribute *attr, char *buf)
2800 {
2801 	struct srp_target_port *target = host_to_target(class_to_shost(dev));
2802 
2803 	return sprintf(buf, "%d\n", target->srp_host->port);
2804 }
2805 
2806 static ssize_t show_local_ib_device(struct device *dev,
2807 				    struct device_attribute *attr, char *buf)
2808 {
2809 	struct srp_target_port *target = host_to_target(class_to_shost(dev));
2810 
2811 	return sprintf(buf, "%s\n", target->srp_host->srp_dev->dev->name);
2812 }
2813 
2814 static ssize_t show_ch_count(struct device *dev, struct device_attribute *attr,
2815 			     char *buf)
2816 {
2817 	struct srp_target_port *target = host_to_target(class_to_shost(dev));
2818 
2819 	return sprintf(buf, "%d\n", target->ch_count);
2820 }
2821 
2822 static ssize_t show_comp_vector(struct device *dev,
2823 				struct device_attribute *attr, char *buf)
2824 {
2825 	struct srp_target_port *target = host_to_target(class_to_shost(dev));
2826 
2827 	return sprintf(buf, "%d\n", target->comp_vector);
2828 }
2829 
2830 static ssize_t show_tl_retry_count(struct device *dev,
2831 				   struct device_attribute *attr, char *buf)
2832 {
2833 	struct srp_target_port *target = host_to_target(class_to_shost(dev));
2834 
2835 	return sprintf(buf, "%d\n", target->tl_retry_count);
2836 }
2837 
2838 static ssize_t show_cmd_sg_entries(struct device *dev,
2839 				   struct device_attribute *attr, char *buf)
2840 {
2841 	struct srp_target_port *target = host_to_target(class_to_shost(dev));
2842 
2843 	return sprintf(buf, "%u\n", target->cmd_sg_cnt);
2844 }
2845 
2846 static ssize_t show_allow_ext_sg(struct device *dev,
2847 				 struct device_attribute *attr, char *buf)
2848 {
2849 	struct srp_target_port *target = host_to_target(class_to_shost(dev));
2850 
2851 	return sprintf(buf, "%s\n", target->allow_ext_sg ? "true" : "false");
2852 }
2853 
2854 static DEVICE_ATTR(id_ext,	    S_IRUGO, show_id_ext,	   NULL);
2855 static DEVICE_ATTR(ioc_guid,	    S_IRUGO, show_ioc_guid,	   NULL);
2856 static DEVICE_ATTR(service_id,	    S_IRUGO, show_service_id,	   NULL);
2857 static DEVICE_ATTR(pkey,	    S_IRUGO, show_pkey,		   NULL);
2858 static DEVICE_ATTR(sgid,	    S_IRUGO, show_sgid,		   NULL);
2859 static DEVICE_ATTR(dgid,	    S_IRUGO, show_dgid,		   NULL);
2860 static DEVICE_ATTR(orig_dgid,	    S_IRUGO, show_orig_dgid,	   NULL);
2861 static DEVICE_ATTR(req_lim,         S_IRUGO, show_req_lim,         NULL);
2862 static DEVICE_ATTR(zero_req_lim,    S_IRUGO, show_zero_req_lim,	   NULL);
2863 static DEVICE_ATTR(local_ib_port,   S_IRUGO, show_local_ib_port,   NULL);
2864 static DEVICE_ATTR(local_ib_device, S_IRUGO, show_local_ib_device, NULL);
2865 static DEVICE_ATTR(ch_count,        S_IRUGO, show_ch_count,        NULL);
2866 static DEVICE_ATTR(comp_vector,     S_IRUGO, show_comp_vector,     NULL);
2867 static DEVICE_ATTR(tl_retry_count,  S_IRUGO, show_tl_retry_count,  NULL);
2868 static DEVICE_ATTR(cmd_sg_entries,  S_IRUGO, show_cmd_sg_entries,  NULL);
2869 static DEVICE_ATTR(allow_ext_sg,    S_IRUGO, show_allow_ext_sg,    NULL);
2870 
2871 static struct device_attribute *srp_host_attrs[] = {
2872 	&dev_attr_id_ext,
2873 	&dev_attr_ioc_guid,
2874 	&dev_attr_service_id,
2875 	&dev_attr_pkey,
2876 	&dev_attr_sgid,
2877 	&dev_attr_dgid,
2878 	&dev_attr_orig_dgid,
2879 	&dev_attr_req_lim,
2880 	&dev_attr_zero_req_lim,
2881 	&dev_attr_local_ib_port,
2882 	&dev_attr_local_ib_device,
2883 	&dev_attr_ch_count,
2884 	&dev_attr_comp_vector,
2885 	&dev_attr_tl_retry_count,
2886 	&dev_attr_cmd_sg_entries,
2887 	&dev_attr_allow_ext_sg,
2888 	NULL
2889 };
2890 
2891 static struct scsi_host_template srp_template = {
2892 	.module				= THIS_MODULE,
2893 	.name				= "InfiniBand SRP initiator",
2894 	.proc_name			= DRV_NAME,
2895 	.slave_alloc			= srp_slave_alloc,
2896 	.slave_configure		= srp_slave_configure,
2897 	.info				= srp_target_info,
2898 	.queuecommand			= srp_queuecommand,
2899 	.change_queue_depth             = srp_change_queue_depth,
2900 	.eh_timed_out			= srp_timed_out,
2901 	.eh_abort_handler		= srp_abort,
2902 	.eh_device_reset_handler	= srp_reset_device,
2903 	.eh_host_reset_handler		= srp_reset_host,
2904 	.skip_settle_delay		= true,
2905 	.sg_tablesize			= SRP_DEF_SG_TABLESIZE,
2906 	.can_queue			= SRP_DEFAULT_CMD_SQ_SIZE,
2907 	.this_id			= -1,
2908 	.cmd_per_lun			= SRP_DEFAULT_CMD_SQ_SIZE,
2909 	.use_clustering			= ENABLE_CLUSTERING,
2910 	.shost_attrs			= srp_host_attrs,
2911 	.track_queue_depth		= 1,
2912 };
2913 
2914 static int srp_sdev_count(struct Scsi_Host *host)
2915 {
2916 	struct scsi_device *sdev;
2917 	int c = 0;
2918 
2919 	shost_for_each_device(sdev, host)
2920 		c++;
2921 
2922 	return c;
2923 }
2924 
2925 /*
2926  * Return values:
2927  * < 0 upon failure. Caller is responsible for SRP target port cleanup.
2928  * 0 and target->state == SRP_TARGET_REMOVED if asynchronous target port
2929  *    removal has been scheduled.
2930  * 0 and target->state != SRP_TARGET_REMOVED upon success.
2931  */
2932 static int srp_add_target(struct srp_host *host, struct srp_target_port *target)
2933 {
2934 	struct srp_rport_identifiers ids;
2935 	struct srp_rport *rport;
2936 
2937 	target->state = SRP_TARGET_SCANNING;
2938 	sprintf(target->target_name, "SRP.T10:%016llX",
2939 		be64_to_cpu(target->id_ext));
2940 
2941 	if (scsi_add_host(target->scsi_host, host->srp_dev->dev->dev.parent))
2942 		return -ENODEV;
2943 
2944 	memcpy(ids.port_id, &target->id_ext, 8);
2945 	memcpy(ids.port_id + 8, &target->ioc_guid, 8);
2946 	ids.roles = SRP_RPORT_ROLE_TARGET;
2947 	rport = srp_rport_add(target->scsi_host, &ids);
2948 	if (IS_ERR(rport)) {
2949 		scsi_remove_host(target->scsi_host);
2950 		return PTR_ERR(rport);
2951 	}
2952 
2953 	rport->lld_data = target;
2954 	target->rport = rport;
2955 
2956 	spin_lock(&host->target_lock);
2957 	list_add_tail(&target->list, &host->target_list);
2958 	spin_unlock(&host->target_lock);
2959 
2960 	scsi_scan_target(&target->scsi_host->shost_gendev,
2961 			 0, target->scsi_id, SCAN_WILD_CARD, SCSI_SCAN_INITIAL);
2962 
2963 	if (srp_connected_ch(target) < target->ch_count ||
2964 	    target->qp_in_error) {
2965 		shost_printk(KERN_INFO, target->scsi_host,
2966 			     PFX "SCSI scan failed - removing SCSI host\n");
2967 		srp_queue_remove_work(target);
2968 		goto out;
2969 	}
2970 
2971 	pr_debug("%s: SCSI scan succeeded - detected %d LUNs\n",
2972 		 dev_name(&target->scsi_host->shost_gendev),
2973 		 srp_sdev_count(target->scsi_host));
2974 
2975 	spin_lock_irq(&target->lock);
2976 	if (target->state == SRP_TARGET_SCANNING)
2977 		target->state = SRP_TARGET_LIVE;
2978 	spin_unlock_irq(&target->lock);
2979 
2980 out:
2981 	return 0;
2982 }
2983 
2984 static void srp_release_dev(struct device *dev)
2985 {
2986 	struct srp_host *host =
2987 		container_of(dev, struct srp_host, dev);
2988 
2989 	complete(&host->released);
2990 }
2991 
2992 static struct class srp_class = {
2993 	.name    = "infiniband_srp",
2994 	.dev_release = srp_release_dev
2995 };
2996 
2997 /**
2998  * srp_conn_unique() - check whether the connection to a target is unique
2999  * @host:   SRP host.
3000  * @target: SRP target port.
3001  */
3002 static bool srp_conn_unique(struct srp_host *host,
3003 			    struct srp_target_port *target)
3004 {
3005 	struct srp_target_port *t;
3006 	bool ret = false;
3007 
3008 	if (target->state == SRP_TARGET_REMOVED)
3009 		goto out;
3010 
3011 	ret = true;
3012 
3013 	spin_lock(&host->target_lock);
3014 	list_for_each_entry(t, &host->target_list, list) {
3015 		if (t != target &&
3016 		    target->id_ext == t->id_ext &&
3017 		    target->ioc_guid == t->ioc_guid &&
3018 		    target->initiator_ext == t->initiator_ext) {
3019 			ret = false;
3020 			break;
3021 		}
3022 	}
3023 	spin_unlock(&host->target_lock);
3024 
3025 out:
3026 	return ret;
3027 }
3028 
3029 /*
3030  * Target ports are added by writing
3031  *
3032  *     id_ext=<SRP ID ext>,ioc_guid=<SRP IOC GUID>,dgid=<dest GID>,
3033  *     pkey=<P_Key>,service_id=<service ID>
3034  *
3035  * to the add_target sysfs attribute.
3036  */
3037 enum {
3038 	SRP_OPT_ERR		= 0,
3039 	SRP_OPT_ID_EXT		= 1 << 0,
3040 	SRP_OPT_IOC_GUID	= 1 << 1,
3041 	SRP_OPT_DGID		= 1 << 2,
3042 	SRP_OPT_PKEY		= 1 << 3,
3043 	SRP_OPT_SERVICE_ID	= 1 << 4,
3044 	SRP_OPT_MAX_SECT	= 1 << 5,
3045 	SRP_OPT_MAX_CMD_PER_LUN	= 1 << 6,
3046 	SRP_OPT_IO_CLASS	= 1 << 7,
3047 	SRP_OPT_INITIATOR_EXT	= 1 << 8,
3048 	SRP_OPT_CMD_SG_ENTRIES	= 1 << 9,
3049 	SRP_OPT_ALLOW_EXT_SG	= 1 << 10,
3050 	SRP_OPT_SG_TABLESIZE	= 1 << 11,
3051 	SRP_OPT_COMP_VECTOR	= 1 << 12,
3052 	SRP_OPT_TL_RETRY_COUNT	= 1 << 13,
3053 	SRP_OPT_QUEUE_SIZE	= 1 << 14,
3054 	SRP_OPT_ALL		= (SRP_OPT_ID_EXT	|
3055 				   SRP_OPT_IOC_GUID	|
3056 				   SRP_OPT_DGID		|
3057 				   SRP_OPT_PKEY		|
3058 				   SRP_OPT_SERVICE_ID),
3059 };
3060 
3061 static const match_table_t srp_opt_tokens = {
3062 	{ SRP_OPT_ID_EXT,		"id_ext=%s" 		},
3063 	{ SRP_OPT_IOC_GUID,		"ioc_guid=%s" 		},
3064 	{ SRP_OPT_DGID,			"dgid=%s" 		},
3065 	{ SRP_OPT_PKEY,			"pkey=%x" 		},
3066 	{ SRP_OPT_SERVICE_ID,		"service_id=%s"		},
3067 	{ SRP_OPT_MAX_SECT,		"max_sect=%d" 		},
3068 	{ SRP_OPT_MAX_CMD_PER_LUN,	"max_cmd_per_lun=%d" 	},
3069 	{ SRP_OPT_IO_CLASS,		"io_class=%x"		},
3070 	{ SRP_OPT_INITIATOR_EXT,	"initiator_ext=%s"	},
3071 	{ SRP_OPT_CMD_SG_ENTRIES,	"cmd_sg_entries=%u"	},
3072 	{ SRP_OPT_ALLOW_EXT_SG,		"allow_ext_sg=%u"	},
3073 	{ SRP_OPT_SG_TABLESIZE,		"sg_tablesize=%u"	},
3074 	{ SRP_OPT_COMP_VECTOR,		"comp_vector=%u"	},
3075 	{ SRP_OPT_TL_RETRY_COUNT,	"tl_retry_count=%u"	},
3076 	{ SRP_OPT_QUEUE_SIZE,		"queue_size=%d"		},
3077 	{ SRP_OPT_ERR,			NULL 			}
3078 };
3079 
3080 static int srp_parse_options(const char *buf, struct srp_target_port *target)
3081 {
3082 	char *options, *sep_opt;
3083 	char *p;
3084 	char dgid[3];
3085 	substring_t args[MAX_OPT_ARGS];
3086 	int opt_mask = 0;
3087 	int token;
3088 	int ret = -EINVAL;
3089 	int i;
3090 
3091 	options = kstrdup(buf, GFP_KERNEL);
3092 	if (!options)
3093 		return -ENOMEM;
3094 
3095 	sep_opt = options;
3096 	while ((p = strsep(&sep_opt, ",\n")) != NULL) {
3097 		if (!*p)
3098 			continue;
3099 
3100 		token = match_token(p, srp_opt_tokens, args);
3101 		opt_mask |= token;
3102 
3103 		switch (token) {
3104 		case SRP_OPT_ID_EXT:
3105 			p = match_strdup(args);
3106 			if (!p) {
3107 				ret = -ENOMEM;
3108 				goto out;
3109 			}
3110 			target->id_ext = cpu_to_be64(simple_strtoull(p, NULL, 16));
3111 			kfree(p);
3112 			break;
3113 
3114 		case SRP_OPT_IOC_GUID:
3115 			p = match_strdup(args);
3116 			if (!p) {
3117 				ret = -ENOMEM;
3118 				goto out;
3119 			}
3120 			target->ioc_guid = cpu_to_be64(simple_strtoull(p, NULL, 16));
3121 			kfree(p);
3122 			break;
3123 
3124 		case SRP_OPT_DGID:
3125 			p = match_strdup(args);
3126 			if (!p) {
3127 				ret = -ENOMEM;
3128 				goto out;
3129 			}
3130 			if (strlen(p) != 32) {
3131 				pr_warn("bad dest GID parameter '%s'\n", p);
3132 				kfree(p);
3133 				goto out;
3134 			}
3135 
3136 			for (i = 0; i < 16; ++i) {
3137 				strlcpy(dgid, p + i * 2, sizeof(dgid));
3138 				if (sscanf(dgid, "%hhx",
3139 					   &target->orig_dgid.raw[i]) < 1) {
3140 					ret = -EINVAL;
3141 					kfree(p);
3142 					goto out;
3143 				}
3144 			}
3145 			kfree(p);
3146 			break;
3147 
3148 		case SRP_OPT_PKEY:
3149 			if (match_hex(args, &token)) {
3150 				pr_warn("bad P_Key parameter '%s'\n", p);
3151 				goto out;
3152 			}
3153 			target->pkey = cpu_to_be16(token);
3154 			break;
3155 
3156 		case SRP_OPT_SERVICE_ID:
3157 			p = match_strdup(args);
3158 			if (!p) {
3159 				ret = -ENOMEM;
3160 				goto out;
3161 			}
3162 			target->service_id = cpu_to_be64(simple_strtoull(p, NULL, 16));
3163 			kfree(p);
3164 			break;
3165 
3166 		case SRP_OPT_MAX_SECT:
3167 			if (match_int(args, &token)) {
3168 				pr_warn("bad max sect parameter '%s'\n", p);
3169 				goto out;
3170 			}
3171 			target->scsi_host->max_sectors = token;
3172 			break;
3173 
3174 		case SRP_OPT_QUEUE_SIZE:
3175 			if (match_int(args, &token) || token < 1) {
3176 				pr_warn("bad queue_size parameter '%s'\n", p);
3177 				goto out;
3178 			}
3179 			target->scsi_host->can_queue = token;
3180 			target->queue_size = token + SRP_RSP_SQ_SIZE +
3181 					     SRP_TSK_MGMT_SQ_SIZE;
3182 			if (!(opt_mask & SRP_OPT_MAX_CMD_PER_LUN))
3183 				target->scsi_host->cmd_per_lun = token;
3184 			break;
3185 
3186 		case SRP_OPT_MAX_CMD_PER_LUN:
3187 			if (match_int(args, &token) || token < 1) {
3188 				pr_warn("bad max cmd_per_lun parameter '%s'\n",
3189 					p);
3190 				goto out;
3191 			}
3192 			target->scsi_host->cmd_per_lun = token;
3193 			break;
3194 
3195 		case SRP_OPT_IO_CLASS:
3196 			if (match_hex(args, &token)) {
3197 				pr_warn("bad IO class parameter '%s'\n", p);
3198 				goto out;
3199 			}
3200 			if (token != SRP_REV10_IB_IO_CLASS &&
3201 			    token != SRP_REV16A_IB_IO_CLASS) {
3202 				pr_warn("unknown IO class parameter value %x specified (use %x or %x).\n",
3203 					token, SRP_REV10_IB_IO_CLASS,
3204 					SRP_REV16A_IB_IO_CLASS);
3205 				goto out;
3206 			}
3207 			target->io_class = token;
3208 			break;
3209 
3210 		case SRP_OPT_INITIATOR_EXT:
3211 			p = match_strdup(args);
3212 			if (!p) {
3213 				ret = -ENOMEM;
3214 				goto out;
3215 			}
3216 			target->initiator_ext = cpu_to_be64(simple_strtoull(p, NULL, 16));
3217 			kfree(p);
3218 			break;
3219 
3220 		case SRP_OPT_CMD_SG_ENTRIES:
3221 			if (match_int(args, &token) || token < 1 || token > 255) {
3222 				pr_warn("bad max cmd_sg_entries parameter '%s'\n",
3223 					p);
3224 				goto out;
3225 			}
3226 			target->cmd_sg_cnt = token;
3227 			break;
3228 
3229 		case SRP_OPT_ALLOW_EXT_SG:
3230 			if (match_int(args, &token)) {
3231 				pr_warn("bad allow_ext_sg parameter '%s'\n", p);
3232 				goto out;
3233 			}
3234 			target->allow_ext_sg = !!token;
3235 			break;
3236 
3237 		case SRP_OPT_SG_TABLESIZE:
3238 			if (match_int(args, &token) || token < 1 ||
3239 					token > SG_MAX_SEGMENTS) {
3240 				pr_warn("bad max sg_tablesize parameter '%s'\n",
3241 					p);
3242 				goto out;
3243 			}
3244 			target->sg_tablesize = token;
3245 			break;
3246 
3247 		case SRP_OPT_COMP_VECTOR:
3248 			if (match_int(args, &token) || token < 0) {
3249 				pr_warn("bad comp_vector parameter '%s'\n", p);
3250 				goto out;
3251 			}
3252 			target->comp_vector = token;
3253 			break;
3254 
3255 		case SRP_OPT_TL_RETRY_COUNT:
3256 			if (match_int(args, &token) || token < 2 || token > 7) {
3257 				pr_warn("bad tl_retry_count parameter '%s' (must be a number between 2 and 7)\n",
3258 					p);
3259 				goto out;
3260 			}
3261 			target->tl_retry_count = token;
3262 			break;
3263 
3264 		default:
3265 			pr_warn("unknown parameter or missing value '%s' in target creation request\n",
3266 				p);
3267 			goto out;
3268 		}
3269 	}
3270 
3271 	if ((opt_mask & SRP_OPT_ALL) == SRP_OPT_ALL)
3272 		ret = 0;
3273 	else
3274 		for (i = 0; i < ARRAY_SIZE(srp_opt_tokens); ++i)
3275 			if ((srp_opt_tokens[i].token & SRP_OPT_ALL) &&
3276 			    !(srp_opt_tokens[i].token & opt_mask))
3277 				pr_warn("target creation request is missing parameter '%s'\n",
3278 					srp_opt_tokens[i].pattern);
3279 
3280 	if (target->scsi_host->cmd_per_lun > target->scsi_host->can_queue
3281 	    && (opt_mask & SRP_OPT_MAX_CMD_PER_LUN))
3282 		pr_warn("cmd_per_lun = %d > queue_size = %d\n",
3283 			target->scsi_host->cmd_per_lun,
3284 			target->scsi_host->can_queue);
3285 
3286 out:
3287 	kfree(options);
3288 	return ret;
3289 }
3290 
3291 static ssize_t srp_create_target(struct device *dev,
3292 				 struct device_attribute *attr,
3293 				 const char *buf, size_t count)
3294 {
3295 	struct srp_host *host =
3296 		container_of(dev, struct srp_host, dev);
3297 	struct Scsi_Host *target_host;
3298 	struct srp_target_port *target;
3299 	struct srp_rdma_ch *ch;
3300 	struct srp_device *srp_dev = host->srp_dev;
3301 	struct ib_device *ibdev = srp_dev->dev;
3302 	int ret, node_idx, node, cpu, i;
3303 	unsigned int max_sectors_per_mr, mr_per_cmd = 0;
3304 	bool multich = false;
3305 
3306 	target_host = scsi_host_alloc(&srp_template,
3307 				      sizeof (struct srp_target_port));
3308 	if (!target_host)
3309 		return -ENOMEM;
3310 
3311 	target_host->transportt  = ib_srp_transport_template;
3312 	target_host->max_channel = 0;
3313 	target_host->max_id      = 1;
3314 	target_host->max_lun     = -1LL;
3315 	target_host->max_cmd_len = sizeof ((struct srp_cmd *) (void *) 0L)->cdb;
3316 
3317 	target = host_to_target(target_host);
3318 
3319 	target->io_class	= SRP_REV16A_IB_IO_CLASS;
3320 	target->scsi_host	= target_host;
3321 	target->srp_host	= host;
3322 	target->pd		= host->srp_dev->pd;
3323 	target->lkey		= host->srp_dev->pd->local_dma_lkey;
3324 	target->cmd_sg_cnt	= cmd_sg_entries;
3325 	target->sg_tablesize	= indirect_sg_entries ? : cmd_sg_entries;
3326 	target->allow_ext_sg	= allow_ext_sg;
3327 	target->tl_retry_count	= 7;
3328 	target->queue_size	= SRP_DEFAULT_QUEUE_SIZE;
3329 
3330 	/*
3331 	 * Avoid that the SCSI host can be removed by srp_remove_target()
3332 	 * before this function returns.
3333 	 */
3334 	scsi_host_get(target->scsi_host);
3335 
3336 	ret = mutex_lock_interruptible(&host->add_target_mutex);
3337 	if (ret < 0)
3338 		goto put;
3339 
3340 	ret = srp_parse_options(buf, target);
3341 	if (ret)
3342 		goto out;
3343 
3344 	target->req_ring_size = target->queue_size - SRP_TSK_MGMT_SQ_SIZE;
3345 
3346 	if (!srp_conn_unique(target->srp_host, target)) {
3347 		shost_printk(KERN_INFO, target->scsi_host,
3348 			     PFX "Already connected to target port with id_ext=%016llx;ioc_guid=%016llx;initiator_ext=%016llx\n",
3349 			     be64_to_cpu(target->id_ext),
3350 			     be64_to_cpu(target->ioc_guid),
3351 			     be64_to_cpu(target->initiator_ext));
3352 		ret = -EEXIST;
3353 		goto out;
3354 	}
3355 
3356 	if (!srp_dev->has_fmr && !srp_dev->has_fr && !target->allow_ext_sg &&
3357 	    target->cmd_sg_cnt < target->sg_tablesize) {
3358 		pr_warn("No MR pool and no external indirect descriptors, limiting sg_tablesize to cmd_sg_cnt\n");
3359 		target->sg_tablesize = target->cmd_sg_cnt;
3360 	}
3361 
3362 	if (srp_dev->use_fast_reg || srp_dev->use_fmr) {
3363 		/*
3364 		 * FR and FMR can only map one HCA page per entry. If the
3365 		 * start address is not aligned on a HCA page boundary two
3366 		 * entries will be used for the head and the tail although
3367 		 * these two entries combined contain at most one HCA page of
3368 		 * data. Hence the "+ 1" in the calculation below.
3369 		 *
3370 		 * The indirect data buffer descriptor is contiguous so the
3371 		 * memory for that buffer will only be registered if
3372 		 * register_always is true. Hence add one to mr_per_cmd if
3373 		 * register_always has been set.
3374 		 */
3375 		max_sectors_per_mr = srp_dev->max_pages_per_mr <<
3376 				  (ilog2(srp_dev->mr_page_size) - 9);
3377 		mr_per_cmd = register_always +
3378 			(target->scsi_host->max_sectors + 1 +
3379 			 max_sectors_per_mr - 1) / max_sectors_per_mr;
3380 		pr_debug("max_sectors = %u; max_pages_per_mr = %u; mr_page_size = %u; max_sectors_per_mr = %u; mr_per_cmd = %u\n",
3381 			 target->scsi_host->max_sectors,
3382 			 srp_dev->max_pages_per_mr, srp_dev->mr_page_size,
3383 			 max_sectors_per_mr, mr_per_cmd);
3384 	}
3385 
3386 	target_host->sg_tablesize = target->sg_tablesize;
3387 	target->mr_pool_size = target->scsi_host->can_queue * mr_per_cmd;
3388 	target->mr_per_cmd = mr_per_cmd;
3389 	target->indirect_size = target->sg_tablesize *
3390 				sizeof (struct srp_direct_buf);
3391 	target->max_iu_len = sizeof (struct srp_cmd) +
3392 			     sizeof (struct srp_indirect_buf) +
3393 			     target->cmd_sg_cnt * sizeof (struct srp_direct_buf);
3394 
3395 	INIT_WORK(&target->tl_err_work, srp_tl_err_work);
3396 	INIT_WORK(&target->remove_work, srp_remove_work);
3397 	spin_lock_init(&target->lock);
3398 	ret = ib_query_gid(ibdev, host->port, 0, &target->sgid, NULL);
3399 	if (ret)
3400 		goto out;
3401 
3402 	ret = -ENOMEM;
3403 	target->ch_count = max_t(unsigned, num_online_nodes(),
3404 				 min(ch_count ? :
3405 				     min(4 * num_online_nodes(),
3406 					 ibdev->num_comp_vectors),
3407 				     num_online_cpus()));
3408 	target->ch = kcalloc(target->ch_count, sizeof(*target->ch),
3409 			     GFP_KERNEL);
3410 	if (!target->ch)
3411 		goto out;
3412 
3413 	node_idx = 0;
3414 	for_each_online_node(node) {
3415 		const int ch_start = (node_idx * target->ch_count /
3416 				      num_online_nodes());
3417 		const int ch_end = ((node_idx + 1) * target->ch_count /
3418 				    num_online_nodes());
3419 		const int cv_start = (node_idx * ibdev->num_comp_vectors /
3420 				      num_online_nodes() + target->comp_vector)
3421 				     % ibdev->num_comp_vectors;
3422 		const int cv_end = ((node_idx + 1) * ibdev->num_comp_vectors /
3423 				    num_online_nodes() + target->comp_vector)
3424 				   % ibdev->num_comp_vectors;
3425 		int cpu_idx = 0;
3426 
3427 		for_each_online_cpu(cpu) {
3428 			if (cpu_to_node(cpu) != node)
3429 				continue;
3430 			if (ch_start + cpu_idx >= ch_end)
3431 				continue;
3432 			ch = &target->ch[ch_start + cpu_idx];
3433 			ch->target = target;
3434 			ch->comp_vector = cv_start == cv_end ? cv_start :
3435 				cv_start + cpu_idx % (cv_end - cv_start);
3436 			spin_lock_init(&ch->lock);
3437 			INIT_LIST_HEAD(&ch->free_tx);
3438 			ret = srp_new_cm_id(ch);
3439 			if (ret)
3440 				goto err_disconnect;
3441 
3442 			ret = srp_create_ch_ib(ch);
3443 			if (ret)
3444 				goto err_disconnect;
3445 
3446 			ret = srp_alloc_req_data(ch);
3447 			if (ret)
3448 				goto err_disconnect;
3449 
3450 			ret = srp_connect_ch(ch, multich);
3451 			if (ret) {
3452 				shost_printk(KERN_ERR, target->scsi_host,
3453 					     PFX "Connection %d/%d to %pI6 failed\n",
3454 					     ch_start + cpu_idx,
3455 					     target->ch_count,
3456 					     ch->target->orig_dgid.raw);
3457 				if (node_idx == 0 && cpu_idx == 0) {
3458 					goto free_ch;
3459 				} else {
3460 					srp_free_ch_ib(target, ch);
3461 					srp_free_req_data(target, ch);
3462 					target->ch_count = ch - target->ch;
3463 					goto connected;
3464 				}
3465 			}
3466 
3467 			multich = true;
3468 			cpu_idx++;
3469 		}
3470 		node_idx++;
3471 	}
3472 
3473 connected:
3474 	target->scsi_host->nr_hw_queues = target->ch_count;
3475 
3476 	ret = srp_add_target(host, target);
3477 	if (ret)
3478 		goto err_disconnect;
3479 
3480 	if (target->state != SRP_TARGET_REMOVED) {
3481 		shost_printk(KERN_DEBUG, target->scsi_host, PFX
3482 			     "new target: id_ext %016llx ioc_guid %016llx pkey %04x service_id %016llx sgid %pI6 dgid %pI6\n",
3483 			     be64_to_cpu(target->id_ext),
3484 			     be64_to_cpu(target->ioc_guid),
3485 			     be16_to_cpu(target->pkey),
3486 			     be64_to_cpu(target->service_id),
3487 			     target->sgid.raw, target->orig_dgid.raw);
3488 	}
3489 
3490 	ret = count;
3491 
3492 out:
3493 	mutex_unlock(&host->add_target_mutex);
3494 
3495 put:
3496 	scsi_host_put(target->scsi_host);
3497 	if (ret < 0)
3498 		scsi_host_put(target->scsi_host);
3499 
3500 	return ret;
3501 
3502 err_disconnect:
3503 	srp_disconnect_target(target);
3504 
3505 free_ch:
3506 	for (i = 0; i < target->ch_count; i++) {
3507 		ch = &target->ch[i];
3508 		srp_free_ch_ib(target, ch);
3509 		srp_free_req_data(target, ch);
3510 	}
3511 
3512 	kfree(target->ch);
3513 	goto out;
3514 }
3515 
3516 static DEVICE_ATTR(add_target, S_IWUSR, NULL, srp_create_target);
3517 
3518 static ssize_t show_ibdev(struct device *dev, struct device_attribute *attr,
3519 			  char *buf)
3520 {
3521 	struct srp_host *host = container_of(dev, struct srp_host, dev);
3522 
3523 	return sprintf(buf, "%s\n", host->srp_dev->dev->name);
3524 }
3525 
3526 static DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL);
3527 
3528 static ssize_t show_port(struct device *dev, struct device_attribute *attr,
3529 			 char *buf)
3530 {
3531 	struct srp_host *host = container_of(dev, struct srp_host, dev);
3532 
3533 	return sprintf(buf, "%d\n", host->port);
3534 }
3535 
3536 static DEVICE_ATTR(port, S_IRUGO, show_port, NULL);
3537 
3538 static struct srp_host *srp_add_port(struct srp_device *device, u8 port)
3539 {
3540 	struct srp_host *host;
3541 
3542 	host = kzalloc(sizeof *host, GFP_KERNEL);
3543 	if (!host)
3544 		return NULL;
3545 
3546 	INIT_LIST_HEAD(&host->target_list);
3547 	spin_lock_init(&host->target_lock);
3548 	init_completion(&host->released);
3549 	mutex_init(&host->add_target_mutex);
3550 	host->srp_dev = device;
3551 	host->port = port;
3552 
3553 	host->dev.class = &srp_class;
3554 	host->dev.parent = device->dev->dev.parent;
3555 	dev_set_name(&host->dev, "srp-%s-%d", device->dev->name, port);
3556 
3557 	if (device_register(&host->dev))
3558 		goto free_host;
3559 	if (device_create_file(&host->dev, &dev_attr_add_target))
3560 		goto err_class;
3561 	if (device_create_file(&host->dev, &dev_attr_ibdev))
3562 		goto err_class;
3563 	if (device_create_file(&host->dev, &dev_attr_port))
3564 		goto err_class;
3565 
3566 	return host;
3567 
3568 err_class:
3569 	device_unregister(&host->dev);
3570 
3571 free_host:
3572 	kfree(host);
3573 
3574 	return NULL;
3575 }
3576 
3577 static void srp_add_one(struct ib_device *device)
3578 {
3579 	struct srp_device *srp_dev;
3580 	struct ib_device_attr *attr = &device->attrs;
3581 	struct srp_host *host;
3582 	int mr_page_shift, p;
3583 	u64 max_pages_per_mr;
3584 	unsigned int flags = 0;
3585 
3586 	srp_dev = kzalloc(sizeof(*srp_dev), GFP_KERNEL);
3587 	if (!srp_dev)
3588 		return;
3589 
3590 	/*
3591 	 * Use the smallest page size supported by the HCA, down to a
3592 	 * minimum of 4096 bytes. We're unlikely to build large sglists
3593 	 * out of smaller entries.
3594 	 */
3595 	mr_page_shift		= max(12, ffs(attr->page_size_cap) - 1);
3596 	srp_dev->mr_page_size	= 1 << mr_page_shift;
3597 	srp_dev->mr_page_mask	= ~((u64) srp_dev->mr_page_size - 1);
3598 	max_pages_per_mr	= attr->max_mr_size;
3599 	do_div(max_pages_per_mr, srp_dev->mr_page_size);
3600 	pr_debug("%s: %llu / %u = %llu <> %u\n", __func__,
3601 		 attr->max_mr_size, srp_dev->mr_page_size,
3602 		 max_pages_per_mr, SRP_MAX_PAGES_PER_MR);
3603 	srp_dev->max_pages_per_mr = min_t(u64, SRP_MAX_PAGES_PER_MR,
3604 					  max_pages_per_mr);
3605 
3606 	srp_dev->has_fmr = (device->alloc_fmr && device->dealloc_fmr &&
3607 			    device->map_phys_fmr && device->unmap_fmr);
3608 	srp_dev->has_fr = (attr->device_cap_flags &
3609 			   IB_DEVICE_MEM_MGT_EXTENSIONS);
3610 	if (!never_register && !srp_dev->has_fmr && !srp_dev->has_fr) {
3611 		dev_warn(&device->dev, "neither FMR nor FR is supported\n");
3612 	} else if (!never_register &&
3613 		   attr->max_mr_size >= 2 * srp_dev->mr_page_size) {
3614 		srp_dev->use_fast_reg = (srp_dev->has_fr &&
3615 					 (!srp_dev->has_fmr || prefer_fr));
3616 		srp_dev->use_fmr = !srp_dev->use_fast_reg && srp_dev->has_fmr;
3617 	}
3618 
3619 	if (never_register || !register_always ||
3620 	    (!srp_dev->has_fmr && !srp_dev->has_fr))
3621 		flags |= IB_PD_UNSAFE_GLOBAL_RKEY;
3622 
3623 	if (srp_dev->use_fast_reg) {
3624 		srp_dev->max_pages_per_mr =
3625 			min_t(u32, srp_dev->max_pages_per_mr,
3626 			      attr->max_fast_reg_page_list_len);
3627 	}
3628 	srp_dev->mr_max_size	= srp_dev->mr_page_size *
3629 				   srp_dev->max_pages_per_mr;
3630 	pr_debug("%s: mr_page_shift = %d, device->max_mr_size = %#llx, device->max_fast_reg_page_list_len = %u, max_pages_per_mr = %d, mr_max_size = %#x\n",
3631 		 device->name, mr_page_shift, attr->max_mr_size,
3632 		 attr->max_fast_reg_page_list_len,
3633 		 srp_dev->max_pages_per_mr, srp_dev->mr_max_size);
3634 
3635 	INIT_LIST_HEAD(&srp_dev->dev_list);
3636 
3637 	srp_dev->dev = device;
3638 	srp_dev->pd  = ib_alloc_pd(device, flags);
3639 	if (IS_ERR(srp_dev->pd))
3640 		goto free_dev;
3641 
3642 
3643 	for (p = rdma_start_port(device); p <= rdma_end_port(device); ++p) {
3644 		host = srp_add_port(srp_dev, p);
3645 		if (host)
3646 			list_add_tail(&host->list, &srp_dev->dev_list);
3647 	}
3648 
3649 	ib_set_client_data(device, &srp_client, srp_dev);
3650 	return;
3651 
3652 free_dev:
3653 	kfree(srp_dev);
3654 }
3655 
3656 static void srp_remove_one(struct ib_device *device, void *client_data)
3657 {
3658 	struct srp_device *srp_dev;
3659 	struct srp_host *host, *tmp_host;
3660 	struct srp_target_port *target;
3661 
3662 	srp_dev = client_data;
3663 	if (!srp_dev)
3664 		return;
3665 
3666 	list_for_each_entry_safe(host, tmp_host, &srp_dev->dev_list, list) {
3667 		device_unregister(&host->dev);
3668 		/*
3669 		 * Wait for the sysfs entry to go away, so that no new
3670 		 * target ports can be created.
3671 		 */
3672 		wait_for_completion(&host->released);
3673 
3674 		/*
3675 		 * Remove all target ports.
3676 		 */
3677 		spin_lock(&host->target_lock);
3678 		list_for_each_entry(target, &host->target_list, list)
3679 			srp_queue_remove_work(target);
3680 		spin_unlock(&host->target_lock);
3681 
3682 		/*
3683 		 * Wait for tl_err and target port removal tasks.
3684 		 */
3685 		flush_workqueue(system_long_wq);
3686 		flush_workqueue(srp_remove_wq);
3687 
3688 		kfree(host);
3689 	}
3690 
3691 	ib_dealloc_pd(srp_dev->pd);
3692 
3693 	kfree(srp_dev);
3694 }
3695 
3696 static struct srp_function_template ib_srp_transport_functions = {
3697 	.has_rport_state	 = true,
3698 	.reset_timer_if_blocked	 = true,
3699 	.reconnect_delay	 = &srp_reconnect_delay,
3700 	.fast_io_fail_tmo	 = &srp_fast_io_fail_tmo,
3701 	.dev_loss_tmo		 = &srp_dev_loss_tmo,
3702 	.reconnect		 = srp_rport_reconnect,
3703 	.rport_delete		 = srp_rport_delete,
3704 	.terminate_rport_io	 = srp_terminate_io,
3705 };
3706 
3707 static int __init srp_init_module(void)
3708 {
3709 	int ret;
3710 
3711 	if (srp_sg_tablesize) {
3712 		pr_warn("srp_sg_tablesize is deprecated, please use cmd_sg_entries\n");
3713 		if (!cmd_sg_entries)
3714 			cmd_sg_entries = srp_sg_tablesize;
3715 	}
3716 
3717 	if (!cmd_sg_entries)
3718 		cmd_sg_entries = SRP_DEF_SG_TABLESIZE;
3719 
3720 	if (cmd_sg_entries > 255) {
3721 		pr_warn("Clamping cmd_sg_entries to 255\n");
3722 		cmd_sg_entries = 255;
3723 	}
3724 
3725 	if (!indirect_sg_entries)
3726 		indirect_sg_entries = cmd_sg_entries;
3727 	else if (indirect_sg_entries < cmd_sg_entries) {
3728 		pr_warn("Bumping up indirect_sg_entries to match cmd_sg_entries (%u)\n",
3729 			cmd_sg_entries);
3730 		indirect_sg_entries = cmd_sg_entries;
3731 	}
3732 
3733 	if (indirect_sg_entries > SG_MAX_SEGMENTS) {
3734 		pr_warn("Clamping indirect_sg_entries to %u\n",
3735 			SG_MAX_SEGMENTS);
3736 		indirect_sg_entries = SG_MAX_SEGMENTS;
3737 	}
3738 
3739 	srp_remove_wq = create_workqueue("srp_remove");
3740 	if (!srp_remove_wq) {
3741 		ret = -ENOMEM;
3742 		goto out;
3743 	}
3744 
3745 	ret = -ENOMEM;
3746 	ib_srp_transport_template =
3747 		srp_attach_transport(&ib_srp_transport_functions);
3748 	if (!ib_srp_transport_template)
3749 		goto destroy_wq;
3750 
3751 	ret = class_register(&srp_class);
3752 	if (ret) {
3753 		pr_err("couldn't register class infiniband_srp\n");
3754 		goto release_tr;
3755 	}
3756 
3757 	ib_sa_register_client(&srp_sa_client);
3758 
3759 	ret = ib_register_client(&srp_client);
3760 	if (ret) {
3761 		pr_err("couldn't register IB client\n");
3762 		goto unreg_sa;
3763 	}
3764 
3765 out:
3766 	return ret;
3767 
3768 unreg_sa:
3769 	ib_sa_unregister_client(&srp_sa_client);
3770 	class_unregister(&srp_class);
3771 
3772 release_tr:
3773 	srp_release_transport(ib_srp_transport_template);
3774 
3775 destroy_wq:
3776 	destroy_workqueue(srp_remove_wq);
3777 	goto out;
3778 }
3779 
3780 static void __exit srp_cleanup_module(void)
3781 {
3782 	ib_unregister_client(&srp_client);
3783 	ib_sa_unregister_client(&srp_sa_client);
3784 	class_unregister(&srp_class);
3785 	srp_release_transport(ib_srp_transport_template);
3786 	destroy_workqueue(srp_remove_wq);
3787 }
3788 
3789 module_init(srp_init_module);
3790 module_exit(srp_cleanup_module);
3791