1 /*
2  * Copyright (c) 2005 Cisco Systems.  All rights reserved.
3  *
4  * This software is available to you under a choice of one of two
5  * licenses.  You may choose to be licensed under the terms of the GNU
6  * General Public License (GPL) Version 2, available from the file
7  * COPYING in the main directory of this source tree, or the
8  * OpenIB.org BSD license below:
9  *
10  *     Redistribution and use in source and binary forms, with or
11  *     without modification, are permitted provided that the following
12  *     conditions are met:
13  *
14  *      - Redistributions of source code must retain the above
15  *        copyright notice, this list of conditions and the following
16  *        disclaimer.
17  *
18  *      - Redistributions in binary form must reproduce the above
19  *        copyright notice, this list of conditions and the following
20  *        disclaimer in the documentation and/or other materials
21  *        provided with the distribution.
22  *
23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30  * SOFTWARE.
31  */
32 
33 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
34 
35 #include <linux/module.h>
36 #include <linux/init.h>
37 #include <linux/slab.h>
38 #include <linux/err.h>
39 #include <linux/string.h>
40 #include <linux/parser.h>
41 #include <linux/random.h>
42 #include <linux/jiffies.h>
43 #include <rdma/ib_cache.h>
44 
45 #include <linux/atomic.h>
46 
47 #include <scsi/scsi.h>
48 #include <scsi/scsi_device.h>
49 #include <scsi/scsi_dbg.h>
50 #include <scsi/scsi_tcq.h>
51 #include <scsi/srp.h>
52 #include <scsi/scsi_transport_srp.h>
53 
54 #include "ib_srp.h"
55 
56 #define DRV_NAME	"ib_srp"
57 #define PFX		DRV_NAME ": "
58 #define DRV_VERSION	"2.0"
59 #define DRV_RELDATE	"July 26, 2015"
60 
61 MODULE_AUTHOR("Roland Dreier");
62 MODULE_DESCRIPTION("InfiniBand SCSI RDMA Protocol initiator");
63 MODULE_LICENSE("Dual BSD/GPL");
64 MODULE_VERSION(DRV_VERSION);
65 MODULE_INFO(release_date, DRV_RELDATE);
66 
67 static unsigned int srp_sg_tablesize;
68 static unsigned int cmd_sg_entries;
69 static unsigned int indirect_sg_entries;
70 static bool allow_ext_sg;
71 static bool prefer_fr = true;
72 static bool register_always = true;
73 static bool never_register;
74 static int topspin_workarounds = 1;
75 
76 module_param(srp_sg_tablesize, uint, 0444);
77 MODULE_PARM_DESC(srp_sg_tablesize, "Deprecated name for cmd_sg_entries");
78 
79 module_param(cmd_sg_entries, uint, 0444);
80 MODULE_PARM_DESC(cmd_sg_entries,
81 		 "Default number of gather/scatter entries in the SRP command (default is 12, max 255)");
82 
83 module_param(indirect_sg_entries, uint, 0444);
84 MODULE_PARM_DESC(indirect_sg_entries,
85 		 "Default max number of gather/scatter entries (default is 12, max is " __stringify(SG_MAX_SEGMENTS) ")");
86 
87 module_param(allow_ext_sg, bool, 0444);
88 MODULE_PARM_DESC(allow_ext_sg,
89 		  "Default behavior when there are more than cmd_sg_entries S/G entries after mapping; fails the request when false (default false)");
90 
91 module_param(topspin_workarounds, int, 0444);
92 MODULE_PARM_DESC(topspin_workarounds,
93 		 "Enable workarounds for Topspin/Cisco SRP target bugs if != 0");
94 
95 module_param(prefer_fr, bool, 0444);
96 MODULE_PARM_DESC(prefer_fr,
97 "Whether to use fast registration if both FMR and fast registration are supported");
98 
99 module_param(register_always, bool, 0444);
100 MODULE_PARM_DESC(register_always,
101 		 "Use memory registration even for contiguous memory regions");
102 
103 module_param(never_register, bool, 0444);
104 MODULE_PARM_DESC(never_register, "Never register memory");
105 
106 static const struct kernel_param_ops srp_tmo_ops;
107 
108 static int srp_reconnect_delay = 10;
109 module_param_cb(reconnect_delay, &srp_tmo_ops, &srp_reconnect_delay,
110 		S_IRUGO | S_IWUSR);
111 MODULE_PARM_DESC(reconnect_delay, "Time between successive reconnect attempts");
112 
113 static int srp_fast_io_fail_tmo = 15;
114 module_param_cb(fast_io_fail_tmo, &srp_tmo_ops, &srp_fast_io_fail_tmo,
115 		S_IRUGO | S_IWUSR);
116 MODULE_PARM_DESC(fast_io_fail_tmo,
117 		 "Number of seconds between the observation of a transport"
118 		 " layer error and failing all I/O. \"off\" means that this"
119 		 " functionality is disabled.");
120 
121 static int srp_dev_loss_tmo = 600;
122 module_param_cb(dev_loss_tmo, &srp_tmo_ops, &srp_dev_loss_tmo,
123 		S_IRUGO | S_IWUSR);
124 MODULE_PARM_DESC(dev_loss_tmo,
125 		 "Maximum number of seconds that the SRP transport should"
126 		 " insulate transport layer errors. After this time has been"
127 		 " exceeded the SCSI host is removed. Should be"
128 		 " between 1 and " __stringify(SCSI_DEVICE_BLOCK_MAX_TIMEOUT)
129 		 " if fast_io_fail_tmo has not been set. \"off\" means that"
130 		 " this functionality is disabled.");
131 
132 static unsigned ch_count;
133 module_param(ch_count, uint, 0444);
134 MODULE_PARM_DESC(ch_count,
135 		 "Number of RDMA channels to use for communication with an SRP target. Using more than one channel improves performance if the HCA supports multiple completion vectors. The default value is the minimum of four times the number of online CPU sockets and the number of completion vectors supported by the HCA.");
136 
137 static void srp_add_one(struct ib_device *device);
138 static void srp_remove_one(struct ib_device *device, void *client_data);
139 static void srp_recv_done(struct ib_cq *cq, struct ib_wc *wc);
140 static void srp_handle_qp_err(struct ib_cq *cq, struct ib_wc *wc,
141 		const char *opname);
142 static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event);
143 
144 static struct scsi_transport_template *ib_srp_transport_template;
145 static struct workqueue_struct *srp_remove_wq;
146 
147 static struct ib_client srp_client = {
148 	.name   = "srp",
149 	.add    = srp_add_one,
150 	.remove = srp_remove_one
151 };
152 
153 static struct ib_sa_client srp_sa_client;
154 
155 static int srp_tmo_get(char *buffer, const struct kernel_param *kp)
156 {
157 	int tmo = *(int *)kp->arg;
158 
159 	if (tmo >= 0)
160 		return sprintf(buffer, "%d", tmo);
161 	else
162 		return sprintf(buffer, "off");
163 }
164 
165 static int srp_tmo_set(const char *val, const struct kernel_param *kp)
166 {
167 	int tmo, res;
168 
169 	res = srp_parse_tmo(&tmo, val);
170 	if (res)
171 		goto out;
172 
173 	if (kp->arg == &srp_reconnect_delay)
174 		res = srp_tmo_valid(tmo, srp_fast_io_fail_tmo,
175 				    srp_dev_loss_tmo);
176 	else if (kp->arg == &srp_fast_io_fail_tmo)
177 		res = srp_tmo_valid(srp_reconnect_delay, tmo, srp_dev_loss_tmo);
178 	else
179 		res = srp_tmo_valid(srp_reconnect_delay, srp_fast_io_fail_tmo,
180 				    tmo);
181 	if (res)
182 		goto out;
183 	*(int *)kp->arg = tmo;
184 
185 out:
186 	return res;
187 }
188 
189 static const struct kernel_param_ops srp_tmo_ops = {
190 	.get = srp_tmo_get,
191 	.set = srp_tmo_set,
192 };
193 
194 static inline struct srp_target_port *host_to_target(struct Scsi_Host *host)
195 {
196 	return (struct srp_target_port *) host->hostdata;
197 }
198 
199 static const char *srp_target_info(struct Scsi_Host *host)
200 {
201 	return host_to_target(host)->target_name;
202 }
203 
204 static int srp_target_is_topspin(struct srp_target_port *target)
205 {
206 	static const u8 topspin_oui[3] = { 0x00, 0x05, 0xad };
207 	static const u8 cisco_oui[3]   = { 0x00, 0x1b, 0x0d };
208 
209 	return topspin_workarounds &&
210 		(!memcmp(&target->ioc_guid, topspin_oui, sizeof topspin_oui) ||
211 		 !memcmp(&target->ioc_guid, cisco_oui, sizeof cisco_oui));
212 }
213 
214 static struct srp_iu *srp_alloc_iu(struct srp_host *host, size_t size,
215 				   gfp_t gfp_mask,
216 				   enum dma_data_direction direction)
217 {
218 	struct srp_iu *iu;
219 
220 	iu = kmalloc(sizeof *iu, gfp_mask);
221 	if (!iu)
222 		goto out;
223 
224 	iu->buf = kzalloc(size, gfp_mask);
225 	if (!iu->buf)
226 		goto out_free_iu;
227 
228 	iu->dma = ib_dma_map_single(host->srp_dev->dev, iu->buf, size,
229 				    direction);
230 	if (ib_dma_mapping_error(host->srp_dev->dev, iu->dma))
231 		goto out_free_buf;
232 
233 	iu->size      = size;
234 	iu->direction = direction;
235 
236 	return iu;
237 
238 out_free_buf:
239 	kfree(iu->buf);
240 out_free_iu:
241 	kfree(iu);
242 out:
243 	return NULL;
244 }
245 
246 static void srp_free_iu(struct srp_host *host, struct srp_iu *iu)
247 {
248 	if (!iu)
249 		return;
250 
251 	ib_dma_unmap_single(host->srp_dev->dev, iu->dma, iu->size,
252 			    iu->direction);
253 	kfree(iu->buf);
254 	kfree(iu);
255 }
256 
257 static void srp_qp_event(struct ib_event *event, void *context)
258 {
259 	pr_debug("QP event %s (%d)\n",
260 		 ib_event_msg(event->event), event->event);
261 }
262 
263 static int srp_init_qp(struct srp_target_port *target,
264 		       struct ib_qp *qp)
265 {
266 	struct ib_qp_attr *attr;
267 	int ret;
268 
269 	attr = kmalloc(sizeof *attr, GFP_KERNEL);
270 	if (!attr)
271 		return -ENOMEM;
272 
273 	ret = ib_find_cached_pkey(target->srp_host->srp_dev->dev,
274 				  target->srp_host->port,
275 				  be16_to_cpu(target->pkey),
276 				  &attr->pkey_index);
277 	if (ret)
278 		goto out;
279 
280 	attr->qp_state        = IB_QPS_INIT;
281 	attr->qp_access_flags = (IB_ACCESS_REMOTE_READ |
282 				    IB_ACCESS_REMOTE_WRITE);
283 	attr->port_num        = target->srp_host->port;
284 
285 	ret = ib_modify_qp(qp, attr,
286 			   IB_QP_STATE		|
287 			   IB_QP_PKEY_INDEX	|
288 			   IB_QP_ACCESS_FLAGS	|
289 			   IB_QP_PORT);
290 
291 out:
292 	kfree(attr);
293 	return ret;
294 }
295 
296 static int srp_new_cm_id(struct srp_rdma_ch *ch)
297 {
298 	struct srp_target_port *target = ch->target;
299 	struct ib_cm_id *new_cm_id;
300 
301 	new_cm_id = ib_create_cm_id(target->srp_host->srp_dev->dev,
302 				    srp_cm_handler, ch);
303 	if (IS_ERR(new_cm_id))
304 		return PTR_ERR(new_cm_id);
305 
306 	if (ch->cm_id)
307 		ib_destroy_cm_id(ch->cm_id);
308 	ch->cm_id = new_cm_id;
309 	ch->path.sgid = target->sgid;
310 	ch->path.dgid = target->orig_dgid;
311 	ch->path.pkey = target->pkey;
312 	ch->path.service_id = target->service_id;
313 
314 	return 0;
315 }
316 
317 static struct ib_fmr_pool *srp_alloc_fmr_pool(struct srp_target_port *target)
318 {
319 	struct srp_device *dev = target->srp_host->srp_dev;
320 	struct ib_fmr_pool_param fmr_param;
321 
322 	memset(&fmr_param, 0, sizeof(fmr_param));
323 	fmr_param.pool_size	    = target->mr_pool_size;
324 	fmr_param.dirty_watermark   = fmr_param.pool_size / 4;
325 	fmr_param.cache		    = 1;
326 	fmr_param.max_pages_per_fmr = dev->max_pages_per_mr;
327 	fmr_param.page_shift	    = ilog2(dev->mr_page_size);
328 	fmr_param.access	    = (IB_ACCESS_LOCAL_WRITE |
329 				       IB_ACCESS_REMOTE_WRITE |
330 				       IB_ACCESS_REMOTE_READ);
331 
332 	return ib_create_fmr_pool(dev->pd, &fmr_param);
333 }
334 
335 /**
336  * srp_destroy_fr_pool() - free the resources owned by a pool
337  * @pool: Fast registration pool to be destroyed.
338  */
339 static void srp_destroy_fr_pool(struct srp_fr_pool *pool)
340 {
341 	int i;
342 	struct srp_fr_desc *d;
343 
344 	if (!pool)
345 		return;
346 
347 	for (i = 0, d = &pool->desc[0]; i < pool->size; i++, d++) {
348 		if (d->mr)
349 			ib_dereg_mr(d->mr);
350 	}
351 	kfree(pool);
352 }
353 
354 /**
355  * srp_create_fr_pool() - allocate and initialize a pool for fast registration
356  * @device:            IB device to allocate fast registration descriptors for.
357  * @pd:                Protection domain associated with the FR descriptors.
358  * @pool_size:         Number of descriptors to allocate.
359  * @max_page_list_len: Maximum fast registration work request page list length.
360  */
361 static struct srp_fr_pool *srp_create_fr_pool(struct ib_device *device,
362 					      struct ib_pd *pd, int pool_size,
363 					      int max_page_list_len)
364 {
365 	struct srp_fr_pool *pool;
366 	struct srp_fr_desc *d;
367 	struct ib_mr *mr;
368 	int i, ret = -EINVAL;
369 
370 	if (pool_size <= 0)
371 		goto err;
372 	ret = -ENOMEM;
373 	pool = kzalloc(sizeof(struct srp_fr_pool) +
374 		       pool_size * sizeof(struct srp_fr_desc), GFP_KERNEL);
375 	if (!pool)
376 		goto err;
377 	pool->size = pool_size;
378 	pool->max_page_list_len = max_page_list_len;
379 	spin_lock_init(&pool->lock);
380 	INIT_LIST_HEAD(&pool->free_list);
381 
382 	for (i = 0, d = &pool->desc[0]; i < pool->size; i++, d++) {
383 		mr = ib_alloc_mr(pd, IB_MR_TYPE_MEM_REG,
384 				 max_page_list_len);
385 		if (IS_ERR(mr)) {
386 			ret = PTR_ERR(mr);
387 			goto destroy_pool;
388 		}
389 		d->mr = mr;
390 		list_add_tail(&d->entry, &pool->free_list);
391 	}
392 
393 out:
394 	return pool;
395 
396 destroy_pool:
397 	srp_destroy_fr_pool(pool);
398 
399 err:
400 	pool = ERR_PTR(ret);
401 	goto out;
402 }
403 
404 /**
405  * srp_fr_pool_get() - obtain a descriptor suitable for fast registration
406  * @pool: Pool to obtain descriptor from.
407  */
408 static struct srp_fr_desc *srp_fr_pool_get(struct srp_fr_pool *pool)
409 {
410 	struct srp_fr_desc *d = NULL;
411 	unsigned long flags;
412 
413 	spin_lock_irqsave(&pool->lock, flags);
414 	if (!list_empty(&pool->free_list)) {
415 		d = list_first_entry(&pool->free_list, typeof(*d), entry);
416 		list_del(&d->entry);
417 	}
418 	spin_unlock_irqrestore(&pool->lock, flags);
419 
420 	return d;
421 }
422 
423 /**
424  * srp_fr_pool_put() - put an FR descriptor back in the free list
425  * @pool: Pool the descriptor was allocated from.
426  * @desc: Pointer to an array of fast registration descriptor pointers.
427  * @n:    Number of descriptors to put back.
428  *
429  * Note: The caller must already have queued an invalidation request for
430  * desc->mr->rkey before calling this function.
431  */
432 static void srp_fr_pool_put(struct srp_fr_pool *pool, struct srp_fr_desc **desc,
433 			    int n)
434 {
435 	unsigned long flags;
436 	int i;
437 
438 	spin_lock_irqsave(&pool->lock, flags);
439 	for (i = 0; i < n; i++)
440 		list_add(&desc[i]->entry, &pool->free_list);
441 	spin_unlock_irqrestore(&pool->lock, flags);
442 }
443 
444 static struct srp_fr_pool *srp_alloc_fr_pool(struct srp_target_port *target)
445 {
446 	struct srp_device *dev = target->srp_host->srp_dev;
447 
448 	return srp_create_fr_pool(dev->dev, dev->pd, target->mr_pool_size,
449 				  dev->max_pages_per_mr);
450 }
451 
452 /**
453  * srp_destroy_qp() - destroy an RDMA queue pair
454  * @qp: RDMA queue pair.
455  *
456  * Drain the qp before destroying it.  This avoids that the receive
457  * completion handler can access the queue pair while it is
458  * being destroyed.
459  */
460 static void srp_destroy_qp(struct ib_qp *qp)
461 {
462 	ib_drain_rq(qp);
463 	ib_destroy_qp(qp);
464 }
465 
466 static int srp_create_ch_ib(struct srp_rdma_ch *ch)
467 {
468 	struct srp_target_port *target = ch->target;
469 	struct srp_device *dev = target->srp_host->srp_dev;
470 	struct ib_qp_init_attr *init_attr;
471 	struct ib_cq *recv_cq, *send_cq;
472 	struct ib_qp *qp;
473 	struct ib_fmr_pool *fmr_pool = NULL;
474 	struct srp_fr_pool *fr_pool = NULL;
475 	const int m = 1 + dev->use_fast_reg * target->mr_per_cmd * 2;
476 	int ret;
477 
478 	init_attr = kzalloc(sizeof *init_attr, GFP_KERNEL);
479 	if (!init_attr)
480 		return -ENOMEM;
481 
482 	/* queue_size + 1 for ib_drain_rq() */
483 	recv_cq = ib_alloc_cq(dev->dev, ch, target->queue_size + 1,
484 				ch->comp_vector, IB_POLL_SOFTIRQ);
485 	if (IS_ERR(recv_cq)) {
486 		ret = PTR_ERR(recv_cq);
487 		goto err;
488 	}
489 
490 	send_cq = ib_alloc_cq(dev->dev, ch, m * target->queue_size,
491 				ch->comp_vector, IB_POLL_DIRECT);
492 	if (IS_ERR(send_cq)) {
493 		ret = PTR_ERR(send_cq);
494 		goto err_recv_cq;
495 	}
496 
497 	init_attr->event_handler       = srp_qp_event;
498 	init_attr->cap.max_send_wr     = m * target->queue_size;
499 	init_attr->cap.max_recv_wr     = target->queue_size + 1;
500 	init_attr->cap.max_recv_sge    = 1;
501 	init_attr->cap.max_send_sge    = 1;
502 	init_attr->sq_sig_type         = IB_SIGNAL_REQ_WR;
503 	init_attr->qp_type             = IB_QPT_RC;
504 	init_attr->send_cq             = send_cq;
505 	init_attr->recv_cq             = recv_cq;
506 
507 	qp = ib_create_qp(dev->pd, init_attr);
508 	if (IS_ERR(qp)) {
509 		ret = PTR_ERR(qp);
510 		goto err_send_cq;
511 	}
512 
513 	ret = srp_init_qp(target, qp);
514 	if (ret)
515 		goto err_qp;
516 
517 	if (dev->use_fast_reg) {
518 		fr_pool = srp_alloc_fr_pool(target);
519 		if (IS_ERR(fr_pool)) {
520 			ret = PTR_ERR(fr_pool);
521 			shost_printk(KERN_WARNING, target->scsi_host, PFX
522 				     "FR pool allocation failed (%d)\n", ret);
523 			goto err_qp;
524 		}
525 	} else if (dev->use_fmr) {
526 		fmr_pool = srp_alloc_fmr_pool(target);
527 		if (IS_ERR(fmr_pool)) {
528 			ret = PTR_ERR(fmr_pool);
529 			shost_printk(KERN_WARNING, target->scsi_host, PFX
530 				     "FMR pool allocation failed (%d)\n", ret);
531 			goto err_qp;
532 		}
533 	}
534 
535 	if (ch->qp)
536 		srp_destroy_qp(ch->qp);
537 	if (ch->recv_cq)
538 		ib_free_cq(ch->recv_cq);
539 	if (ch->send_cq)
540 		ib_free_cq(ch->send_cq);
541 
542 	ch->qp = qp;
543 	ch->recv_cq = recv_cq;
544 	ch->send_cq = send_cq;
545 
546 	if (dev->use_fast_reg) {
547 		if (ch->fr_pool)
548 			srp_destroy_fr_pool(ch->fr_pool);
549 		ch->fr_pool = fr_pool;
550 	} else if (dev->use_fmr) {
551 		if (ch->fmr_pool)
552 			ib_destroy_fmr_pool(ch->fmr_pool);
553 		ch->fmr_pool = fmr_pool;
554 	}
555 
556 	kfree(init_attr);
557 	return 0;
558 
559 err_qp:
560 	srp_destroy_qp(qp);
561 
562 err_send_cq:
563 	ib_free_cq(send_cq);
564 
565 err_recv_cq:
566 	ib_free_cq(recv_cq);
567 
568 err:
569 	kfree(init_attr);
570 	return ret;
571 }
572 
573 /*
574  * Note: this function may be called without srp_alloc_iu_bufs() having been
575  * invoked. Hence the ch->[rt]x_ring checks.
576  */
577 static void srp_free_ch_ib(struct srp_target_port *target,
578 			   struct srp_rdma_ch *ch)
579 {
580 	struct srp_device *dev = target->srp_host->srp_dev;
581 	int i;
582 
583 	if (!ch->target)
584 		return;
585 
586 	if (ch->cm_id) {
587 		ib_destroy_cm_id(ch->cm_id);
588 		ch->cm_id = NULL;
589 	}
590 
591 	/* If srp_new_cm_id() succeeded but srp_create_ch_ib() not, return. */
592 	if (!ch->qp)
593 		return;
594 
595 	if (dev->use_fast_reg) {
596 		if (ch->fr_pool)
597 			srp_destroy_fr_pool(ch->fr_pool);
598 	} else if (dev->use_fmr) {
599 		if (ch->fmr_pool)
600 			ib_destroy_fmr_pool(ch->fmr_pool);
601 	}
602 
603 	srp_destroy_qp(ch->qp);
604 	ib_free_cq(ch->send_cq);
605 	ib_free_cq(ch->recv_cq);
606 
607 	/*
608 	 * Avoid that the SCSI error handler tries to use this channel after
609 	 * it has been freed. The SCSI error handler can namely continue
610 	 * trying to perform recovery actions after scsi_remove_host()
611 	 * returned.
612 	 */
613 	ch->target = NULL;
614 
615 	ch->qp = NULL;
616 	ch->send_cq = ch->recv_cq = NULL;
617 
618 	if (ch->rx_ring) {
619 		for (i = 0; i < target->queue_size; ++i)
620 			srp_free_iu(target->srp_host, ch->rx_ring[i]);
621 		kfree(ch->rx_ring);
622 		ch->rx_ring = NULL;
623 	}
624 	if (ch->tx_ring) {
625 		for (i = 0; i < target->queue_size; ++i)
626 			srp_free_iu(target->srp_host, ch->tx_ring[i]);
627 		kfree(ch->tx_ring);
628 		ch->tx_ring = NULL;
629 	}
630 }
631 
632 static void srp_path_rec_completion(int status,
633 				    struct ib_sa_path_rec *pathrec,
634 				    void *ch_ptr)
635 {
636 	struct srp_rdma_ch *ch = ch_ptr;
637 	struct srp_target_port *target = ch->target;
638 
639 	ch->status = status;
640 	if (status)
641 		shost_printk(KERN_ERR, target->scsi_host,
642 			     PFX "Got failed path rec status %d\n", status);
643 	else
644 		ch->path = *pathrec;
645 	complete(&ch->done);
646 }
647 
648 static int srp_lookup_path(struct srp_rdma_ch *ch)
649 {
650 	struct srp_target_port *target = ch->target;
651 	int ret;
652 
653 	ch->path.numb_path = 1;
654 
655 	init_completion(&ch->done);
656 
657 	ch->path_query_id = ib_sa_path_rec_get(&srp_sa_client,
658 					       target->srp_host->srp_dev->dev,
659 					       target->srp_host->port,
660 					       &ch->path,
661 					       IB_SA_PATH_REC_SERVICE_ID |
662 					       IB_SA_PATH_REC_DGID	 |
663 					       IB_SA_PATH_REC_SGID	 |
664 					       IB_SA_PATH_REC_NUMB_PATH	 |
665 					       IB_SA_PATH_REC_PKEY,
666 					       SRP_PATH_REC_TIMEOUT_MS,
667 					       GFP_KERNEL,
668 					       srp_path_rec_completion,
669 					       ch, &ch->path_query);
670 	if (ch->path_query_id < 0)
671 		return ch->path_query_id;
672 
673 	ret = wait_for_completion_interruptible(&ch->done);
674 	if (ret < 0)
675 		return ret;
676 
677 	if (ch->status < 0)
678 		shost_printk(KERN_WARNING, target->scsi_host,
679 			     PFX "Path record query failed\n");
680 
681 	return ch->status;
682 }
683 
684 static int srp_send_req(struct srp_rdma_ch *ch, bool multich)
685 {
686 	struct srp_target_port *target = ch->target;
687 	struct {
688 		struct ib_cm_req_param param;
689 		struct srp_login_req   priv;
690 	} *req = NULL;
691 	int status;
692 
693 	req = kzalloc(sizeof *req, GFP_KERNEL);
694 	if (!req)
695 		return -ENOMEM;
696 
697 	req->param.primary_path		      = &ch->path;
698 	req->param.alternate_path 	      = NULL;
699 	req->param.service_id 		      = target->service_id;
700 	req->param.qp_num		      = ch->qp->qp_num;
701 	req->param.qp_type		      = ch->qp->qp_type;
702 	req->param.private_data 	      = &req->priv;
703 	req->param.private_data_len 	      = sizeof req->priv;
704 	req->param.flow_control 	      = 1;
705 
706 	get_random_bytes(&req->param.starting_psn, 4);
707 	req->param.starting_psn 	     &= 0xffffff;
708 
709 	/*
710 	 * Pick some arbitrary defaults here; we could make these
711 	 * module parameters if anyone cared about setting them.
712 	 */
713 	req->param.responder_resources	      = 4;
714 	req->param.remote_cm_response_timeout = 20;
715 	req->param.local_cm_response_timeout  = 20;
716 	req->param.retry_count                = target->tl_retry_count;
717 	req->param.rnr_retry_count 	      = 7;
718 	req->param.max_cm_retries 	      = 15;
719 
720 	req->priv.opcode     	= SRP_LOGIN_REQ;
721 	req->priv.tag        	= 0;
722 	req->priv.req_it_iu_len = cpu_to_be32(target->max_iu_len);
723 	req->priv.req_buf_fmt 	= cpu_to_be16(SRP_BUF_FORMAT_DIRECT |
724 					      SRP_BUF_FORMAT_INDIRECT);
725 	req->priv.req_flags	= (multich ? SRP_MULTICHAN_MULTI :
726 				   SRP_MULTICHAN_SINGLE);
727 	/*
728 	 * In the published SRP specification (draft rev. 16a), the
729 	 * port identifier format is 8 bytes of ID extension followed
730 	 * by 8 bytes of GUID.  Older drafts put the two halves in the
731 	 * opposite order, so that the GUID comes first.
732 	 *
733 	 * Targets conforming to these obsolete drafts can be
734 	 * recognized by the I/O Class they report.
735 	 */
736 	if (target->io_class == SRP_REV10_IB_IO_CLASS) {
737 		memcpy(req->priv.initiator_port_id,
738 		       &target->sgid.global.interface_id, 8);
739 		memcpy(req->priv.initiator_port_id + 8,
740 		       &target->initiator_ext, 8);
741 		memcpy(req->priv.target_port_id,     &target->ioc_guid, 8);
742 		memcpy(req->priv.target_port_id + 8, &target->id_ext, 8);
743 	} else {
744 		memcpy(req->priv.initiator_port_id,
745 		       &target->initiator_ext, 8);
746 		memcpy(req->priv.initiator_port_id + 8,
747 		       &target->sgid.global.interface_id, 8);
748 		memcpy(req->priv.target_port_id,     &target->id_ext, 8);
749 		memcpy(req->priv.target_port_id + 8, &target->ioc_guid, 8);
750 	}
751 
752 	/*
753 	 * Topspin/Cisco SRP targets will reject our login unless we
754 	 * zero out the first 8 bytes of our initiator port ID and set
755 	 * the second 8 bytes to the local node GUID.
756 	 */
757 	if (srp_target_is_topspin(target)) {
758 		shost_printk(KERN_DEBUG, target->scsi_host,
759 			     PFX "Topspin/Cisco initiator port ID workaround "
760 			     "activated for target GUID %016llx\n",
761 			     be64_to_cpu(target->ioc_guid));
762 		memset(req->priv.initiator_port_id, 0, 8);
763 		memcpy(req->priv.initiator_port_id + 8,
764 		       &target->srp_host->srp_dev->dev->node_guid, 8);
765 	}
766 
767 	status = ib_send_cm_req(ch->cm_id, &req->param);
768 
769 	kfree(req);
770 
771 	return status;
772 }
773 
774 static bool srp_queue_remove_work(struct srp_target_port *target)
775 {
776 	bool changed = false;
777 
778 	spin_lock_irq(&target->lock);
779 	if (target->state != SRP_TARGET_REMOVED) {
780 		target->state = SRP_TARGET_REMOVED;
781 		changed = true;
782 	}
783 	spin_unlock_irq(&target->lock);
784 
785 	if (changed)
786 		queue_work(srp_remove_wq, &target->remove_work);
787 
788 	return changed;
789 }
790 
791 static void srp_disconnect_target(struct srp_target_port *target)
792 {
793 	struct srp_rdma_ch *ch;
794 	int i;
795 
796 	/* XXX should send SRP_I_LOGOUT request */
797 
798 	for (i = 0; i < target->ch_count; i++) {
799 		ch = &target->ch[i];
800 		ch->connected = false;
801 		if (ch->cm_id && ib_send_cm_dreq(ch->cm_id, NULL, 0)) {
802 			shost_printk(KERN_DEBUG, target->scsi_host,
803 				     PFX "Sending CM DREQ failed\n");
804 		}
805 	}
806 }
807 
808 static void srp_free_req_data(struct srp_target_port *target,
809 			      struct srp_rdma_ch *ch)
810 {
811 	struct srp_device *dev = target->srp_host->srp_dev;
812 	struct ib_device *ibdev = dev->dev;
813 	struct srp_request *req;
814 	int i;
815 
816 	if (!ch->req_ring)
817 		return;
818 
819 	for (i = 0; i < target->req_ring_size; ++i) {
820 		req = &ch->req_ring[i];
821 		if (dev->use_fast_reg) {
822 			kfree(req->fr_list);
823 		} else {
824 			kfree(req->fmr_list);
825 			kfree(req->map_page);
826 		}
827 		if (req->indirect_dma_addr) {
828 			ib_dma_unmap_single(ibdev, req->indirect_dma_addr,
829 					    target->indirect_size,
830 					    DMA_TO_DEVICE);
831 		}
832 		kfree(req->indirect_desc);
833 	}
834 
835 	kfree(ch->req_ring);
836 	ch->req_ring = NULL;
837 }
838 
839 static int srp_alloc_req_data(struct srp_rdma_ch *ch)
840 {
841 	struct srp_target_port *target = ch->target;
842 	struct srp_device *srp_dev = target->srp_host->srp_dev;
843 	struct ib_device *ibdev = srp_dev->dev;
844 	struct srp_request *req;
845 	void *mr_list;
846 	dma_addr_t dma_addr;
847 	int i, ret = -ENOMEM;
848 
849 	ch->req_ring = kcalloc(target->req_ring_size, sizeof(*ch->req_ring),
850 			       GFP_KERNEL);
851 	if (!ch->req_ring)
852 		goto out;
853 
854 	for (i = 0; i < target->req_ring_size; ++i) {
855 		req = &ch->req_ring[i];
856 		mr_list = kmalloc(target->mr_per_cmd * sizeof(void *),
857 				  GFP_KERNEL);
858 		if (!mr_list)
859 			goto out;
860 		if (srp_dev->use_fast_reg) {
861 			req->fr_list = mr_list;
862 		} else {
863 			req->fmr_list = mr_list;
864 			req->map_page = kmalloc(srp_dev->max_pages_per_mr *
865 						sizeof(void *), GFP_KERNEL);
866 			if (!req->map_page)
867 				goto out;
868 		}
869 		req->indirect_desc = kmalloc(target->indirect_size, GFP_KERNEL);
870 		if (!req->indirect_desc)
871 			goto out;
872 
873 		dma_addr = ib_dma_map_single(ibdev, req->indirect_desc,
874 					     target->indirect_size,
875 					     DMA_TO_DEVICE);
876 		if (ib_dma_mapping_error(ibdev, dma_addr))
877 			goto out;
878 
879 		req->indirect_dma_addr = dma_addr;
880 	}
881 	ret = 0;
882 
883 out:
884 	return ret;
885 }
886 
887 /**
888  * srp_del_scsi_host_attr() - Remove attributes defined in the host template.
889  * @shost: SCSI host whose attributes to remove from sysfs.
890  *
891  * Note: Any attributes defined in the host template and that did not exist
892  * before invocation of this function will be ignored.
893  */
894 static void srp_del_scsi_host_attr(struct Scsi_Host *shost)
895 {
896 	struct device_attribute **attr;
897 
898 	for (attr = shost->hostt->shost_attrs; attr && *attr; ++attr)
899 		device_remove_file(&shost->shost_dev, *attr);
900 }
901 
902 static void srp_remove_target(struct srp_target_port *target)
903 {
904 	struct srp_rdma_ch *ch;
905 	int i;
906 
907 	WARN_ON_ONCE(target->state != SRP_TARGET_REMOVED);
908 
909 	srp_del_scsi_host_attr(target->scsi_host);
910 	srp_rport_get(target->rport);
911 	srp_remove_host(target->scsi_host);
912 	scsi_remove_host(target->scsi_host);
913 	srp_stop_rport_timers(target->rport);
914 	srp_disconnect_target(target);
915 	for (i = 0; i < target->ch_count; i++) {
916 		ch = &target->ch[i];
917 		srp_free_ch_ib(target, ch);
918 	}
919 	cancel_work_sync(&target->tl_err_work);
920 	srp_rport_put(target->rport);
921 	for (i = 0; i < target->ch_count; i++) {
922 		ch = &target->ch[i];
923 		srp_free_req_data(target, ch);
924 	}
925 	kfree(target->ch);
926 	target->ch = NULL;
927 
928 	spin_lock(&target->srp_host->target_lock);
929 	list_del(&target->list);
930 	spin_unlock(&target->srp_host->target_lock);
931 
932 	scsi_host_put(target->scsi_host);
933 }
934 
935 static void srp_remove_work(struct work_struct *work)
936 {
937 	struct srp_target_port *target =
938 		container_of(work, struct srp_target_port, remove_work);
939 
940 	WARN_ON_ONCE(target->state != SRP_TARGET_REMOVED);
941 
942 	srp_remove_target(target);
943 }
944 
945 static void srp_rport_delete(struct srp_rport *rport)
946 {
947 	struct srp_target_port *target = rport->lld_data;
948 
949 	srp_queue_remove_work(target);
950 }
951 
952 /**
953  * srp_connected_ch() - number of connected channels
954  * @target: SRP target port.
955  */
956 static int srp_connected_ch(struct srp_target_port *target)
957 {
958 	int i, c = 0;
959 
960 	for (i = 0; i < target->ch_count; i++)
961 		c += target->ch[i].connected;
962 
963 	return c;
964 }
965 
966 static int srp_connect_ch(struct srp_rdma_ch *ch, bool multich)
967 {
968 	struct srp_target_port *target = ch->target;
969 	int ret;
970 
971 	WARN_ON_ONCE(!multich && srp_connected_ch(target) > 0);
972 
973 	ret = srp_lookup_path(ch);
974 	if (ret)
975 		goto out;
976 
977 	while (1) {
978 		init_completion(&ch->done);
979 		ret = srp_send_req(ch, multich);
980 		if (ret)
981 			goto out;
982 		ret = wait_for_completion_interruptible(&ch->done);
983 		if (ret < 0)
984 			goto out;
985 
986 		/*
987 		 * The CM event handling code will set status to
988 		 * SRP_PORT_REDIRECT if we get a port redirect REJ
989 		 * back, or SRP_DLID_REDIRECT if we get a lid/qp
990 		 * redirect REJ back.
991 		 */
992 		ret = ch->status;
993 		switch (ret) {
994 		case 0:
995 			ch->connected = true;
996 			goto out;
997 
998 		case SRP_PORT_REDIRECT:
999 			ret = srp_lookup_path(ch);
1000 			if (ret)
1001 				goto out;
1002 			break;
1003 
1004 		case SRP_DLID_REDIRECT:
1005 			break;
1006 
1007 		case SRP_STALE_CONN:
1008 			shost_printk(KERN_ERR, target->scsi_host, PFX
1009 				     "giving up on stale connection\n");
1010 			ret = -ECONNRESET;
1011 			goto out;
1012 
1013 		default:
1014 			goto out;
1015 		}
1016 	}
1017 
1018 out:
1019 	return ret <= 0 ? ret : -ENODEV;
1020 }
1021 
1022 static void srp_inv_rkey_err_done(struct ib_cq *cq, struct ib_wc *wc)
1023 {
1024 	srp_handle_qp_err(cq, wc, "INV RKEY");
1025 }
1026 
1027 static int srp_inv_rkey(struct srp_request *req, struct srp_rdma_ch *ch,
1028 		u32 rkey)
1029 {
1030 	struct ib_send_wr *bad_wr;
1031 	struct ib_send_wr wr = {
1032 		.opcode		    = IB_WR_LOCAL_INV,
1033 		.next		    = NULL,
1034 		.num_sge	    = 0,
1035 		.send_flags	    = 0,
1036 		.ex.invalidate_rkey = rkey,
1037 	};
1038 
1039 	wr.wr_cqe = &req->reg_cqe;
1040 	req->reg_cqe.done = srp_inv_rkey_err_done;
1041 	return ib_post_send(ch->qp, &wr, &bad_wr);
1042 }
1043 
1044 static void srp_unmap_data(struct scsi_cmnd *scmnd,
1045 			   struct srp_rdma_ch *ch,
1046 			   struct srp_request *req)
1047 {
1048 	struct srp_target_port *target = ch->target;
1049 	struct srp_device *dev = target->srp_host->srp_dev;
1050 	struct ib_device *ibdev = dev->dev;
1051 	int i, res;
1052 
1053 	if (!scsi_sglist(scmnd) ||
1054 	    (scmnd->sc_data_direction != DMA_TO_DEVICE &&
1055 	     scmnd->sc_data_direction != DMA_FROM_DEVICE))
1056 		return;
1057 
1058 	if (dev->use_fast_reg) {
1059 		struct srp_fr_desc **pfr;
1060 
1061 		for (i = req->nmdesc, pfr = req->fr_list; i > 0; i--, pfr++) {
1062 			res = srp_inv_rkey(req, ch, (*pfr)->mr->rkey);
1063 			if (res < 0) {
1064 				shost_printk(KERN_ERR, target->scsi_host, PFX
1065 				  "Queueing INV WR for rkey %#x failed (%d)\n",
1066 				  (*pfr)->mr->rkey, res);
1067 				queue_work(system_long_wq,
1068 					   &target->tl_err_work);
1069 			}
1070 		}
1071 		if (req->nmdesc)
1072 			srp_fr_pool_put(ch->fr_pool, req->fr_list,
1073 					req->nmdesc);
1074 	} else if (dev->use_fmr) {
1075 		struct ib_pool_fmr **pfmr;
1076 
1077 		for (i = req->nmdesc, pfmr = req->fmr_list; i > 0; i--, pfmr++)
1078 			ib_fmr_pool_unmap(*pfmr);
1079 	}
1080 
1081 	ib_dma_unmap_sg(ibdev, scsi_sglist(scmnd), scsi_sg_count(scmnd),
1082 			scmnd->sc_data_direction);
1083 }
1084 
1085 /**
1086  * srp_claim_req - Take ownership of the scmnd associated with a request.
1087  * @ch: SRP RDMA channel.
1088  * @req: SRP request.
1089  * @sdev: If not NULL, only take ownership for this SCSI device.
1090  * @scmnd: If NULL, take ownership of @req->scmnd. If not NULL, only take
1091  *         ownership of @req->scmnd if it equals @scmnd.
1092  *
1093  * Return value:
1094  * Either NULL or a pointer to the SCSI command the caller became owner of.
1095  */
1096 static struct scsi_cmnd *srp_claim_req(struct srp_rdma_ch *ch,
1097 				       struct srp_request *req,
1098 				       struct scsi_device *sdev,
1099 				       struct scsi_cmnd *scmnd)
1100 {
1101 	unsigned long flags;
1102 
1103 	spin_lock_irqsave(&ch->lock, flags);
1104 	if (req->scmnd &&
1105 	    (!sdev || req->scmnd->device == sdev) &&
1106 	    (!scmnd || req->scmnd == scmnd)) {
1107 		scmnd = req->scmnd;
1108 		req->scmnd = NULL;
1109 	} else {
1110 		scmnd = NULL;
1111 	}
1112 	spin_unlock_irqrestore(&ch->lock, flags);
1113 
1114 	return scmnd;
1115 }
1116 
1117 /**
1118  * srp_free_req() - Unmap data and adjust ch->req_lim.
1119  * @ch:     SRP RDMA channel.
1120  * @req:    Request to be freed.
1121  * @scmnd:  SCSI command associated with @req.
1122  * @req_lim_delta: Amount to be added to @target->req_lim.
1123  */
1124 static void srp_free_req(struct srp_rdma_ch *ch, struct srp_request *req,
1125 			 struct scsi_cmnd *scmnd, s32 req_lim_delta)
1126 {
1127 	unsigned long flags;
1128 
1129 	srp_unmap_data(scmnd, ch, req);
1130 
1131 	spin_lock_irqsave(&ch->lock, flags);
1132 	ch->req_lim += req_lim_delta;
1133 	spin_unlock_irqrestore(&ch->lock, flags);
1134 }
1135 
1136 static void srp_finish_req(struct srp_rdma_ch *ch, struct srp_request *req,
1137 			   struct scsi_device *sdev, int result)
1138 {
1139 	struct scsi_cmnd *scmnd = srp_claim_req(ch, req, sdev, NULL);
1140 
1141 	if (scmnd) {
1142 		srp_free_req(ch, req, scmnd, 0);
1143 		scmnd->result = result;
1144 		scmnd->scsi_done(scmnd);
1145 	}
1146 }
1147 
1148 static void srp_terminate_io(struct srp_rport *rport)
1149 {
1150 	struct srp_target_port *target = rport->lld_data;
1151 	struct srp_rdma_ch *ch;
1152 	struct Scsi_Host *shost = target->scsi_host;
1153 	struct scsi_device *sdev;
1154 	int i, j;
1155 
1156 	/*
1157 	 * Invoking srp_terminate_io() while srp_queuecommand() is running
1158 	 * is not safe. Hence the warning statement below.
1159 	 */
1160 	shost_for_each_device(sdev, shost)
1161 		WARN_ON_ONCE(sdev->request_queue->request_fn_active);
1162 
1163 	for (i = 0; i < target->ch_count; i++) {
1164 		ch = &target->ch[i];
1165 
1166 		for (j = 0; j < target->req_ring_size; ++j) {
1167 			struct srp_request *req = &ch->req_ring[j];
1168 
1169 			srp_finish_req(ch, req, NULL,
1170 				       DID_TRANSPORT_FAILFAST << 16);
1171 		}
1172 	}
1173 }
1174 
1175 /*
1176  * It is up to the caller to ensure that srp_rport_reconnect() calls are
1177  * serialized and that no concurrent srp_queuecommand(), srp_abort(),
1178  * srp_reset_device() or srp_reset_host() calls will occur while this function
1179  * is in progress. One way to realize that is not to call this function
1180  * directly but to call srp_reconnect_rport() instead since that last function
1181  * serializes calls of this function via rport->mutex and also blocks
1182  * srp_queuecommand() calls before invoking this function.
1183  */
1184 static int srp_rport_reconnect(struct srp_rport *rport)
1185 {
1186 	struct srp_target_port *target = rport->lld_data;
1187 	struct srp_rdma_ch *ch;
1188 	int i, j, ret = 0;
1189 	bool multich = false;
1190 
1191 	srp_disconnect_target(target);
1192 
1193 	if (target->state == SRP_TARGET_SCANNING)
1194 		return -ENODEV;
1195 
1196 	/*
1197 	 * Now get a new local CM ID so that we avoid confusing the target in
1198 	 * case things are really fouled up. Doing so also ensures that all CM
1199 	 * callbacks will have finished before a new QP is allocated.
1200 	 */
1201 	for (i = 0; i < target->ch_count; i++) {
1202 		ch = &target->ch[i];
1203 		ret += srp_new_cm_id(ch);
1204 	}
1205 	for (i = 0; i < target->ch_count; i++) {
1206 		ch = &target->ch[i];
1207 		for (j = 0; j < target->req_ring_size; ++j) {
1208 			struct srp_request *req = &ch->req_ring[j];
1209 
1210 			srp_finish_req(ch, req, NULL, DID_RESET << 16);
1211 		}
1212 	}
1213 	for (i = 0; i < target->ch_count; i++) {
1214 		ch = &target->ch[i];
1215 		/*
1216 		 * Whether or not creating a new CM ID succeeded, create a new
1217 		 * QP. This guarantees that all completion callback function
1218 		 * invocations have finished before request resetting starts.
1219 		 */
1220 		ret += srp_create_ch_ib(ch);
1221 
1222 		INIT_LIST_HEAD(&ch->free_tx);
1223 		for (j = 0; j < target->queue_size; ++j)
1224 			list_add(&ch->tx_ring[j]->list, &ch->free_tx);
1225 	}
1226 
1227 	target->qp_in_error = false;
1228 
1229 	for (i = 0; i < target->ch_count; i++) {
1230 		ch = &target->ch[i];
1231 		if (ret)
1232 			break;
1233 		ret = srp_connect_ch(ch, multich);
1234 		multich = true;
1235 	}
1236 
1237 	if (ret == 0)
1238 		shost_printk(KERN_INFO, target->scsi_host,
1239 			     PFX "reconnect succeeded\n");
1240 
1241 	return ret;
1242 }
1243 
1244 static void srp_map_desc(struct srp_map_state *state, dma_addr_t dma_addr,
1245 			 unsigned int dma_len, u32 rkey)
1246 {
1247 	struct srp_direct_buf *desc = state->desc;
1248 
1249 	WARN_ON_ONCE(!dma_len);
1250 
1251 	desc->va = cpu_to_be64(dma_addr);
1252 	desc->key = cpu_to_be32(rkey);
1253 	desc->len = cpu_to_be32(dma_len);
1254 
1255 	state->total_len += dma_len;
1256 	state->desc++;
1257 	state->ndesc++;
1258 }
1259 
1260 static int srp_map_finish_fmr(struct srp_map_state *state,
1261 			      struct srp_rdma_ch *ch)
1262 {
1263 	struct srp_target_port *target = ch->target;
1264 	struct srp_device *dev = target->srp_host->srp_dev;
1265 	struct ib_pd *pd = target->pd;
1266 	struct ib_pool_fmr *fmr;
1267 	u64 io_addr = 0;
1268 
1269 	if (state->fmr.next >= state->fmr.end)
1270 		return -ENOMEM;
1271 
1272 	WARN_ON_ONCE(!dev->use_fmr);
1273 
1274 	if (state->npages == 0)
1275 		return 0;
1276 
1277 	if (state->npages == 1 && (pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY)) {
1278 		srp_map_desc(state, state->base_dma_addr, state->dma_len,
1279 			     pd->unsafe_global_rkey);
1280 		goto reset_state;
1281 	}
1282 
1283 	fmr = ib_fmr_pool_map_phys(ch->fmr_pool, state->pages,
1284 				   state->npages, io_addr);
1285 	if (IS_ERR(fmr))
1286 		return PTR_ERR(fmr);
1287 
1288 	*state->fmr.next++ = fmr;
1289 	state->nmdesc++;
1290 
1291 	srp_map_desc(state, state->base_dma_addr & ~dev->mr_page_mask,
1292 		     state->dma_len, fmr->fmr->rkey);
1293 
1294 reset_state:
1295 	state->npages = 0;
1296 	state->dma_len = 0;
1297 
1298 	return 0;
1299 }
1300 
1301 static void srp_reg_mr_err_done(struct ib_cq *cq, struct ib_wc *wc)
1302 {
1303 	srp_handle_qp_err(cq, wc, "FAST REG");
1304 }
1305 
1306 /*
1307  * Map up to sg_nents elements of state->sg where *sg_offset_p is the offset
1308  * where to start in the first element. If sg_offset_p != NULL then
1309  * *sg_offset_p is updated to the offset in state->sg[retval] of the first
1310  * byte that has not yet been mapped.
1311  */
1312 static int srp_map_finish_fr(struct srp_map_state *state,
1313 			     struct srp_request *req,
1314 			     struct srp_rdma_ch *ch, int sg_nents,
1315 			     unsigned int *sg_offset_p)
1316 {
1317 	struct srp_target_port *target = ch->target;
1318 	struct srp_device *dev = target->srp_host->srp_dev;
1319 	struct ib_pd *pd = target->pd;
1320 	struct ib_send_wr *bad_wr;
1321 	struct ib_reg_wr wr;
1322 	struct srp_fr_desc *desc;
1323 	u32 rkey;
1324 	int n, err;
1325 
1326 	if (state->fr.next >= state->fr.end)
1327 		return -ENOMEM;
1328 
1329 	WARN_ON_ONCE(!dev->use_fast_reg);
1330 
1331 	if (sg_nents == 1 && (pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY)) {
1332 		unsigned int sg_offset = sg_offset_p ? *sg_offset_p : 0;
1333 
1334 		srp_map_desc(state, sg_dma_address(state->sg) + sg_offset,
1335 			     sg_dma_len(state->sg) - sg_offset,
1336 			     pd->unsafe_global_rkey);
1337 		if (sg_offset_p)
1338 			*sg_offset_p = 0;
1339 		return 1;
1340 	}
1341 
1342 	desc = srp_fr_pool_get(ch->fr_pool);
1343 	if (!desc)
1344 		return -ENOMEM;
1345 
1346 	rkey = ib_inc_rkey(desc->mr->rkey);
1347 	ib_update_fast_reg_key(desc->mr, rkey);
1348 
1349 	n = ib_map_mr_sg(desc->mr, state->sg, sg_nents, sg_offset_p,
1350 			 dev->mr_page_size);
1351 	if (unlikely(n < 0)) {
1352 		srp_fr_pool_put(ch->fr_pool, &desc, 1);
1353 		pr_debug("%s: ib_map_mr_sg(%d, %d) returned %d.\n",
1354 			 dev_name(&req->scmnd->device->sdev_gendev), sg_nents,
1355 			 sg_offset_p ? *sg_offset_p : -1, n);
1356 		return n;
1357 	}
1358 
1359 	WARN_ON_ONCE(desc->mr->length == 0);
1360 
1361 	req->reg_cqe.done = srp_reg_mr_err_done;
1362 
1363 	wr.wr.next = NULL;
1364 	wr.wr.opcode = IB_WR_REG_MR;
1365 	wr.wr.wr_cqe = &req->reg_cqe;
1366 	wr.wr.num_sge = 0;
1367 	wr.wr.send_flags = 0;
1368 	wr.mr = desc->mr;
1369 	wr.key = desc->mr->rkey;
1370 	wr.access = (IB_ACCESS_LOCAL_WRITE |
1371 		     IB_ACCESS_REMOTE_READ |
1372 		     IB_ACCESS_REMOTE_WRITE);
1373 
1374 	*state->fr.next++ = desc;
1375 	state->nmdesc++;
1376 
1377 	srp_map_desc(state, desc->mr->iova,
1378 		     desc->mr->length, desc->mr->rkey);
1379 
1380 	err = ib_post_send(ch->qp, &wr.wr, &bad_wr);
1381 	if (unlikely(err)) {
1382 		WARN_ON_ONCE(err == -ENOMEM);
1383 		return err;
1384 	}
1385 
1386 	return n;
1387 }
1388 
1389 static int srp_map_sg_entry(struct srp_map_state *state,
1390 			    struct srp_rdma_ch *ch,
1391 			    struct scatterlist *sg)
1392 {
1393 	struct srp_target_port *target = ch->target;
1394 	struct srp_device *dev = target->srp_host->srp_dev;
1395 	struct ib_device *ibdev = dev->dev;
1396 	dma_addr_t dma_addr = ib_sg_dma_address(ibdev, sg);
1397 	unsigned int dma_len = ib_sg_dma_len(ibdev, sg);
1398 	unsigned int len = 0;
1399 	int ret;
1400 
1401 	WARN_ON_ONCE(!dma_len);
1402 
1403 	while (dma_len) {
1404 		unsigned offset = dma_addr & ~dev->mr_page_mask;
1405 
1406 		if (state->npages == dev->max_pages_per_mr ||
1407 		    (state->npages > 0 && offset != 0)) {
1408 			ret = srp_map_finish_fmr(state, ch);
1409 			if (ret)
1410 				return ret;
1411 		}
1412 
1413 		len = min_t(unsigned int, dma_len, dev->mr_page_size - offset);
1414 
1415 		if (!state->npages)
1416 			state->base_dma_addr = dma_addr;
1417 		state->pages[state->npages++] = dma_addr & dev->mr_page_mask;
1418 		state->dma_len += len;
1419 		dma_addr += len;
1420 		dma_len -= len;
1421 	}
1422 
1423 	/*
1424 	 * If the end of the MR is not on a page boundary then we need to
1425 	 * close it out and start a new one -- we can only merge at page
1426 	 * boundaries.
1427 	 */
1428 	ret = 0;
1429 	if ((dma_addr & ~dev->mr_page_mask) != 0)
1430 		ret = srp_map_finish_fmr(state, ch);
1431 	return ret;
1432 }
1433 
1434 static int srp_map_sg_fmr(struct srp_map_state *state, struct srp_rdma_ch *ch,
1435 			  struct srp_request *req, struct scatterlist *scat,
1436 			  int count)
1437 {
1438 	struct scatterlist *sg;
1439 	int i, ret;
1440 
1441 	state->pages = req->map_page;
1442 	state->fmr.next = req->fmr_list;
1443 	state->fmr.end = req->fmr_list + ch->target->mr_per_cmd;
1444 
1445 	for_each_sg(scat, sg, count, i) {
1446 		ret = srp_map_sg_entry(state, ch, sg);
1447 		if (ret)
1448 			return ret;
1449 	}
1450 
1451 	ret = srp_map_finish_fmr(state, ch);
1452 	if (ret)
1453 		return ret;
1454 
1455 	return 0;
1456 }
1457 
1458 static int srp_map_sg_fr(struct srp_map_state *state, struct srp_rdma_ch *ch,
1459 			 struct srp_request *req, struct scatterlist *scat,
1460 			 int count)
1461 {
1462 	unsigned int sg_offset = 0;
1463 
1464 	state->fr.next = req->fr_list;
1465 	state->fr.end = req->fr_list + ch->target->mr_per_cmd;
1466 	state->sg = scat;
1467 
1468 	if (count == 0)
1469 		return 0;
1470 
1471 	while (count) {
1472 		int i, n;
1473 
1474 		n = srp_map_finish_fr(state, req, ch, count, &sg_offset);
1475 		if (unlikely(n < 0))
1476 			return n;
1477 
1478 		count -= n;
1479 		for (i = 0; i < n; i++)
1480 			state->sg = sg_next(state->sg);
1481 	}
1482 
1483 	return 0;
1484 }
1485 
1486 static int srp_map_sg_dma(struct srp_map_state *state, struct srp_rdma_ch *ch,
1487 			  struct srp_request *req, struct scatterlist *scat,
1488 			  int count)
1489 {
1490 	struct srp_target_port *target = ch->target;
1491 	struct srp_device *dev = target->srp_host->srp_dev;
1492 	struct scatterlist *sg;
1493 	int i;
1494 
1495 	for_each_sg(scat, sg, count, i) {
1496 		srp_map_desc(state, ib_sg_dma_address(dev->dev, sg),
1497 			     ib_sg_dma_len(dev->dev, sg),
1498 			     target->pd->unsafe_global_rkey);
1499 	}
1500 
1501 	return 0;
1502 }
1503 
1504 /*
1505  * Register the indirect data buffer descriptor with the HCA.
1506  *
1507  * Note: since the indirect data buffer descriptor has been allocated with
1508  * kmalloc() it is guaranteed that this buffer is a physically contiguous
1509  * memory buffer.
1510  */
1511 static int srp_map_idb(struct srp_rdma_ch *ch, struct srp_request *req,
1512 		       void **next_mr, void **end_mr, u32 idb_len,
1513 		       __be32 *idb_rkey)
1514 {
1515 	struct srp_target_port *target = ch->target;
1516 	struct srp_device *dev = target->srp_host->srp_dev;
1517 	struct srp_map_state state;
1518 	struct srp_direct_buf idb_desc;
1519 	u64 idb_pages[1];
1520 	struct scatterlist idb_sg[1];
1521 	int ret;
1522 
1523 	memset(&state, 0, sizeof(state));
1524 	memset(&idb_desc, 0, sizeof(idb_desc));
1525 	state.gen.next = next_mr;
1526 	state.gen.end = end_mr;
1527 	state.desc = &idb_desc;
1528 	state.base_dma_addr = req->indirect_dma_addr;
1529 	state.dma_len = idb_len;
1530 
1531 	if (dev->use_fast_reg) {
1532 		state.sg = idb_sg;
1533 		sg_init_one(idb_sg, req->indirect_desc, idb_len);
1534 		idb_sg->dma_address = req->indirect_dma_addr; /* hack! */
1535 #ifdef CONFIG_NEED_SG_DMA_LENGTH
1536 		idb_sg->dma_length = idb_sg->length;	      /* hack^2 */
1537 #endif
1538 		ret = srp_map_finish_fr(&state, req, ch, 1, NULL);
1539 		if (ret < 0)
1540 			return ret;
1541 		WARN_ON_ONCE(ret < 1);
1542 	} else if (dev->use_fmr) {
1543 		state.pages = idb_pages;
1544 		state.pages[0] = (req->indirect_dma_addr &
1545 				  dev->mr_page_mask);
1546 		state.npages = 1;
1547 		ret = srp_map_finish_fmr(&state, ch);
1548 		if (ret < 0)
1549 			return ret;
1550 	} else {
1551 		return -EINVAL;
1552 	}
1553 
1554 	*idb_rkey = idb_desc.key;
1555 
1556 	return 0;
1557 }
1558 
1559 #if defined(DYNAMIC_DATA_DEBUG)
1560 static void srp_check_mapping(struct srp_map_state *state,
1561 			      struct srp_rdma_ch *ch, struct srp_request *req,
1562 			      struct scatterlist *scat, int count)
1563 {
1564 	struct srp_device *dev = ch->target->srp_host->srp_dev;
1565 	struct srp_fr_desc **pfr;
1566 	u64 desc_len = 0, mr_len = 0;
1567 	int i;
1568 
1569 	for (i = 0; i < state->ndesc; i++)
1570 		desc_len += be32_to_cpu(req->indirect_desc[i].len);
1571 	if (dev->use_fast_reg)
1572 		for (i = 0, pfr = req->fr_list; i < state->nmdesc; i++, pfr++)
1573 			mr_len += (*pfr)->mr->length;
1574 	else if (dev->use_fmr)
1575 		for (i = 0; i < state->nmdesc; i++)
1576 			mr_len += be32_to_cpu(req->indirect_desc[i].len);
1577 	if (desc_len != scsi_bufflen(req->scmnd) ||
1578 	    mr_len > scsi_bufflen(req->scmnd))
1579 		pr_err("Inconsistent: scsi len %d <> desc len %lld <> mr len %lld; ndesc %d; nmdesc = %d\n",
1580 		       scsi_bufflen(req->scmnd), desc_len, mr_len,
1581 		       state->ndesc, state->nmdesc);
1582 }
1583 #endif
1584 
1585 /**
1586  * srp_map_data() - map SCSI data buffer onto an SRP request
1587  * @scmnd: SCSI command to map
1588  * @ch: SRP RDMA channel
1589  * @req: SRP request
1590  *
1591  * Returns the length in bytes of the SRP_CMD IU or a negative value if
1592  * mapping failed.
1593  */
1594 static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_rdma_ch *ch,
1595 			struct srp_request *req)
1596 {
1597 	struct srp_target_port *target = ch->target;
1598 	struct ib_pd *pd = target->pd;
1599 	struct scatterlist *scat;
1600 	struct srp_cmd *cmd = req->cmd->buf;
1601 	int len, nents, count, ret;
1602 	struct srp_device *dev;
1603 	struct ib_device *ibdev;
1604 	struct srp_map_state state;
1605 	struct srp_indirect_buf *indirect_hdr;
1606 	u32 idb_len, table_len;
1607 	__be32 idb_rkey;
1608 	u8 fmt;
1609 
1610 	if (!scsi_sglist(scmnd) || scmnd->sc_data_direction == DMA_NONE)
1611 		return sizeof (struct srp_cmd);
1612 
1613 	if (scmnd->sc_data_direction != DMA_FROM_DEVICE &&
1614 	    scmnd->sc_data_direction != DMA_TO_DEVICE) {
1615 		shost_printk(KERN_WARNING, target->scsi_host,
1616 			     PFX "Unhandled data direction %d\n",
1617 			     scmnd->sc_data_direction);
1618 		return -EINVAL;
1619 	}
1620 
1621 	nents = scsi_sg_count(scmnd);
1622 	scat  = scsi_sglist(scmnd);
1623 
1624 	dev = target->srp_host->srp_dev;
1625 	ibdev = dev->dev;
1626 
1627 	count = ib_dma_map_sg(ibdev, scat, nents, scmnd->sc_data_direction);
1628 	if (unlikely(count == 0))
1629 		return -EIO;
1630 
1631 	fmt = SRP_DATA_DESC_DIRECT;
1632 	len = sizeof (struct srp_cmd) +	sizeof (struct srp_direct_buf);
1633 
1634 	if (count == 1 && (pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY)) {
1635 		/*
1636 		 * The midlayer only generated a single gather/scatter
1637 		 * entry, or DMA mapping coalesced everything to a
1638 		 * single entry.  So a direct descriptor along with
1639 		 * the DMA MR suffices.
1640 		 */
1641 		struct srp_direct_buf *buf = (void *) cmd->add_data;
1642 
1643 		buf->va  = cpu_to_be64(ib_sg_dma_address(ibdev, scat));
1644 		buf->key = cpu_to_be32(pd->unsafe_global_rkey);
1645 		buf->len = cpu_to_be32(ib_sg_dma_len(ibdev, scat));
1646 
1647 		req->nmdesc = 0;
1648 		goto map_complete;
1649 	}
1650 
1651 	/*
1652 	 * We have more than one scatter/gather entry, so build our indirect
1653 	 * descriptor table, trying to merge as many entries as we can.
1654 	 */
1655 	indirect_hdr = (void *) cmd->add_data;
1656 
1657 	ib_dma_sync_single_for_cpu(ibdev, req->indirect_dma_addr,
1658 				   target->indirect_size, DMA_TO_DEVICE);
1659 
1660 	memset(&state, 0, sizeof(state));
1661 	state.desc = req->indirect_desc;
1662 	if (dev->use_fast_reg)
1663 		ret = srp_map_sg_fr(&state, ch, req, scat, count);
1664 	else if (dev->use_fmr)
1665 		ret = srp_map_sg_fmr(&state, ch, req, scat, count);
1666 	else
1667 		ret = srp_map_sg_dma(&state, ch, req, scat, count);
1668 	req->nmdesc = state.nmdesc;
1669 	if (ret < 0)
1670 		goto unmap;
1671 
1672 #if defined(DYNAMIC_DEBUG)
1673 	{
1674 		DEFINE_DYNAMIC_DEBUG_METADATA(ddm,
1675 			"Memory mapping consistency check");
1676 		if (unlikely(ddm.flags & _DPRINTK_FLAGS_PRINT))
1677 			srp_check_mapping(&state, ch, req, scat, count);
1678 	}
1679 #endif
1680 
1681 	/* We've mapped the request, now pull as much of the indirect
1682 	 * descriptor table as we can into the command buffer. If this
1683 	 * target is not using an external indirect table, we are
1684 	 * guaranteed to fit into the command, as the SCSI layer won't
1685 	 * give us more S/G entries than we allow.
1686 	 */
1687 	if (state.ndesc == 1) {
1688 		/*
1689 		 * Memory registration collapsed the sg-list into one entry,
1690 		 * so use a direct descriptor.
1691 		 */
1692 		struct srp_direct_buf *buf = (void *) cmd->add_data;
1693 
1694 		*buf = req->indirect_desc[0];
1695 		goto map_complete;
1696 	}
1697 
1698 	if (unlikely(target->cmd_sg_cnt < state.ndesc &&
1699 						!target->allow_ext_sg)) {
1700 		shost_printk(KERN_ERR, target->scsi_host,
1701 			     "Could not fit S/G list into SRP_CMD\n");
1702 		ret = -EIO;
1703 		goto unmap;
1704 	}
1705 
1706 	count = min(state.ndesc, target->cmd_sg_cnt);
1707 	table_len = state.ndesc * sizeof (struct srp_direct_buf);
1708 	idb_len = sizeof(struct srp_indirect_buf) + table_len;
1709 
1710 	fmt = SRP_DATA_DESC_INDIRECT;
1711 	len = sizeof(struct srp_cmd) + sizeof (struct srp_indirect_buf);
1712 	len += count * sizeof (struct srp_direct_buf);
1713 
1714 	memcpy(indirect_hdr->desc_list, req->indirect_desc,
1715 	       count * sizeof (struct srp_direct_buf));
1716 
1717 	if (!(pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY)) {
1718 		ret = srp_map_idb(ch, req, state.gen.next, state.gen.end,
1719 				  idb_len, &idb_rkey);
1720 		if (ret < 0)
1721 			goto unmap;
1722 		req->nmdesc++;
1723 	} else {
1724 		idb_rkey = cpu_to_be32(pd->unsafe_global_rkey);
1725 	}
1726 
1727 	indirect_hdr->table_desc.va = cpu_to_be64(req->indirect_dma_addr);
1728 	indirect_hdr->table_desc.key = idb_rkey;
1729 	indirect_hdr->table_desc.len = cpu_to_be32(table_len);
1730 	indirect_hdr->len = cpu_to_be32(state.total_len);
1731 
1732 	if (scmnd->sc_data_direction == DMA_TO_DEVICE)
1733 		cmd->data_out_desc_cnt = count;
1734 	else
1735 		cmd->data_in_desc_cnt = count;
1736 
1737 	ib_dma_sync_single_for_device(ibdev, req->indirect_dma_addr, table_len,
1738 				      DMA_TO_DEVICE);
1739 
1740 map_complete:
1741 	if (scmnd->sc_data_direction == DMA_TO_DEVICE)
1742 		cmd->buf_fmt = fmt << 4;
1743 	else
1744 		cmd->buf_fmt = fmt;
1745 
1746 	return len;
1747 
1748 unmap:
1749 	srp_unmap_data(scmnd, ch, req);
1750 	if (ret == -ENOMEM && req->nmdesc >= target->mr_pool_size)
1751 		ret = -E2BIG;
1752 	return ret;
1753 }
1754 
1755 /*
1756  * Return an IU and possible credit to the free pool
1757  */
1758 static void srp_put_tx_iu(struct srp_rdma_ch *ch, struct srp_iu *iu,
1759 			  enum srp_iu_type iu_type)
1760 {
1761 	unsigned long flags;
1762 
1763 	spin_lock_irqsave(&ch->lock, flags);
1764 	list_add(&iu->list, &ch->free_tx);
1765 	if (iu_type != SRP_IU_RSP)
1766 		++ch->req_lim;
1767 	spin_unlock_irqrestore(&ch->lock, flags);
1768 }
1769 
1770 /*
1771  * Must be called with ch->lock held to protect req_lim and free_tx.
1772  * If IU is not sent, it must be returned using srp_put_tx_iu().
1773  *
1774  * Note:
1775  * An upper limit for the number of allocated information units for each
1776  * request type is:
1777  * - SRP_IU_CMD: SRP_CMD_SQ_SIZE, since the SCSI mid-layer never queues
1778  *   more than Scsi_Host.can_queue requests.
1779  * - SRP_IU_TSK_MGMT: SRP_TSK_MGMT_SQ_SIZE.
1780  * - SRP_IU_RSP: 1, since a conforming SRP target never sends more than
1781  *   one unanswered SRP request to an initiator.
1782  */
1783 static struct srp_iu *__srp_get_tx_iu(struct srp_rdma_ch *ch,
1784 				      enum srp_iu_type iu_type)
1785 {
1786 	struct srp_target_port *target = ch->target;
1787 	s32 rsv = (iu_type == SRP_IU_TSK_MGMT) ? 0 : SRP_TSK_MGMT_SQ_SIZE;
1788 	struct srp_iu *iu;
1789 
1790 	ib_process_cq_direct(ch->send_cq, -1);
1791 
1792 	if (list_empty(&ch->free_tx))
1793 		return NULL;
1794 
1795 	/* Initiator responses to target requests do not consume credits */
1796 	if (iu_type != SRP_IU_RSP) {
1797 		if (ch->req_lim <= rsv) {
1798 			++target->zero_req_lim;
1799 			return NULL;
1800 		}
1801 
1802 		--ch->req_lim;
1803 	}
1804 
1805 	iu = list_first_entry(&ch->free_tx, struct srp_iu, list);
1806 	list_del(&iu->list);
1807 	return iu;
1808 }
1809 
1810 static void srp_send_done(struct ib_cq *cq, struct ib_wc *wc)
1811 {
1812 	struct srp_iu *iu = container_of(wc->wr_cqe, struct srp_iu, cqe);
1813 	struct srp_rdma_ch *ch = cq->cq_context;
1814 
1815 	if (unlikely(wc->status != IB_WC_SUCCESS)) {
1816 		srp_handle_qp_err(cq, wc, "SEND");
1817 		return;
1818 	}
1819 
1820 	list_add(&iu->list, &ch->free_tx);
1821 }
1822 
1823 static int srp_post_send(struct srp_rdma_ch *ch, struct srp_iu *iu, int len)
1824 {
1825 	struct srp_target_port *target = ch->target;
1826 	struct ib_sge list;
1827 	struct ib_send_wr wr, *bad_wr;
1828 
1829 	list.addr   = iu->dma;
1830 	list.length = len;
1831 	list.lkey   = target->lkey;
1832 
1833 	iu->cqe.done = srp_send_done;
1834 
1835 	wr.next       = NULL;
1836 	wr.wr_cqe     = &iu->cqe;
1837 	wr.sg_list    = &list;
1838 	wr.num_sge    = 1;
1839 	wr.opcode     = IB_WR_SEND;
1840 	wr.send_flags = IB_SEND_SIGNALED;
1841 
1842 	return ib_post_send(ch->qp, &wr, &bad_wr);
1843 }
1844 
1845 static int srp_post_recv(struct srp_rdma_ch *ch, struct srp_iu *iu)
1846 {
1847 	struct srp_target_port *target = ch->target;
1848 	struct ib_recv_wr wr, *bad_wr;
1849 	struct ib_sge list;
1850 
1851 	list.addr   = iu->dma;
1852 	list.length = iu->size;
1853 	list.lkey   = target->lkey;
1854 
1855 	iu->cqe.done = srp_recv_done;
1856 
1857 	wr.next     = NULL;
1858 	wr.wr_cqe   = &iu->cqe;
1859 	wr.sg_list  = &list;
1860 	wr.num_sge  = 1;
1861 
1862 	return ib_post_recv(ch->qp, &wr, &bad_wr);
1863 }
1864 
1865 static void srp_process_rsp(struct srp_rdma_ch *ch, struct srp_rsp *rsp)
1866 {
1867 	struct srp_target_port *target = ch->target;
1868 	struct srp_request *req;
1869 	struct scsi_cmnd *scmnd;
1870 	unsigned long flags;
1871 
1872 	if (unlikely(rsp->tag & SRP_TAG_TSK_MGMT)) {
1873 		spin_lock_irqsave(&ch->lock, flags);
1874 		ch->req_lim += be32_to_cpu(rsp->req_lim_delta);
1875 		spin_unlock_irqrestore(&ch->lock, flags);
1876 
1877 		ch->tsk_mgmt_status = -1;
1878 		if (be32_to_cpu(rsp->resp_data_len) >= 4)
1879 			ch->tsk_mgmt_status = rsp->data[3];
1880 		complete(&ch->tsk_mgmt_done);
1881 	} else {
1882 		scmnd = scsi_host_find_tag(target->scsi_host, rsp->tag);
1883 		if (scmnd) {
1884 			req = (void *)scmnd->host_scribble;
1885 			scmnd = srp_claim_req(ch, req, NULL, scmnd);
1886 		}
1887 		if (!scmnd) {
1888 			shost_printk(KERN_ERR, target->scsi_host,
1889 				     "Null scmnd for RSP w/tag %#016llx received on ch %td / QP %#x\n",
1890 				     rsp->tag, ch - target->ch, ch->qp->qp_num);
1891 
1892 			spin_lock_irqsave(&ch->lock, flags);
1893 			ch->req_lim += be32_to_cpu(rsp->req_lim_delta);
1894 			spin_unlock_irqrestore(&ch->lock, flags);
1895 
1896 			return;
1897 		}
1898 		scmnd->result = rsp->status;
1899 
1900 		if (rsp->flags & SRP_RSP_FLAG_SNSVALID) {
1901 			memcpy(scmnd->sense_buffer, rsp->data +
1902 			       be32_to_cpu(rsp->resp_data_len),
1903 			       min_t(int, be32_to_cpu(rsp->sense_data_len),
1904 				     SCSI_SENSE_BUFFERSIZE));
1905 		}
1906 
1907 		if (unlikely(rsp->flags & SRP_RSP_FLAG_DIUNDER))
1908 			scsi_set_resid(scmnd, be32_to_cpu(rsp->data_in_res_cnt));
1909 		else if (unlikely(rsp->flags & SRP_RSP_FLAG_DIOVER))
1910 			scsi_set_resid(scmnd, -be32_to_cpu(rsp->data_in_res_cnt));
1911 		else if (unlikely(rsp->flags & SRP_RSP_FLAG_DOUNDER))
1912 			scsi_set_resid(scmnd, be32_to_cpu(rsp->data_out_res_cnt));
1913 		else if (unlikely(rsp->flags & SRP_RSP_FLAG_DOOVER))
1914 			scsi_set_resid(scmnd, -be32_to_cpu(rsp->data_out_res_cnt));
1915 
1916 		srp_free_req(ch, req, scmnd,
1917 			     be32_to_cpu(rsp->req_lim_delta));
1918 
1919 		scmnd->host_scribble = NULL;
1920 		scmnd->scsi_done(scmnd);
1921 	}
1922 }
1923 
1924 static int srp_response_common(struct srp_rdma_ch *ch, s32 req_delta,
1925 			       void *rsp, int len)
1926 {
1927 	struct srp_target_port *target = ch->target;
1928 	struct ib_device *dev = target->srp_host->srp_dev->dev;
1929 	unsigned long flags;
1930 	struct srp_iu *iu;
1931 	int err;
1932 
1933 	spin_lock_irqsave(&ch->lock, flags);
1934 	ch->req_lim += req_delta;
1935 	iu = __srp_get_tx_iu(ch, SRP_IU_RSP);
1936 	spin_unlock_irqrestore(&ch->lock, flags);
1937 
1938 	if (!iu) {
1939 		shost_printk(KERN_ERR, target->scsi_host, PFX
1940 			     "no IU available to send response\n");
1941 		return 1;
1942 	}
1943 
1944 	ib_dma_sync_single_for_cpu(dev, iu->dma, len, DMA_TO_DEVICE);
1945 	memcpy(iu->buf, rsp, len);
1946 	ib_dma_sync_single_for_device(dev, iu->dma, len, DMA_TO_DEVICE);
1947 
1948 	err = srp_post_send(ch, iu, len);
1949 	if (err) {
1950 		shost_printk(KERN_ERR, target->scsi_host, PFX
1951 			     "unable to post response: %d\n", err);
1952 		srp_put_tx_iu(ch, iu, SRP_IU_RSP);
1953 	}
1954 
1955 	return err;
1956 }
1957 
1958 static void srp_process_cred_req(struct srp_rdma_ch *ch,
1959 				 struct srp_cred_req *req)
1960 {
1961 	struct srp_cred_rsp rsp = {
1962 		.opcode = SRP_CRED_RSP,
1963 		.tag = req->tag,
1964 	};
1965 	s32 delta = be32_to_cpu(req->req_lim_delta);
1966 
1967 	if (srp_response_common(ch, delta, &rsp, sizeof(rsp)))
1968 		shost_printk(KERN_ERR, ch->target->scsi_host, PFX
1969 			     "problems processing SRP_CRED_REQ\n");
1970 }
1971 
1972 static void srp_process_aer_req(struct srp_rdma_ch *ch,
1973 				struct srp_aer_req *req)
1974 {
1975 	struct srp_target_port *target = ch->target;
1976 	struct srp_aer_rsp rsp = {
1977 		.opcode = SRP_AER_RSP,
1978 		.tag = req->tag,
1979 	};
1980 	s32 delta = be32_to_cpu(req->req_lim_delta);
1981 
1982 	shost_printk(KERN_ERR, target->scsi_host, PFX
1983 		     "ignoring AER for LUN %llu\n", scsilun_to_int(&req->lun));
1984 
1985 	if (srp_response_common(ch, delta, &rsp, sizeof(rsp)))
1986 		shost_printk(KERN_ERR, target->scsi_host, PFX
1987 			     "problems processing SRP_AER_REQ\n");
1988 }
1989 
1990 static void srp_recv_done(struct ib_cq *cq, struct ib_wc *wc)
1991 {
1992 	struct srp_iu *iu = container_of(wc->wr_cqe, struct srp_iu, cqe);
1993 	struct srp_rdma_ch *ch = cq->cq_context;
1994 	struct srp_target_port *target = ch->target;
1995 	struct ib_device *dev = target->srp_host->srp_dev->dev;
1996 	int res;
1997 	u8 opcode;
1998 
1999 	if (unlikely(wc->status != IB_WC_SUCCESS)) {
2000 		srp_handle_qp_err(cq, wc, "RECV");
2001 		return;
2002 	}
2003 
2004 	ib_dma_sync_single_for_cpu(dev, iu->dma, ch->max_ti_iu_len,
2005 				   DMA_FROM_DEVICE);
2006 
2007 	opcode = *(u8 *) iu->buf;
2008 
2009 	if (0) {
2010 		shost_printk(KERN_ERR, target->scsi_host,
2011 			     PFX "recv completion, opcode 0x%02x\n", opcode);
2012 		print_hex_dump(KERN_ERR, "", DUMP_PREFIX_OFFSET, 8, 1,
2013 			       iu->buf, wc->byte_len, true);
2014 	}
2015 
2016 	switch (opcode) {
2017 	case SRP_RSP:
2018 		srp_process_rsp(ch, iu->buf);
2019 		break;
2020 
2021 	case SRP_CRED_REQ:
2022 		srp_process_cred_req(ch, iu->buf);
2023 		break;
2024 
2025 	case SRP_AER_REQ:
2026 		srp_process_aer_req(ch, iu->buf);
2027 		break;
2028 
2029 	case SRP_T_LOGOUT:
2030 		/* XXX Handle target logout */
2031 		shost_printk(KERN_WARNING, target->scsi_host,
2032 			     PFX "Got target logout request\n");
2033 		break;
2034 
2035 	default:
2036 		shost_printk(KERN_WARNING, target->scsi_host,
2037 			     PFX "Unhandled SRP opcode 0x%02x\n", opcode);
2038 		break;
2039 	}
2040 
2041 	ib_dma_sync_single_for_device(dev, iu->dma, ch->max_ti_iu_len,
2042 				      DMA_FROM_DEVICE);
2043 
2044 	res = srp_post_recv(ch, iu);
2045 	if (res != 0)
2046 		shost_printk(KERN_ERR, target->scsi_host,
2047 			     PFX "Recv failed with error code %d\n", res);
2048 }
2049 
2050 /**
2051  * srp_tl_err_work() - handle a transport layer error
2052  * @work: Work structure embedded in an SRP target port.
2053  *
2054  * Note: This function may get invoked before the rport has been created,
2055  * hence the target->rport test.
2056  */
2057 static void srp_tl_err_work(struct work_struct *work)
2058 {
2059 	struct srp_target_port *target;
2060 
2061 	target = container_of(work, struct srp_target_port, tl_err_work);
2062 	if (target->rport)
2063 		srp_start_tl_fail_timers(target->rport);
2064 }
2065 
2066 static void srp_handle_qp_err(struct ib_cq *cq, struct ib_wc *wc,
2067 		const char *opname)
2068 {
2069 	struct srp_rdma_ch *ch = cq->cq_context;
2070 	struct srp_target_port *target = ch->target;
2071 
2072 	if (ch->connected && !target->qp_in_error) {
2073 		shost_printk(KERN_ERR, target->scsi_host,
2074 			     PFX "failed %s status %s (%d) for CQE %p\n",
2075 			     opname, ib_wc_status_msg(wc->status), wc->status,
2076 			     wc->wr_cqe);
2077 		queue_work(system_long_wq, &target->tl_err_work);
2078 	}
2079 	target->qp_in_error = true;
2080 }
2081 
2082 static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd)
2083 {
2084 	struct srp_target_port *target = host_to_target(shost);
2085 	struct srp_rport *rport = target->rport;
2086 	struct srp_rdma_ch *ch;
2087 	struct srp_request *req;
2088 	struct srp_iu *iu;
2089 	struct srp_cmd *cmd;
2090 	struct ib_device *dev;
2091 	unsigned long flags;
2092 	u32 tag;
2093 	u16 idx;
2094 	int len, ret;
2095 	const bool in_scsi_eh = !in_interrupt() && current == shost->ehandler;
2096 
2097 	/*
2098 	 * The SCSI EH thread is the only context from which srp_queuecommand()
2099 	 * can get invoked for blocked devices (SDEV_BLOCK /
2100 	 * SDEV_CREATED_BLOCK). Avoid racing with srp_reconnect_rport() by
2101 	 * locking the rport mutex if invoked from inside the SCSI EH.
2102 	 */
2103 	if (in_scsi_eh)
2104 		mutex_lock(&rport->mutex);
2105 
2106 	scmnd->result = srp_chkready(target->rport);
2107 	if (unlikely(scmnd->result))
2108 		goto err;
2109 
2110 	WARN_ON_ONCE(scmnd->request->tag < 0);
2111 	tag = blk_mq_unique_tag(scmnd->request);
2112 	ch = &target->ch[blk_mq_unique_tag_to_hwq(tag)];
2113 	idx = blk_mq_unique_tag_to_tag(tag);
2114 	WARN_ONCE(idx >= target->req_ring_size, "%s: tag %#x: idx %d >= %d\n",
2115 		  dev_name(&shost->shost_gendev), tag, idx,
2116 		  target->req_ring_size);
2117 
2118 	spin_lock_irqsave(&ch->lock, flags);
2119 	iu = __srp_get_tx_iu(ch, SRP_IU_CMD);
2120 	spin_unlock_irqrestore(&ch->lock, flags);
2121 
2122 	if (!iu)
2123 		goto err;
2124 
2125 	req = &ch->req_ring[idx];
2126 	dev = target->srp_host->srp_dev->dev;
2127 	ib_dma_sync_single_for_cpu(dev, iu->dma, target->max_iu_len,
2128 				   DMA_TO_DEVICE);
2129 
2130 	scmnd->host_scribble = (void *) req;
2131 
2132 	cmd = iu->buf;
2133 	memset(cmd, 0, sizeof *cmd);
2134 
2135 	cmd->opcode = SRP_CMD;
2136 	int_to_scsilun(scmnd->device->lun, &cmd->lun);
2137 	cmd->tag    = tag;
2138 	memcpy(cmd->cdb, scmnd->cmnd, scmnd->cmd_len);
2139 
2140 	req->scmnd    = scmnd;
2141 	req->cmd      = iu;
2142 
2143 	len = srp_map_data(scmnd, ch, req);
2144 	if (len < 0) {
2145 		shost_printk(KERN_ERR, target->scsi_host,
2146 			     PFX "Failed to map data (%d)\n", len);
2147 		/*
2148 		 * If we ran out of memory descriptors (-ENOMEM) because an
2149 		 * application is queuing many requests with more than
2150 		 * max_pages_per_mr sg-list elements, tell the SCSI mid-layer
2151 		 * to reduce queue depth temporarily.
2152 		 */
2153 		scmnd->result = len == -ENOMEM ?
2154 			DID_OK << 16 | QUEUE_FULL << 1 : DID_ERROR << 16;
2155 		goto err_iu;
2156 	}
2157 
2158 	ib_dma_sync_single_for_device(dev, iu->dma, target->max_iu_len,
2159 				      DMA_TO_DEVICE);
2160 
2161 	if (srp_post_send(ch, iu, len)) {
2162 		shost_printk(KERN_ERR, target->scsi_host, PFX "Send failed\n");
2163 		goto err_unmap;
2164 	}
2165 
2166 	ret = 0;
2167 
2168 unlock_rport:
2169 	if (in_scsi_eh)
2170 		mutex_unlock(&rport->mutex);
2171 
2172 	return ret;
2173 
2174 err_unmap:
2175 	srp_unmap_data(scmnd, ch, req);
2176 
2177 err_iu:
2178 	srp_put_tx_iu(ch, iu, SRP_IU_CMD);
2179 
2180 	/*
2181 	 * Avoid that the loops that iterate over the request ring can
2182 	 * encounter a dangling SCSI command pointer.
2183 	 */
2184 	req->scmnd = NULL;
2185 
2186 err:
2187 	if (scmnd->result) {
2188 		scmnd->scsi_done(scmnd);
2189 		ret = 0;
2190 	} else {
2191 		ret = SCSI_MLQUEUE_HOST_BUSY;
2192 	}
2193 
2194 	goto unlock_rport;
2195 }
2196 
2197 /*
2198  * Note: the resources allocated in this function are freed in
2199  * srp_free_ch_ib().
2200  */
2201 static int srp_alloc_iu_bufs(struct srp_rdma_ch *ch)
2202 {
2203 	struct srp_target_port *target = ch->target;
2204 	int i;
2205 
2206 	ch->rx_ring = kcalloc(target->queue_size, sizeof(*ch->rx_ring),
2207 			      GFP_KERNEL);
2208 	if (!ch->rx_ring)
2209 		goto err_no_ring;
2210 	ch->tx_ring = kcalloc(target->queue_size, sizeof(*ch->tx_ring),
2211 			      GFP_KERNEL);
2212 	if (!ch->tx_ring)
2213 		goto err_no_ring;
2214 
2215 	for (i = 0; i < target->queue_size; ++i) {
2216 		ch->rx_ring[i] = srp_alloc_iu(target->srp_host,
2217 					      ch->max_ti_iu_len,
2218 					      GFP_KERNEL, DMA_FROM_DEVICE);
2219 		if (!ch->rx_ring[i])
2220 			goto err;
2221 	}
2222 
2223 	for (i = 0; i < target->queue_size; ++i) {
2224 		ch->tx_ring[i] = srp_alloc_iu(target->srp_host,
2225 					      target->max_iu_len,
2226 					      GFP_KERNEL, DMA_TO_DEVICE);
2227 		if (!ch->tx_ring[i])
2228 			goto err;
2229 
2230 		list_add(&ch->tx_ring[i]->list, &ch->free_tx);
2231 	}
2232 
2233 	return 0;
2234 
2235 err:
2236 	for (i = 0; i < target->queue_size; ++i) {
2237 		srp_free_iu(target->srp_host, ch->rx_ring[i]);
2238 		srp_free_iu(target->srp_host, ch->tx_ring[i]);
2239 	}
2240 
2241 
2242 err_no_ring:
2243 	kfree(ch->tx_ring);
2244 	ch->tx_ring = NULL;
2245 	kfree(ch->rx_ring);
2246 	ch->rx_ring = NULL;
2247 
2248 	return -ENOMEM;
2249 }
2250 
2251 static uint32_t srp_compute_rq_tmo(struct ib_qp_attr *qp_attr, int attr_mask)
2252 {
2253 	uint64_t T_tr_ns, max_compl_time_ms;
2254 	uint32_t rq_tmo_jiffies;
2255 
2256 	/*
2257 	 * According to section 11.2.4.2 in the IBTA spec (Modify Queue Pair,
2258 	 * table 91), both the QP timeout and the retry count have to be set
2259 	 * for RC QP's during the RTR to RTS transition.
2260 	 */
2261 	WARN_ON_ONCE((attr_mask & (IB_QP_TIMEOUT | IB_QP_RETRY_CNT)) !=
2262 		     (IB_QP_TIMEOUT | IB_QP_RETRY_CNT));
2263 
2264 	/*
2265 	 * Set target->rq_tmo_jiffies to one second more than the largest time
2266 	 * it can take before an error completion is generated. See also
2267 	 * C9-140..142 in the IBTA spec for more information about how to
2268 	 * convert the QP Local ACK Timeout value to nanoseconds.
2269 	 */
2270 	T_tr_ns = 4096 * (1ULL << qp_attr->timeout);
2271 	max_compl_time_ms = qp_attr->retry_cnt * 4 * T_tr_ns;
2272 	do_div(max_compl_time_ms, NSEC_PER_MSEC);
2273 	rq_tmo_jiffies = msecs_to_jiffies(max_compl_time_ms + 1000);
2274 
2275 	return rq_tmo_jiffies;
2276 }
2277 
2278 static void srp_cm_rep_handler(struct ib_cm_id *cm_id,
2279 			       const struct srp_login_rsp *lrsp,
2280 			       struct srp_rdma_ch *ch)
2281 {
2282 	struct srp_target_port *target = ch->target;
2283 	struct ib_qp_attr *qp_attr = NULL;
2284 	int attr_mask = 0;
2285 	int ret;
2286 	int i;
2287 
2288 	if (lrsp->opcode == SRP_LOGIN_RSP) {
2289 		ch->max_ti_iu_len = be32_to_cpu(lrsp->max_ti_iu_len);
2290 		ch->req_lim       = be32_to_cpu(lrsp->req_lim_delta);
2291 
2292 		/*
2293 		 * Reserve credits for task management so we don't
2294 		 * bounce requests back to the SCSI mid-layer.
2295 		 */
2296 		target->scsi_host->can_queue
2297 			= min(ch->req_lim - SRP_TSK_MGMT_SQ_SIZE,
2298 			      target->scsi_host->can_queue);
2299 		target->scsi_host->cmd_per_lun
2300 			= min_t(int, target->scsi_host->can_queue,
2301 				target->scsi_host->cmd_per_lun);
2302 	} else {
2303 		shost_printk(KERN_WARNING, target->scsi_host,
2304 			     PFX "Unhandled RSP opcode %#x\n", lrsp->opcode);
2305 		ret = -ECONNRESET;
2306 		goto error;
2307 	}
2308 
2309 	if (!ch->rx_ring) {
2310 		ret = srp_alloc_iu_bufs(ch);
2311 		if (ret)
2312 			goto error;
2313 	}
2314 
2315 	ret = -ENOMEM;
2316 	qp_attr = kmalloc(sizeof *qp_attr, GFP_KERNEL);
2317 	if (!qp_attr)
2318 		goto error;
2319 
2320 	qp_attr->qp_state = IB_QPS_RTR;
2321 	ret = ib_cm_init_qp_attr(cm_id, qp_attr, &attr_mask);
2322 	if (ret)
2323 		goto error_free;
2324 
2325 	ret = ib_modify_qp(ch->qp, qp_attr, attr_mask);
2326 	if (ret)
2327 		goto error_free;
2328 
2329 	for (i = 0; i < target->queue_size; i++) {
2330 		struct srp_iu *iu = ch->rx_ring[i];
2331 
2332 		ret = srp_post_recv(ch, iu);
2333 		if (ret)
2334 			goto error_free;
2335 	}
2336 
2337 	qp_attr->qp_state = IB_QPS_RTS;
2338 	ret = ib_cm_init_qp_attr(cm_id, qp_attr, &attr_mask);
2339 	if (ret)
2340 		goto error_free;
2341 
2342 	target->rq_tmo_jiffies = srp_compute_rq_tmo(qp_attr, attr_mask);
2343 
2344 	ret = ib_modify_qp(ch->qp, qp_attr, attr_mask);
2345 	if (ret)
2346 		goto error_free;
2347 
2348 	ret = ib_send_cm_rtu(cm_id, NULL, 0);
2349 
2350 error_free:
2351 	kfree(qp_attr);
2352 
2353 error:
2354 	ch->status = ret;
2355 }
2356 
2357 static void srp_cm_rej_handler(struct ib_cm_id *cm_id,
2358 			       struct ib_cm_event *event,
2359 			       struct srp_rdma_ch *ch)
2360 {
2361 	struct srp_target_port *target = ch->target;
2362 	struct Scsi_Host *shost = target->scsi_host;
2363 	struct ib_class_port_info *cpi;
2364 	int opcode;
2365 
2366 	switch (event->param.rej_rcvd.reason) {
2367 	case IB_CM_REJ_PORT_CM_REDIRECT:
2368 		cpi = event->param.rej_rcvd.ari;
2369 		ch->path.dlid = cpi->redirect_lid;
2370 		ch->path.pkey = cpi->redirect_pkey;
2371 		cm_id->remote_cm_qpn = be32_to_cpu(cpi->redirect_qp) & 0x00ffffff;
2372 		memcpy(ch->path.dgid.raw, cpi->redirect_gid, 16);
2373 
2374 		ch->status = ch->path.dlid ?
2375 			SRP_DLID_REDIRECT : SRP_PORT_REDIRECT;
2376 		break;
2377 
2378 	case IB_CM_REJ_PORT_REDIRECT:
2379 		if (srp_target_is_topspin(target)) {
2380 			/*
2381 			 * Topspin/Cisco SRP gateways incorrectly send
2382 			 * reject reason code 25 when they mean 24
2383 			 * (port redirect).
2384 			 */
2385 			memcpy(ch->path.dgid.raw,
2386 			       event->param.rej_rcvd.ari, 16);
2387 
2388 			shost_printk(KERN_DEBUG, shost,
2389 				     PFX "Topspin/Cisco redirect to target port GID %016llx%016llx\n",
2390 				     be64_to_cpu(ch->path.dgid.global.subnet_prefix),
2391 				     be64_to_cpu(ch->path.dgid.global.interface_id));
2392 
2393 			ch->status = SRP_PORT_REDIRECT;
2394 		} else {
2395 			shost_printk(KERN_WARNING, shost,
2396 				     "  REJ reason: IB_CM_REJ_PORT_REDIRECT\n");
2397 			ch->status = -ECONNRESET;
2398 		}
2399 		break;
2400 
2401 	case IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID:
2402 		shost_printk(KERN_WARNING, shost,
2403 			    "  REJ reason: IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID\n");
2404 		ch->status = -ECONNRESET;
2405 		break;
2406 
2407 	case IB_CM_REJ_CONSUMER_DEFINED:
2408 		opcode = *(u8 *) event->private_data;
2409 		if (opcode == SRP_LOGIN_REJ) {
2410 			struct srp_login_rej *rej = event->private_data;
2411 			u32 reason = be32_to_cpu(rej->reason);
2412 
2413 			if (reason == SRP_LOGIN_REJ_REQ_IT_IU_LENGTH_TOO_LARGE)
2414 				shost_printk(KERN_WARNING, shost,
2415 					     PFX "SRP_LOGIN_REJ: requested max_it_iu_len too large\n");
2416 			else
2417 				shost_printk(KERN_WARNING, shost, PFX
2418 					     "SRP LOGIN from %pI6 to %pI6 REJECTED, reason 0x%08x\n",
2419 					     target->sgid.raw,
2420 					     target->orig_dgid.raw, reason);
2421 		} else
2422 			shost_printk(KERN_WARNING, shost,
2423 				     "  REJ reason: IB_CM_REJ_CONSUMER_DEFINED,"
2424 				     " opcode 0x%02x\n", opcode);
2425 		ch->status = -ECONNRESET;
2426 		break;
2427 
2428 	case IB_CM_REJ_STALE_CONN:
2429 		shost_printk(KERN_WARNING, shost, "  REJ reason: stale connection\n");
2430 		ch->status = SRP_STALE_CONN;
2431 		break;
2432 
2433 	default:
2434 		shost_printk(KERN_WARNING, shost, "  REJ reason 0x%x\n",
2435 			     event->param.rej_rcvd.reason);
2436 		ch->status = -ECONNRESET;
2437 	}
2438 }
2439 
2440 static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
2441 {
2442 	struct srp_rdma_ch *ch = cm_id->context;
2443 	struct srp_target_port *target = ch->target;
2444 	int comp = 0;
2445 
2446 	switch (event->event) {
2447 	case IB_CM_REQ_ERROR:
2448 		shost_printk(KERN_DEBUG, target->scsi_host,
2449 			     PFX "Sending CM REQ failed\n");
2450 		comp = 1;
2451 		ch->status = -ECONNRESET;
2452 		break;
2453 
2454 	case IB_CM_REP_RECEIVED:
2455 		comp = 1;
2456 		srp_cm_rep_handler(cm_id, event->private_data, ch);
2457 		break;
2458 
2459 	case IB_CM_REJ_RECEIVED:
2460 		shost_printk(KERN_DEBUG, target->scsi_host, PFX "REJ received\n");
2461 		comp = 1;
2462 
2463 		srp_cm_rej_handler(cm_id, event, ch);
2464 		break;
2465 
2466 	case IB_CM_DREQ_RECEIVED:
2467 		shost_printk(KERN_WARNING, target->scsi_host,
2468 			     PFX "DREQ received - connection closed\n");
2469 		ch->connected = false;
2470 		if (ib_send_cm_drep(cm_id, NULL, 0))
2471 			shost_printk(KERN_ERR, target->scsi_host,
2472 				     PFX "Sending CM DREP failed\n");
2473 		queue_work(system_long_wq, &target->tl_err_work);
2474 		break;
2475 
2476 	case IB_CM_TIMEWAIT_EXIT:
2477 		shost_printk(KERN_ERR, target->scsi_host,
2478 			     PFX "connection closed\n");
2479 		comp = 1;
2480 
2481 		ch->status = 0;
2482 		break;
2483 
2484 	case IB_CM_MRA_RECEIVED:
2485 	case IB_CM_DREQ_ERROR:
2486 	case IB_CM_DREP_RECEIVED:
2487 		break;
2488 
2489 	default:
2490 		shost_printk(KERN_WARNING, target->scsi_host,
2491 			     PFX "Unhandled CM event %d\n", event->event);
2492 		break;
2493 	}
2494 
2495 	if (comp)
2496 		complete(&ch->done);
2497 
2498 	return 0;
2499 }
2500 
2501 /**
2502  * srp_change_queue_depth - setting device queue depth
2503  * @sdev: scsi device struct
2504  * @qdepth: requested queue depth
2505  *
2506  * Returns queue depth.
2507  */
2508 static int
2509 srp_change_queue_depth(struct scsi_device *sdev, int qdepth)
2510 {
2511 	if (!sdev->tagged_supported)
2512 		qdepth = 1;
2513 	return scsi_change_queue_depth(sdev, qdepth);
2514 }
2515 
2516 static int srp_send_tsk_mgmt(struct srp_rdma_ch *ch, u64 req_tag, u64 lun,
2517 			     u8 func)
2518 {
2519 	struct srp_target_port *target = ch->target;
2520 	struct srp_rport *rport = target->rport;
2521 	struct ib_device *dev = target->srp_host->srp_dev->dev;
2522 	struct srp_iu *iu;
2523 	struct srp_tsk_mgmt *tsk_mgmt;
2524 
2525 	if (!ch->connected || target->qp_in_error)
2526 		return -1;
2527 
2528 	init_completion(&ch->tsk_mgmt_done);
2529 
2530 	/*
2531 	 * Lock the rport mutex to avoid that srp_create_ch_ib() is
2532 	 * invoked while a task management function is being sent.
2533 	 */
2534 	mutex_lock(&rport->mutex);
2535 	spin_lock_irq(&ch->lock);
2536 	iu = __srp_get_tx_iu(ch, SRP_IU_TSK_MGMT);
2537 	spin_unlock_irq(&ch->lock);
2538 
2539 	if (!iu) {
2540 		mutex_unlock(&rport->mutex);
2541 
2542 		return -1;
2543 	}
2544 
2545 	ib_dma_sync_single_for_cpu(dev, iu->dma, sizeof *tsk_mgmt,
2546 				   DMA_TO_DEVICE);
2547 	tsk_mgmt = iu->buf;
2548 	memset(tsk_mgmt, 0, sizeof *tsk_mgmt);
2549 
2550 	tsk_mgmt->opcode 	= SRP_TSK_MGMT;
2551 	int_to_scsilun(lun, &tsk_mgmt->lun);
2552 	tsk_mgmt->tag		= req_tag | SRP_TAG_TSK_MGMT;
2553 	tsk_mgmt->tsk_mgmt_func = func;
2554 	tsk_mgmt->task_tag	= req_tag;
2555 
2556 	ib_dma_sync_single_for_device(dev, iu->dma, sizeof *tsk_mgmt,
2557 				      DMA_TO_DEVICE);
2558 	if (srp_post_send(ch, iu, sizeof(*tsk_mgmt))) {
2559 		srp_put_tx_iu(ch, iu, SRP_IU_TSK_MGMT);
2560 		mutex_unlock(&rport->mutex);
2561 
2562 		return -1;
2563 	}
2564 	mutex_unlock(&rport->mutex);
2565 
2566 	if (!wait_for_completion_timeout(&ch->tsk_mgmt_done,
2567 					 msecs_to_jiffies(SRP_ABORT_TIMEOUT_MS)))
2568 		return -1;
2569 
2570 	return 0;
2571 }
2572 
2573 static int srp_abort(struct scsi_cmnd *scmnd)
2574 {
2575 	struct srp_target_port *target = host_to_target(scmnd->device->host);
2576 	struct srp_request *req = (struct srp_request *) scmnd->host_scribble;
2577 	u32 tag;
2578 	u16 ch_idx;
2579 	struct srp_rdma_ch *ch;
2580 	int ret;
2581 
2582 	shost_printk(KERN_ERR, target->scsi_host, "SRP abort called\n");
2583 
2584 	if (!req)
2585 		return SUCCESS;
2586 	tag = blk_mq_unique_tag(scmnd->request);
2587 	ch_idx = blk_mq_unique_tag_to_hwq(tag);
2588 	if (WARN_ON_ONCE(ch_idx >= target->ch_count))
2589 		return SUCCESS;
2590 	ch = &target->ch[ch_idx];
2591 	if (!srp_claim_req(ch, req, NULL, scmnd))
2592 		return SUCCESS;
2593 	shost_printk(KERN_ERR, target->scsi_host,
2594 		     "Sending SRP abort for tag %#x\n", tag);
2595 	if (srp_send_tsk_mgmt(ch, tag, scmnd->device->lun,
2596 			      SRP_TSK_ABORT_TASK) == 0)
2597 		ret = SUCCESS;
2598 	else if (target->rport->state == SRP_RPORT_LOST)
2599 		ret = FAST_IO_FAIL;
2600 	else
2601 		ret = FAILED;
2602 	srp_free_req(ch, req, scmnd, 0);
2603 	scmnd->result = DID_ABORT << 16;
2604 	scmnd->scsi_done(scmnd);
2605 
2606 	return ret;
2607 }
2608 
2609 static int srp_reset_device(struct scsi_cmnd *scmnd)
2610 {
2611 	struct srp_target_port *target = host_to_target(scmnd->device->host);
2612 	struct srp_rdma_ch *ch;
2613 	int i;
2614 
2615 	shost_printk(KERN_ERR, target->scsi_host, "SRP reset_device called\n");
2616 
2617 	ch = &target->ch[0];
2618 	if (srp_send_tsk_mgmt(ch, SRP_TAG_NO_REQ, scmnd->device->lun,
2619 			      SRP_TSK_LUN_RESET))
2620 		return FAILED;
2621 	if (ch->tsk_mgmt_status)
2622 		return FAILED;
2623 
2624 	for (i = 0; i < target->ch_count; i++) {
2625 		ch = &target->ch[i];
2626 		for (i = 0; i < target->req_ring_size; ++i) {
2627 			struct srp_request *req = &ch->req_ring[i];
2628 
2629 			srp_finish_req(ch, req, scmnd->device, DID_RESET << 16);
2630 		}
2631 	}
2632 
2633 	return SUCCESS;
2634 }
2635 
2636 static int srp_reset_host(struct scsi_cmnd *scmnd)
2637 {
2638 	struct srp_target_port *target = host_to_target(scmnd->device->host);
2639 
2640 	shost_printk(KERN_ERR, target->scsi_host, PFX "SRP reset_host called\n");
2641 
2642 	return srp_reconnect_rport(target->rport) == 0 ? SUCCESS : FAILED;
2643 }
2644 
2645 static int srp_slave_alloc(struct scsi_device *sdev)
2646 {
2647 	struct Scsi_Host *shost = sdev->host;
2648 	struct srp_target_port *target = host_to_target(shost);
2649 	struct srp_device *srp_dev = target->srp_host->srp_dev;
2650 	struct ib_device *ibdev = srp_dev->dev;
2651 
2652 	if (!(ibdev->attrs.device_cap_flags & IB_DEVICE_SG_GAPS_REG))
2653 		blk_queue_virt_boundary(sdev->request_queue,
2654 					~srp_dev->mr_page_mask);
2655 
2656 	return 0;
2657 }
2658 
2659 static int srp_slave_configure(struct scsi_device *sdev)
2660 {
2661 	struct Scsi_Host *shost = sdev->host;
2662 	struct srp_target_port *target = host_to_target(shost);
2663 	struct request_queue *q = sdev->request_queue;
2664 	unsigned long timeout;
2665 
2666 	if (sdev->type == TYPE_DISK) {
2667 		timeout = max_t(unsigned, 30 * HZ, target->rq_tmo_jiffies);
2668 		blk_queue_rq_timeout(q, timeout);
2669 	}
2670 
2671 	return 0;
2672 }
2673 
2674 static ssize_t show_id_ext(struct device *dev, struct device_attribute *attr,
2675 			   char *buf)
2676 {
2677 	struct srp_target_port *target = host_to_target(class_to_shost(dev));
2678 
2679 	return sprintf(buf, "0x%016llx\n", be64_to_cpu(target->id_ext));
2680 }
2681 
2682 static ssize_t show_ioc_guid(struct device *dev, struct device_attribute *attr,
2683 			     char *buf)
2684 {
2685 	struct srp_target_port *target = host_to_target(class_to_shost(dev));
2686 
2687 	return sprintf(buf, "0x%016llx\n", be64_to_cpu(target->ioc_guid));
2688 }
2689 
2690 static ssize_t show_service_id(struct device *dev,
2691 			       struct device_attribute *attr, char *buf)
2692 {
2693 	struct srp_target_port *target = host_to_target(class_to_shost(dev));
2694 
2695 	return sprintf(buf, "0x%016llx\n", be64_to_cpu(target->service_id));
2696 }
2697 
2698 static ssize_t show_pkey(struct device *dev, struct device_attribute *attr,
2699 			 char *buf)
2700 {
2701 	struct srp_target_port *target = host_to_target(class_to_shost(dev));
2702 
2703 	return sprintf(buf, "0x%04x\n", be16_to_cpu(target->pkey));
2704 }
2705 
2706 static ssize_t show_sgid(struct device *dev, struct device_attribute *attr,
2707 			 char *buf)
2708 {
2709 	struct srp_target_port *target = host_to_target(class_to_shost(dev));
2710 
2711 	return sprintf(buf, "%pI6\n", target->sgid.raw);
2712 }
2713 
2714 static ssize_t show_dgid(struct device *dev, struct device_attribute *attr,
2715 			 char *buf)
2716 {
2717 	struct srp_target_port *target = host_to_target(class_to_shost(dev));
2718 	struct srp_rdma_ch *ch = &target->ch[0];
2719 
2720 	return sprintf(buf, "%pI6\n", ch->path.dgid.raw);
2721 }
2722 
2723 static ssize_t show_orig_dgid(struct device *dev,
2724 			      struct device_attribute *attr, char *buf)
2725 {
2726 	struct srp_target_port *target = host_to_target(class_to_shost(dev));
2727 
2728 	return sprintf(buf, "%pI6\n", target->orig_dgid.raw);
2729 }
2730 
2731 static ssize_t show_req_lim(struct device *dev,
2732 			    struct device_attribute *attr, char *buf)
2733 {
2734 	struct srp_target_port *target = host_to_target(class_to_shost(dev));
2735 	struct srp_rdma_ch *ch;
2736 	int i, req_lim = INT_MAX;
2737 
2738 	for (i = 0; i < target->ch_count; i++) {
2739 		ch = &target->ch[i];
2740 		req_lim = min(req_lim, ch->req_lim);
2741 	}
2742 	return sprintf(buf, "%d\n", req_lim);
2743 }
2744 
2745 static ssize_t show_zero_req_lim(struct device *dev,
2746 				 struct device_attribute *attr, char *buf)
2747 {
2748 	struct srp_target_port *target = host_to_target(class_to_shost(dev));
2749 
2750 	return sprintf(buf, "%d\n", target->zero_req_lim);
2751 }
2752 
2753 static ssize_t show_local_ib_port(struct device *dev,
2754 				  struct device_attribute *attr, char *buf)
2755 {
2756 	struct srp_target_port *target = host_to_target(class_to_shost(dev));
2757 
2758 	return sprintf(buf, "%d\n", target->srp_host->port);
2759 }
2760 
2761 static ssize_t show_local_ib_device(struct device *dev,
2762 				    struct device_attribute *attr, char *buf)
2763 {
2764 	struct srp_target_port *target = host_to_target(class_to_shost(dev));
2765 
2766 	return sprintf(buf, "%s\n", target->srp_host->srp_dev->dev->name);
2767 }
2768 
2769 static ssize_t show_ch_count(struct device *dev, struct device_attribute *attr,
2770 			     char *buf)
2771 {
2772 	struct srp_target_port *target = host_to_target(class_to_shost(dev));
2773 
2774 	return sprintf(buf, "%d\n", target->ch_count);
2775 }
2776 
2777 static ssize_t show_comp_vector(struct device *dev,
2778 				struct device_attribute *attr, char *buf)
2779 {
2780 	struct srp_target_port *target = host_to_target(class_to_shost(dev));
2781 
2782 	return sprintf(buf, "%d\n", target->comp_vector);
2783 }
2784 
2785 static ssize_t show_tl_retry_count(struct device *dev,
2786 				   struct device_attribute *attr, char *buf)
2787 {
2788 	struct srp_target_port *target = host_to_target(class_to_shost(dev));
2789 
2790 	return sprintf(buf, "%d\n", target->tl_retry_count);
2791 }
2792 
2793 static ssize_t show_cmd_sg_entries(struct device *dev,
2794 				   struct device_attribute *attr, char *buf)
2795 {
2796 	struct srp_target_port *target = host_to_target(class_to_shost(dev));
2797 
2798 	return sprintf(buf, "%u\n", target->cmd_sg_cnt);
2799 }
2800 
2801 static ssize_t show_allow_ext_sg(struct device *dev,
2802 				 struct device_attribute *attr, char *buf)
2803 {
2804 	struct srp_target_port *target = host_to_target(class_to_shost(dev));
2805 
2806 	return sprintf(buf, "%s\n", target->allow_ext_sg ? "true" : "false");
2807 }
2808 
2809 static DEVICE_ATTR(id_ext,	    S_IRUGO, show_id_ext,	   NULL);
2810 static DEVICE_ATTR(ioc_guid,	    S_IRUGO, show_ioc_guid,	   NULL);
2811 static DEVICE_ATTR(service_id,	    S_IRUGO, show_service_id,	   NULL);
2812 static DEVICE_ATTR(pkey,	    S_IRUGO, show_pkey,		   NULL);
2813 static DEVICE_ATTR(sgid,	    S_IRUGO, show_sgid,		   NULL);
2814 static DEVICE_ATTR(dgid,	    S_IRUGO, show_dgid,		   NULL);
2815 static DEVICE_ATTR(orig_dgid,	    S_IRUGO, show_orig_dgid,	   NULL);
2816 static DEVICE_ATTR(req_lim,         S_IRUGO, show_req_lim,         NULL);
2817 static DEVICE_ATTR(zero_req_lim,    S_IRUGO, show_zero_req_lim,	   NULL);
2818 static DEVICE_ATTR(local_ib_port,   S_IRUGO, show_local_ib_port,   NULL);
2819 static DEVICE_ATTR(local_ib_device, S_IRUGO, show_local_ib_device, NULL);
2820 static DEVICE_ATTR(ch_count,        S_IRUGO, show_ch_count,        NULL);
2821 static DEVICE_ATTR(comp_vector,     S_IRUGO, show_comp_vector,     NULL);
2822 static DEVICE_ATTR(tl_retry_count,  S_IRUGO, show_tl_retry_count,  NULL);
2823 static DEVICE_ATTR(cmd_sg_entries,  S_IRUGO, show_cmd_sg_entries,  NULL);
2824 static DEVICE_ATTR(allow_ext_sg,    S_IRUGO, show_allow_ext_sg,    NULL);
2825 
2826 static struct device_attribute *srp_host_attrs[] = {
2827 	&dev_attr_id_ext,
2828 	&dev_attr_ioc_guid,
2829 	&dev_attr_service_id,
2830 	&dev_attr_pkey,
2831 	&dev_attr_sgid,
2832 	&dev_attr_dgid,
2833 	&dev_attr_orig_dgid,
2834 	&dev_attr_req_lim,
2835 	&dev_attr_zero_req_lim,
2836 	&dev_attr_local_ib_port,
2837 	&dev_attr_local_ib_device,
2838 	&dev_attr_ch_count,
2839 	&dev_attr_comp_vector,
2840 	&dev_attr_tl_retry_count,
2841 	&dev_attr_cmd_sg_entries,
2842 	&dev_attr_allow_ext_sg,
2843 	NULL
2844 };
2845 
2846 static struct scsi_host_template srp_template = {
2847 	.module				= THIS_MODULE,
2848 	.name				= "InfiniBand SRP initiator",
2849 	.proc_name			= DRV_NAME,
2850 	.slave_alloc			= srp_slave_alloc,
2851 	.slave_configure		= srp_slave_configure,
2852 	.info				= srp_target_info,
2853 	.queuecommand			= srp_queuecommand,
2854 	.change_queue_depth             = srp_change_queue_depth,
2855 	.eh_abort_handler		= srp_abort,
2856 	.eh_device_reset_handler	= srp_reset_device,
2857 	.eh_host_reset_handler		= srp_reset_host,
2858 	.skip_settle_delay		= true,
2859 	.sg_tablesize			= SRP_DEF_SG_TABLESIZE,
2860 	.can_queue			= SRP_DEFAULT_CMD_SQ_SIZE,
2861 	.this_id			= -1,
2862 	.cmd_per_lun			= SRP_DEFAULT_CMD_SQ_SIZE,
2863 	.use_clustering			= ENABLE_CLUSTERING,
2864 	.shost_attrs			= srp_host_attrs,
2865 	.track_queue_depth		= 1,
2866 };
2867 
2868 static int srp_sdev_count(struct Scsi_Host *host)
2869 {
2870 	struct scsi_device *sdev;
2871 	int c = 0;
2872 
2873 	shost_for_each_device(sdev, host)
2874 		c++;
2875 
2876 	return c;
2877 }
2878 
2879 /*
2880  * Return values:
2881  * < 0 upon failure. Caller is responsible for SRP target port cleanup.
2882  * 0 and target->state == SRP_TARGET_REMOVED if asynchronous target port
2883  *    removal has been scheduled.
2884  * 0 and target->state != SRP_TARGET_REMOVED upon success.
2885  */
2886 static int srp_add_target(struct srp_host *host, struct srp_target_port *target)
2887 {
2888 	struct srp_rport_identifiers ids;
2889 	struct srp_rport *rport;
2890 
2891 	target->state = SRP_TARGET_SCANNING;
2892 	sprintf(target->target_name, "SRP.T10:%016llX",
2893 		be64_to_cpu(target->id_ext));
2894 
2895 	if (scsi_add_host(target->scsi_host, host->srp_dev->dev->dma_device))
2896 		return -ENODEV;
2897 
2898 	memcpy(ids.port_id, &target->id_ext, 8);
2899 	memcpy(ids.port_id + 8, &target->ioc_guid, 8);
2900 	ids.roles = SRP_RPORT_ROLE_TARGET;
2901 	rport = srp_rport_add(target->scsi_host, &ids);
2902 	if (IS_ERR(rport)) {
2903 		scsi_remove_host(target->scsi_host);
2904 		return PTR_ERR(rport);
2905 	}
2906 
2907 	rport->lld_data = target;
2908 	target->rport = rport;
2909 
2910 	spin_lock(&host->target_lock);
2911 	list_add_tail(&target->list, &host->target_list);
2912 	spin_unlock(&host->target_lock);
2913 
2914 	scsi_scan_target(&target->scsi_host->shost_gendev,
2915 			 0, target->scsi_id, SCAN_WILD_CARD, SCSI_SCAN_INITIAL);
2916 
2917 	if (srp_connected_ch(target) < target->ch_count ||
2918 	    target->qp_in_error) {
2919 		shost_printk(KERN_INFO, target->scsi_host,
2920 			     PFX "SCSI scan failed - removing SCSI host\n");
2921 		srp_queue_remove_work(target);
2922 		goto out;
2923 	}
2924 
2925 	pr_debug("%s: SCSI scan succeeded - detected %d LUNs\n",
2926 		 dev_name(&target->scsi_host->shost_gendev),
2927 		 srp_sdev_count(target->scsi_host));
2928 
2929 	spin_lock_irq(&target->lock);
2930 	if (target->state == SRP_TARGET_SCANNING)
2931 		target->state = SRP_TARGET_LIVE;
2932 	spin_unlock_irq(&target->lock);
2933 
2934 out:
2935 	return 0;
2936 }
2937 
2938 static void srp_release_dev(struct device *dev)
2939 {
2940 	struct srp_host *host =
2941 		container_of(dev, struct srp_host, dev);
2942 
2943 	complete(&host->released);
2944 }
2945 
2946 static struct class srp_class = {
2947 	.name    = "infiniband_srp",
2948 	.dev_release = srp_release_dev
2949 };
2950 
2951 /**
2952  * srp_conn_unique() - check whether the connection to a target is unique
2953  * @host:   SRP host.
2954  * @target: SRP target port.
2955  */
2956 static bool srp_conn_unique(struct srp_host *host,
2957 			    struct srp_target_port *target)
2958 {
2959 	struct srp_target_port *t;
2960 	bool ret = false;
2961 
2962 	if (target->state == SRP_TARGET_REMOVED)
2963 		goto out;
2964 
2965 	ret = true;
2966 
2967 	spin_lock(&host->target_lock);
2968 	list_for_each_entry(t, &host->target_list, list) {
2969 		if (t != target &&
2970 		    target->id_ext == t->id_ext &&
2971 		    target->ioc_guid == t->ioc_guid &&
2972 		    target->initiator_ext == t->initiator_ext) {
2973 			ret = false;
2974 			break;
2975 		}
2976 	}
2977 	spin_unlock(&host->target_lock);
2978 
2979 out:
2980 	return ret;
2981 }
2982 
2983 /*
2984  * Target ports are added by writing
2985  *
2986  *     id_ext=<SRP ID ext>,ioc_guid=<SRP IOC GUID>,dgid=<dest GID>,
2987  *     pkey=<P_Key>,service_id=<service ID>
2988  *
2989  * to the add_target sysfs attribute.
2990  */
2991 enum {
2992 	SRP_OPT_ERR		= 0,
2993 	SRP_OPT_ID_EXT		= 1 << 0,
2994 	SRP_OPT_IOC_GUID	= 1 << 1,
2995 	SRP_OPT_DGID		= 1 << 2,
2996 	SRP_OPT_PKEY		= 1 << 3,
2997 	SRP_OPT_SERVICE_ID	= 1 << 4,
2998 	SRP_OPT_MAX_SECT	= 1 << 5,
2999 	SRP_OPT_MAX_CMD_PER_LUN	= 1 << 6,
3000 	SRP_OPT_IO_CLASS	= 1 << 7,
3001 	SRP_OPT_INITIATOR_EXT	= 1 << 8,
3002 	SRP_OPT_CMD_SG_ENTRIES	= 1 << 9,
3003 	SRP_OPT_ALLOW_EXT_SG	= 1 << 10,
3004 	SRP_OPT_SG_TABLESIZE	= 1 << 11,
3005 	SRP_OPT_COMP_VECTOR	= 1 << 12,
3006 	SRP_OPT_TL_RETRY_COUNT	= 1 << 13,
3007 	SRP_OPT_QUEUE_SIZE	= 1 << 14,
3008 	SRP_OPT_ALL		= (SRP_OPT_ID_EXT	|
3009 				   SRP_OPT_IOC_GUID	|
3010 				   SRP_OPT_DGID		|
3011 				   SRP_OPT_PKEY		|
3012 				   SRP_OPT_SERVICE_ID),
3013 };
3014 
3015 static const match_table_t srp_opt_tokens = {
3016 	{ SRP_OPT_ID_EXT,		"id_ext=%s" 		},
3017 	{ SRP_OPT_IOC_GUID,		"ioc_guid=%s" 		},
3018 	{ SRP_OPT_DGID,			"dgid=%s" 		},
3019 	{ SRP_OPT_PKEY,			"pkey=%x" 		},
3020 	{ SRP_OPT_SERVICE_ID,		"service_id=%s"		},
3021 	{ SRP_OPT_MAX_SECT,		"max_sect=%d" 		},
3022 	{ SRP_OPT_MAX_CMD_PER_LUN,	"max_cmd_per_lun=%d" 	},
3023 	{ SRP_OPT_IO_CLASS,		"io_class=%x"		},
3024 	{ SRP_OPT_INITIATOR_EXT,	"initiator_ext=%s"	},
3025 	{ SRP_OPT_CMD_SG_ENTRIES,	"cmd_sg_entries=%u"	},
3026 	{ SRP_OPT_ALLOW_EXT_SG,		"allow_ext_sg=%u"	},
3027 	{ SRP_OPT_SG_TABLESIZE,		"sg_tablesize=%u"	},
3028 	{ SRP_OPT_COMP_VECTOR,		"comp_vector=%u"	},
3029 	{ SRP_OPT_TL_RETRY_COUNT,	"tl_retry_count=%u"	},
3030 	{ SRP_OPT_QUEUE_SIZE,		"queue_size=%d"		},
3031 	{ SRP_OPT_ERR,			NULL 			}
3032 };
3033 
3034 static int srp_parse_options(const char *buf, struct srp_target_port *target)
3035 {
3036 	char *options, *sep_opt;
3037 	char *p;
3038 	char dgid[3];
3039 	substring_t args[MAX_OPT_ARGS];
3040 	int opt_mask = 0;
3041 	int token;
3042 	int ret = -EINVAL;
3043 	int i;
3044 
3045 	options = kstrdup(buf, GFP_KERNEL);
3046 	if (!options)
3047 		return -ENOMEM;
3048 
3049 	sep_opt = options;
3050 	while ((p = strsep(&sep_opt, ",\n")) != NULL) {
3051 		if (!*p)
3052 			continue;
3053 
3054 		token = match_token(p, srp_opt_tokens, args);
3055 		opt_mask |= token;
3056 
3057 		switch (token) {
3058 		case SRP_OPT_ID_EXT:
3059 			p = match_strdup(args);
3060 			if (!p) {
3061 				ret = -ENOMEM;
3062 				goto out;
3063 			}
3064 			target->id_ext = cpu_to_be64(simple_strtoull(p, NULL, 16));
3065 			kfree(p);
3066 			break;
3067 
3068 		case SRP_OPT_IOC_GUID:
3069 			p = match_strdup(args);
3070 			if (!p) {
3071 				ret = -ENOMEM;
3072 				goto out;
3073 			}
3074 			target->ioc_guid = cpu_to_be64(simple_strtoull(p, NULL, 16));
3075 			kfree(p);
3076 			break;
3077 
3078 		case SRP_OPT_DGID:
3079 			p = match_strdup(args);
3080 			if (!p) {
3081 				ret = -ENOMEM;
3082 				goto out;
3083 			}
3084 			if (strlen(p) != 32) {
3085 				pr_warn("bad dest GID parameter '%s'\n", p);
3086 				kfree(p);
3087 				goto out;
3088 			}
3089 
3090 			for (i = 0; i < 16; ++i) {
3091 				strlcpy(dgid, p + i * 2, sizeof(dgid));
3092 				if (sscanf(dgid, "%hhx",
3093 					   &target->orig_dgid.raw[i]) < 1) {
3094 					ret = -EINVAL;
3095 					kfree(p);
3096 					goto out;
3097 				}
3098 			}
3099 			kfree(p);
3100 			break;
3101 
3102 		case SRP_OPT_PKEY:
3103 			if (match_hex(args, &token)) {
3104 				pr_warn("bad P_Key parameter '%s'\n", p);
3105 				goto out;
3106 			}
3107 			target->pkey = cpu_to_be16(token);
3108 			break;
3109 
3110 		case SRP_OPT_SERVICE_ID:
3111 			p = match_strdup(args);
3112 			if (!p) {
3113 				ret = -ENOMEM;
3114 				goto out;
3115 			}
3116 			target->service_id = cpu_to_be64(simple_strtoull(p, NULL, 16));
3117 			kfree(p);
3118 			break;
3119 
3120 		case SRP_OPT_MAX_SECT:
3121 			if (match_int(args, &token)) {
3122 				pr_warn("bad max sect parameter '%s'\n", p);
3123 				goto out;
3124 			}
3125 			target->scsi_host->max_sectors = token;
3126 			break;
3127 
3128 		case SRP_OPT_QUEUE_SIZE:
3129 			if (match_int(args, &token) || token < 1) {
3130 				pr_warn("bad queue_size parameter '%s'\n", p);
3131 				goto out;
3132 			}
3133 			target->scsi_host->can_queue = token;
3134 			target->queue_size = token + SRP_RSP_SQ_SIZE +
3135 					     SRP_TSK_MGMT_SQ_SIZE;
3136 			if (!(opt_mask & SRP_OPT_MAX_CMD_PER_LUN))
3137 				target->scsi_host->cmd_per_lun = token;
3138 			break;
3139 
3140 		case SRP_OPT_MAX_CMD_PER_LUN:
3141 			if (match_int(args, &token) || token < 1) {
3142 				pr_warn("bad max cmd_per_lun parameter '%s'\n",
3143 					p);
3144 				goto out;
3145 			}
3146 			target->scsi_host->cmd_per_lun = token;
3147 			break;
3148 
3149 		case SRP_OPT_IO_CLASS:
3150 			if (match_hex(args, &token)) {
3151 				pr_warn("bad IO class parameter '%s'\n", p);
3152 				goto out;
3153 			}
3154 			if (token != SRP_REV10_IB_IO_CLASS &&
3155 			    token != SRP_REV16A_IB_IO_CLASS) {
3156 				pr_warn("unknown IO class parameter value %x specified (use %x or %x).\n",
3157 					token, SRP_REV10_IB_IO_CLASS,
3158 					SRP_REV16A_IB_IO_CLASS);
3159 				goto out;
3160 			}
3161 			target->io_class = token;
3162 			break;
3163 
3164 		case SRP_OPT_INITIATOR_EXT:
3165 			p = match_strdup(args);
3166 			if (!p) {
3167 				ret = -ENOMEM;
3168 				goto out;
3169 			}
3170 			target->initiator_ext = cpu_to_be64(simple_strtoull(p, NULL, 16));
3171 			kfree(p);
3172 			break;
3173 
3174 		case SRP_OPT_CMD_SG_ENTRIES:
3175 			if (match_int(args, &token) || token < 1 || token > 255) {
3176 				pr_warn("bad max cmd_sg_entries parameter '%s'\n",
3177 					p);
3178 				goto out;
3179 			}
3180 			target->cmd_sg_cnt = token;
3181 			break;
3182 
3183 		case SRP_OPT_ALLOW_EXT_SG:
3184 			if (match_int(args, &token)) {
3185 				pr_warn("bad allow_ext_sg parameter '%s'\n", p);
3186 				goto out;
3187 			}
3188 			target->allow_ext_sg = !!token;
3189 			break;
3190 
3191 		case SRP_OPT_SG_TABLESIZE:
3192 			if (match_int(args, &token) || token < 1 ||
3193 					token > SG_MAX_SEGMENTS) {
3194 				pr_warn("bad max sg_tablesize parameter '%s'\n",
3195 					p);
3196 				goto out;
3197 			}
3198 			target->sg_tablesize = token;
3199 			break;
3200 
3201 		case SRP_OPT_COMP_VECTOR:
3202 			if (match_int(args, &token) || token < 0) {
3203 				pr_warn("bad comp_vector parameter '%s'\n", p);
3204 				goto out;
3205 			}
3206 			target->comp_vector = token;
3207 			break;
3208 
3209 		case SRP_OPT_TL_RETRY_COUNT:
3210 			if (match_int(args, &token) || token < 2 || token > 7) {
3211 				pr_warn("bad tl_retry_count parameter '%s' (must be a number between 2 and 7)\n",
3212 					p);
3213 				goto out;
3214 			}
3215 			target->tl_retry_count = token;
3216 			break;
3217 
3218 		default:
3219 			pr_warn("unknown parameter or missing value '%s' in target creation request\n",
3220 				p);
3221 			goto out;
3222 		}
3223 	}
3224 
3225 	if ((opt_mask & SRP_OPT_ALL) == SRP_OPT_ALL)
3226 		ret = 0;
3227 	else
3228 		for (i = 0; i < ARRAY_SIZE(srp_opt_tokens); ++i)
3229 			if ((srp_opt_tokens[i].token & SRP_OPT_ALL) &&
3230 			    !(srp_opt_tokens[i].token & opt_mask))
3231 				pr_warn("target creation request is missing parameter '%s'\n",
3232 					srp_opt_tokens[i].pattern);
3233 
3234 	if (target->scsi_host->cmd_per_lun > target->scsi_host->can_queue
3235 	    && (opt_mask & SRP_OPT_MAX_CMD_PER_LUN))
3236 		pr_warn("cmd_per_lun = %d > queue_size = %d\n",
3237 			target->scsi_host->cmd_per_lun,
3238 			target->scsi_host->can_queue);
3239 
3240 out:
3241 	kfree(options);
3242 	return ret;
3243 }
3244 
3245 static ssize_t srp_create_target(struct device *dev,
3246 				 struct device_attribute *attr,
3247 				 const char *buf, size_t count)
3248 {
3249 	struct srp_host *host =
3250 		container_of(dev, struct srp_host, dev);
3251 	struct Scsi_Host *target_host;
3252 	struct srp_target_port *target;
3253 	struct srp_rdma_ch *ch;
3254 	struct srp_device *srp_dev = host->srp_dev;
3255 	struct ib_device *ibdev = srp_dev->dev;
3256 	int ret, node_idx, node, cpu, i;
3257 	unsigned int max_sectors_per_mr, mr_per_cmd = 0;
3258 	bool multich = false;
3259 
3260 	target_host = scsi_host_alloc(&srp_template,
3261 				      sizeof (struct srp_target_port));
3262 	if (!target_host)
3263 		return -ENOMEM;
3264 
3265 	target_host->transportt  = ib_srp_transport_template;
3266 	target_host->max_channel = 0;
3267 	target_host->max_id      = 1;
3268 	target_host->max_lun     = -1LL;
3269 	target_host->max_cmd_len = sizeof ((struct srp_cmd *) (void *) 0L)->cdb;
3270 
3271 	target = host_to_target(target_host);
3272 
3273 	target->io_class	= SRP_REV16A_IB_IO_CLASS;
3274 	target->scsi_host	= target_host;
3275 	target->srp_host	= host;
3276 	target->pd		= host->srp_dev->pd;
3277 	target->lkey		= host->srp_dev->pd->local_dma_lkey;
3278 	target->cmd_sg_cnt	= cmd_sg_entries;
3279 	target->sg_tablesize	= indirect_sg_entries ? : cmd_sg_entries;
3280 	target->allow_ext_sg	= allow_ext_sg;
3281 	target->tl_retry_count	= 7;
3282 	target->queue_size	= SRP_DEFAULT_QUEUE_SIZE;
3283 
3284 	/*
3285 	 * Avoid that the SCSI host can be removed by srp_remove_target()
3286 	 * before this function returns.
3287 	 */
3288 	scsi_host_get(target->scsi_host);
3289 
3290 	mutex_lock(&host->add_target_mutex);
3291 
3292 	ret = srp_parse_options(buf, target);
3293 	if (ret)
3294 		goto out;
3295 
3296 	target->req_ring_size = target->queue_size - SRP_TSK_MGMT_SQ_SIZE;
3297 
3298 	if (!srp_conn_unique(target->srp_host, target)) {
3299 		shost_printk(KERN_INFO, target->scsi_host,
3300 			     PFX "Already connected to target port with id_ext=%016llx;ioc_guid=%016llx;initiator_ext=%016llx\n",
3301 			     be64_to_cpu(target->id_ext),
3302 			     be64_to_cpu(target->ioc_guid),
3303 			     be64_to_cpu(target->initiator_ext));
3304 		ret = -EEXIST;
3305 		goto out;
3306 	}
3307 
3308 	if (!srp_dev->has_fmr && !srp_dev->has_fr && !target->allow_ext_sg &&
3309 	    target->cmd_sg_cnt < target->sg_tablesize) {
3310 		pr_warn("No MR pool and no external indirect descriptors, limiting sg_tablesize to cmd_sg_cnt\n");
3311 		target->sg_tablesize = target->cmd_sg_cnt;
3312 	}
3313 
3314 	if (srp_dev->use_fast_reg || srp_dev->use_fmr) {
3315 		/*
3316 		 * FR and FMR can only map one HCA page per entry. If the
3317 		 * start address is not aligned on a HCA page boundary two
3318 		 * entries will be used for the head and the tail although
3319 		 * these two entries combined contain at most one HCA page of
3320 		 * data. Hence the "+ 1" in the calculation below.
3321 		 *
3322 		 * The indirect data buffer descriptor is contiguous so the
3323 		 * memory for that buffer will only be registered if
3324 		 * register_always is true. Hence add one to mr_per_cmd if
3325 		 * register_always has been set.
3326 		 */
3327 		max_sectors_per_mr = srp_dev->max_pages_per_mr <<
3328 				  (ilog2(srp_dev->mr_page_size) - 9);
3329 		mr_per_cmd = register_always +
3330 			(target->scsi_host->max_sectors + 1 +
3331 			 max_sectors_per_mr - 1) / max_sectors_per_mr;
3332 		pr_debug("max_sectors = %u; max_pages_per_mr = %u; mr_page_size = %u; max_sectors_per_mr = %u; mr_per_cmd = %u\n",
3333 			 target->scsi_host->max_sectors,
3334 			 srp_dev->max_pages_per_mr, srp_dev->mr_page_size,
3335 			 max_sectors_per_mr, mr_per_cmd);
3336 	}
3337 
3338 	target_host->sg_tablesize = target->sg_tablesize;
3339 	target->mr_pool_size = target->scsi_host->can_queue * mr_per_cmd;
3340 	target->mr_per_cmd = mr_per_cmd;
3341 	target->indirect_size = target->sg_tablesize *
3342 				sizeof (struct srp_direct_buf);
3343 	target->max_iu_len = sizeof (struct srp_cmd) +
3344 			     sizeof (struct srp_indirect_buf) +
3345 			     target->cmd_sg_cnt * sizeof (struct srp_direct_buf);
3346 
3347 	INIT_WORK(&target->tl_err_work, srp_tl_err_work);
3348 	INIT_WORK(&target->remove_work, srp_remove_work);
3349 	spin_lock_init(&target->lock);
3350 	ret = ib_query_gid(ibdev, host->port, 0, &target->sgid, NULL);
3351 	if (ret)
3352 		goto out;
3353 
3354 	ret = -ENOMEM;
3355 	target->ch_count = max_t(unsigned, num_online_nodes(),
3356 				 min(ch_count ? :
3357 				     min(4 * num_online_nodes(),
3358 					 ibdev->num_comp_vectors),
3359 				     num_online_cpus()));
3360 	target->ch = kcalloc(target->ch_count, sizeof(*target->ch),
3361 			     GFP_KERNEL);
3362 	if (!target->ch)
3363 		goto out;
3364 
3365 	node_idx = 0;
3366 	for_each_online_node(node) {
3367 		const int ch_start = (node_idx * target->ch_count /
3368 				      num_online_nodes());
3369 		const int ch_end = ((node_idx + 1) * target->ch_count /
3370 				    num_online_nodes());
3371 		const int cv_start = (node_idx * ibdev->num_comp_vectors /
3372 				      num_online_nodes() + target->comp_vector)
3373 				     % ibdev->num_comp_vectors;
3374 		const int cv_end = ((node_idx + 1) * ibdev->num_comp_vectors /
3375 				    num_online_nodes() + target->comp_vector)
3376 				   % ibdev->num_comp_vectors;
3377 		int cpu_idx = 0;
3378 
3379 		for_each_online_cpu(cpu) {
3380 			if (cpu_to_node(cpu) != node)
3381 				continue;
3382 			if (ch_start + cpu_idx >= ch_end)
3383 				continue;
3384 			ch = &target->ch[ch_start + cpu_idx];
3385 			ch->target = target;
3386 			ch->comp_vector = cv_start == cv_end ? cv_start :
3387 				cv_start + cpu_idx % (cv_end - cv_start);
3388 			spin_lock_init(&ch->lock);
3389 			INIT_LIST_HEAD(&ch->free_tx);
3390 			ret = srp_new_cm_id(ch);
3391 			if (ret)
3392 				goto err_disconnect;
3393 
3394 			ret = srp_create_ch_ib(ch);
3395 			if (ret)
3396 				goto err_disconnect;
3397 
3398 			ret = srp_alloc_req_data(ch);
3399 			if (ret)
3400 				goto err_disconnect;
3401 
3402 			ret = srp_connect_ch(ch, multich);
3403 			if (ret) {
3404 				shost_printk(KERN_ERR, target->scsi_host,
3405 					     PFX "Connection %d/%d failed\n",
3406 					     ch_start + cpu_idx,
3407 					     target->ch_count);
3408 				if (node_idx == 0 && cpu_idx == 0) {
3409 					goto err_disconnect;
3410 				} else {
3411 					srp_free_ch_ib(target, ch);
3412 					srp_free_req_data(target, ch);
3413 					target->ch_count = ch - target->ch;
3414 					goto connected;
3415 				}
3416 			}
3417 
3418 			multich = true;
3419 			cpu_idx++;
3420 		}
3421 		node_idx++;
3422 	}
3423 
3424 connected:
3425 	target->scsi_host->nr_hw_queues = target->ch_count;
3426 
3427 	ret = srp_add_target(host, target);
3428 	if (ret)
3429 		goto err_disconnect;
3430 
3431 	if (target->state != SRP_TARGET_REMOVED) {
3432 		shost_printk(KERN_DEBUG, target->scsi_host, PFX
3433 			     "new target: id_ext %016llx ioc_guid %016llx pkey %04x service_id %016llx sgid %pI6 dgid %pI6\n",
3434 			     be64_to_cpu(target->id_ext),
3435 			     be64_to_cpu(target->ioc_guid),
3436 			     be16_to_cpu(target->pkey),
3437 			     be64_to_cpu(target->service_id),
3438 			     target->sgid.raw, target->orig_dgid.raw);
3439 	}
3440 
3441 	ret = count;
3442 
3443 out:
3444 	mutex_unlock(&host->add_target_mutex);
3445 
3446 	scsi_host_put(target->scsi_host);
3447 	if (ret < 0)
3448 		scsi_host_put(target->scsi_host);
3449 
3450 	return ret;
3451 
3452 err_disconnect:
3453 	srp_disconnect_target(target);
3454 
3455 	for (i = 0; i < target->ch_count; i++) {
3456 		ch = &target->ch[i];
3457 		srp_free_ch_ib(target, ch);
3458 		srp_free_req_data(target, ch);
3459 	}
3460 
3461 	kfree(target->ch);
3462 	goto out;
3463 }
3464 
3465 static DEVICE_ATTR(add_target, S_IWUSR, NULL, srp_create_target);
3466 
3467 static ssize_t show_ibdev(struct device *dev, struct device_attribute *attr,
3468 			  char *buf)
3469 {
3470 	struct srp_host *host = container_of(dev, struct srp_host, dev);
3471 
3472 	return sprintf(buf, "%s\n", host->srp_dev->dev->name);
3473 }
3474 
3475 static DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL);
3476 
3477 static ssize_t show_port(struct device *dev, struct device_attribute *attr,
3478 			 char *buf)
3479 {
3480 	struct srp_host *host = container_of(dev, struct srp_host, dev);
3481 
3482 	return sprintf(buf, "%d\n", host->port);
3483 }
3484 
3485 static DEVICE_ATTR(port, S_IRUGO, show_port, NULL);
3486 
3487 static struct srp_host *srp_add_port(struct srp_device *device, u8 port)
3488 {
3489 	struct srp_host *host;
3490 
3491 	host = kzalloc(sizeof *host, GFP_KERNEL);
3492 	if (!host)
3493 		return NULL;
3494 
3495 	INIT_LIST_HEAD(&host->target_list);
3496 	spin_lock_init(&host->target_lock);
3497 	init_completion(&host->released);
3498 	mutex_init(&host->add_target_mutex);
3499 	host->srp_dev = device;
3500 	host->port = port;
3501 
3502 	host->dev.class = &srp_class;
3503 	host->dev.parent = device->dev->dma_device;
3504 	dev_set_name(&host->dev, "srp-%s-%d", device->dev->name, port);
3505 
3506 	if (device_register(&host->dev))
3507 		goto free_host;
3508 	if (device_create_file(&host->dev, &dev_attr_add_target))
3509 		goto err_class;
3510 	if (device_create_file(&host->dev, &dev_attr_ibdev))
3511 		goto err_class;
3512 	if (device_create_file(&host->dev, &dev_attr_port))
3513 		goto err_class;
3514 
3515 	return host;
3516 
3517 err_class:
3518 	device_unregister(&host->dev);
3519 
3520 free_host:
3521 	kfree(host);
3522 
3523 	return NULL;
3524 }
3525 
3526 static void srp_add_one(struct ib_device *device)
3527 {
3528 	struct srp_device *srp_dev;
3529 	struct srp_host *host;
3530 	int mr_page_shift, p;
3531 	u64 max_pages_per_mr;
3532 	unsigned int flags = 0;
3533 
3534 	srp_dev = kzalloc(sizeof(*srp_dev), GFP_KERNEL);
3535 	if (!srp_dev)
3536 		return;
3537 
3538 	/*
3539 	 * Use the smallest page size supported by the HCA, down to a
3540 	 * minimum of 4096 bytes. We're unlikely to build large sglists
3541 	 * out of smaller entries.
3542 	 */
3543 	mr_page_shift		= max(12, ffs(device->attrs.page_size_cap) - 1);
3544 	srp_dev->mr_page_size	= 1 << mr_page_shift;
3545 	srp_dev->mr_page_mask	= ~((u64) srp_dev->mr_page_size - 1);
3546 	max_pages_per_mr	= device->attrs.max_mr_size;
3547 	do_div(max_pages_per_mr, srp_dev->mr_page_size);
3548 	pr_debug("%s: %llu / %u = %llu <> %u\n", __func__,
3549 		 device->attrs.max_mr_size, srp_dev->mr_page_size,
3550 		 max_pages_per_mr, SRP_MAX_PAGES_PER_MR);
3551 	srp_dev->max_pages_per_mr = min_t(u64, SRP_MAX_PAGES_PER_MR,
3552 					  max_pages_per_mr);
3553 
3554 	srp_dev->has_fmr = (device->alloc_fmr && device->dealloc_fmr &&
3555 			    device->map_phys_fmr && device->unmap_fmr);
3556 	srp_dev->has_fr = (device->attrs.device_cap_flags &
3557 			   IB_DEVICE_MEM_MGT_EXTENSIONS);
3558 	if (!never_register && !srp_dev->has_fmr && !srp_dev->has_fr) {
3559 		dev_warn(&device->dev, "neither FMR nor FR is supported\n");
3560 	} else if (!never_register &&
3561 		   device->attrs.max_mr_size >= 2 * srp_dev->mr_page_size) {
3562 		srp_dev->use_fast_reg = (srp_dev->has_fr &&
3563 					 (!srp_dev->has_fmr || prefer_fr));
3564 		srp_dev->use_fmr = !srp_dev->use_fast_reg && srp_dev->has_fmr;
3565 	}
3566 
3567 	if (never_register || !register_always ||
3568 	    (!srp_dev->has_fmr && !srp_dev->has_fr))
3569 		flags |= IB_PD_UNSAFE_GLOBAL_RKEY;
3570 
3571 	if (srp_dev->use_fast_reg) {
3572 		srp_dev->max_pages_per_mr =
3573 			min_t(u32, srp_dev->max_pages_per_mr,
3574 			      device->attrs.max_fast_reg_page_list_len);
3575 	}
3576 	srp_dev->mr_max_size	= srp_dev->mr_page_size *
3577 				   srp_dev->max_pages_per_mr;
3578 	pr_debug("%s: mr_page_shift = %d, device->max_mr_size = %#llx, device->max_fast_reg_page_list_len = %u, max_pages_per_mr = %d, mr_max_size = %#x\n",
3579 		 device->name, mr_page_shift, device->attrs.max_mr_size,
3580 		 device->attrs.max_fast_reg_page_list_len,
3581 		 srp_dev->max_pages_per_mr, srp_dev->mr_max_size);
3582 
3583 	INIT_LIST_HEAD(&srp_dev->dev_list);
3584 
3585 	srp_dev->dev = device;
3586 	srp_dev->pd  = ib_alloc_pd(device, flags);
3587 	if (IS_ERR(srp_dev->pd))
3588 		goto free_dev;
3589 
3590 
3591 	for (p = rdma_start_port(device); p <= rdma_end_port(device); ++p) {
3592 		host = srp_add_port(srp_dev, p);
3593 		if (host)
3594 			list_add_tail(&host->list, &srp_dev->dev_list);
3595 	}
3596 
3597 	ib_set_client_data(device, &srp_client, srp_dev);
3598 	return;
3599 
3600 free_dev:
3601 	kfree(srp_dev);
3602 }
3603 
3604 static void srp_remove_one(struct ib_device *device, void *client_data)
3605 {
3606 	struct srp_device *srp_dev;
3607 	struct srp_host *host, *tmp_host;
3608 	struct srp_target_port *target;
3609 
3610 	srp_dev = client_data;
3611 	if (!srp_dev)
3612 		return;
3613 
3614 	list_for_each_entry_safe(host, tmp_host, &srp_dev->dev_list, list) {
3615 		device_unregister(&host->dev);
3616 		/*
3617 		 * Wait for the sysfs entry to go away, so that no new
3618 		 * target ports can be created.
3619 		 */
3620 		wait_for_completion(&host->released);
3621 
3622 		/*
3623 		 * Remove all target ports.
3624 		 */
3625 		spin_lock(&host->target_lock);
3626 		list_for_each_entry(target, &host->target_list, list)
3627 			srp_queue_remove_work(target);
3628 		spin_unlock(&host->target_lock);
3629 
3630 		/*
3631 		 * Wait for tl_err and target port removal tasks.
3632 		 */
3633 		flush_workqueue(system_long_wq);
3634 		flush_workqueue(srp_remove_wq);
3635 
3636 		kfree(host);
3637 	}
3638 
3639 	ib_dealloc_pd(srp_dev->pd);
3640 
3641 	kfree(srp_dev);
3642 }
3643 
3644 static struct srp_function_template ib_srp_transport_functions = {
3645 	.has_rport_state	 = true,
3646 	.reset_timer_if_blocked	 = true,
3647 	.reconnect_delay	 = &srp_reconnect_delay,
3648 	.fast_io_fail_tmo	 = &srp_fast_io_fail_tmo,
3649 	.dev_loss_tmo		 = &srp_dev_loss_tmo,
3650 	.reconnect		 = srp_rport_reconnect,
3651 	.rport_delete		 = srp_rport_delete,
3652 	.terminate_rport_io	 = srp_terminate_io,
3653 };
3654 
3655 static int __init srp_init_module(void)
3656 {
3657 	int ret;
3658 
3659 	if (srp_sg_tablesize) {
3660 		pr_warn("srp_sg_tablesize is deprecated, please use cmd_sg_entries\n");
3661 		if (!cmd_sg_entries)
3662 			cmd_sg_entries = srp_sg_tablesize;
3663 	}
3664 
3665 	if (!cmd_sg_entries)
3666 		cmd_sg_entries = SRP_DEF_SG_TABLESIZE;
3667 
3668 	if (cmd_sg_entries > 255) {
3669 		pr_warn("Clamping cmd_sg_entries to 255\n");
3670 		cmd_sg_entries = 255;
3671 	}
3672 
3673 	if (!indirect_sg_entries)
3674 		indirect_sg_entries = cmd_sg_entries;
3675 	else if (indirect_sg_entries < cmd_sg_entries) {
3676 		pr_warn("Bumping up indirect_sg_entries to match cmd_sg_entries (%u)\n",
3677 			cmd_sg_entries);
3678 		indirect_sg_entries = cmd_sg_entries;
3679 	}
3680 
3681 	srp_remove_wq = create_workqueue("srp_remove");
3682 	if (!srp_remove_wq) {
3683 		ret = -ENOMEM;
3684 		goto out;
3685 	}
3686 
3687 	ret = -ENOMEM;
3688 	ib_srp_transport_template =
3689 		srp_attach_transport(&ib_srp_transport_functions);
3690 	if (!ib_srp_transport_template)
3691 		goto destroy_wq;
3692 
3693 	ret = class_register(&srp_class);
3694 	if (ret) {
3695 		pr_err("couldn't register class infiniband_srp\n");
3696 		goto release_tr;
3697 	}
3698 
3699 	ib_sa_register_client(&srp_sa_client);
3700 
3701 	ret = ib_register_client(&srp_client);
3702 	if (ret) {
3703 		pr_err("couldn't register IB client\n");
3704 		goto unreg_sa;
3705 	}
3706 
3707 out:
3708 	return ret;
3709 
3710 unreg_sa:
3711 	ib_sa_unregister_client(&srp_sa_client);
3712 	class_unregister(&srp_class);
3713 
3714 release_tr:
3715 	srp_release_transport(ib_srp_transport_template);
3716 
3717 destroy_wq:
3718 	destroy_workqueue(srp_remove_wq);
3719 	goto out;
3720 }
3721 
3722 static void __exit srp_cleanup_module(void)
3723 {
3724 	ib_unregister_client(&srp_client);
3725 	ib_sa_unregister_client(&srp_sa_client);
3726 	class_unregister(&srp_class);
3727 	srp_release_transport(ib_srp_transport_template);
3728 	destroy_workqueue(srp_remove_wq);
3729 }
3730 
3731 module_init(srp_init_module);
3732 module_exit(srp_cleanup_module);
3733