xref: /openbmc/linux/drivers/infiniband/ulp/iser/iscsi_iser.c (revision 1ac731c529cd4d6adbce134754b51ff7d822b145)
1  /*
2   * iSCSI Initiator over iSER Data-Path
3   *
4   * Copyright (C) 2004 Dmitry Yusupov
5   * Copyright (C) 2004 Alex Aizman
6   * Copyright (C) 2005 Mike Christie
7   * Copyright (c) 2005, 2006 Voltaire, Inc. All rights reserved.
8   * Copyright (c) 2013-2014 Mellanox Technologies. All rights reserved.
9   * maintained by openib-general@openib.org
10   *
11   * This software is available to you under a choice of one of two
12   * licenses.  You may choose to be licensed under the terms of the GNU
13   * General Public License (GPL) Version 2, available from the file
14   * COPYING in the main directory of this source tree, or the
15   * OpenIB.org BSD license below:
16   *
17   *     Redistribution and use in source and binary forms, with or
18   *     without modification, are permitted provided that the following
19   *     conditions are met:
20   *
21   *	- Redistributions of source code must retain the above
22   *	  copyright notice, this list of conditions and the following
23   *	  disclaimer.
24   *
25   *	- Redistributions in binary form must reproduce the above
26   *	  copyright notice, this list of conditions and the following
27   *	  disclaimer in the documentation and/or other materials
28   *	  provided with the distribution.
29   *
30   * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
31   * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
32   * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
33   * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
34   * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
35   * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
36   * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
37   * SOFTWARE.
38   *
39   * Credits:
40   *	Christoph Hellwig
41   *	FUJITA Tomonori
42   *	Arne Redlich
43   *	Zhenyu Wang
44   * Modified by:
45   *      Erez Zilber
46   */
47  
48  #include <linux/types.h>
49  #include <linux/list.h>
50  #include <linux/hardirq.h>
51  #include <linux/kfifo.h>
52  #include <linux/blkdev.h>
53  #include <linux/init.h>
54  #include <linux/ioctl.h>
55  #include <linux/cdev.h>
56  #include <linux/in.h>
57  #include <linux/net.h>
58  #include <linux/scatterlist.h>
59  #include <linux/delay.h>
60  #include <linux/slab.h>
61  #include <linux/module.h>
62  
63  #include <net/sock.h>
64  
65  #include <linux/uaccess.h>
66  
67  #include <scsi/scsi_cmnd.h>
68  #include <scsi/scsi_device.h>
69  #include <scsi/scsi_eh.h>
70  #include <scsi/scsi_tcq.h>
71  #include <scsi/scsi_host.h>
72  #include <scsi/scsi.h>
73  #include <scsi/scsi_transport_iscsi.h>
74  
75  #include "iscsi_iser.h"
76  
77  MODULE_DESCRIPTION("iSER (iSCSI Extensions for RDMA) Datamover");
78  MODULE_LICENSE("Dual BSD/GPL");
79  MODULE_AUTHOR("Alex Nezhinsky, Dan Bar Dov, Or Gerlitz");
80  
81  static const struct scsi_host_template iscsi_iser_sht;
82  static struct iscsi_transport iscsi_iser_transport;
83  static struct scsi_transport_template *iscsi_iser_scsi_transport;
84  static struct workqueue_struct *release_wq;
85  static DEFINE_MUTEX(unbind_iser_conn_mutex);
86  struct iser_global ig;
87  
88  int iser_debug_level = 0;
89  module_param_named(debug_level, iser_debug_level, int, S_IRUGO | S_IWUSR);
90  MODULE_PARM_DESC(debug_level, "Enable debug tracing if > 0 (default:disabled)");
91  
92  static int iscsi_iser_set(const char *val, const struct kernel_param *kp);
93  static const struct kernel_param_ops iscsi_iser_size_ops = {
94  	.set = iscsi_iser_set,
95  	.get = param_get_uint,
96  };
97  
98  static unsigned int iscsi_max_lun = 512;
99  module_param_cb(max_lun, &iscsi_iser_size_ops, &iscsi_max_lun, S_IRUGO);
100  MODULE_PARM_DESC(max_lun, "Max LUNs to allow per session, should > 0 (default:512)");
101  
102  unsigned int iser_max_sectors = ISER_DEF_MAX_SECTORS;
103  module_param_cb(max_sectors, &iscsi_iser_size_ops, &iser_max_sectors,
104  		S_IRUGO | S_IWUSR);
105  MODULE_PARM_DESC(max_sectors, "Max number of sectors in a single scsi command, should > 0 (default:1024)");
106  
107  bool iser_always_reg = true;
108  module_param_named(always_register, iser_always_reg, bool, S_IRUGO);
109  MODULE_PARM_DESC(always_register,
110  		 "Always register memory, even for continuous memory regions (default:true)");
111  
112  bool iser_pi_enable = false;
113  module_param_named(pi_enable, iser_pi_enable, bool, S_IRUGO);
114  MODULE_PARM_DESC(pi_enable, "Enable T10-PI offload support (default:disabled)");
115  
iscsi_iser_set(const char * val,const struct kernel_param * kp)116  static int iscsi_iser_set(const char *val, const struct kernel_param *kp)
117  {
118  	int ret;
119  	unsigned int n = 0;
120  
121  	ret = kstrtouint(val, 10, &n);
122  	if (ret != 0 || n == 0)
123  		return -EINVAL;
124  
125  	return param_set_uint(val, kp);
126  }
127  
128  /*
129   * iscsi_iser_recv() - Process a successful recv completion
130   * @conn:         iscsi connection
131   * @hdr:          iscsi header
132   * @rx_data:      buffer containing receive data payload
133   * @rx_data_len:  length of rx_data
134   *
135   * Notes: In case of data length errors or iscsi PDU completion failures
136   *        this routine will signal iscsi layer of connection failure.
137   */
iscsi_iser_recv(struct iscsi_conn * conn,struct iscsi_hdr * hdr,char * rx_data,int rx_data_len)138  void iscsi_iser_recv(struct iscsi_conn *conn, struct iscsi_hdr *hdr,
139  		     char *rx_data, int rx_data_len)
140  {
141  	int rc = 0;
142  	int datalen;
143  
144  	/* verify PDU length */
145  	datalen = ntoh24(hdr->dlength);
146  	if (datalen > rx_data_len || (datalen + 4) < rx_data_len) {
147  		iser_err("wrong datalen %d (hdr), %d (IB)\n",
148  			datalen, rx_data_len);
149  		rc = ISCSI_ERR_DATALEN;
150  		goto error;
151  	}
152  
153  	if (datalen != rx_data_len)
154  		iser_dbg("aligned datalen (%d) hdr, %d (IB)\n",
155  			datalen, rx_data_len);
156  
157  	rc = iscsi_complete_pdu(conn, hdr, rx_data, rx_data_len);
158  	if (rc && rc != ISCSI_ERR_NO_SCSI_CMD)
159  		goto error;
160  
161  	return;
162  error:
163  	iscsi_conn_failure(conn, rc);
164  }
165  
166  /**
167   * iscsi_iser_pdu_alloc() - allocate an iscsi-iser PDU
168   * @task:     iscsi task
169   * @opcode:   iscsi command opcode
170   *
171   * Netes: This routine can't fail, just assign iscsi task
172   *        hdr and max hdr size.
173   */
iscsi_iser_pdu_alloc(struct iscsi_task * task,uint8_t opcode)174  static int iscsi_iser_pdu_alloc(struct iscsi_task *task, uint8_t opcode)
175  {
176  	struct iscsi_iser_task *iser_task = task->dd_data;
177  
178  	task->hdr = (struct iscsi_hdr *)&iser_task->desc.iscsi_header;
179  	task->hdr_max = sizeof(iser_task->desc.iscsi_header);
180  
181  	return 0;
182  }
183  
184  /**
185   * iser_initialize_task_headers() - Initialize task headers
186   * @task:       iscsi task
187   * @tx_desc:    iser tx descriptor
188   *
189   * Notes:
190   * This routine may race with iser teardown flow for scsi
191   * error handling TMFs. So for TMF we should acquire the
192   * state mutex to avoid dereferencing the IB device which
193   * may have already been terminated.
194   */
iser_initialize_task_headers(struct iscsi_task * task,struct iser_tx_desc * tx_desc)195  int iser_initialize_task_headers(struct iscsi_task *task,
196  				 struct iser_tx_desc *tx_desc)
197  {
198  	struct iser_conn *iser_conn = task->conn->dd_data;
199  	struct iser_device *device = iser_conn->ib_conn.device;
200  	struct iscsi_iser_task *iser_task = task->dd_data;
201  	u64 dma_addr;
202  
203  	if (unlikely(iser_conn->state != ISER_CONN_UP))
204  		return -ENODEV;
205  
206  	dma_addr = ib_dma_map_single(device->ib_device, (void *)tx_desc,
207  				ISER_HEADERS_LEN, DMA_TO_DEVICE);
208  	if (ib_dma_mapping_error(device->ib_device, dma_addr))
209  		return -ENOMEM;
210  
211  	tx_desc->inv_wr.next = NULL;
212  	tx_desc->reg_wr.wr.next = NULL;
213  	tx_desc->mapped = true;
214  	tx_desc->dma_addr = dma_addr;
215  	tx_desc->tx_sg[0].addr   = tx_desc->dma_addr;
216  	tx_desc->tx_sg[0].length = ISER_HEADERS_LEN;
217  	tx_desc->tx_sg[0].lkey   = device->pd->local_dma_lkey;
218  
219  	iser_task->iser_conn = iser_conn;
220  
221  	return 0;
222  }
223  
224  /**
225   * iscsi_iser_task_init() - Initialize iscsi-iser task
226   * @task: iscsi task
227   *
228   * Initialize the task for the scsi command or mgmt command.
229   *
230   * Return: Returns zero on success or -ENOMEM when failing
231   *         to init task headers (dma mapping error).
232   */
iscsi_iser_task_init(struct iscsi_task * task)233  static int iscsi_iser_task_init(struct iscsi_task *task)
234  {
235  	struct iscsi_iser_task *iser_task = task->dd_data;
236  	int ret;
237  
238  	ret = iser_initialize_task_headers(task, &iser_task->desc);
239  	if (ret) {
240  		iser_err("Failed to init task %p, err = %d\n",
241  			 iser_task, ret);
242  		return ret;
243  	}
244  
245  	/* mgmt task */
246  	if (!task->sc)
247  		return 0;
248  
249  	iser_task->command_sent = 0;
250  	iser_task_rdma_init(iser_task);
251  	iser_task->sc = task->sc;
252  
253  	return 0;
254  }
255  
256  /**
257   * iscsi_iser_mtask_xmit() - xmit management (immediate) task
258   * @conn: iscsi connection
259   * @task: task management task
260   *
261   * Notes:
262   *	The function can return -EAGAIN in which case caller must
263   *	call it again later, or recover. '0' return code means successful
264   *	xmit.
265   *
266   **/
iscsi_iser_mtask_xmit(struct iscsi_conn * conn,struct iscsi_task * task)267  static int iscsi_iser_mtask_xmit(struct iscsi_conn *conn,
268  				 struct iscsi_task *task)
269  {
270  	int error = 0;
271  
272  	iser_dbg("mtask xmit [cid %d itt 0x%x]\n", conn->id, task->itt);
273  
274  	error = iser_send_control(conn, task);
275  
276  	/* since iser xmits control with zero copy, tasks can not be recycled
277  	 * right after sending them.
278  	 * The recycling scheme is based on whether a response is expected
279  	 * - if yes, the task is recycled at iscsi_complete_pdu
280  	 * - if no,  the task is recycled at iser_snd_completion
281  	 */
282  	return error;
283  }
284  
iscsi_iser_task_xmit_unsol_data(struct iscsi_conn * conn,struct iscsi_task * task)285  static int iscsi_iser_task_xmit_unsol_data(struct iscsi_conn *conn,
286  					   struct iscsi_task *task)
287  {
288  	struct iscsi_r2t_info *r2t = &task->unsol_r2t;
289  	struct iscsi_data hdr;
290  	int error = 0;
291  
292  	/* Send data-out PDUs while there's still unsolicited data to send */
293  	while (iscsi_task_has_unsol_data(task)) {
294  		iscsi_prep_data_out_pdu(task, r2t, &hdr);
295  		iser_dbg("Sending data-out: itt 0x%x, data count %d\n",
296  			   hdr.itt, r2t->data_count);
297  
298  		/* the buffer description has been passed with the command */
299  		/* Send the command */
300  		error = iser_send_data_out(conn, task, &hdr);
301  		if (error) {
302  			r2t->datasn--;
303  			goto iscsi_iser_task_xmit_unsol_data_exit;
304  		}
305  		r2t->sent += r2t->data_count;
306  		iser_dbg("Need to send %d more as data-out PDUs\n",
307  			   r2t->data_length - r2t->sent);
308  	}
309  
310  iscsi_iser_task_xmit_unsol_data_exit:
311  	return error;
312  }
313  
314  /**
315   * iscsi_iser_task_xmit() - xmit iscsi-iser task
316   * @task: iscsi task
317   *
318   * Return: zero on success or escalates $error on failure.
319   */
iscsi_iser_task_xmit(struct iscsi_task * task)320  static int iscsi_iser_task_xmit(struct iscsi_task *task)
321  {
322  	struct iscsi_conn *conn = task->conn;
323  	struct iscsi_iser_task *iser_task = task->dd_data;
324  	int error = 0;
325  
326  	if (!task->sc)
327  		return iscsi_iser_mtask_xmit(conn, task);
328  
329  	if (task->sc->sc_data_direction == DMA_TO_DEVICE) {
330  		BUG_ON(scsi_bufflen(task->sc) == 0);
331  
332  		iser_dbg("cmd [itt %x total %d imm %d unsol_data %d\n",
333  			   task->itt, scsi_bufflen(task->sc),
334  			   task->imm_count, task->unsol_r2t.data_length);
335  	}
336  
337  	iser_dbg("ctask xmit [cid %d itt 0x%x]\n",
338  		   conn->id, task->itt);
339  
340  	/* Send the cmd PDU */
341  	if (!iser_task->command_sent) {
342  		error = iser_send_command(conn, task);
343  		if (error)
344  			goto iscsi_iser_task_xmit_exit;
345  		iser_task->command_sent = 1;
346  	}
347  
348  	/* Send unsolicited data-out PDU(s) if necessary */
349  	if (iscsi_task_has_unsol_data(task))
350  		error = iscsi_iser_task_xmit_unsol_data(conn, task);
351  
352   iscsi_iser_task_xmit_exit:
353  	return error;
354  }
355  
356  /**
357   * iscsi_iser_cleanup_task() - cleanup an iscsi-iser task
358   * @task: iscsi task
359   *
360   * Notes: In case the RDMA device is already NULL (might have
361   *        been removed in DEVICE_REMOVAL CM event it will bail-out
362   *        without doing dma unmapping.
363   */
iscsi_iser_cleanup_task(struct iscsi_task * task)364  static void iscsi_iser_cleanup_task(struct iscsi_task *task)
365  {
366  	struct iscsi_iser_task *iser_task = task->dd_data;
367  	struct iser_tx_desc *tx_desc = &iser_task->desc;
368  	struct iser_conn *iser_conn = task->conn->dd_data;
369  	struct iser_device *device = iser_conn->ib_conn.device;
370  
371  	/* DEVICE_REMOVAL event might have already released the device */
372  	if (!device)
373  		return;
374  
375  	if (likely(tx_desc->mapped)) {
376  		ib_dma_unmap_single(device->ib_device, tx_desc->dma_addr,
377  				    ISER_HEADERS_LEN, DMA_TO_DEVICE);
378  		tx_desc->mapped = false;
379  	}
380  
381  	/* mgmt tasks do not need special cleanup */
382  	if (!task->sc)
383  		return;
384  
385  	if (iser_task->status == ISER_TASK_STATUS_STARTED) {
386  		iser_task->status = ISER_TASK_STATUS_COMPLETED;
387  		iser_task_rdma_finalize(iser_task);
388  	}
389  }
390  
391  /**
392   * iscsi_iser_check_protection() - check protection information status of task.
393   * @task:     iscsi task
394   * @sector:   error sector if exsists (output)
395   *
396   * Return: zero if no data-integrity errors have occured
397   *         0x1: data-integrity error occured in the guard-block
398   *         0x2: data-integrity error occured in the reference tag
399   *         0x3: data-integrity error occured in the application tag
400   *
401   *         In addition the error sector is marked.
402   */
iscsi_iser_check_protection(struct iscsi_task * task,sector_t * sector)403  static u8 iscsi_iser_check_protection(struct iscsi_task *task, sector_t *sector)
404  {
405  	struct iscsi_iser_task *iser_task = task->dd_data;
406  	enum iser_data_dir dir = iser_task->dir[ISER_DIR_IN] ?
407  					ISER_DIR_IN : ISER_DIR_OUT;
408  
409  	return iser_check_task_pi_status(iser_task, dir, sector);
410  }
411  
412  /**
413   * iscsi_iser_conn_create() - create a new iscsi-iser connection
414   * @cls_session: iscsi class connection
415   * @conn_idx:    connection index within the session (for MCS)
416   *
417   * Return: iscsi_cls_conn when iscsi_conn_setup succeeds or NULL
418   *         otherwise.
419   */
420  static struct iscsi_cls_conn *
iscsi_iser_conn_create(struct iscsi_cls_session * cls_session,uint32_t conn_idx)421  iscsi_iser_conn_create(struct iscsi_cls_session *cls_session,
422  		       uint32_t conn_idx)
423  {
424  	struct iscsi_conn *conn;
425  	struct iscsi_cls_conn *cls_conn;
426  
427  	cls_conn = iscsi_conn_setup(cls_session, 0, conn_idx);
428  	if (!cls_conn)
429  		return NULL;
430  	conn = cls_conn->dd_data;
431  
432  	/*
433  	 * due to issues with the login code re iser sematics
434  	 * this not set in iscsi_conn_setup - FIXME
435  	 */
436  	conn->max_recv_dlength = ISER_RECV_DATA_SEG_LEN;
437  
438  	return cls_conn;
439  }
440  
441  /**
442   * iscsi_iser_conn_bind() - bind iscsi and iser connection structures
443   * @cls_session:     iscsi class session
444   * @cls_conn:        iscsi class connection
445   * @transport_eph:   transport end-point handle
446   * @is_leading:      indicate if this is the session leading connection (MCS)
447   *
448   * Return: zero on success, $error if iscsi_conn_bind fails and
449   *         -EINVAL in case end-point doesn't exists anymore or iser connection
450   *         state is not UP (teardown already started).
451   */
iscsi_iser_conn_bind(struct iscsi_cls_session * cls_session,struct iscsi_cls_conn * cls_conn,uint64_t transport_eph,int is_leading)452  static int iscsi_iser_conn_bind(struct iscsi_cls_session *cls_session,
453  				struct iscsi_cls_conn *cls_conn,
454  				uint64_t transport_eph, int is_leading)
455  {
456  	struct iscsi_conn *conn = cls_conn->dd_data;
457  	struct iser_conn *iser_conn;
458  	struct iscsi_endpoint *ep;
459  	int error;
460  
461  	error = iscsi_conn_bind(cls_session, cls_conn, is_leading);
462  	if (error)
463  		return error;
464  
465  	/* the transport ep handle comes from user space so it must be
466  	 * verified against the global ib connections list */
467  	ep = iscsi_lookup_endpoint(transport_eph);
468  	if (!ep) {
469  		iser_err("can't bind eph %llx\n",
470  			 (unsigned long long)transport_eph);
471  		return -EINVAL;
472  	}
473  	iser_conn = ep->dd_data;
474  
475  	mutex_lock(&iser_conn->state_mutex);
476  	if (iser_conn->state != ISER_CONN_UP) {
477  		error = -EINVAL;
478  		iser_err("iser_conn %p state is %d, teardown started\n",
479  			 iser_conn, iser_conn->state);
480  		goto out;
481  	}
482  
483  	error = iser_alloc_rx_descriptors(iser_conn, conn->session);
484  	if (error)
485  		goto out;
486  
487  	/* binds the iSER connection retrieved from the previously
488  	 * connected ep_handle to the iSCSI layer connection. exchanges
489  	 * connection pointers */
490  	iser_info("binding iscsi conn %p to iser_conn %p\n", conn, iser_conn);
491  
492  	conn->dd_data = iser_conn;
493  	iser_conn->iscsi_conn = conn;
494  
495  out:
496  	iscsi_put_endpoint(ep);
497  	mutex_unlock(&iser_conn->state_mutex);
498  	return error;
499  }
500  
501  /**
502   * iscsi_iser_conn_start() - start iscsi-iser connection
503   * @cls_conn: iscsi class connection
504   *
505   * Notes: Here iser intialize (or re-initialize) stop_completion as
506   *        from this point iscsi must call conn_stop in session/connection
507   *        teardown so iser transport must wait for it.
508   */
iscsi_iser_conn_start(struct iscsi_cls_conn * cls_conn)509  static int iscsi_iser_conn_start(struct iscsi_cls_conn *cls_conn)
510  {
511  	struct iscsi_conn *iscsi_conn;
512  	struct iser_conn *iser_conn;
513  
514  	iscsi_conn = cls_conn->dd_data;
515  	iser_conn = iscsi_conn->dd_data;
516  	reinit_completion(&iser_conn->stop_completion);
517  
518  	return iscsi_conn_start(cls_conn);
519  }
520  
521  /**
522   * iscsi_iser_conn_stop() - stop iscsi-iser connection
523   * @cls_conn:  iscsi class connection
524   * @flag:      indicate if recover or terminate (passed as is)
525   *
526   * Notes: Calling iscsi_conn_stop might theoretically race with
527   *        DEVICE_REMOVAL event and dereference a previously freed RDMA device
528   *        handle, so we call it under iser the state lock to protect against
529   *        this kind of race.
530   */
iscsi_iser_conn_stop(struct iscsi_cls_conn * cls_conn,int flag)531  static void iscsi_iser_conn_stop(struct iscsi_cls_conn *cls_conn, int flag)
532  {
533  	struct iscsi_conn *conn = cls_conn->dd_data;
534  	struct iser_conn *iser_conn = conn->dd_data;
535  
536  	iser_info("stopping iscsi_conn: %p, iser_conn: %p\n", conn, iser_conn);
537  
538  	/*
539  	 * Userspace may have goofed up and not bound the connection or
540  	 * might have only partially setup the connection.
541  	 */
542  	if (iser_conn) {
543  		mutex_lock(&iser_conn->state_mutex);
544  		mutex_lock(&unbind_iser_conn_mutex);
545  		iser_conn_terminate(iser_conn);
546  		iscsi_conn_stop(cls_conn, flag);
547  
548  		/* unbind */
549  		iser_conn->iscsi_conn = NULL;
550  		conn->dd_data = NULL;
551  		mutex_unlock(&unbind_iser_conn_mutex);
552  
553  		complete(&iser_conn->stop_completion);
554  		mutex_unlock(&iser_conn->state_mutex);
555  	} else {
556  		iscsi_conn_stop(cls_conn, flag);
557  	}
558  }
559  
560  /**
561   * iscsi_iser_session_destroy() - destroy iscsi-iser session
562   * @cls_session: iscsi class session
563   *
564   * Removes and free iscsi host.
565   */
iscsi_iser_session_destroy(struct iscsi_cls_session * cls_session)566  static void iscsi_iser_session_destroy(struct iscsi_cls_session *cls_session)
567  {
568  	struct Scsi_Host *shost = iscsi_session_to_shost(cls_session);
569  
570  	iscsi_session_teardown(cls_session);
571  	iscsi_host_remove(shost, false);
572  	iscsi_host_free(shost);
573  }
574  
iser_dif_prot_caps(int prot_caps)575  static inline unsigned int iser_dif_prot_caps(int prot_caps)
576  {
577  	int ret = 0;
578  
579  	if (prot_caps & IB_PROT_T10DIF_TYPE_1)
580  		ret |= SHOST_DIF_TYPE1_PROTECTION |
581  		       SHOST_DIX_TYPE0_PROTECTION |
582  		       SHOST_DIX_TYPE1_PROTECTION;
583  	if (prot_caps & IB_PROT_T10DIF_TYPE_2)
584  		ret |= SHOST_DIF_TYPE2_PROTECTION |
585  		       SHOST_DIX_TYPE2_PROTECTION;
586  	if (prot_caps & IB_PROT_T10DIF_TYPE_3)
587  		ret |= SHOST_DIF_TYPE3_PROTECTION |
588  		       SHOST_DIX_TYPE3_PROTECTION;
589  
590  	return ret;
591  }
592  
593  /**
594   * iscsi_iser_session_create() - create an iscsi-iser session
595   * @ep:             iscsi end-point handle
596   * @cmds_max:       maximum commands in this session
597   * @qdepth:         session command queue depth
598   * @initial_cmdsn:  initiator command sequnce number
599   *
600   * Allocates and adds a scsi host, expose DIF supprot if
601   * exists, and sets up an iscsi session.
602   */
603  static struct iscsi_cls_session *
iscsi_iser_session_create(struct iscsi_endpoint * ep,uint16_t cmds_max,uint16_t qdepth,uint32_t initial_cmdsn)604  iscsi_iser_session_create(struct iscsi_endpoint *ep,
605  			  uint16_t cmds_max, uint16_t qdepth,
606  			  uint32_t initial_cmdsn)
607  {
608  	struct iscsi_cls_session *cls_session;
609  	struct Scsi_Host *shost;
610  	struct iser_conn *iser_conn = NULL;
611  	struct ib_conn *ib_conn;
612  	struct ib_device *ib_dev;
613  	u32 max_fr_sectors;
614  
615  	shost = iscsi_host_alloc(&iscsi_iser_sht, 0, 0);
616  	if (!shost)
617  		return NULL;
618  	shost->transportt = iscsi_iser_scsi_transport;
619  	shost->cmd_per_lun = qdepth;
620  	shost->max_lun = iscsi_max_lun;
621  	shost->max_id = 0;
622  	shost->max_channel = 0;
623  	shost->max_cmd_len = 16;
624  
625  	/*
626  	 * older userspace tools (before 2.0-870) did not pass us
627  	 * the leading conn's ep so this will be NULL;
628  	 */
629  	if (ep) {
630  		iser_conn = ep->dd_data;
631  		shost->sg_tablesize = iser_conn->scsi_sg_tablesize;
632  		shost->can_queue = min_t(u16, cmds_max, iser_conn->max_cmds);
633  
634  		mutex_lock(&iser_conn->state_mutex);
635  		if (iser_conn->state != ISER_CONN_UP) {
636  			iser_err("iser conn %p already started teardown\n",
637  				 iser_conn);
638  			mutex_unlock(&iser_conn->state_mutex);
639  			goto free_host;
640  		}
641  
642  		ib_conn = &iser_conn->ib_conn;
643  		ib_dev = ib_conn->device->ib_device;
644  		if (ib_conn->pi_support) {
645  			u32 sig_caps = ib_dev->attrs.sig_prot_cap;
646  
647  			shost->sg_prot_tablesize = shost->sg_tablesize;
648  			scsi_host_set_prot(shost, iser_dif_prot_caps(sig_caps));
649  			scsi_host_set_guard(shost, SHOST_DIX_GUARD_IP |
650  						   SHOST_DIX_GUARD_CRC);
651  		}
652  
653  		if (!(ib_dev->attrs.kernel_cap_flags & IBK_SG_GAPS_REG))
654  			shost->virt_boundary_mask = SZ_4K - 1;
655  
656  		if (iscsi_host_add(shost, ib_dev->dev.parent)) {
657  			mutex_unlock(&iser_conn->state_mutex);
658  			goto free_host;
659  		}
660  		mutex_unlock(&iser_conn->state_mutex);
661  	} else {
662  		shost->can_queue = min_t(u16, cmds_max, ISER_DEF_XMIT_CMDS_MAX);
663  		if (iscsi_host_add(shost, NULL))
664  			goto free_host;
665  	}
666  
667  	max_fr_sectors = (shost->sg_tablesize * PAGE_SIZE) >> 9;
668  	shost->max_sectors = min(iser_max_sectors, max_fr_sectors);
669  
670  	iser_dbg("iser_conn %p, sg_tablesize %u, max_sectors %u\n",
671  		 iser_conn, shost->sg_tablesize,
672  		 shost->max_sectors);
673  
674  	if (shost->max_sectors < iser_max_sectors)
675  		iser_warn("max_sectors was reduced from %u to %u\n",
676  			  iser_max_sectors, shost->max_sectors);
677  
678  	cls_session = iscsi_session_setup(&iscsi_iser_transport, shost,
679  					  shost->can_queue, 0,
680  					  sizeof(struct iscsi_iser_task),
681  					  initial_cmdsn, 0);
682  	if (!cls_session)
683  		goto remove_host;
684  
685  	return cls_session;
686  
687  remove_host:
688  	iscsi_host_remove(shost, false);
689  free_host:
690  	iscsi_host_free(shost);
691  	return NULL;
692  }
693  
iscsi_iser_set_param(struct iscsi_cls_conn * cls_conn,enum iscsi_param param,char * buf,int buflen)694  static int iscsi_iser_set_param(struct iscsi_cls_conn *cls_conn,
695  				enum iscsi_param param, char *buf, int buflen)
696  {
697  	int value;
698  
699  	switch (param) {
700  	case ISCSI_PARAM_MAX_RECV_DLENGTH:
701  		/* TBD */
702  		break;
703  	case ISCSI_PARAM_HDRDGST_EN:
704  		sscanf(buf, "%d", &value);
705  		if (value) {
706  			iser_err("DataDigest wasn't negotiated to None\n");
707  			return -EPROTO;
708  		}
709  		break;
710  	case ISCSI_PARAM_DATADGST_EN:
711  		sscanf(buf, "%d", &value);
712  		if (value) {
713  			iser_err("DataDigest wasn't negotiated to None\n");
714  			return -EPROTO;
715  		}
716  		break;
717  	case ISCSI_PARAM_IFMARKER_EN:
718  		sscanf(buf, "%d", &value);
719  		if (value) {
720  			iser_err("IFMarker wasn't negotiated to No\n");
721  			return -EPROTO;
722  		}
723  		break;
724  	case ISCSI_PARAM_OFMARKER_EN:
725  		sscanf(buf, "%d", &value);
726  		if (value) {
727  			iser_err("OFMarker wasn't negotiated to No\n");
728  			return -EPROTO;
729  		}
730  		break;
731  	default:
732  		return iscsi_set_param(cls_conn, param, buf, buflen);
733  	}
734  
735  	return 0;
736  }
737  
738  /**
739   * iscsi_iser_conn_get_stats() - get iscsi connection statistics
740   * @cls_conn:    iscsi class connection
741   * @stats:       iscsi stats to output
742   *
743   * Output connection statistics.
744   */
iscsi_iser_conn_get_stats(struct iscsi_cls_conn * cls_conn,struct iscsi_stats * stats)745  static void iscsi_iser_conn_get_stats(struct iscsi_cls_conn *cls_conn,
746  				      struct iscsi_stats *stats)
747  {
748  	struct iscsi_conn *conn = cls_conn->dd_data;
749  
750  	stats->txdata_octets = conn->txdata_octets;
751  	stats->rxdata_octets = conn->rxdata_octets;
752  	stats->scsicmd_pdus = conn->scsicmd_pdus_cnt;
753  	stats->dataout_pdus = conn->dataout_pdus_cnt;
754  	stats->scsirsp_pdus = conn->scsirsp_pdus_cnt;
755  	stats->datain_pdus = conn->datain_pdus_cnt; /* always 0 */
756  	stats->r2t_pdus = conn->r2t_pdus_cnt; /* always 0 */
757  	stats->tmfcmd_pdus = conn->tmfcmd_pdus_cnt;
758  	stats->tmfrsp_pdus = conn->tmfrsp_pdus_cnt;
759  	stats->custom_length = 0;
760  }
761  
iscsi_iser_get_ep_param(struct iscsi_endpoint * ep,enum iscsi_param param,char * buf)762  static int iscsi_iser_get_ep_param(struct iscsi_endpoint *ep,
763  				   enum iscsi_param param, char *buf)
764  {
765  	struct iser_conn *iser_conn = ep->dd_data;
766  
767  	switch (param) {
768  	case ISCSI_PARAM_CONN_PORT:
769  	case ISCSI_PARAM_CONN_ADDRESS:
770  		if (!iser_conn || !iser_conn->ib_conn.cma_id)
771  			return -ENOTCONN;
772  
773  		return iscsi_conn_get_addr_param((struct sockaddr_storage *)
774  				&iser_conn->ib_conn.cma_id->route.addr.dst_addr,
775  				param, buf);
776  	default:
777  		break;
778  	}
779  	return -ENOSYS;
780  }
781  
782  /**
783   * iscsi_iser_ep_connect() - Initiate iSER connection establishment
784   * @shost:          scsi_host
785   * @dst_addr:       destination address
786   * @non_blocking:   indicate if routine can block
787   *
788   * Allocate an iscsi endpoint, an iser_conn structure and bind them.
789   * After that start RDMA connection establishment via rdma_cm. We
790   * don't allocate iser_conn embedded in iscsi_endpoint since in teardown
791   * the endpoint will be destroyed at ep_disconnect while iser_conn will
792   * cleanup its resources asynchronuously.
793   *
794   * Return: iscsi_endpoint created by iscsi layer or ERR_PTR(error)
795   *         if fails.
796   */
iscsi_iser_ep_connect(struct Scsi_Host * shost,struct sockaddr * dst_addr,int non_blocking)797  static struct iscsi_endpoint *iscsi_iser_ep_connect(struct Scsi_Host *shost,
798  						    struct sockaddr *dst_addr,
799  						    int non_blocking)
800  {
801  	int err;
802  	struct iser_conn *iser_conn;
803  	struct iscsi_endpoint *ep;
804  
805  	ep = iscsi_create_endpoint(0);
806  	if (!ep)
807  		return ERR_PTR(-ENOMEM);
808  
809  	iser_conn = kzalloc(sizeof(*iser_conn), GFP_KERNEL);
810  	if (!iser_conn) {
811  		err = -ENOMEM;
812  		goto failure;
813  	}
814  
815  	ep->dd_data = iser_conn;
816  	iser_conn->ep = ep;
817  	iser_conn_init(iser_conn);
818  
819  	err = iser_connect(iser_conn, NULL, dst_addr, non_blocking);
820  	if (err)
821  		goto failure;
822  
823  	return ep;
824  failure:
825  	iscsi_destroy_endpoint(ep);
826  	return ERR_PTR(err);
827  }
828  
829  /**
830   * iscsi_iser_ep_poll() - poll for iser connection establishment to complete
831   * @ep:            iscsi endpoint (created at ep_connect)
832   * @timeout_ms:    polling timeout allowed in ms.
833   *
834   * This routine boils down to waiting for up_completion signaling
835   * that cma_id got CONNECTED event.
836   *
837   * Return: 1 if succeeded in connection establishment, 0 if timeout expired
838   *         (libiscsi will retry will kick in) or -1 if interrupted by signal
839   *         or more likely iser connection state transitioned to TEMINATING or
840   *         DOWN during the wait period.
841   */
iscsi_iser_ep_poll(struct iscsi_endpoint * ep,int timeout_ms)842  static int iscsi_iser_ep_poll(struct iscsi_endpoint *ep, int timeout_ms)
843  {
844  	struct iser_conn *iser_conn = ep->dd_data;
845  	int rc;
846  
847  	rc = wait_for_completion_interruptible_timeout(&iser_conn->up_completion,
848  						       msecs_to_jiffies(timeout_ms));
849  	/* if conn establishment failed, return error code to iscsi */
850  	if (rc == 0) {
851  		mutex_lock(&iser_conn->state_mutex);
852  		if (iser_conn->state == ISER_CONN_TERMINATING ||
853  		    iser_conn->state == ISER_CONN_DOWN)
854  			rc = -1;
855  		mutex_unlock(&iser_conn->state_mutex);
856  	}
857  
858  	iser_info("iser conn %p rc = %d\n", iser_conn, rc);
859  
860  	if (rc > 0)
861  		return 1; /* success, this is the equivalent of EPOLLOUT */
862  	else if (!rc)
863  		return 0; /* timeout */
864  	else
865  		return rc; /* signal */
866  }
867  
868  /**
869   * iscsi_iser_ep_disconnect() - Initiate connection teardown process
870   * @ep:    iscsi endpoint handle
871   *
872   * This routine is not blocked by iser and RDMA termination process
873   * completion as we queue a deffered work for iser/RDMA destruction
874   * and cleanup or actually call it immediately in case we didn't pass
875   * iscsi conn bind/start stage, thus it is safe.
876   */
iscsi_iser_ep_disconnect(struct iscsi_endpoint * ep)877  static void iscsi_iser_ep_disconnect(struct iscsi_endpoint *ep)
878  {
879  	struct iser_conn *iser_conn = ep->dd_data;
880  
881  	iser_info("ep %p iser conn %p\n", ep, iser_conn);
882  
883  	mutex_lock(&iser_conn->state_mutex);
884  	iser_conn_terminate(iser_conn);
885  
886  	/*
887  	 * if iser_conn and iscsi_conn are bound, we must wait for
888  	 * iscsi_conn_stop and flush errors completion before freeing
889  	 * the iser resources. Otherwise we are safe to free resources
890  	 * immediately.
891  	 */
892  	if (iser_conn->iscsi_conn) {
893  		INIT_WORK(&iser_conn->release_work, iser_release_work);
894  		queue_work(release_wq, &iser_conn->release_work);
895  		mutex_unlock(&iser_conn->state_mutex);
896  	} else {
897  		iser_conn->state = ISER_CONN_DOWN;
898  		mutex_unlock(&iser_conn->state_mutex);
899  		iser_conn_release(iser_conn);
900  	}
901  
902  	iscsi_destroy_endpoint(ep);
903  }
904  
iser_attr_is_visible(int param_type,int param)905  static umode_t iser_attr_is_visible(int param_type, int param)
906  {
907  	switch (param_type) {
908  	case ISCSI_HOST_PARAM:
909  		switch (param) {
910  		case ISCSI_HOST_PARAM_NETDEV_NAME:
911  		case ISCSI_HOST_PARAM_HWADDRESS:
912  		case ISCSI_HOST_PARAM_INITIATOR_NAME:
913  			return S_IRUGO;
914  		default:
915  			return 0;
916  		}
917  	case ISCSI_PARAM:
918  		switch (param) {
919  		case ISCSI_PARAM_MAX_RECV_DLENGTH:
920  		case ISCSI_PARAM_MAX_XMIT_DLENGTH:
921  		case ISCSI_PARAM_HDRDGST_EN:
922  		case ISCSI_PARAM_DATADGST_EN:
923  		case ISCSI_PARAM_CONN_ADDRESS:
924  		case ISCSI_PARAM_CONN_PORT:
925  		case ISCSI_PARAM_EXP_STATSN:
926  		case ISCSI_PARAM_PERSISTENT_ADDRESS:
927  		case ISCSI_PARAM_PERSISTENT_PORT:
928  		case ISCSI_PARAM_PING_TMO:
929  		case ISCSI_PARAM_RECV_TMO:
930  		case ISCSI_PARAM_INITIAL_R2T_EN:
931  		case ISCSI_PARAM_MAX_R2T:
932  		case ISCSI_PARAM_IMM_DATA_EN:
933  		case ISCSI_PARAM_FIRST_BURST:
934  		case ISCSI_PARAM_MAX_BURST:
935  		case ISCSI_PARAM_PDU_INORDER_EN:
936  		case ISCSI_PARAM_DATASEQ_INORDER_EN:
937  		case ISCSI_PARAM_TARGET_NAME:
938  		case ISCSI_PARAM_TPGT:
939  		case ISCSI_PARAM_USERNAME:
940  		case ISCSI_PARAM_PASSWORD:
941  		case ISCSI_PARAM_USERNAME_IN:
942  		case ISCSI_PARAM_PASSWORD_IN:
943  		case ISCSI_PARAM_FAST_ABORT:
944  		case ISCSI_PARAM_ABORT_TMO:
945  		case ISCSI_PARAM_LU_RESET_TMO:
946  		case ISCSI_PARAM_TGT_RESET_TMO:
947  		case ISCSI_PARAM_IFACE_NAME:
948  		case ISCSI_PARAM_INITIATOR_NAME:
949  		case ISCSI_PARAM_DISCOVERY_SESS:
950  			return S_IRUGO;
951  		default:
952  			return 0;
953  		}
954  	}
955  
956  	return 0;
957  }
958  
959  static const struct scsi_host_template iscsi_iser_sht = {
960  	.module                 = THIS_MODULE,
961  	.name                   = "iSCSI Initiator over iSER",
962  	.queuecommand           = iscsi_queuecommand,
963  	.change_queue_depth	= scsi_change_queue_depth,
964  	.sg_tablesize           = ISCSI_ISER_DEF_SG_TABLESIZE,
965  	.cmd_per_lun            = ISER_DEF_CMD_PER_LUN,
966  	.eh_timed_out		= iscsi_eh_cmd_timed_out,
967  	.eh_abort_handler       = iscsi_eh_abort,
968  	.eh_device_reset_handler= iscsi_eh_device_reset,
969  	.eh_target_reset_handler = iscsi_eh_recover_target,
970  	.target_alloc		= iscsi_target_alloc,
971  	.proc_name              = "iscsi_iser",
972  	.this_id                = -1,
973  	.track_queue_depth	= 1,
974  	.cmd_size		= sizeof(struct iscsi_cmd),
975  };
976  
977  static struct iscsi_transport iscsi_iser_transport = {
978  	.owner                  = THIS_MODULE,
979  	.name                   = "iser",
980  	.caps                   = CAP_RECOVERY_L0 | CAP_MULTI_R2T | CAP_TEXT_NEGO,
981  	/* session management */
982  	.create_session         = iscsi_iser_session_create,
983  	.destroy_session        = iscsi_iser_session_destroy,
984  	/* connection management */
985  	.create_conn            = iscsi_iser_conn_create,
986  	.bind_conn              = iscsi_iser_conn_bind,
987  	.unbind_conn		= iscsi_conn_unbind,
988  	.destroy_conn           = iscsi_conn_teardown,
989  	.attr_is_visible	= iser_attr_is_visible,
990  	.set_param              = iscsi_iser_set_param,
991  	.get_conn_param		= iscsi_conn_get_param,
992  	.get_ep_param		= iscsi_iser_get_ep_param,
993  	.get_session_param	= iscsi_session_get_param,
994  	.start_conn             = iscsi_iser_conn_start,
995  	.stop_conn              = iscsi_iser_conn_stop,
996  	/* iscsi host params */
997  	.get_host_param		= iscsi_host_get_param,
998  	.set_host_param		= iscsi_host_set_param,
999  	/* IO */
1000  	.send_pdu		= iscsi_conn_send_pdu,
1001  	.get_stats		= iscsi_iser_conn_get_stats,
1002  	.init_task		= iscsi_iser_task_init,
1003  	.xmit_task		= iscsi_iser_task_xmit,
1004  	.cleanup_task		= iscsi_iser_cleanup_task,
1005  	.alloc_pdu		= iscsi_iser_pdu_alloc,
1006  	.check_protection	= iscsi_iser_check_protection,
1007  	/* recovery */
1008  	.session_recovery_timedout = iscsi_session_recovery_timedout,
1009  
1010  	.ep_connect             = iscsi_iser_ep_connect,
1011  	.ep_poll                = iscsi_iser_ep_poll,
1012  	.ep_disconnect          = iscsi_iser_ep_disconnect
1013  };
1014  
iser_init(void)1015  static int __init iser_init(void)
1016  {
1017  	int err;
1018  
1019  	iser_dbg("Starting iSER datamover...\n");
1020  
1021  	memset(&ig, 0, sizeof(struct iser_global));
1022  
1023  	ig.desc_cache = kmem_cache_create("iser_descriptors",
1024  					  sizeof(struct iser_tx_desc),
1025  					  0, SLAB_HWCACHE_ALIGN,
1026  					  NULL);
1027  	if (ig.desc_cache == NULL)
1028  		return -ENOMEM;
1029  
1030  	/* device init is called only after the first addr resolution */
1031  	mutex_init(&ig.device_list_mutex);
1032  	INIT_LIST_HEAD(&ig.device_list);
1033  	mutex_init(&ig.connlist_mutex);
1034  	INIT_LIST_HEAD(&ig.connlist);
1035  
1036  	release_wq = alloc_workqueue("release workqueue", 0, 0);
1037  	if (!release_wq) {
1038  		iser_err("failed to allocate release workqueue\n");
1039  		err = -ENOMEM;
1040  		goto err_alloc_wq;
1041  	}
1042  
1043  	iscsi_iser_scsi_transport = iscsi_register_transport(
1044  							&iscsi_iser_transport);
1045  	if (!iscsi_iser_scsi_transport) {
1046  		iser_err("iscsi_register_transport failed\n");
1047  		err = -EINVAL;
1048  		goto err_reg;
1049  	}
1050  
1051  	return 0;
1052  
1053  err_reg:
1054  	destroy_workqueue(release_wq);
1055  err_alloc_wq:
1056  	kmem_cache_destroy(ig.desc_cache);
1057  
1058  	return err;
1059  }
1060  
iser_exit(void)1061  static void __exit iser_exit(void)
1062  {
1063  	struct iser_conn *iser_conn, *n;
1064  	int connlist_empty;
1065  
1066  	iser_dbg("Removing iSER datamover...\n");
1067  	destroy_workqueue(release_wq);
1068  
1069  	mutex_lock(&ig.connlist_mutex);
1070  	connlist_empty = list_empty(&ig.connlist);
1071  	mutex_unlock(&ig.connlist_mutex);
1072  
1073  	if (!connlist_empty) {
1074  		iser_err("Error cleanup stage completed but we still have iser "
1075  			 "connections, destroying them anyway\n");
1076  		list_for_each_entry_safe(iser_conn, n, &ig.connlist,
1077  					 conn_list) {
1078  			iser_conn_release(iser_conn);
1079  		}
1080  	}
1081  
1082  	iscsi_unregister_transport(&iscsi_iser_transport);
1083  	kmem_cache_destroy(ig.desc_cache);
1084  }
1085  
1086  module_init(iser_init);
1087  module_exit(iser_exit);
1088