xref: /openbmc/linux/drivers/vhost/scsi.c (revision a2cce7a9)
1 /*******************************************************************************
2  * Vhost kernel TCM fabric driver for virtio SCSI initiators
3  *
4  * (C) Copyright 2010-2013 Datera, Inc.
5  * (C) Copyright 2010-2012 IBM Corp.
6  *
7  * Licensed to the Linux Foundation under the General Public License (GPL) version 2.
8  *
9  * Authors: Nicholas A. Bellinger <nab@daterainc.com>
10  *          Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
11  *
12  * This program is free software; you can redistribute it and/or modify
13  * it under the terms of the GNU General Public License as published by
14  * the Free Software Foundation; either version 2 of the License, or
15  * (at your option) any later version.
16  *
17  * This program is distributed in the hope that it will be useful,
18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20  * GNU General Public License for more details.
21  *
22  ****************************************************************************/
23 
24 #include <linux/module.h>
25 #include <linux/moduleparam.h>
26 #include <generated/utsrelease.h>
27 #include <linux/utsname.h>
28 #include <linux/init.h>
29 #include <linux/slab.h>
30 #include <linux/kthread.h>
31 #include <linux/types.h>
32 #include <linux/string.h>
33 #include <linux/configfs.h>
34 #include <linux/ctype.h>
35 #include <linux/compat.h>
36 #include <linux/eventfd.h>
37 #include <linux/fs.h>
38 #include <linux/vmalloc.h>
39 #include <linux/miscdevice.h>
40 #include <asm/unaligned.h>
41 #include <scsi/scsi_common.h>
42 #include <scsi/scsi_proto.h>
43 #include <target/target_core_base.h>
44 #include <target/target_core_fabric.h>
45 #include <target/target_core_fabric_configfs.h>
46 #include <target/configfs_macros.h>
47 #include <linux/vhost.h>
48 #include <linux/virtio_scsi.h>
49 #include <linux/llist.h>
50 #include <linux/bitmap.h>
51 #include <linux/percpu_ida.h>
52 
53 #include "vhost.h"
54 
55 #define VHOST_SCSI_VERSION  "v0.1"
56 #define VHOST_SCSI_NAMELEN 256
57 #define VHOST_SCSI_MAX_CDB_SIZE 32
58 #define VHOST_SCSI_DEFAULT_TAGS 256
59 #define VHOST_SCSI_PREALLOC_SGLS 2048
60 #define VHOST_SCSI_PREALLOC_UPAGES 2048
61 #define VHOST_SCSI_PREALLOC_PROT_SGLS 512
62 
63 struct vhost_scsi_inflight {
64 	/* Wait for the flush operation to finish */
65 	struct completion comp;
66 	/* Refcount for the inflight reqs */
67 	struct kref kref;
68 };
69 
70 struct vhost_scsi_cmd {
71 	/* Descriptor from vhost_get_vq_desc() for virt_queue segment */
72 	int tvc_vq_desc;
73 	/* virtio-scsi initiator task attribute */
74 	int tvc_task_attr;
75 	/* virtio-scsi response incoming iovecs */
76 	int tvc_in_iovs;
77 	/* virtio-scsi initiator data direction */
78 	enum dma_data_direction tvc_data_direction;
79 	/* Expected data transfer length from virtio-scsi header */
80 	u32 tvc_exp_data_len;
81 	/* The Tag from include/linux/virtio_scsi.h:struct virtio_scsi_cmd_req */
82 	u64 tvc_tag;
83 	/* The number of scatterlists associated with this cmd */
84 	u32 tvc_sgl_count;
85 	u32 tvc_prot_sgl_count;
86 	/* Saved unpacked SCSI LUN for vhost_scsi_submission_work() */
87 	u32 tvc_lun;
88 	/* Pointer to the SGL formatted memory from virtio-scsi */
89 	struct scatterlist *tvc_sgl;
90 	struct scatterlist *tvc_prot_sgl;
91 	struct page **tvc_upages;
92 	/* Pointer to response header iovec */
93 	struct iovec *tvc_resp_iov;
94 	/* Pointer to vhost_scsi for our device */
95 	struct vhost_scsi *tvc_vhost;
96 	/* Pointer to vhost_virtqueue for the cmd */
97 	struct vhost_virtqueue *tvc_vq;
98 	/* Pointer to vhost nexus memory */
99 	struct vhost_scsi_nexus *tvc_nexus;
100 	/* The TCM I/O descriptor that is accessed via container_of() */
101 	struct se_cmd tvc_se_cmd;
102 	/* work item used for cmwq dispatch to vhost_scsi_submission_work() */
103 	struct work_struct work;
104 	/* Copy of the incoming SCSI command descriptor block (CDB) */
105 	unsigned char tvc_cdb[VHOST_SCSI_MAX_CDB_SIZE];
106 	/* Sense buffer that will be mapped into outgoing status */
107 	unsigned char tvc_sense_buf[TRANSPORT_SENSE_BUFFER];
108 	/* Completed commands list, serviced from vhost worker thread */
109 	struct llist_node tvc_completion_list;
110 	/* Used to track inflight cmd */
111 	struct vhost_scsi_inflight *inflight;
112 };
113 
114 struct vhost_scsi_nexus {
115 	/* Pointer to TCM session for I_T Nexus */
116 	struct se_session *tvn_se_sess;
117 };
118 
119 struct vhost_scsi_tpg {
120 	/* Vhost port target portal group tag for TCM */
121 	u16 tport_tpgt;
122 	/* Used to track number of TPG Port/Lun Links wrt to explict I_T Nexus shutdown */
123 	int tv_tpg_port_count;
124 	/* Used for vhost_scsi device reference to tpg_nexus, protected by tv_tpg_mutex */
125 	int tv_tpg_vhost_count;
126 	/* Used for enabling T10-PI with legacy devices */
127 	int tv_fabric_prot_type;
128 	/* list for vhost_scsi_list */
129 	struct list_head tv_tpg_list;
130 	/* Used to protect access for tpg_nexus */
131 	struct mutex tv_tpg_mutex;
132 	/* Pointer to the TCM VHost I_T Nexus for this TPG endpoint */
133 	struct vhost_scsi_nexus *tpg_nexus;
134 	/* Pointer back to vhost_scsi_tport */
135 	struct vhost_scsi_tport *tport;
136 	/* Returned by vhost_scsi_make_tpg() */
137 	struct se_portal_group se_tpg;
138 	/* Pointer back to vhost_scsi, protected by tv_tpg_mutex */
139 	struct vhost_scsi *vhost_scsi;
140 };
141 
142 struct vhost_scsi_tport {
143 	/* SCSI protocol the tport is providing */
144 	u8 tport_proto_id;
145 	/* Binary World Wide unique Port Name for Vhost Target port */
146 	u64 tport_wwpn;
147 	/* ASCII formatted WWPN for Vhost Target port */
148 	char tport_name[VHOST_SCSI_NAMELEN];
149 	/* Returned by vhost_scsi_make_tport() */
150 	struct se_wwn tport_wwn;
151 };
152 
153 struct vhost_scsi_evt {
154 	/* event to be sent to guest */
155 	struct virtio_scsi_event event;
156 	/* event list, serviced from vhost worker thread */
157 	struct llist_node list;
158 };
159 
160 enum {
161 	VHOST_SCSI_VQ_CTL = 0,
162 	VHOST_SCSI_VQ_EVT = 1,
163 	VHOST_SCSI_VQ_IO = 2,
164 };
165 
166 /* Note: can't set VIRTIO_F_VERSION_1 yet, since that implies ANY_LAYOUT. */
167 enum {
168 	VHOST_SCSI_FEATURES = VHOST_FEATURES | (1ULL << VIRTIO_SCSI_F_HOTPLUG) |
169 					       (1ULL << VIRTIO_SCSI_F_T10_PI)
170 };
171 
172 #define VHOST_SCSI_MAX_TARGET	256
173 #define VHOST_SCSI_MAX_VQ	128
174 #define VHOST_SCSI_MAX_EVENT	128
175 
176 struct vhost_scsi_virtqueue {
177 	struct vhost_virtqueue vq;
178 	/*
179 	 * Reference counting for inflight reqs, used for flush operation. At
180 	 * each time, one reference tracks new commands submitted, while we
181 	 * wait for another one to reach 0.
182 	 */
183 	struct vhost_scsi_inflight inflights[2];
184 	/*
185 	 * Indicate current inflight in use, protected by vq->mutex.
186 	 * Writers must also take dev mutex and flush under it.
187 	 */
188 	int inflight_idx;
189 };
190 
191 struct vhost_scsi {
192 	/* Protected by vhost_scsi->dev.mutex */
193 	struct vhost_scsi_tpg **vs_tpg;
194 	char vs_vhost_wwpn[TRANSPORT_IQN_LEN];
195 
196 	struct vhost_dev dev;
197 	struct vhost_scsi_virtqueue vqs[VHOST_SCSI_MAX_VQ];
198 
199 	struct vhost_work vs_completion_work; /* cmd completion work item */
200 	struct llist_head vs_completion_list; /* cmd completion queue */
201 
202 	struct vhost_work vs_event_work; /* evt injection work item */
203 	struct llist_head vs_event_list; /* evt injection queue */
204 
205 	bool vs_events_missed; /* any missed events, protected by vq->mutex */
206 	int vs_events_nr; /* num of pending events, protected by vq->mutex */
207 };
208 
209 static struct workqueue_struct *vhost_scsi_workqueue;
210 
211 /* Global spinlock to protect vhost_scsi TPG list for vhost IOCTL access */
212 static DEFINE_MUTEX(vhost_scsi_mutex);
213 static LIST_HEAD(vhost_scsi_list);
214 
215 static int iov_num_pages(void __user *iov_base, size_t iov_len)
216 {
217 	return (PAGE_ALIGN((unsigned long)iov_base + iov_len) -
218 	       ((unsigned long)iov_base & PAGE_MASK)) >> PAGE_SHIFT;
219 }
220 
221 static void vhost_scsi_done_inflight(struct kref *kref)
222 {
223 	struct vhost_scsi_inflight *inflight;
224 
225 	inflight = container_of(kref, struct vhost_scsi_inflight, kref);
226 	complete(&inflight->comp);
227 }
228 
229 static void vhost_scsi_init_inflight(struct vhost_scsi *vs,
230 				    struct vhost_scsi_inflight *old_inflight[])
231 {
232 	struct vhost_scsi_inflight *new_inflight;
233 	struct vhost_virtqueue *vq;
234 	int idx, i;
235 
236 	for (i = 0; i < VHOST_SCSI_MAX_VQ; i++) {
237 		vq = &vs->vqs[i].vq;
238 
239 		mutex_lock(&vq->mutex);
240 
241 		/* store old infight */
242 		idx = vs->vqs[i].inflight_idx;
243 		if (old_inflight)
244 			old_inflight[i] = &vs->vqs[i].inflights[idx];
245 
246 		/* setup new infight */
247 		vs->vqs[i].inflight_idx = idx ^ 1;
248 		new_inflight = &vs->vqs[i].inflights[idx ^ 1];
249 		kref_init(&new_inflight->kref);
250 		init_completion(&new_inflight->comp);
251 
252 		mutex_unlock(&vq->mutex);
253 	}
254 }
255 
256 static struct vhost_scsi_inflight *
257 vhost_scsi_get_inflight(struct vhost_virtqueue *vq)
258 {
259 	struct vhost_scsi_inflight *inflight;
260 	struct vhost_scsi_virtqueue *svq;
261 
262 	svq = container_of(vq, struct vhost_scsi_virtqueue, vq);
263 	inflight = &svq->inflights[svq->inflight_idx];
264 	kref_get(&inflight->kref);
265 
266 	return inflight;
267 }
268 
269 static void vhost_scsi_put_inflight(struct vhost_scsi_inflight *inflight)
270 {
271 	kref_put(&inflight->kref, vhost_scsi_done_inflight);
272 }
273 
274 static int vhost_scsi_check_true(struct se_portal_group *se_tpg)
275 {
276 	return 1;
277 }
278 
279 static int vhost_scsi_check_false(struct se_portal_group *se_tpg)
280 {
281 	return 0;
282 }
283 
284 static char *vhost_scsi_get_fabric_name(void)
285 {
286 	return "vhost";
287 }
288 
289 static char *vhost_scsi_get_fabric_wwn(struct se_portal_group *se_tpg)
290 {
291 	struct vhost_scsi_tpg *tpg = container_of(se_tpg,
292 				struct vhost_scsi_tpg, se_tpg);
293 	struct vhost_scsi_tport *tport = tpg->tport;
294 
295 	return &tport->tport_name[0];
296 }
297 
298 static u16 vhost_scsi_get_tpgt(struct se_portal_group *se_tpg)
299 {
300 	struct vhost_scsi_tpg *tpg = container_of(se_tpg,
301 				struct vhost_scsi_tpg, se_tpg);
302 	return tpg->tport_tpgt;
303 }
304 
305 static int vhost_scsi_check_prot_fabric_only(struct se_portal_group *se_tpg)
306 {
307 	struct vhost_scsi_tpg *tpg = container_of(se_tpg,
308 				struct vhost_scsi_tpg, se_tpg);
309 
310 	return tpg->tv_fabric_prot_type;
311 }
312 
313 static u32 vhost_scsi_tpg_get_inst_index(struct se_portal_group *se_tpg)
314 {
315 	return 1;
316 }
317 
318 static void vhost_scsi_release_cmd(struct se_cmd *se_cmd)
319 {
320 	struct vhost_scsi_cmd *tv_cmd = container_of(se_cmd,
321 				struct vhost_scsi_cmd, tvc_se_cmd);
322 	struct se_session *se_sess = tv_cmd->tvc_nexus->tvn_se_sess;
323 	int i;
324 
325 	if (tv_cmd->tvc_sgl_count) {
326 		for (i = 0; i < tv_cmd->tvc_sgl_count; i++)
327 			put_page(sg_page(&tv_cmd->tvc_sgl[i]));
328 	}
329 	if (tv_cmd->tvc_prot_sgl_count) {
330 		for (i = 0; i < tv_cmd->tvc_prot_sgl_count; i++)
331 			put_page(sg_page(&tv_cmd->tvc_prot_sgl[i]));
332 	}
333 
334 	vhost_scsi_put_inflight(tv_cmd->inflight);
335 	percpu_ida_free(&se_sess->sess_tag_pool, se_cmd->map_tag);
336 }
337 
338 static int vhost_scsi_shutdown_session(struct se_session *se_sess)
339 {
340 	return 0;
341 }
342 
343 static void vhost_scsi_close_session(struct se_session *se_sess)
344 {
345 	return;
346 }
347 
348 static u32 vhost_scsi_sess_get_index(struct se_session *se_sess)
349 {
350 	return 0;
351 }
352 
353 static int vhost_scsi_write_pending(struct se_cmd *se_cmd)
354 {
355 	/* Go ahead and process the write immediately */
356 	target_execute_cmd(se_cmd);
357 	return 0;
358 }
359 
360 static int vhost_scsi_write_pending_status(struct se_cmd *se_cmd)
361 {
362 	return 0;
363 }
364 
365 static void vhost_scsi_set_default_node_attrs(struct se_node_acl *nacl)
366 {
367 	return;
368 }
369 
370 static int vhost_scsi_get_cmd_state(struct se_cmd *se_cmd)
371 {
372 	return 0;
373 }
374 
375 static void vhost_scsi_complete_cmd(struct vhost_scsi_cmd *cmd)
376 {
377 	struct vhost_scsi *vs = cmd->tvc_vhost;
378 
379 	llist_add(&cmd->tvc_completion_list, &vs->vs_completion_list);
380 
381 	vhost_work_queue(&vs->dev, &vs->vs_completion_work);
382 }
383 
384 static int vhost_scsi_queue_data_in(struct se_cmd *se_cmd)
385 {
386 	struct vhost_scsi_cmd *cmd = container_of(se_cmd,
387 				struct vhost_scsi_cmd, tvc_se_cmd);
388 	vhost_scsi_complete_cmd(cmd);
389 	return 0;
390 }
391 
392 static int vhost_scsi_queue_status(struct se_cmd *se_cmd)
393 {
394 	struct vhost_scsi_cmd *cmd = container_of(se_cmd,
395 				struct vhost_scsi_cmd, tvc_se_cmd);
396 	vhost_scsi_complete_cmd(cmd);
397 	return 0;
398 }
399 
400 static void vhost_scsi_queue_tm_rsp(struct se_cmd *se_cmd)
401 {
402 	return;
403 }
404 
405 static void vhost_scsi_aborted_task(struct se_cmd *se_cmd)
406 {
407 	return;
408 }
409 
410 static void vhost_scsi_free_evt(struct vhost_scsi *vs, struct vhost_scsi_evt *evt)
411 {
412 	vs->vs_events_nr--;
413 	kfree(evt);
414 }
415 
416 static struct vhost_scsi_evt *
417 vhost_scsi_allocate_evt(struct vhost_scsi *vs,
418 		       u32 event, u32 reason)
419 {
420 	struct vhost_virtqueue *vq = &vs->vqs[VHOST_SCSI_VQ_EVT].vq;
421 	struct vhost_scsi_evt *evt;
422 
423 	if (vs->vs_events_nr > VHOST_SCSI_MAX_EVENT) {
424 		vs->vs_events_missed = true;
425 		return NULL;
426 	}
427 
428 	evt = kzalloc(sizeof(*evt), GFP_KERNEL);
429 	if (!evt) {
430 		vq_err(vq, "Failed to allocate vhost_scsi_evt\n");
431 		vs->vs_events_missed = true;
432 		return NULL;
433 	}
434 
435 	evt->event.event = cpu_to_vhost32(vq, event);
436 	evt->event.reason = cpu_to_vhost32(vq, reason);
437 	vs->vs_events_nr++;
438 
439 	return evt;
440 }
441 
442 static void vhost_scsi_free_cmd(struct vhost_scsi_cmd *cmd)
443 {
444 	struct se_cmd *se_cmd = &cmd->tvc_se_cmd;
445 
446 	/* TODO locking against target/backend threads? */
447 	transport_generic_free_cmd(se_cmd, 0);
448 
449 }
450 
451 static int vhost_scsi_check_stop_free(struct se_cmd *se_cmd)
452 {
453 	return target_put_sess_cmd(se_cmd);
454 }
455 
456 static void
457 vhost_scsi_do_evt_work(struct vhost_scsi *vs, struct vhost_scsi_evt *evt)
458 {
459 	struct vhost_virtqueue *vq = &vs->vqs[VHOST_SCSI_VQ_EVT].vq;
460 	struct virtio_scsi_event *event = &evt->event;
461 	struct virtio_scsi_event __user *eventp;
462 	unsigned out, in;
463 	int head, ret;
464 
465 	if (!vq->private_data) {
466 		vs->vs_events_missed = true;
467 		return;
468 	}
469 
470 again:
471 	vhost_disable_notify(&vs->dev, vq);
472 	head = vhost_get_vq_desc(vq, vq->iov,
473 			ARRAY_SIZE(vq->iov), &out, &in,
474 			NULL, NULL);
475 	if (head < 0) {
476 		vs->vs_events_missed = true;
477 		return;
478 	}
479 	if (head == vq->num) {
480 		if (vhost_enable_notify(&vs->dev, vq))
481 			goto again;
482 		vs->vs_events_missed = true;
483 		return;
484 	}
485 
486 	if ((vq->iov[out].iov_len != sizeof(struct virtio_scsi_event))) {
487 		vq_err(vq, "Expecting virtio_scsi_event, got %zu bytes\n",
488 				vq->iov[out].iov_len);
489 		vs->vs_events_missed = true;
490 		return;
491 	}
492 
493 	if (vs->vs_events_missed) {
494 		event->event |= cpu_to_vhost32(vq, VIRTIO_SCSI_T_EVENTS_MISSED);
495 		vs->vs_events_missed = false;
496 	}
497 
498 	eventp = vq->iov[out].iov_base;
499 	ret = __copy_to_user(eventp, event, sizeof(*event));
500 	if (!ret)
501 		vhost_add_used_and_signal(&vs->dev, vq, head, 0);
502 	else
503 		vq_err(vq, "Faulted on vhost_scsi_send_event\n");
504 }
505 
506 static void vhost_scsi_evt_work(struct vhost_work *work)
507 {
508 	struct vhost_scsi *vs = container_of(work, struct vhost_scsi,
509 					vs_event_work);
510 	struct vhost_virtqueue *vq = &vs->vqs[VHOST_SCSI_VQ_EVT].vq;
511 	struct vhost_scsi_evt *evt;
512 	struct llist_node *llnode;
513 
514 	mutex_lock(&vq->mutex);
515 	llnode = llist_del_all(&vs->vs_event_list);
516 	while (llnode) {
517 		evt = llist_entry(llnode, struct vhost_scsi_evt, list);
518 		llnode = llist_next(llnode);
519 		vhost_scsi_do_evt_work(vs, evt);
520 		vhost_scsi_free_evt(vs, evt);
521 	}
522 	mutex_unlock(&vq->mutex);
523 }
524 
525 /* Fill in status and signal that we are done processing this command
526  *
527  * This is scheduled in the vhost work queue so we are called with the owner
528  * process mm and can access the vring.
529  */
530 static void vhost_scsi_complete_cmd_work(struct vhost_work *work)
531 {
532 	struct vhost_scsi *vs = container_of(work, struct vhost_scsi,
533 					vs_completion_work);
534 	DECLARE_BITMAP(signal, VHOST_SCSI_MAX_VQ);
535 	struct virtio_scsi_cmd_resp v_rsp;
536 	struct vhost_scsi_cmd *cmd;
537 	struct llist_node *llnode;
538 	struct se_cmd *se_cmd;
539 	struct iov_iter iov_iter;
540 	int ret, vq;
541 
542 	bitmap_zero(signal, VHOST_SCSI_MAX_VQ);
543 	llnode = llist_del_all(&vs->vs_completion_list);
544 	while (llnode) {
545 		cmd = llist_entry(llnode, struct vhost_scsi_cmd,
546 				     tvc_completion_list);
547 		llnode = llist_next(llnode);
548 		se_cmd = &cmd->tvc_se_cmd;
549 
550 		pr_debug("%s tv_cmd %p resid %u status %#02x\n", __func__,
551 			cmd, se_cmd->residual_count, se_cmd->scsi_status);
552 
553 		memset(&v_rsp, 0, sizeof(v_rsp));
554 		v_rsp.resid = cpu_to_vhost32(cmd->tvc_vq, se_cmd->residual_count);
555 		/* TODO is status_qualifier field needed? */
556 		v_rsp.status = se_cmd->scsi_status;
557 		v_rsp.sense_len = cpu_to_vhost32(cmd->tvc_vq,
558 						 se_cmd->scsi_sense_length);
559 		memcpy(v_rsp.sense, cmd->tvc_sense_buf,
560 		       se_cmd->scsi_sense_length);
561 
562 		iov_iter_init(&iov_iter, READ, cmd->tvc_resp_iov,
563 			      cmd->tvc_in_iovs, sizeof(v_rsp));
564 		ret = copy_to_iter(&v_rsp, sizeof(v_rsp), &iov_iter);
565 		if (likely(ret == sizeof(v_rsp))) {
566 			struct vhost_scsi_virtqueue *q;
567 			vhost_add_used(cmd->tvc_vq, cmd->tvc_vq_desc, 0);
568 			q = container_of(cmd->tvc_vq, struct vhost_scsi_virtqueue, vq);
569 			vq = q - vs->vqs;
570 			__set_bit(vq, signal);
571 		} else
572 			pr_err("Faulted on virtio_scsi_cmd_resp\n");
573 
574 		vhost_scsi_free_cmd(cmd);
575 	}
576 
577 	vq = -1;
578 	while ((vq = find_next_bit(signal, VHOST_SCSI_MAX_VQ, vq + 1))
579 		< VHOST_SCSI_MAX_VQ)
580 		vhost_signal(&vs->dev, &vs->vqs[vq].vq);
581 }
582 
583 static struct vhost_scsi_cmd *
584 vhost_scsi_get_tag(struct vhost_virtqueue *vq, struct vhost_scsi_tpg *tpg,
585 		   unsigned char *cdb, u64 scsi_tag, u16 lun, u8 task_attr,
586 		   u32 exp_data_len, int data_direction)
587 {
588 	struct vhost_scsi_cmd *cmd;
589 	struct vhost_scsi_nexus *tv_nexus;
590 	struct se_session *se_sess;
591 	struct scatterlist *sg, *prot_sg;
592 	struct page **pages;
593 	int tag;
594 
595 	tv_nexus = tpg->tpg_nexus;
596 	if (!tv_nexus) {
597 		pr_err("Unable to locate active struct vhost_scsi_nexus\n");
598 		return ERR_PTR(-EIO);
599 	}
600 	se_sess = tv_nexus->tvn_se_sess;
601 
602 	tag = percpu_ida_alloc(&se_sess->sess_tag_pool, TASK_RUNNING);
603 	if (tag < 0) {
604 		pr_err("Unable to obtain tag for vhost_scsi_cmd\n");
605 		return ERR_PTR(-ENOMEM);
606 	}
607 
608 	cmd = &((struct vhost_scsi_cmd *)se_sess->sess_cmd_map)[tag];
609 	sg = cmd->tvc_sgl;
610 	prot_sg = cmd->tvc_prot_sgl;
611 	pages = cmd->tvc_upages;
612 	memset(cmd, 0, sizeof(struct vhost_scsi_cmd));
613 
614 	cmd->tvc_sgl = sg;
615 	cmd->tvc_prot_sgl = prot_sg;
616 	cmd->tvc_upages = pages;
617 	cmd->tvc_se_cmd.map_tag = tag;
618 	cmd->tvc_tag = scsi_tag;
619 	cmd->tvc_lun = lun;
620 	cmd->tvc_task_attr = task_attr;
621 	cmd->tvc_exp_data_len = exp_data_len;
622 	cmd->tvc_data_direction = data_direction;
623 	cmd->tvc_nexus = tv_nexus;
624 	cmd->inflight = vhost_scsi_get_inflight(vq);
625 
626 	memcpy(cmd->tvc_cdb, cdb, VHOST_SCSI_MAX_CDB_SIZE);
627 
628 	return cmd;
629 }
630 
631 /*
632  * Map a user memory range into a scatterlist
633  *
634  * Returns the number of scatterlist entries used or -errno on error.
635  */
636 static int
637 vhost_scsi_map_to_sgl(struct vhost_scsi_cmd *cmd,
638 		      void __user *ptr,
639 		      size_t len,
640 		      struct scatterlist *sgl,
641 		      bool write)
642 {
643 	unsigned int npages = 0, offset, nbytes;
644 	unsigned int pages_nr = iov_num_pages(ptr, len);
645 	struct scatterlist *sg = sgl;
646 	struct page **pages = cmd->tvc_upages;
647 	int ret, i;
648 
649 	if (pages_nr > VHOST_SCSI_PREALLOC_UPAGES) {
650 		pr_err("vhost_scsi_map_to_sgl() pages_nr: %u greater than"
651 		       " preallocated VHOST_SCSI_PREALLOC_UPAGES: %u\n",
652 			pages_nr, VHOST_SCSI_PREALLOC_UPAGES);
653 		return -ENOBUFS;
654 	}
655 
656 	ret = get_user_pages_fast((unsigned long)ptr, pages_nr, write, pages);
657 	/* No pages were pinned */
658 	if (ret < 0)
659 		goto out;
660 	/* Less pages pinned than wanted */
661 	if (ret != pages_nr) {
662 		for (i = 0; i < ret; i++)
663 			put_page(pages[i]);
664 		ret = -EFAULT;
665 		goto out;
666 	}
667 
668 	while (len > 0) {
669 		offset = (uintptr_t)ptr & ~PAGE_MASK;
670 		nbytes = min_t(unsigned int, PAGE_SIZE - offset, len);
671 		sg_set_page(sg, pages[npages], nbytes, offset);
672 		ptr += nbytes;
673 		len -= nbytes;
674 		sg++;
675 		npages++;
676 	}
677 
678 out:
679 	return ret;
680 }
681 
682 static int
683 vhost_scsi_calc_sgls(struct iov_iter *iter, size_t bytes, int max_sgls)
684 {
685 	int sgl_count = 0;
686 
687 	if (!iter || !iter->iov) {
688 		pr_err("%s: iter->iov is NULL, but expected bytes: %zu"
689 		       " present\n", __func__, bytes);
690 		return -EINVAL;
691 	}
692 
693 	sgl_count = iov_iter_npages(iter, 0xffff);
694 	if (sgl_count > max_sgls) {
695 		pr_err("%s: requested sgl_count: %d exceeds pre-allocated"
696 		       " max_sgls: %d\n", __func__, sgl_count, max_sgls);
697 		return -EINVAL;
698 	}
699 	return sgl_count;
700 }
701 
702 static int
703 vhost_scsi_iov_to_sgl(struct vhost_scsi_cmd *cmd, bool write,
704 		      struct iov_iter *iter,
705 		      struct scatterlist *sg, int sg_count)
706 {
707 	size_t off = iter->iov_offset;
708 	int i, ret;
709 
710 	for (i = 0; i < iter->nr_segs; i++) {
711 		void __user *base = iter->iov[i].iov_base + off;
712 		size_t len = iter->iov[i].iov_len - off;
713 
714 		ret = vhost_scsi_map_to_sgl(cmd, base, len, sg, write);
715 		if (ret < 0) {
716 			for (i = 0; i < sg_count; i++) {
717 				struct page *page = sg_page(&sg[i]);
718 				if (page)
719 					put_page(page);
720 			}
721 			return ret;
722 		}
723 		sg += ret;
724 		off = 0;
725 	}
726 	return 0;
727 }
728 
729 static int
730 vhost_scsi_mapal(struct vhost_scsi_cmd *cmd,
731 		 size_t prot_bytes, struct iov_iter *prot_iter,
732 		 size_t data_bytes, struct iov_iter *data_iter)
733 {
734 	int sgl_count, ret;
735 	bool write = (cmd->tvc_data_direction == DMA_FROM_DEVICE);
736 
737 	if (prot_bytes) {
738 		sgl_count = vhost_scsi_calc_sgls(prot_iter, prot_bytes,
739 						 VHOST_SCSI_PREALLOC_PROT_SGLS);
740 		if (sgl_count < 0)
741 			return sgl_count;
742 
743 		sg_init_table(cmd->tvc_prot_sgl, sgl_count);
744 		cmd->tvc_prot_sgl_count = sgl_count;
745 		pr_debug("%s prot_sg %p prot_sgl_count %u\n", __func__,
746 			 cmd->tvc_prot_sgl, cmd->tvc_prot_sgl_count);
747 
748 		ret = vhost_scsi_iov_to_sgl(cmd, write, prot_iter,
749 					    cmd->tvc_prot_sgl,
750 					    cmd->tvc_prot_sgl_count);
751 		if (ret < 0) {
752 			cmd->tvc_prot_sgl_count = 0;
753 			return ret;
754 		}
755 	}
756 	sgl_count = vhost_scsi_calc_sgls(data_iter, data_bytes,
757 					 VHOST_SCSI_PREALLOC_SGLS);
758 	if (sgl_count < 0)
759 		return sgl_count;
760 
761 	sg_init_table(cmd->tvc_sgl, sgl_count);
762 	cmd->tvc_sgl_count = sgl_count;
763 	pr_debug("%s data_sg %p data_sgl_count %u\n", __func__,
764 		  cmd->tvc_sgl, cmd->tvc_sgl_count);
765 
766 	ret = vhost_scsi_iov_to_sgl(cmd, write, data_iter,
767 				    cmd->tvc_sgl, cmd->tvc_sgl_count);
768 	if (ret < 0) {
769 		cmd->tvc_sgl_count = 0;
770 		return ret;
771 	}
772 	return 0;
773 }
774 
775 static int vhost_scsi_to_tcm_attr(int attr)
776 {
777 	switch (attr) {
778 	case VIRTIO_SCSI_S_SIMPLE:
779 		return TCM_SIMPLE_TAG;
780 	case VIRTIO_SCSI_S_ORDERED:
781 		return TCM_ORDERED_TAG;
782 	case VIRTIO_SCSI_S_HEAD:
783 		return TCM_HEAD_TAG;
784 	case VIRTIO_SCSI_S_ACA:
785 		return TCM_ACA_TAG;
786 	default:
787 		break;
788 	}
789 	return TCM_SIMPLE_TAG;
790 }
791 
792 static void vhost_scsi_submission_work(struct work_struct *work)
793 {
794 	struct vhost_scsi_cmd *cmd =
795 		container_of(work, struct vhost_scsi_cmd, work);
796 	struct vhost_scsi_nexus *tv_nexus;
797 	struct se_cmd *se_cmd = &cmd->tvc_se_cmd;
798 	struct scatterlist *sg_ptr, *sg_prot_ptr = NULL;
799 	int rc;
800 
801 	/* FIXME: BIDI operation */
802 	if (cmd->tvc_sgl_count) {
803 		sg_ptr = cmd->tvc_sgl;
804 
805 		if (cmd->tvc_prot_sgl_count)
806 			sg_prot_ptr = cmd->tvc_prot_sgl;
807 		else
808 			se_cmd->prot_pto = true;
809 	} else {
810 		sg_ptr = NULL;
811 	}
812 	tv_nexus = cmd->tvc_nexus;
813 
814 	se_cmd->tag = 0;
815 	rc = target_submit_cmd_map_sgls(se_cmd, tv_nexus->tvn_se_sess,
816 			cmd->tvc_cdb, &cmd->tvc_sense_buf[0],
817 			cmd->tvc_lun, cmd->tvc_exp_data_len,
818 			vhost_scsi_to_tcm_attr(cmd->tvc_task_attr),
819 			cmd->tvc_data_direction, TARGET_SCF_ACK_KREF,
820 			sg_ptr, cmd->tvc_sgl_count, NULL, 0, sg_prot_ptr,
821 			cmd->tvc_prot_sgl_count);
822 	if (rc < 0) {
823 		transport_send_check_condition_and_sense(se_cmd,
824 				TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE, 0);
825 		transport_generic_free_cmd(se_cmd, 0);
826 	}
827 }
828 
829 static void
830 vhost_scsi_send_bad_target(struct vhost_scsi *vs,
831 			   struct vhost_virtqueue *vq,
832 			   int head, unsigned out)
833 {
834 	struct virtio_scsi_cmd_resp __user *resp;
835 	struct virtio_scsi_cmd_resp rsp;
836 	int ret;
837 
838 	memset(&rsp, 0, sizeof(rsp));
839 	rsp.response = VIRTIO_SCSI_S_BAD_TARGET;
840 	resp = vq->iov[out].iov_base;
841 	ret = __copy_to_user(resp, &rsp, sizeof(rsp));
842 	if (!ret)
843 		vhost_add_used_and_signal(&vs->dev, vq, head, 0);
844 	else
845 		pr_err("Faulted on virtio_scsi_cmd_resp\n");
846 }
847 
848 static void
849 vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq)
850 {
851 	struct vhost_scsi_tpg **vs_tpg, *tpg;
852 	struct virtio_scsi_cmd_req v_req;
853 	struct virtio_scsi_cmd_req_pi v_req_pi;
854 	struct vhost_scsi_cmd *cmd;
855 	struct iov_iter out_iter, in_iter, prot_iter, data_iter;
856 	u64 tag;
857 	u32 exp_data_len, data_direction;
858 	unsigned out, in;
859 	int head, ret, prot_bytes;
860 	size_t req_size, rsp_size = sizeof(struct virtio_scsi_cmd_resp);
861 	size_t out_size, in_size;
862 	u16 lun;
863 	u8 *target, *lunp, task_attr;
864 	bool t10_pi = vhost_has_feature(vq, VIRTIO_SCSI_F_T10_PI);
865 	void *req, *cdb;
866 
867 	mutex_lock(&vq->mutex);
868 	/*
869 	 * We can handle the vq only after the endpoint is setup by calling the
870 	 * VHOST_SCSI_SET_ENDPOINT ioctl.
871 	 */
872 	vs_tpg = vq->private_data;
873 	if (!vs_tpg)
874 		goto out;
875 
876 	vhost_disable_notify(&vs->dev, vq);
877 
878 	for (;;) {
879 		head = vhost_get_vq_desc(vq, vq->iov,
880 					 ARRAY_SIZE(vq->iov), &out, &in,
881 					 NULL, NULL);
882 		pr_debug("vhost_get_vq_desc: head: %d, out: %u in: %u\n",
883 			 head, out, in);
884 		/* On error, stop handling until the next kick. */
885 		if (unlikely(head < 0))
886 			break;
887 		/* Nothing new?  Wait for eventfd to tell us they refilled. */
888 		if (head == vq->num) {
889 			if (unlikely(vhost_enable_notify(&vs->dev, vq))) {
890 				vhost_disable_notify(&vs->dev, vq);
891 				continue;
892 			}
893 			break;
894 		}
895 		/*
896 		 * Check for a sane response buffer so we can report early
897 		 * errors back to the guest.
898 		 */
899 		if (unlikely(vq->iov[out].iov_len < rsp_size)) {
900 			vq_err(vq, "Expecting at least virtio_scsi_cmd_resp"
901 				" size, got %zu bytes\n", vq->iov[out].iov_len);
902 			break;
903 		}
904 		/*
905 		 * Setup pointers and values based upon different virtio-scsi
906 		 * request header if T10_PI is enabled in KVM guest.
907 		 */
908 		if (t10_pi) {
909 			req = &v_req_pi;
910 			req_size = sizeof(v_req_pi);
911 			lunp = &v_req_pi.lun[0];
912 			target = &v_req_pi.lun[1];
913 		} else {
914 			req = &v_req;
915 			req_size = sizeof(v_req);
916 			lunp = &v_req.lun[0];
917 			target = &v_req.lun[1];
918 		}
919 		/*
920 		 * FIXME: Not correct for BIDI operation
921 		 */
922 		out_size = iov_length(vq->iov, out);
923 		in_size = iov_length(&vq->iov[out], in);
924 
925 		/*
926 		 * Copy over the virtio-scsi request header, which for a
927 		 * ANY_LAYOUT enabled guest may span multiple iovecs, or a
928 		 * single iovec may contain both the header + outgoing
929 		 * WRITE payloads.
930 		 *
931 		 * copy_from_iter() will advance out_iter, so that it will
932 		 * point at the start of the outgoing WRITE payload, if
933 		 * DMA_TO_DEVICE is set.
934 		 */
935 		iov_iter_init(&out_iter, WRITE, vq->iov, out, out_size);
936 
937 		ret = copy_from_iter(req, req_size, &out_iter);
938 		if (unlikely(ret != req_size)) {
939 			vq_err(vq, "Faulted on copy_from_iter\n");
940 			vhost_scsi_send_bad_target(vs, vq, head, out);
941 			continue;
942 		}
943 		/* virtio-scsi spec requires byte 0 of the lun to be 1 */
944 		if (unlikely(*lunp != 1)) {
945 			vq_err(vq, "Illegal virtio-scsi lun: %u\n", *lunp);
946 			vhost_scsi_send_bad_target(vs, vq, head, out);
947 			continue;
948 		}
949 
950 		tpg = ACCESS_ONCE(vs_tpg[*target]);
951 		if (unlikely(!tpg)) {
952 			/* Target does not exist, fail the request */
953 			vhost_scsi_send_bad_target(vs, vq, head, out);
954 			continue;
955 		}
956 		/*
957 		 * Determine data_direction by calculating the total outgoing
958 		 * iovec sizes + incoming iovec sizes vs. virtio-scsi request +
959 		 * response headers respectively.
960 		 *
961 		 * For DMA_TO_DEVICE this is out_iter, which is already pointing
962 		 * to the right place.
963 		 *
964 		 * For DMA_FROM_DEVICE, the iovec will be just past the end
965 		 * of the virtio-scsi response header in either the same
966 		 * or immediately following iovec.
967 		 *
968 		 * Any associated T10_PI bytes for the outgoing / incoming
969 		 * payloads are included in calculation of exp_data_len here.
970 		 */
971 		prot_bytes = 0;
972 
973 		if (out_size > req_size) {
974 			data_direction = DMA_TO_DEVICE;
975 			exp_data_len = out_size - req_size;
976 			data_iter = out_iter;
977 		} else if (in_size > rsp_size) {
978 			data_direction = DMA_FROM_DEVICE;
979 			exp_data_len = in_size - rsp_size;
980 
981 			iov_iter_init(&in_iter, READ, &vq->iov[out], in,
982 				      rsp_size + exp_data_len);
983 			iov_iter_advance(&in_iter, rsp_size);
984 			data_iter = in_iter;
985 		} else {
986 			data_direction = DMA_NONE;
987 			exp_data_len = 0;
988 		}
989 		/*
990 		 * If T10_PI header + payload is present, setup prot_iter values
991 		 * and recalculate data_iter for vhost_scsi_mapal() mapping to
992 		 * host scatterlists via get_user_pages_fast().
993 		 */
994 		if (t10_pi) {
995 			if (v_req_pi.pi_bytesout) {
996 				if (data_direction != DMA_TO_DEVICE) {
997 					vq_err(vq, "Received non zero pi_bytesout,"
998 						" but wrong data_direction\n");
999 					vhost_scsi_send_bad_target(vs, vq, head, out);
1000 					continue;
1001 				}
1002 				prot_bytes = vhost32_to_cpu(vq, v_req_pi.pi_bytesout);
1003 			} else if (v_req_pi.pi_bytesin) {
1004 				if (data_direction != DMA_FROM_DEVICE) {
1005 					vq_err(vq, "Received non zero pi_bytesin,"
1006 						" but wrong data_direction\n");
1007 					vhost_scsi_send_bad_target(vs, vq, head, out);
1008 					continue;
1009 				}
1010 				prot_bytes = vhost32_to_cpu(vq, v_req_pi.pi_bytesin);
1011 			}
1012 			/*
1013 			 * Set prot_iter to data_iter, and advance past any
1014 			 * preceeding prot_bytes that may be present.
1015 			 *
1016 			 * Also fix up the exp_data_len to reflect only the
1017 			 * actual data payload length.
1018 			 */
1019 			if (prot_bytes) {
1020 				exp_data_len -= prot_bytes;
1021 				prot_iter = data_iter;
1022 				iov_iter_advance(&data_iter, prot_bytes);
1023 			}
1024 			tag = vhost64_to_cpu(vq, v_req_pi.tag);
1025 			task_attr = v_req_pi.task_attr;
1026 			cdb = &v_req_pi.cdb[0];
1027 			lun = ((v_req_pi.lun[2] << 8) | v_req_pi.lun[3]) & 0x3FFF;
1028 		} else {
1029 			tag = vhost64_to_cpu(vq, v_req.tag);
1030 			task_attr = v_req.task_attr;
1031 			cdb = &v_req.cdb[0];
1032 			lun = ((v_req.lun[2] << 8) | v_req.lun[3]) & 0x3FFF;
1033 		}
1034 		/*
1035 		 * Check that the received CDB size does not exceeded our
1036 		 * hardcoded max for vhost-scsi, then get a pre-allocated
1037 		 * cmd descriptor for the new virtio-scsi tag.
1038 		 *
1039 		 * TODO what if cdb was too small for varlen cdb header?
1040 		 */
1041 		if (unlikely(scsi_command_size(cdb) > VHOST_SCSI_MAX_CDB_SIZE)) {
1042 			vq_err(vq, "Received SCSI CDB with command_size: %d that"
1043 				" exceeds SCSI_MAX_VARLEN_CDB_SIZE: %d\n",
1044 				scsi_command_size(cdb), VHOST_SCSI_MAX_CDB_SIZE);
1045 			vhost_scsi_send_bad_target(vs, vq, head, out);
1046 			continue;
1047 		}
1048 		cmd = vhost_scsi_get_tag(vq, tpg, cdb, tag, lun, task_attr,
1049 					 exp_data_len + prot_bytes,
1050 					 data_direction);
1051 		if (IS_ERR(cmd)) {
1052 			vq_err(vq, "vhost_scsi_get_tag failed %ld\n",
1053 			       PTR_ERR(cmd));
1054 			vhost_scsi_send_bad_target(vs, vq, head, out);
1055 			continue;
1056 		}
1057 		cmd->tvc_vhost = vs;
1058 		cmd->tvc_vq = vq;
1059 		cmd->tvc_resp_iov = &vq->iov[out];
1060 		cmd->tvc_in_iovs = in;
1061 
1062 		pr_debug("vhost_scsi got command opcode: %#02x, lun: %d\n",
1063 			 cmd->tvc_cdb[0], cmd->tvc_lun);
1064 		pr_debug("cmd: %p exp_data_len: %d, prot_bytes: %d data_direction:"
1065 			 " %d\n", cmd, exp_data_len, prot_bytes, data_direction);
1066 
1067 		if (data_direction != DMA_NONE) {
1068 			ret = vhost_scsi_mapal(cmd,
1069 					       prot_bytes, &prot_iter,
1070 					       exp_data_len, &data_iter);
1071 			if (unlikely(ret)) {
1072 				vq_err(vq, "Failed to map iov to sgl\n");
1073 				vhost_scsi_release_cmd(&cmd->tvc_se_cmd);
1074 				vhost_scsi_send_bad_target(vs, vq, head, out);
1075 				continue;
1076 			}
1077 		}
1078 		/*
1079 		 * Save the descriptor from vhost_get_vq_desc() to be used to
1080 		 * complete the virtio-scsi request in TCM callback context via
1081 		 * vhost_scsi_queue_data_in() and vhost_scsi_queue_status()
1082 		 */
1083 		cmd->tvc_vq_desc = head;
1084 		/*
1085 		 * Dispatch cmd descriptor for cmwq execution in process
1086 		 * context provided by vhost_scsi_workqueue.  This also ensures
1087 		 * cmd is executed on the same kworker CPU as this vhost
1088 		 * thread to gain positive L2 cache locality effects.
1089 		 */
1090 		INIT_WORK(&cmd->work, vhost_scsi_submission_work);
1091 		queue_work(vhost_scsi_workqueue, &cmd->work);
1092 	}
1093 out:
1094 	mutex_unlock(&vq->mutex);
1095 }
1096 
1097 static void vhost_scsi_ctl_handle_kick(struct vhost_work *work)
1098 {
1099 	pr_debug("%s: The handling func for control queue.\n", __func__);
1100 }
1101 
1102 static void
1103 vhost_scsi_send_evt(struct vhost_scsi *vs,
1104 		   struct vhost_scsi_tpg *tpg,
1105 		   struct se_lun *lun,
1106 		   u32 event,
1107 		   u32 reason)
1108 {
1109 	struct vhost_scsi_evt *evt;
1110 
1111 	evt = vhost_scsi_allocate_evt(vs, event, reason);
1112 	if (!evt)
1113 		return;
1114 
1115 	if (tpg && lun) {
1116 		/* TODO: share lun setup code with virtio-scsi.ko */
1117 		/*
1118 		 * Note: evt->event is zeroed when we allocate it and
1119 		 * lun[4-7] need to be zero according to virtio-scsi spec.
1120 		 */
1121 		evt->event.lun[0] = 0x01;
1122 		evt->event.lun[1] = tpg->tport_tpgt;
1123 		if (lun->unpacked_lun >= 256)
1124 			evt->event.lun[2] = lun->unpacked_lun >> 8 | 0x40 ;
1125 		evt->event.lun[3] = lun->unpacked_lun & 0xFF;
1126 	}
1127 
1128 	llist_add(&evt->list, &vs->vs_event_list);
1129 	vhost_work_queue(&vs->dev, &vs->vs_event_work);
1130 }
1131 
1132 static void vhost_scsi_evt_handle_kick(struct vhost_work *work)
1133 {
1134 	struct vhost_virtqueue *vq = container_of(work, struct vhost_virtqueue,
1135 						poll.work);
1136 	struct vhost_scsi *vs = container_of(vq->dev, struct vhost_scsi, dev);
1137 
1138 	mutex_lock(&vq->mutex);
1139 	if (!vq->private_data)
1140 		goto out;
1141 
1142 	if (vs->vs_events_missed)
1143 		vhost_scsi_send_evt(vs, NULL, NULL, VIRTIO_SCSI_T_NO_EVENT, 0);
1144 out:
1145 	mutex_unlock(&vq->mutex);
1146 }
1147 
1148 static void vhost_scsi_handle_kick(struct vhost_work *work)
1149 {
1150 	struct vhost_virtqueue *vq = container_of(work, struct vhost_virtqueue,
1151 						poll.work);
1152 	struct vhost_scsi *vs = container_of(vq->dev, struct vhost_scsi, dev);
1153 
1154 	vhost_scsi_handle_vq(vs, vq);
1155 }
1156 
1157 static void vhost_scsi_flush_vq(struct vhost_scsi *vs, int index)
1158 {
1159 	vhost_poll_flush(&vs->vqs[index].vq.poll);
1160 }
1161 
1162 /* Callers must hold dev mutex */
1163 static void vhost_scsi_flush(struct vhost_scsi *vs)
1164 {
1165 	struct vhost_scsi_inflight *old_inflight[VHOST_SCSI_MAX_VQ];
1166 	int i;
1167 
1168 	/* Init new inflight and remember the old inflight */
1169 	vhost_scsi_init_inflight(vs, old_inflight);
1170 
1171 	/*
1172 	 * The inflight->kref was initialized to 1. We decrement it here to
1173 	 * indicate the start of the flush operation so that it will reach 0
1174 	 * when all the reqs are finished.
1175 	 */
1176 	for (i = 0; i < VHOST_SCSI_MAX_VQ; i++)
1177 		kref_put(&old_inflight[i]->kref, vhost_scsi_done_inflight);
1178 
1179 	/* Flush both the vhost poll and vhost work */
1180 	for (i = 0; i < VHOST_SCSI_MAX_VQ; i++)
1181 		vhost_scsi_flush_vq(vs, i);
1182 	vhost_work_flush(&vs->dev, &vs->vs_completion_work);
1183 	vhost_work_flush(&vs->dev, &vs->vs_event_work);
1184 
1185 	/* Wait for all reqs issued before the flush to be finished */
1186 	for (i = 0; i < VHOST_SCSI_MAX_VQ; i++)
1187 		wait_for_completion(&old_inflight[i]->comp);
1188 }
1189 
1190 /*
1191  * Called from vhost_scsi_ioctl() context to walk the list of available
1192  * vhost_scsi_tpg with an active struct vhost_scsi_nexus
1193  *
1194  *  The lock nesting rule is:
1195  *    vhost_scsi_mutex -> vs->dev.mutex -> tpg->tv_tpg_mutex -> vq->mutex
1196  */
1197 static int
1198 vhost_scsi_set_endpoint(struct vhost_scsi *vs,
1199 			struct vhost_scsi_target *t)
1200 {
1201 	struct se_portal_group *se_tpg;
1202 	struct vhost_scsi_tport *tv_tport;
1203 	struct vhost_scsi_tpg *tpg;
1204 	struct vhost_scsi_tpg **vs_tpg;
1205 	struct vhost_virtqueue *vq;
1206 	int index, ret, i, len;
1207 	bool match = false;
1208 
1209 	mutex_lock(&vhost_scsi_mutex);
1210 	mutex_lock(&vs->dev.mutex);
1211 
1212 	/* Verify that ring has been setup correctly. */
1213 	for (index = 0; index < vs->dev.nvqs; ++index) {
1214 		/* Verify that ring has been setup correctly. */
1215 		if (!vhost_vq_access_ok(&vs->vqs[index].vq)) {
1216 			ret = -EFAULT;
1217 			goto out;
1218 		}
1219 	}
1220 
1221 	len = sizeof(vs_tpg[0]) * VHOST_SCSI_MAX_TARGET;
1222 	vs_tpg = kzalloc(len, GFP_KERNEL);
1223 	if (!vs_tpg) {
1224 		ret = -ENOMEM;
1225 		goto out;
1226 	}
1227 	if (vs->vs_tpg)
1228 		memcpy(vs_tpg, vs->vs_tpg, len);
1229 
1230 	list_for_each_entry(tpg, &vhost_scsi_list, tv_tpg_list) {
1231 		mutex_lock(&tpg->tv_tpg_mutex);
1232 		if (!tpg->tpg_nexus) {
1233 			mutex_unlock(&tpg->tv_tpg_mutex);
1234 			continue;
1235 		}
1236 		if (tpg->tv_tpg_vhost_count != 0) {
1237 			mutex_unlock(&tpg->tv_tpg_mutex);
1238 			continue;
1239 		}
1240 		tv_tport = tpg->tport;
1241 
1242 		if (!strcmp(tv_tport->tport_name, t->vhost_wwpn)) {
1243 			if (vs->vs_tpg && vs->vs_tpg[tpg->tport_tpgt]) {
1244 				kfree(vs_tpg);
1245 				mutex_unlock(&tpg->tv_tpg_mutex);
1246 				ret = -EEXIST;
1247 				goto out;
1248 			}
1249 			/*
1250 			 * In order to ensure individual vhost-scsi configfs
1251 			 * groups cannot be removed while in use by vhost ioctl,
1252 			 * go ahead and take an explicit se_tpg->tpg_group.cg_item
1253 			 * dependency now.
1254 			 */
1255 			se_tpg = &tpg->se_tpg;
1256 			ret = target_depend_item(&se_tpg->tpg_group.cg_item);
1257 			if (ret) {
1258 				pr_warn("configfs_depend_item() failed: %d\n", ret);
1259 				kfree(vs_tpg);
1260 				mutex_unlock(&tpg->tv_tpg_mutex);
1261 				goto out;
1262 			}
1263 			tpg->tv_tpg_vhost_count++;
1264 			tpg->vhost_scsi = vs;
1265 			vs_tpg[tpg->tport_tpgt] = tpg;
1266 			smp_mb__after_atomic();
1267 			match = true;
1268 		}
1269 		mutex_unlock(&tpg->tv_tpg_mutex);
1270 	}
1271 
1272 	if (match) {
1273 		memcpy(vs->vs_vhost_wwpn, t->vhost_wwpn,
1274 		       sizeof(vs->vs_vhost_wwpn));
1275 		for (i = 0; i < VHOST_SCSI_MAX_VQ; i++) {
1276 			vq = &vs->vqs[i].vq;
1277 			mutex_lock(&vq->mutex);
1278 			vq->private_data = vs_tpg;
1279 			vhost_init_used(vq);
1280 			mutex_unlock(&vq->mutex);
1281 		}
1282 		ret = 0;
1283 	} else {
1284 		ret = -EEXIST;
1285 	}
1286 
1287 	/*
1288 	 * Act as synchronize_rcu to make sure access to
1289 	 * old vs->vs_tpg is finished.
1290 	 */
1291 	vhost_scsi_flush(vs);
1292 	kfree(vs->vs_tpg);
1293 	vs->vs_tpg = vs_tpg;
1294 
1295 out:
1296 	mutex_unlock(&vs->dev.mutex);
1297 	mutex_unlock(&vhost_scsi_mutex);
1298 	return ret;
1299 }
1300 
1301 static int
1302 vhost_scsi_clear_endpoint(struct vhost_scsi *vs,
1303 			  struct vhost_scsi_target *t)
1304 {
1305 	struct se_portal_group *se_tpg;
1306 	struct vhost_scsi_tport *tv_tport;
1307 	struct vhost_scsi_tpg *tpg;
1308 	struct vhost_virtqueue *vq;
1309 	bool match = false;
1310 	int index, ret, i;
1311 	u8 target;
1312 
1313 	mutex_lock(&vhost_scsi_mutex);
1314 	mutex_lock(&vs->dev.mutex);
1315 	/* Verify that ring has been setup correctly. */
1316 	for (index = 0; index < vs->dev.nvqs; ++index) {
1317 		if (!vhost_vq_access_ok(&vs->vqs[index].vq)) {
1318 			ret = -EFAULT;
1319 			goto err_dev;
1320 		}
1321 	}
1322 
1323 	if (!vs->vs_tpg) {
1324 		ret = 0;
1325 		goto err_dev;
1326 	}
1327 
1328 	for (i = 0; i < VHOST_SCSI_MAX_TARGET; i++) {
1329 		target = i;
1330 		tpg = vs->vs_tpg[target];
1331 		if (!tpg)
1332 			continue;
1333 
1334 		mutex_lock(&tpg->tv_tpg_mutex);
1335 		tv_tport = tpg->tport;
1336 		if (!tv_tport) {
1337 			ret = -ENODEV;
1338 			goto err_tpg;
1339 		}
1340 
1341 		if (strcmp(tv_tport->tport_name, t->vhost_wwpn)) {
1342 			pr_warn("tv_tport->tport_name: %s, tpg->tport_tpgt: %hu"
1343 				" does not match t->vhost_wwpn: %s, t->vhost_tpgt: %hu\n",
1344 				tv_tport->tport_name, tpg->tport_tpgt,
1345 				t->vhost_wwpn, t->vhost_tpgt);
1346 			ret = -EINVAL;
1347 			goto err_tpg;
1348 		}
1349 		tpg->tv_tpg_vhost_count--;
1350 		tpg->vhost_scsi = NULL;
1351 		vs->vs_tpg[target] = NULL;
1352 		match = true;
1353 		mutex_unlock(&tpg->tv_tpg_mutex);
1354 		/*
1355 		 * Release se_tpg->tpg_group.cg_item configfs dependency now
1356 		 * to allow vhost-scsi WWPN se_tpg->tpg_group shutdown to occur.
1357 		 */
1358 		se_tpg = &tpg->se_tpg;
1359 		target_undepend_item(&se_tpg->tpg_group.cg_item);
1360 	}
1361 	if (match) {
1362 		for (i = 0; i < VHOST_SCSI_MAX_VQ; i++) {
1363 			vq = &vs->vqs[i].vq;
1364 			mutex_lock(&vq->mutex);
1365 			vq->private_data = NULL;
1366 			mutex_unlock(&vq->mutex);
1367 		}
1368 	}
1369 	/*
1370 	 * Act as synchronize_rcu to make sure access to
1371 	 * old vs->vs_tpg is finished.
1372 	 */
1373 	vhost_scsi_flush(vs);
1374 	kfree(vs->vs_tpg);
1375 	vs->vs_tpg = NULL;
1376 	WARN_ON(vs->vs_events_nr);
1377 	mutex_unlock(&vs->dev.mutex);
1378 	mutex_unlock(&vhost_scsi_mutex);
1379 	return 0;
1380 
1381 err_tpg:
1382 	mutex_unlock(&tpg->tv_tpg_mutex);
1383 err_dev:
1384 	mutex_unlock(&vs->dev.mutex);
1385 	mutex_unlock(&vhost_scsi_mutex);
1386 	return ret;
1387 }
1388 
1389 static int vhost_scsi_set_features(struct vhost_scsi *vs, u64 features)
1390 {
1391 	struct vhost_virtqueue *vq;
1392 	int i;
1393 
1394 	if (features & ~VHOST_SCSI_FEATURES)
1395 		return -EOPNOTSUPP;
1396 
1397 	mutex_lock(&vs->dev.mutex);
1398 	if ((features & (1 << VHOST_F_LOG_ALL)) &&
1399 	    !vhost_log_access_ok(&vs->dev)) {
1400 		mutex_unlock(&vs->dev.mutex);
1401 		return -EFAULT;
1402 	}
1403 
1404 	for (i = 0; i < VHOST_SCSI_MAX_VQ; i++) {
1405 		vq = &vs->vqs[i].vq;
1406 		mutex_lock(&vq->mutex);
1407 		vq->acked_features = features;
1408 		mutex_unlock(&vq->mutex);
1409 	}
1410 	mutex_unlock(&vs->dev.mutex);
1411 	return 0;
1412 }
1413 
1414 static int vhost_scsi_open(struct inode *inode, struct file *f)
1415 {
1416 	struct vhost_scsi *vs;
1417 	struct vhost_virtqueue **vqs;
1418 	int r = -ENOMEM, i;
1419 
1420 	vs = kzalloc(sizeof(*vs), GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT);
1421 	if (!vs) {
1422 		vs = vzalloc(sizeof(*vs));
1423 		if (!vs)
1424 			goto err_vs;
1425 	}
1426 
1427 	vqs = kmalloc(VHOST_SCSI_MAX_VQ * sizeof(*vqs), GFP_KERNEL);
1428 	if (!vqs)
1429 		goto err_vqs;
1430 
1431 	vhost_work_init(&vs->vs_completion_work, vhost_scsi_complete_cmd_work);
1432 	vhost_work_init(&vs->vs_event_work, vhost_scsi_evt_work);
1433 
1434 	vs->vs_events_nr = 0;
1435 	vs->vs_events_missed = false;
1436 
1437 	vqs[VHOST_SCSI_VQ_CTL] = &vs->vqs[VHOST_SCSI_VQ_CTL].vq;
1438 	vqs[VHOST_SCSI_VQ_EVT] = &vs->vqs[VHOST_SCSI_VQ_EVT].vq;
1439 	vs->vqs[VHOST_SCSI_VQ_CTL].vq.handle_kick = vhost_scsi_ctl_handle_kick;
1440 	vs->vqs[VHOST_SCSI_VQ_EVT].vq.handle_kick = vhost_scsi_evt_handle_kick;
1441 	for (i = VHOST_SCSI_VQ_IO; i < VHOST_SCSI_MAX_VQ; i++) {
1442 		vqs[i] = &vs->vqs[i].vq;
1443 		vs->vqs[i].vq.handle_kick = vhost_scsi_handle_kick;
1444 	}
1445 	vhost_dev_init(&vs->dev, vqs, VHOST_SCSI_MAX_VQ);
1446 
1447 	vhost_scsi_init_inflight(vs, NULL);
1448 
1449 	f->private_data = vs;
1450 	return 0;
1451 
1452 err_vqs:
1453 	kvfree(vs);
1454 err_vs:
1455 	return r;
1456 }
1457 
1458 static int vhost_scsi_release(struct inode *inode, struct file *f)
1459 {
1460 	struct vhost_scsi *vs = f->private_data;
1461 	struct vhost_scsi_target t;
1462 
1463 	mutex_lock(&vs->dev.mutex);
1464 	memcpy(t.vhost_wwpn, vs->vs_vhost_wwpn, sizeof(t.vhost_wwpn));
1465 	mutex_unlock(&vs->dev.mutex);
1466 	vhost_scsi_clear_endpoint(vs, &t);
1467 	vhost_dev_stop(&vs->dev);
1468 	vhost_dev_cleanup(&vs->dev, false);
1469 	/* Jobs can re-queue themselves in evt kick handler. Do extra flush. */
1470 	vhost_scsi_flush(vs);
1471 	kfree(vs->dev.vqs);
1472 	kvfree(vs);
1473 	return 0;
1474 }
1475 
1476 static long
1477 vhost_scsi_ioctl(struct file *f,
1478 		 unsigned int ioctl,
1479 		 unsigned long arg)
1480 {
1481 	struct vhost_scsi *vs = f->private_data;
1482 	struct vhost_scsi_target backend;
1483 	void __user *argp = (void __user *)arg;
1484 	u64 __user *featurep = argp;
1485 	u32 __user *eventsp = argp;
1486 	u32 events_missed;
1487 	u64 features;
1488 	int r, abi_version = VHOST_SCSI_ABI_VERSION;
1489 	struct vhost_virtqueue *vq = &vs->vqs[VHOST_SCSI_VQ_EVT].vq;
1490 
1491 	switch (ioctl) {
1492 	case VHOST_SCSI_SET_ENDPOINT:
1493 		if (copy_from_user(&backend, argp, sizeof backend))
1494 			return -EFAULT;
1495 		if (backend.reserved != 0)
1496 			return -EOPNOTSUPP;
1497 
1498 		return vhost_scsi_set_endpoint(vs, &backend);
1499 	case VHOST_SCSI_CLEAR_ENDPOINT:
1500 		if (copy_from_user(&backend, argp, sizeof backend))
1501 			return -EFAULT;
1502 		if (backend.reserved != 0)
1503 			return -EOPNOTSUPP;
1504 
1505 		return vhost_scsi_clear_endpoint(vs, &backend);
1506 	case VHOST_SCSI_GET_ABI_VERSION:
1507 		if (copy_to_user(argp, &abi_version, sizeof abi_version))
1508 			return -EFAULT;
1509 		return 0;
1510 	case VHOST_SCSI_SET_EVENTS_MISSED:
1511 		if (get_user(events_missed, eventsp))
1512 			return -EFAULT;
1513 		mutex_lock(&vq->mutex);
1514 		vs->vs_events_missed = events_missed;
1515 		mutex_unlock(&vq->mutex);
1516 		return 0;
1517 	case VHOST_SCSI_GET_EVENTS_MISSED:
1518 		mutex_lock(&vq->mutex);
1519 		events_missed = vs->vs_events_missed;
1520 		mutex_unlock(&vq->mutex);
1521 		if (put_user(events_missed, eventsp))
1522 			return -EFAULT;
1523 		return 0;
1524 	case VHOST_GET_FEATURES:
1525 		features = VHOST_SCSI_FEATURES;
1526 		if (copy_to_user(featurep, &features, sizeof features))
1527 			return -EFAULT;
1528 		return 0;
1529 	case VHOST_SET_FEATURES:
1530 		if (copy_from_user(&features, featurep, sizeof features))
1531 			return -EFAULT;
1532 		return vhost_scsi_set_features(vs, features);
1533 	default:
1534 		mutex_lock(&vs->dev.mutex);
1535 		r = vhost_dev_ioctl(&vs->dev, ioctl, argp);
1536 		/* TODO: flush backend after dev ioctl. */
1537 		if (r == -ENOIOCTLCMD)
1538 			r = vhost_vring_ioctl(&vs->dev, ioctl, argp);
1539 		mutex_unlock(&vs->dev.mutex);
1540 		return r;
1541 	}
1542 }
1543 
1544 #ifdef CONFIG_COMPAT
1545 static long vhost_scsi_compat_ioctl(struct file *f, unsigned int ioctl,
1546 				unsigned long arg)
1547 {
1548 	return vhost_scsi_ioctl(f, ioctl, (unsigned long)compat_ptr(arg));
1549 }
1550 #endif
1551 
1552 static const struct file_operations vhost_scsi_fops = {
1553 	.owner          = THIS_MODULE,
1554 	.release        = vhost_scsi_release,
1555 	.unlocked_ioctl = vhost_scsi_ioctl,
1556 #ifdef CONFIG_COMPAT
1557 	.compat_ioctl	= vhost_scsi_compat_ioctl,
1558 #endif
1559 	.open           = vhost_scsi_open,
1560 	.llseek		= noop_llseek,
1561 };
1562 
1563 static struct miscdevice vhost_scsi_misc = {
1564 	MISC_DYNAMIC_MINOR,
1565 	"vhost-scsi",
1566 	&vhost_scsi_fops,
1567 };
1568 
1569 static int __init vhost_scsi_register(void)
1570 {
1571 	return misc_register(&vhost_scsi_misc);
1572 }
1573 
1574 static void vhost_scsi_deregister(void)
1575 {
1576 	misc_deregister(&vhost_scsi_misc);
1577 }
1578 
1579 static char *vhost_scsi_dump_proto_id(struct vhost_scsi_tport *tport)
1580 {
1581 	switch (tport->tport_proto_id) {
1582 	case SCSI_PROTOCOL_SAS:
1583 		return "SAS";
1584 	case SCSI_PROTOCOL_FCP:
1585 		return "FCP";
1586 	case SCSI_PROTOCOL_ISCSI:
1587 		return "iSCSI";
1588 	default:
1589 		break;
1590 	}
1591 
1592 	return "Unknown";
1593 }
1594 
1595 static void
1596 vhost_scsi_do_plug(struct vhost_scsi_tpg *tpg,
1597 		  struct se_lun *lun, bool plug)
1598 {
1599 
1600 	struct vhost_scsi *vs = tpg->vhost_scsi;
1601 	struct vhost_virtqueue *vq;
1602 	u32 reason;
1603 
1604 	if (!vs)
1605 		return;
1606 
1607 	mutex_lock(&vs->dev.mutex);
1608 
1609 	if (plug)
1610 		reason = VIRTIO_SCSI_EVT_RESET_RESCAN;
1611 	else
1612 		reason = VIRTIO_SCSI_EVT_RESET_REMOVED;
1613 
1614 	vq = &vs->vqs[VHOST_SCSI_VQ_EVT].vq;
1615 	mutex_lock(&vq->mutex);
1616 	if (vhost_has_feature(vq, VIRTIO_SCSI_F_HOTPLUG))
1617 		vhost_scsi_send_evt(vs, tpg, lun,
1618 				   VIRTIO_SCSI_T_TRANSPORT_RESET, reason);
1619 	mutex_unlock(&vq->mutex);
1620 	mutex_unlock(&vs->dev.mutex);
1621 }
1622 
1623 static void vhost_scsi_hotplug(struct vhost_scsi_tpg *tpg, struct se_lun *lun)
1624 {
1625 	vhost_scsi_do_plug(tpg, lun, true);
1626 }
1627 
1628 static void vhost_scsi_hotunplug(struct vhost_scsi_tpg *tpg, struct se_lun *lun)
1629 {
1630 	vhost_scsi_do_plug(tpg, lun, false);
1631 }
1632 
1633 static int vhost_scsi_port_link(struct se_portal_group *se_tpg,
1634 			       struct se_lun *lun)
1635 {
1636 	struct vhost_scsi_tpg *tpg = container_of(se_tpg,
1637 				struct vhost_scsi_tpg, se_tpg);
1638 
1639 	mutex_lock(&vhost_scsi_mutex);
1640 
1641 	mutex_lock(&tpg->tv_tpg_mutex);
1642 	tpg->tv_tpg_port_count++;
1643 	mutex_unlock(&tpg->tv_tpg_mutex);
1644 
1645 	vhost_scsi_hotplug(tpg, lun);
1646 
1647 	mutex_unlock(&vhost_scsi_mutex);
1648 
1649 	return 0;
1650 }
1651 
1652 static void vhost_scsi_port_unlink(struct se_portal_group *se_tpg,
1653 				  struct se_lun *lun)
1654 {
1655 	struct vhost_scsi_tpg *tpg = container_of(se_tpg,
1656 				struct vhost_scsi_tpg, se_tpg);
1657 
1658 	mutex_lock(&vhost_scsi_mutex);
1659 
1660 	mutex_lock(&tpg->tv_tpg_mutex);
1661 	tpg->tv_tpg_port_count--;
1662 	mutex_unlock(&tpg->tv_tpg_mutex);
1663 
1664 	vhost_scsi_hotunplug(tpg, lun);
1665 
1666 	mutex_unlock(&vhost_scsi_mutex);
1667 }
1668 
1669 static void vhost_scsi_free_cmd_map_res(struct vhost_scsi_nexus *nexus,
1670 				       struct se_session *se_sess)
1671 {
1672 	struct vhost_scsi_cmd *tv_cmd;
1673 	unsigned int i;
1674 
1675 	if (!se_sess->sess_cmd_map)
1676 		return;
1677 
1678 	for (i = 0; i < VHOST_SCSI_DEFAULT_TAGS; i++) {
1679 		tv_cmd = &((struct vhost_scsi_cmd *)se_sess->sess_cmd_map)[i];
1680 
1681 		kfree(tv_cmd->tvc_sgl);
1682 		kfree(tv_cmd->tvc_prot_sgl);
1683 		kfree(tv_cmd->tvc_upages);
1684 	}
1685 }
1686 
1687 static ssize_t vhost_scsi_tpg_attrib_store_fabric_prot_type(
1688 	struct se_portal_group *se_tpg,
1689 	const char *page,
1690 	size_t count)
1691 {
1692 	struct vhost_scsi_tpg *tpg = container_of(se_tpg,
1693 				struct vhost_scsi_tpg, se_tpg);
1694 	unsigned long val;
1695 	int ret = kstrtoul(page, 0, &val);
1696 
1697 	if (ret) {
1698 		pr_err("kstrtoul() returned %d for fabric_prot_type\n", ret);
1699 		return ret;
1700 	}
1701 	if (val != 0 && val != 1 && val != 3) {
1702 		pr_err("Invalid vhost_scsi fabric_prot_type: %lu\n", val);
1703 		return -EINVAL;
1704 	}
1705 	tpg->tv_fabric_prot_type = val;
1706 
1707 	return count;
1708 }
1709 
1710 static ssize_t vhost_scsi_tpg_attrib_show_fabric_prot_type(
1711 	struct se_portal_group *se_tpg,
1712 	char *page)
1713 {
1714 	struct vhost_scsi_tpg *tpg = container_of(se_tpg,
1715 				struct vhost_scsi_tpg, se_tpg);
1716 
1717 	return sprintf(page, "%d\n", tpg->tv_fabric_prot_type);
1718 }
1719 TF_TPG_ATTRIB_ATTR(vhost_scsi, fabric_prot_type, S_IRUGO | S_IWUSR);
1720 
1721 static struct configfs_attribute *vhost_scsi_tpg_attrib_attrs[] = {
1722 	&vhost_scsi_tpg_attrib_fabric_prot_type.attr,
1723 	NULL,
1724 };
1725 
1726 static int vhost_scsi_make_nexus(struct vhost_scsi_tpg *tpg,
1727 				const char *name)
1728 {
1729 	struct se_portal_group *se_tpg;
1730 	struct se_session *se_sess;
1731 	struct vhost_scsi_nexus *tv_nexus;
1732 	struct vhost_scsi_cmd *tv_cmd;
1733 	unsigned int i;
1734 
1735 	mutex_lock(&tpg->tv_tpg_mutex);
1736 	if (tpg->tpg_nexus) {
1737 		mutex_unlock(&tpg->tv_tpg_mutex);
1738 		pr_debug("tpg->tpg_nexus already exists\n");
1739 		return -EEXIST;
1740 	}
1741 	se_tpg = &tpg->se_tpg;
1742 
1743 	tv_nexus = kzalloc(sizeof(struct vhost_scsi_nexus), GFP_KERNEL);
1744 	if (!tv_nexus) {
1745 		mutex_unlock(&tpg->tv_tpg_mutex);
1746 		pr_err("Unable to allocate struct vhost_scsi_nexus\n");
1747 		return -ENOMEM;
1748 	}
1749 	/*
1750 	 *  Initialize the struct se_session pointer and setup tagpool
1751 	 *  for struct vhost_scsi_cmd descriptors
1752 	 */
1753 	tv_nexus->tvn_se_sess = transport_init_session_tags(
1754 					VHOST_SCSI_DEFAULT_TAGS,
1755 					sizeof(struct vhost_scsi_cmd),
1756 					TARGET_PROT_DIN_PASS | TARGET_PROT_DOUT_PASS);
1757 	if (IS_ERR(tv_nexus->tvn_se_sess)) {
1758 		mutex_unlock(&tpg->tv_tpg_mutex);
1759 		kfree(tv_nexus);
1760 		return -ENOMEM;
1761 	}
1762 	se_sess = tv_nexus->tvn_se_sess;
1763 	for (i = 0; i < VHOST_SCSI_DEFAULT_TAGS; i++) {
1764 		tv_cmd = &((struct vhost_scsi_cmd *)se_sess->sess_cmd_map)[i];
1765 
1766 		tv_cmd->tvc_sgl = kzalloc(sizeof(struct scatterlist) *
1767 					VHOST_SCSI_PREALLOC_SGLS, GFP_KERNEL);
1768 		if (!tv_cmd->tvc_sgl) {
1769 			mutex_unlock(&tpg->tv_tpg_mutex);
1770 			pr_err("Unable to allocate tv_cmd->tvc_sgl\n");
1771 			goto out;
1772 		}
1773 
1774 		tv_cmd->tvc_upages = kzalloc(sizeof(struct page *) *
1775 					VHOST_SCSI_PREALLOC_UPAGES, GFP_KERNEL);
1776 		if (!tv_cmd->tvc_upages) {
1777 			mutex_unlock(&tpg->tv_tpg_mutex);
1778 			pr_err("Unable to allocate tv_cmd->tvc_upages\n");
1779 			goto out;
1780 		}
1781 
1782 		tv_cmd->tvc_prot_sgl = kzalloc(sizeof(struct scatterlist) *
1783 					VHOST_SCSI_PREALLOC_PROT_SGLS, GFP_KERNEL);
1784 		if (!tv_cmd->tvc_prot_sgl) {
1785 			mutex_unlock(&tpg->tv_tpg_mutex);
1786 			pr_err("Unable to allocate tv_cmd->tvc_prot_sgl\n");
1787 			goto out;
1788 		}
1789 	}
1790 	/*
1791 	 * Since we are running in 'demo mode' this call with generate a
1792 	 * struct se_node_acl for the vhost_scsi struct se_portal_group with
1793 	 * the SCSI Initiator port name of the passed configfs group 'name'.
1794 	 */
1795 	tv_nexus->tvn_se_sess->se_node_acl = core_tpg_check_initiator_node_acl(
1796 				se_tpg, (unsigned char *)name);
1797 	if (!tv_nexus->tvn_se_sess->se_node_acl) {
1798 		mutex_unlock(&tpg->tv_tpg_mutex);
1799 		pr_debug("core_tpg_check_initiator_node_acl() failed"
1800 				" for %s\n", name);
1801 		goto out;
1802 	}
1803 	/*
1804 	 * Now register the TCM vhost virtual I_T Nexus as active.
1805 	 */
1806 	transport_register_session(se_tpg, tv_nexus->tvn_se_sess->se_node_acl,
1807 			tv_nexus->tvn_se_sess, tv_nexus);
1808 	tpg->tpg_nexus = tv_nexus;
1809 
1810 	mutex_unlock(&tpg->tv_tpg_mutex);
1811 	return 0;
1812 
1813 out:
1814 	vhost_scsi_free_cmd_map_res(tv_nexus, se_sess);
1815 	transport_free_session(se_sess);
1816 	kfree(tv_nexus);
1817 	return -ENOMEM;
1818 }
1819 
1820 static int vhost_scsi_drop_nexus(struct vhost_scsi_tpg *tpg)
1821 {
1822 	struct se_session *se_sess;
1823 	struct vhost_scsi_nexus *tv_nexus;
1824 
1825 	mutex_lock(&tpg->tv_tpg_mutex);
1826 	tv_nexus = tpg->tpg_nexus;
1827 	if (!tv_nexus) {
1828 		mutex_unlock(&tpg->tv_tpg_mutex);
1829 		return -ENODEV;
1830 	}
1831 
1832 	se_sess = tv_nexus->tvn_se_sess;
1833 	if (!se_sess) {
1834 		mutex_unlock(&tpg->tv_tpg_mutex);
1835 		return -ENODEV;
1836 	}
1837 
1838 	if (tpg->tv_tpg_port_count != 0) {
1839 		mutex_unlock(&tpg->tv_tpg_mutex);
1840 		pr_err("Unable to remove TCM_vhost I_T Nexus with"
1841 			" active TPG port count: %d\n",
1842 			tpg->tv_tpg_port_count);
1843 		return -EBUSY;
1844 	}
1845 
1846 	if (tpg->tv_tpg_vhost_count != 0) {
1847 		mutex_unlock(&tpg->tv_tpg_mutex);
1848 		pr_err("Unable to remove TCM_vhost I_T Nexus with"
1849 			" active TPG vhost count: %d\n",
1850 			tpg->tv_tpg_vhost_count);
1851 		return -EBUSY;
1852 	}
1853 
1854 	pr_debug("TCM_vhost_ConfigFS: Removing I_T Nexus to emulated"
1855 		" %s Initiator Port: %s\n", vhost_scsi_dump_proto_id(tpg->tport),
1856 		tv_nexus->tvn_se_sess->se_node_acl->initiatorname);
1857 
1858 	vhost_scsi_free_cmd_map_res(tv_nexus, se_sess);
1859 	/*
1860 	 * Release the SCSI I_T Nexus to the emulated vhost Target Port
1861 	 */
1862 	transport_deregister_session(tv_nexus->tvn_se_sess);
1863 	tpg->tpg_nexus = NULL;
1864 	mutex_unlock(&tpg->tv_tpg_mutex);
1865 
1866 	kfree(tv_nexus);
1867 	return 0;
1868 }
1869 
1870 static ssize_t vhost_scsi_tpg_show_nexus(struct se_portal_group *se_tpg,
1871 					char *page)
1872 {
1873 	struct vhost_scsi_tpg *tpg = container_of(se_tpg,
1874 				struct vhost_scsi_tpg, se_tpg);
1875 	struct vhost_scsi_nexus *tv_nexus;
1876 	ssize_t ret;
1877 
1878 	mutex_lock(&tpg->tv_tpg_mutex);
1879 	tv_nexus = tpg->tpg_nexus;
1880 	if (!tv_nexus) {
1881 		mutex_unlock(&tpg->tv_tpg_mutex);
1882 		return -ENODEV;
1883 	}
1884 	ret = snprintf(page, PAGE_SIZE, "%s\n",
1885 			tv_nexus->tvn_se_sess->se_node_acl->initiatorname);
1886 	mutex_unlock(&tpg->tv_tpg_mutex);
1887 
1888 	return ret;
1889 }
1890 
1891 static ssize_t vhost_scsi_tpg_store_nexus(struct se_portal_group *se_tpg,
1892 					 const char *page,
1893 					 size_t count)
1894 {
1895 	struct vhost_scsi_tpg *tpg = container_of(se_tpg,
1896 				struct vhost_scsi_tpg, se_tpg);
1897 	struct vhost_scsi_tport *tport_wwn = tpg->tport;
1898 	unsigned char i_port[VHOST_SCSI_NAMELEN], *ptr, *port_ptr;
1899 	int ret;
1900 	/*
1901 	 * Shutdown the active I_T nexus if 'NULL' is passed..
1902 	 */
1903 	if (!strncmp(page, "NULL", 4)) {
1904 		ret = vhost_scsi_drop_nexus(tpg);
1905 		return (!ret) ? count : ret;
1906 	}
1907 	/*
1908 	 * Otherwise make sure the passed virtual Initiator port WWN matches
1909 	 * the fabric protocol_id set in vhost_scsi_make_tport(), and call
1910 	 * vhost_scsi_make_nexus().
1911 	 */
1912 	if (strlen(page) >= VHOST_SCSI_NAMELEN) {
1913 		pr_err("Emulated NAA Sas Address: %s, exceeds"
1914 				" max: %d\n", page, VHOST_SCSI_NAMELEN);
1915 		return -EINVAL;
1916 	}
1917 	snprintf(&i_port[0], VHOST_SCSI_NAMELEN, "%s", page);
1918 
1919 	ptr = strstr(i_port, "naa.");
1920 	if (ptr) {
1921 		if (tport_wwn->tport_proto_id != SCSI_PROTOCOL_SAS) {
1922 			pr_err("Passed SAS Initiator Port %s does not"
1923 				" match target port protoid: %s\n", i_port,
1924 				vhost_scsi_dump_proto_id(tport_wwn));
1925 			return -EINVAL;
1926 		}
1927 		port_ptr = &i_port[0];
1928 		goto check_newline;
1929 	}
1930 	ptr = strstr(i_port, "fc.");
1931 	if (ptr) {
1932 		if (tport_wwn->tport_proto_id != SCSI_PROTOCOL_FCP) {
1933 			pr_err("Passed FCP Initiator Port %s does not"
1934 				" match target port protoid: %s\n", i_port,
1935 				vhost_scsi_dump_proto_id(tport_wwn));
1936 			return -EINVAL;
1937 		}
1938 		port_ptr = &i_port[3]; /* Skip over "fc." */
1939 		goto check_newline;
1940 	}
1941 	ptr = strstr(i_port, "iqn.");
1942 	if (ptr) {
1943 		if (tport_wwn->tport_proto_id != SCSI_PROTOCOL_ISCSI) {
1944 			pr_err("Passed iSCSI Initiator Port %s does not"
1945 				" match target port protoid: %s\n", i_port,
1946 				vhost_scsi_dump_proto_id(tport_wwn));
1947 			return -EINVAL;
1948 		}
1949 		port_ptr = &i_port[0];
1950 		goto check_newline;
1951 	}
1952 	pr_err("Unable to locate prefix for emulated Initiator Port:"
1953 			" %s\n", i_port);
1954 	return -EINVAL;
1955 	/*
1956 	 * Clear any trailing newline for the NAA WWN
1957 	 */
1958 check_newline:
1959 	if (i_port[strlen(i_port)-1] == '\n')
1960 		i_port[strlen(i_port)-1] = '\0';
1961 
1962 	ret = vhost_scsi_make_nexus(tpg, port_ptr);
1963 	if (ret < 0)
1964 		return ret;
1965 
1966 	return count;
1967 }
1968 
1969 TF_TPG_BASE_ATTR(vhost_scsi, nexus, S_IRUGO | S_IWUSR);
1970 
1971 static struct configfs_attribute *vhost_scsi_tpg_attrs[] = {
1972 	&vhost_scsi_tpg_nexus.attr,
1973 	NULL,
1974 };
1975 
1976 static struct se_portal_group *
1977 vhost_scsi_make_tpg(struct se_wwn *wwn,
1978 		   struct config_group *group,
1979 		   const char *name)
1980 {
1981 	struct vhost_scsi_tport *tport = container_of(wwn,
1982 			struct vhost_scsi_tport, tport_wwn);
1983 
1984 	struct vhost_scsi_tpg *tpg;
1985 	u16 tpgt;
1986 	int ret;
1987 
1988 	if (strstr(name, "tpgt_") != name)
1989 		return ERR_PTR(-EINVAL);
1990 	if (kstrtou16(name + 5, 10, &tpgt) || tpgt >= VHOST_SCSI_MAX_TARGET)
1991 		return ERR_PTR(-EINVAL);
1992 
1993 	tpg = kzalloc(sizeof(struct vhost_scsi_tpg), GFP_KERNEL);
1994 	if (!tpg) {
1995 		pr_err("Unable to allocate struct vhost_scsi_tpg");
1996 		return ERR_PTR(-ENOMEM);
1997 	}
1998 	mutex_init(&tpg->tv_tpg_mutex);
1999 	INIT_LIST_HEAD(&tpg->tv_tpg_list);
2000 	tpg->tport = tport;
2001 	tpg->tport_tpgt = tpgt;
2002 
2003 	ret = core_tpg_register(wwn, &tpg->se_tpg, tport->tport_proto_id);
2004 	if (ret < 0) {
2005 		kfree(tpg);
2006 		return NULL;
2007 	}
2008 	mutex_lock(&vhost_scsi_mutex);
2009 	list_add_tail(&tpg->tv_tpg_list, &vhost_scsi_list);
2010 	mutex_unlock(&vhost_scsi_mutex);
2011 
2012 	return &tpg->se_tpg;
2013 }
2014 
2015 static void vhost_scsi_drop_tpg(struct se_portal_group *se_tpg)
2016 {
2017 	struct vhost_scsi_tpg *tpg = container_of(se_tpg,
2018 				struct vhost_scsi_tpg, se_tpg);
2019 
2020 	mutex_lock(&vhost_scsi_mutex);
2021 	list_del(&tpg->tv_tpg_list);
2022 	mutex_unlock(&vhost_scsi_mutex);
2023 	/*
2024 	 * Release the virtual I_T Nexus for this vhost TPG
2025 	 */
2026 	vhost_scsi_drop_nexus(tpg);
2027 	/*
2028 	 * Deregister the se_tpg from TCM..
2029 	 */
2030 	core_tpg_deregister(se_tpg);
2031 	kfree(tpg);
2032 }
2033 
2034 static struct se_wwn *
2035 vhost_scsi_make_tport(struct target_fabric_configfs *tf,
2036 		     struct config_group *group,
2037 		     const char *name)
2038 {
2039 	struct vhost_scsi_tport *tport;
2040 	char *ptr;
2041 	u64 wwpn = 0;
2042 	int off = 0;
2043 
2044 	/* if (vhost_scsi_parse_wwn(name, &wwpn, 1) < 0)
2045 		return ERR_PTR(-EINVAL); */
2046 
2047 	tport = kzalloc(sizeof(struct vhost_scsi_tport), GFP_KERNEL);
2048 	if (!tport) {
2049 		pr_err("Unable to allocate struct vhost_scsi_tport");
2050 		return ERR_PTR(-ENOMEM);
2051 	}
2052 	tport->tport_wwpn = wwpn;
2053 	/*
2054 	 * Determine the emulated Protocol Identifier and Target Port Name
2055 	 * based on the incoming configfs directory name.
2056 	 */
2057 	ptr = strstr(name, "naa.");
2058 	if (ptr) {
2059 		tport->tport_proto_id = SCSI_PROTOCOL_SAS;
2060 		goto check_len;
2061 	}
2062 	ptr = strstr(name, "fc.");
2063 	if (ptr) {
2064 		tport->tport_proto_id = SCSI_PROTOCOL_FCP;
2065 		off = 3; /* Skip over "fc." */
2066 		goto check_len;
2067 	}
2068 	ptr = strstr(name, "iqn.");
2069 	if (ptr) {
2070 		tport->tport_proto_id = SCSI_PROTOCOL_ISCSI;
2071 		goto check_len;
2072 	}
2073 
2074 	pr_err("Unable to locate prefix for emulated Target Port:"
2075 			" %s\n", name);
2076 	kfree(tport);
2077 	return ERR_PTR(-EINVAL);
2078 
2079 check_len:
2080 	if (strlen(name) >= VHOST_SCSI_NAMELEN) {
2081 		pr_err("Emulated %s Address: %s, exceeds"
2082 			" max: %d\n", name, vhost_scsi_dump_proto_id(tport),
2083 			VHOST_SCSI_NAMELEN);
2084 		kfree(tport);
2085 		return ERR_PTR(-EINVAL);
2086 	}
2087 	snprintf(&tport->tport_name[0], VHOST_SCSI_NAMELEN, "%s", &name[off]);
2088 
2089 	pr_debug("TCM_VHost_ConfigFS: Allocated emulated Target"
2090 		" %s Address: %s\n", vhost_scsi_dump_proto_id(tport), name);
2091 
2092 	return &tport->tport_wwn;
2093 }
2094 
2095 static void vhost_scsi_drop_tport(struct se_wwn *wwn)
2096 {
2097 	struct vhost_scsi_tport *tport = container_of(wwn,
2098 				struct vhost_scsi_tport, tport_wwn);
2099 
2100 	pr_debug("TCM_VHost_ConfigFS: Deallocating emulated Target"
2101 		" %s Address: %s\n", vhost_scsi_dump_proto_id(tport),
2102 		tport->tport_name);
2103 
2104 	kfree(tport);
2105 }
2106 
2107 static ssize_t
2108 vhost_scsi_wwn_show_attr_version(struct target_fabric_configfs *tf,
2109 				char *page)
2110 {
2111 	return sprintf(page, "TCM_VHOST fabric module %s on %s/%s"
2112 		"on "UTS_RELEASE"\n", VHOST_SCSI_VERSION, utsname()->sysname,
2113 		utsname()->machine);
2114 }
2115 
2116 TF_WWN_ATTR_RO(vhost_scsi, version);
2117 
2118 static struct configfs_attribute *vhost_scsi_wwn_attrs[] = {
2119 	&vhost_scsi_wwn_version.attr,
2120 	NULL,
2121 };
2122 
2123 static struct target_core_fabric_ops vhost_scsi_ops = {
2124 	.module				= THIS_MODULE,
2125 	.name				= "vhost",
2126 	.get_fabric_name		= vhost_scsi_get_fabric_name,
2127 	.tpg_get_wwn			= vhost_scsi_get_fabric_wwn,
2128 	.tpg_get_tag			= vhost_scsi_get_tpgt,
2129 	.tpg_check_demo_mode		= vhost_scsi_check_true,
2130 	.tpg_check_demo_mode_cache	= vhost_scsi_check_true,
2131 	.tpg_check_demo_mode_write_protect = vhost_scsi_check_false,
2132 	.tpg_check_prod_mode_write_protect = vhost_scsi_check_false,
2133 	.tpg_check_prot_fabric_only	= vhost_scsi_check_prot_fabric_only,
2134 	.tpg_get_inst_index		= vhost_scsi_tpg_get_inst_index,
2135 	.release_cmd			= vhost_scsi_release_cmd,
2136 	.check_stop_free		= vhost_scsi_check_stop_free,
2137 	.shutdown_session		= vhost_scsi_shutdown_session,
2138 	.close_session			= vhost_scsi_close_session,
2139 	.sess_get_index			= vhost_scsi_sess_get_index,
2140 	.sess_get_initiator_sid		= NULL,
2141 	.write_pending			= vhost_scsi_write_pending,
2142 	.write_pending_status		= vhost_scsi_write_pending_status,
2143 	.set_default_node_attributes	= vhost_scsi_set_default_node_attrs,
2144 	.get_cmd_state			= vhost_scsi_get_cmd_state,
2145 	.queue_data_in			= vhost_scsi_queue_data_in,
2146 	.queue_status			= vhost_scsi_queue_status,
2147 	.queue_tm_rsp			= vhost_scsi_queue_tm_rsp,
2148 	.aborted_task			= vhost_scsi_aborted_task,
2149 	/*
2150 	 * Setup callers for generic logic in target_core_fabric_configfs.c
2151 	 */
2152 	.fabric_make_wwn		= vhost_scsi_make_tport,
2153 	.fabric_drop_wwn		= vhost_scsi_drop_tport,
2154 	.fabric_make_tpg		= vhost_scsi_make_tpg,
2155 	.fabric_drop_tpg		= vhost_scsi_drop_tpg,
2156 	.fabric_post_link		= vhost_scsi_port_link,
2157 	.fabric_pre_unlink		= vhost_scsi_port_unlink,
2158 
2159 	.tfc_wwn_attrs			= vhost_scsi_wwn_attrs,
2160 	.tfc_tpg_base_attrs		= vhost_scsi_tpg_attrs,
2161 	.tfc_tpg_attrib_attrs		= vhost_scsi_tpg_attrib_attrs,
2162 };
2163 
2164 static int __init vhost_scsi_init(void)
2165 {
2166 	int ret = -ENOMEM;
2167 
2168 	pr_debug("TCM_VHOST fabric module %s on %s/%s"
2169 		" on "UTS_RELEASE"\n", VHOST_SCSI_VERSION, utsname()->sysname,
2170 		utsname()->machine);
2171 
2172 	/*
2173 	 * Use our own dedicated workqueue for submitting I/O into
2174 	 * target core to avoid contention within system_wq.
2175 	 */
2176 	vhost_scsi_workqueue = alloc_workqueue("vhost_scsi", 0, 0);
2177 	if (!vhost_scsi_workqueue)
2178 		goto out;
2179 
2180 	ret = vhost_scsi_register();
2181 	if (ret < 0)
2182 		goto out_destroy_workqueue;
2183 
2184 	ret = target_register_template(&vhost_scsi_ops);
2185 	if (ret < 0)
2186 		goto out_vhost_scsi_deregister;
2187 
2188 	return 0;
2189 
2190 out_vhost_scsi_deregister:
2191 	vhost_scsi_deregister();
2192 out_destroy_workqueue:
2193 	destroy_workqueue(vhost_scsi_workqueue);
2194 out:
2195 	return ret;
2196 };
2197 
2198 static void vhost_scsi_exit(void)
2199 {
2200 	target_unregister_template(&vhost_scsi_ops);
2201 	vhost_scsi_deregister();
2202 	destroy_workqueue(vhost_scsi_workqueue);
2203 };
2204 
2205 MODULE_DESCRIPTION("VHOST_SCSI series fabric driver");
2206 MODULE_ALIAS("tcm_vhost");
2207 MODULE_LICENSE("GPL");
2208 module_init(vhost_scsi_init);
2209 module_exit(vhost_scsi_exit);
2210