xref: /openbmc/linux/drivers/nvme/target/loop.c (revision 232b0b08)
1 /*
2  * NVMe over Fabrics loopback device.
3  * Copyright (c) 2015-2016 HGST, a Western Digital Company.
4  *
5  * This program is free software; you can redistribute it and/or modify it
6  * under the terms and conditions of the GNU General Public License,
7  * version 2, as published by the Free Software Foundation.
8  *
9  * This program is distributed in the hope it will be useful, but WITHOUT
10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
12  * more details.
13  */
14 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
15 #include <linux/scatterlist.h>
16 #include <linux/delay.h>
17 #include <linux/blk-mq.h>
18 #include <linux/nvme.h>
19 #include <linux/module.h>
20 #include <linux/parser.h>
21 #include <linux/t10-pi.h>
22 #include "nvmet.h"
23 #include "../host/nvme.h"
24 #include "../host/fabrics.h"
25 
26 #define NVME_LOOP_AQ_DEPTH		256
27 
28 #define NVME_LOOP_MAX_SEGMENTS		256
29 
30 /*
31  * We handle AEN commands ourselves and don't even let the
32  * block layer know about them.
33  */
34 #define NVME_LOOP_NR_AEN_COMMANDS	1
35 #define NVME_LOOP_AQ_BLKMQ_DEPTH	\
36 	(NVME_LOOP_AQ_DEPTH - NVME_LOOP_NR_AEN_COMMANDS)
37 
38 struct nvme_loop_iod {
39 	struct nvme_request	nvme_req;
40 	struct nvme_command	cmd;
41 	struct nvme_completion	rsp;
42 	struct nvmet_req	req;
43 	struct nvme_loop_queue	*queue;
44 	struct work_struct	work;
45 	struct sg_table		sg_table;
46 	struct scatterlist	first_sgl[];
47 };
48 
49 struct nvme_loop_ctrl {
50 	spinlock_t		lock;
51 	struct nvme_loop_queue	*queues;
52 	u32			queue_count;
53 
54 	struct blk_mq_tag_set	admin_tag_set;
55 
56 	struct list_head	list;
57 	u64			cap;
58 	struct blk_mq_tag_set	tag_set;
59 	struct nvme_loop_iod	async_event_iod;
60 	struct nvme_ctrl	ctrl;
61 
62 	struct nvmet_ctrl	*target_ctrl;
63 	struct work_struct	delete_work;
64 	struct work_struct	reset_work;
65 };
66 
67 static inline struct nvme_loop_ctrl *to_loop_ctrl(struct nvme_ctrl *ctrl)
68 {
69 	return container_of(ctrl, struct nvme_loop_ctrl, ctrl);
70 }
71 
72 struct nvme_loop_queue {
73 	struct nvmet_cq		nvme_cq;
74 	struct nvmet_sq		nvme_sq;
75 	struct nvme_loop_ctrl	*ctrl;
76 };
77 
78 static struct nvmet_port *nvmet_loop_port;
79 
80 static LIST_HEAD(nvme_loop_ctrl_list);
81 static DEFINE_MUTEX(nvme_loop_ctrl_mutex);
82 
83 static void nvme_loop_queue_response(struct nvmet_req *nvme_req);
84 static void nvme_loop_delete_ctrl(struct nvmet_ctrl *ctrl);
85 
86 static struct nvmet_fabrics_ops nvme_loop_ops;
87 
88 static inline int nvme_loop_queue_idx(struct nvme_loop_queue *queue)
89 {
90 	return queue - queue->ctrl->queues;
91 }
92 
93 static void nvme_loop_complete_rq(struct request *req)
94 {
95 	struct nvme_loop_iod *iod = blk_mq_rq_to_pdu(req);
96 	int error = 0;
97 
98 	nvme_cleanup_cmd(req);
99 	sg_free_table_chained(&iod->sg_table, true);
100 
101 	if (unlikely(req->errors)) {
102 		if (nvme_req_needs_retry(req, req->errors)) {
103 			nvme_requeue_req(req);
104 			return;
105 		}
106 
107 		if (blk_rq_is_passthrough(req))
108 			error = req->errors;
109 		else
110 			error = nvme_error_status(req->errors);
111 	}
112 
113 	blk_mq_end_request(req, error);
114 }
115 
116 static void nvme_loop_queue_response(struct nvmet_req *req)
117 {
118 	struct nvme_loop_iod *iod =
119 		container_of(req, struct nvme_loop_iod, req);
120 	struct nvme_completion *cqe = &iod->rsp;
121 
122 	/*
123 	 * AEN requests are special as they don't time out and can
124 	 * survive any kind of queue freeze and often don't respond to
125 	 * aborts.  We don't even bother to allocate a struct request
126 	 * for them but rather special case them here.
127 	 */
128 	if (unlikely(nvme_loop_queue_idx(iod->queue) == 0 &&
129 			cqe->command_id >= NVME_LOOP_AQ_BLKMQ_DEPTH)) {
130 		nvme_complete_async_event(&iod->queue->ctrl->ctrl, cqe->status,
131 				&cqe->result);
132 	} else {
133 		struct request *rq = blk_mq_rq_from_pdu(iod);
134 
135 		iod->nvme_req.result = cqe->result;
136 		blk_mq_complete_request(rq, le16_to_cpu(cqe->status) >> 1);
137 	}
138 }
139 
140 static void nvme_loop_execute_work(struct work_struct *work)
141 {
142 	struct nvme_loop_iod *iod =
143 		container_of(work, struct nvme_loop_iod, work);
144 
145 	iod->req.execute(&iod->req);
146 }
147 
148 static enum blk_eh_timer_return
149 nvme_loop_timeout(struct request *rq, bool reserved)
150 {
151 	struct nvme_loop_iod *iod = blk_mq_rq_to_pdu(rq);
152 
153 	/* queue error recovery */
154 	schedule_work(&iod->queue->ctrl->reset_work);
155 
156 	/* fail with DNR on admin cmd timeout */
157 	rq->errors = NVME_SC_ABORT_REQ | NVME_SC_DNR;
158 
159 	return BLK_EH_HANDLED;
160 }
161 
162 static int nvme_loop_queue_rq(struct blk_mq_hw_ctx *hctx,
163 		const struct blk_mq_queue_data *bd)
164 {
165 	struct nvme_ns *ns = hctx->queue->queuedata;
166 	struct nvme_loop_queue *queue = hctx->driver_data;
167 	struct request *req = bd->rq;
168 	struct nvme_loop_iod *iod = blk_mq_rq_to_pdu(req);
169 	int ret;
170 
171 	ret = nvme_setup_cmd(ns, req, &iod->cmd);
172 	if (ret != BLK_MQ_RQ_QUEUE_OK)
173 		return ret;
174 
175 	iod->cmd.common.flags |= NVME_CMD_SGL_METABUF;
176 	iod->req.port = nvmet_loop_port;
177 	if (!nvmet_req_init(&iod->req, &queue->nvme_cq,
178 			&queue->nvme_sq, &nvme_loop_ops)) {
179 		nvme_cleanup_cmd(req);
180 		blk_mq_start_request(req);
181 		nvme_loop_queue_response(&iod->req);
182 		return BLK_MQ_RQ_QUEUE_OK;
183 	}
184 
185 	if (blk_rq_bytes(req)) {
186 		iod->sg_table.sgl = iod->first_sgl;
187 		ret = sg_alloc_table_chained(&iod->sg_table,
188 				blk_rq_nr_phys_segments(req),
189 				iod->sg_table.sgl);
190 		if (ret)
191 			return BLK_MQ_RQ_QUEUE_BUSY;
192 
193 		iod->req.sg = iod->sg_table.sgl;
194 		iod->req.sg_cnt = blk_rq_map_sg(req->q, req, iod->sg_table.sgl);
195 	}
196 
197 	blk_mq_start_request(req);
198 
199 	schedule_work(&iod->work);
200 	return BLK_MQ_RQ_QUEUE_OK;
201 }
202 
203 static void nvme_loop_submit_async_event(struct nvme_ctrl *arg, int aer_idx)
204 {
205 	struct nvme_loop_ctrl *ctrl = to_loop_ctrl(arg);
206 	struct nvme_loop_queue *queue = &ctrl->queues[0];
207 	struct nvme_loop_iod *iod = &ctrl->async_event_iod;
208 
209 	memset(&iod->cmd, 0, sizeof(iod->cmd));
210 	iod->cmd.common.opcode = nvme_admin_async_event;
211 	iod->cmd.common.command_id = NVME_LOOP_AQ_BLKMQ_DEPTH;
212 	iod->cmd.common.flags |= NVME_CMD_SGL_METABUF;
213 
214 	if (!nvmet_req_init(&iod->req, &queue->nvme_cq, &queue->nvme_sq,
215 			&nvme_loop_ops)) {
216 		dev_err(ctrl->ctrl.device, "failed async event work\n");
217 		return;
218 	}
219 
220 	schedule_work(&iod->work);
221 }
222 
223 static int nvme_loop_init_iod(struct nvme_loop_ctrl *ctrl,
224 		struct nvme_loop_iod *iod, unsigned int queue_idx)
225 {
226 	iod->req.cmd = &iod->cmd;
227 	iod->req.rsp = &iod->rsp;
228 	iod->queue = &ctrl->queues[queue_idx];
229 	INIT_WORK(&iod->work, nvme_loop_execute_work);
230 	return 0;
231 }
232 
233 static int nvme_loop_init_request(void *data, struct request *req,
234 				unsigned int hctx_idx, unsigned int rq_idx,
235 				unsigned int numa_node)
236 {
237 	return nvme_loop_init_iod(data, blk_mq_rq_to_pdu(req), hctx_idx + 1);
238 }
239 
240 static int nvme_loop_init_admin_request(void *data, struct request *req,
241 				unsigned int hctx_idx, unsigned int rq_idx,
242 				unsigned int numa_node)
243 {
244 	return nvme_loop_init_iod(data, blk_mq_rq_to_pdu(req), 0);
245 }
246 
247 static int nvme_loop_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
248 		unsigned int hctx_idx)
249 {
250 	struct nvme_loop_ctrl *ctrl = data;
251 	struct nvme_loop_queue *queue = &ctrl->queues[hctx_idx + 1];
252 
253 	BUG_ON(hctx_idx >= ctrl->queue_count);
254 
255 	hctx->driver_data = queue;
256 	return 0;
257 }
258 
259 static int nvme_loop_init_admin_hctx(struct blk_mq_hw_ctx *hctx, void *data,
260 		unsigned int hctx_idx)
261 {
262 	struct nvme_loop_ctrl *ctrl = data;
263 	struct nvme_loop_queue *queue = &ctrl->queues[0];
264 
265 	BUG_ON(hctx_idx != 0);
266 
267 	hctx->driver_data = queue;
268 	return 0;
269 }
270 
271 static struct blk_mq_ops nvme_loop_mq_ops = {
272 	.queue_rq	= nvme_loop_queue_rq,
273 	.complete	= nvme_loop_complete_rq,
274 	.init_request	= nvme_loop_init_request,
275 	.init_hctx	= nvme_loop_init_hctx,
276 	.timeout	= nvme_loop_timeout,
277 };
278 
279 static struct blk_mq_ops nvme_loop_admin_mq_ops = {
280 	.queue_rq	= nvme_loop_queue_rq,
281 	.complete	= nvme_loop_complete_rq,
282 	.init_request	= nvme_loop_init_admin_request,
283 	.init_hctx	= nvme_loop_init_admin_hctx,
284 	.timeout	= nvme_loop_timeout,
285 };
286 
287 static void nvme_loop_destroy_admin_queue(struct nvme_loop_ctrl *ctrl)
288 {
289 	nvmet_sq_destroy(&ctrl->queues[0].nvme_sq);
290 	blk_cleanup_queue(ctrl->ctrl.admin_q);
291 	blk_mq_free_tag_set(&ctrl->admin_tag_set);
292 }
293 
294 static void nvme_loop_free_ctrl(struct nvme_ctrl *nctrl)
295 {
296 	struct nvme_loop_ctrl *ctrl = to_loop_ctrl(nctrl);
297 
298 	if (list_empty(&ctrl->list))
299 		goto free_ctrl;
300 
301 	mutex_lock(&nvme_loop_ctrl_mutex);
302 	list_del(&ctrl->list);
303 	mutex_unlock(&nvme_loop_ctrl_mutex);
304 
305 	if (nctrl->tagset) {
306 		blk_cleanup_queue(ctrl->ctrl.connect_q);
307 		blk_mq_free_tag_set(&ctrl->tag_set);
308 	}
309 	kfree(ctrl->queues);
310 	nvmf_free_options(nctrl->opts);
311 free_ctrl:
312 	kfree(ctrl);
313 }
314 
315 static void nvme_loop_destroy_io_queues(struct nvme_loop_ctrl *ctrl)
316 {
317 	int i;
318 
319 	for (i = 1; i < ctrl->queue_count; i++)
320 		nvmet_sq_destroy(&ctrl->queues[i].nvme_sq);
321 }
322 
323 static int nvme_loop_init_io_queues(struct nvme_loop_ctrl *ctrl)
324 {
325 	struct nvmf_ctrl_options *opts = ctrl->ctrl.opts;
326 	unsigned int nr_io_queues;
327 	int ret, i;
328 
329 	nr_io_queues = min(opts->nr_io_queues, num_online_cpus());
330 	ret = nvme_set_queue_count(&ctrl->ctrl, &nr_io_queues);
331 	if (ret || !nr_io_queues)
332 		return ret;
333 
334 	dev_info(ctrl->ctrl.device, "creating %d I/O queues.\n", nr_io_queues);
335 
336 	for (i = 1; i <= nr_io_queues; i++) {
337 		ctrl->queues[i].ctrl = ctrl;
338 		ret = nvmet_sq_init(&ctrl->queues[i].nvme_sq);
339 		if (ret)
340 			goto out_destroy_queues;
341 
342 		ctrl->queue_count++;
343 	}
344 
345 	return 0;
346 
347 out_destroy_queues:
348 	nvme_loop_destroy_io_queues(ctrl);
349 	return ret;
350 }
351 
352 static int nvme_loop_configure_admin_queue(struct nvme_loop_ctrl *ctrl)
353 {
354 	int error;
355 
356 	memset(&ctrl->admin_tag_set, 0, sizeof(ctrl->admin_tag_set));
357 	ctrl->admin_tag_set.ops = &nvme_loop_admin_mq_ops;
358 	ctrl->admin_tag_set.queue_depth = NVME_LOOP_AQ_BLKMQ_DEPTH;
359 	ctrl->admin_tag_set.reserved_tags = 2; /* connect + keep-alive */
360 	ctrl->admin_tag_set.numa_node = NUMA_NO_NODE;
361 	ctrl->admin_tag_set.cmd_size = sizeof(struct nvme_loop_iod) +
362 		SG_CHUNK_SIZE * sizeof(struct scatterlist);
363 	ctrl->admin_tag_set.driver_data = ctrl;
364 	ctrl->admin_tag_set.nr_hw_queues = 1;
365 	ctrl->admin_tag_set.timeout = ADMIN_TIMEOUT;
366 
367 	ctrl->queues[0].ctrl = ctrl;
368 	error = nvmet_sq_init(&ctrl->queues[0].nvme_sq);
369 	if (error)
370 		return error;
371 	ctrl->queue_count = 1;
372 
373 	error = blk_mq_alloc_tag_set(&ctrl->admin_tag_set);
374 	if (error)
375 		goto out_free_sq;
376 
377 	ctrl->ctrl.admin_q = blk_mq_init_queue(&ctrl->admin_tag_set);
378 	if (IS_ERR(ctrl->ctrl.admin_q)) {
379 		error = PTR_ERR(ctrl->ctrl.admin_q);
380 		goto out_free_tagset;
381 	}
382 
383 	error = nvmf_connect_admin_queue(&ctrl->ctrl);
384 	if (error)
385 		goto out_cleanup_queue;
386 
387 	error = nvmf_reg_read64(&ctrl->ctrl, NVME_REG_CAP, &ctrl->cap);
388 	if (error) {
389 		dev_err(ctrl->ctrl.device,
390 			"prop_get NVME_REG_CAP failed\n");
391 		goto out_cleanup_queue;
392 	}
393 
394 	ctrl->ctrl.sqsize =
395 		min_t(int, NVME_CAP_MQES(ctrl->cap) + 1, ctrl->ctrl.sqsize);
396 
397 	error = nvme_enable_ctrl(&ctrl->ctrl, ctrl->cap);
398 	if (error)
399 		goto out_cleanup_queue;
400 
401 	ctrl->ctrl.max_hw_sectors =
402 		(NVME_LOOP_MAX_SEGMENTS - 1) << (PAGE_SHIFT - 9);
403 
404 	error = nvme_init_identify(&ctrl->ctrl);
405 	if (error)
406 		goto out_cleanup_queue;
407 
408 	nvme_start_keep_alive(&ctrl->ctrl);
409 
410 	return 0;
411 
412 out_cleanup_queue:
413 	blk_cleanup_queue(ctrl->ctrl.admin_q);
414 out_free_tagset:
415 	blk_mq_free_tag_set(&ctrl->admin_tag_set);
416 out_free_sq:
417 	nvmet_sq_destroy(&ctrl->queues[0].nvme_sq);
418 	return error;
419 }
420 
421 static void nvme_loop_shutdown_ctrl(struct nvme_loop_ctrl *ctrl)
422 {
423 	nvme_stop_keep_alive(&ctrl->ctrl);
424 
425 	if (ctrl->queue_count > 1) {
426 		nvme_stop_queues(&ctrl->ctrl);
427 		blk_mq_tagset_busy_iter(&ctrl->tag_set,
428 					nvme_cancel_request, &ctrl->ctrl);
429 		nvme_loop_destroy_io_queues(ctrl);
430 	}
431 
432 	if (ctrl->ctrl.state == NVME_CTRL_LIVE)
433 		nvme_shutdown_ctrl(&ctrl->ctrl);
434 
435 	blk_mq_stop_hw_queues(ctrl->ctrl.admin_q);
436 	blk_mq_tagset_busy_iter(&ctrl->admin_tag_set,
437 				nvme_cancel_request, &ctrl->ctrl);
438 	nvme_loop_destroy_admin_queue(ctrl);
439 }
440 
441 static void nvme_loop_del_ctrl_work(struct work_struct *work)
442 {
443 	struct nvme_loop_ctrl *ctrl = container_of(work,
444 				struct nvme_loop_ctrl, delete_work);
445 
446 	nvme_uninit_ctrl(&ctrl->ctrl);
447 	nvme_loop_shutdown_ctrl(ctrl);
448 	nvme_put_ctrl(&ctrl->ctrl);
449 }
450 
451 static int __nvme_loop_del_ctrl(struct nvme_loop_ctrl *ctrl)
452 {
453 	if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_DELETING))
454 		return -EBUSY;
455 
456 	if (!schedule_work(&ctrl->delete_work))
457 		return -EBUSY;
458 
459 	return 0;
460 }
461 
462 static int nvme_loop_del_ctrl(struct nvme_ctrl *nctrl)
463 {
464 	struct nvme_loop_ctrl *ctrl = to_loop_ctrl(nctrl);
465 	int ret;
466 
467 	ret = __nvme_loop_del_ctrl(ctrl);
468 	if (ret)
469 		return ret;
470 
471 	flush_work(&ctrl->delete_work);
472 
473 	return 0;
474 }
475 
476 static void nvme_loop_delete_ctrl(struct nvmet_ctrl *nctrl)
477 {
478 	struct nvme_loop_ctrl *ctrl;
479 
480 	mutex_lock(&nvme_loop_ctrl_mutex);
481 	list_for_each_entry(ctrl, &nvme_loop_ctrl_list, list) {
482 		if (ctrl->ctrl.cntlid == nctrl->cntlid)
483 			__nvme_loop_del_ctrl(ctrl);
484 	}
485 	mutex_unlock(&nvme_loop_ctrl_mutex);
486 }
487 
488 static void nvme_loop_reset_ctrl_work(struct work_struct *work)
489 {
490 	struct nvme_loop_ctrl *ctrl = container_of(work,
491 					struct nvme_loop_ctrl, reset_work);
492 	bool changed;
493 	int i, ret;
494 
495 	nvme_loop_shutdown_ctrl(ctrl);
496 
497 	ret = nvme_loop_configure_admin_queue(ctrl);
498 	if (ret)
499 		goto out_disable;
500 
501 	ret = nvme_loop_init_io_queues(ctrl);
502 	if (ret)
503 		goto out_destroy_admin;
504 
505 	for (i = 1; i < ctrl->queue_count; i++) {
506 		ret = nvmf_connect_io_queue(&ctrl->ctrl, i);
507 		if (ret)
508 			goto out_destroy_io;
509 	}
510 
511 	changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE);
512 	WARN_ON_ONCE(!changed);
513 
514 	nvme_queue_scan(&ctrl->ctrl);
515 	nvme_queue_async_events(&ctrl->ctrl);
516 
517 	nvme_start_queues(&ctrl->ctrl);
518 
519 	return;
520 
521 out_destroy_io:
522 	nvme_loop_destroy_io_queues(ctrl);
523 out_destroy_admin:
524 	nvme_loop_destroy_admin_queue(ctrl);
525 out_disable:
526 	dev_warn(ctrl->ctrl.device, "Removing after reset failure\n");
527 	nvme_uninit_ctrl(&ctrl->ctrl);
528 	nvme_put_ctrl(&ctrl->ctrl);
529 }
530 
531 static int nvme_loop_reset_ctrl(struct nvme_ctrl *nctrl)
532 {
533 	struct nvme_loop_ctrl *ctrl = to_loop_ctrl(nctrl);
534 
535 	if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RESETTING))
536 		return -EBUSY;
537 
538 	if (!schedule_work(&ctrl->reset_work))
539 		return -EBUSY;
540 
541 	flush_work(&ctrl->reset_work);
542 
543 	return 0;
544 }
545 
546 static const struct nvme_ctrl_ops nvme_loop_ctrl_ops = {
547 	.name			= "loop",
548 	.module			= THIS_MODULE,
549 	.is_fabrics		= true,
550 	.reg_read32		= nvmf_reg_read32,
551 	.reg_read64		= nvmf_reg_read64,
552 	.reg_write32		= nvmf_reg_write32,
553 	.reset_ctrl		= nvme_loop_reset_ctrl,
554 	.free_ctrl		= nvme_loop_free_ctrl,
555 	.submit_async_event	= nvme_loop_submit_async_event,
556 	.delete_ctrl		= nvme_loop_del_ctrl,
557 	.get_subsysnqn		= nvmf_get_subsysnqn,
558 };
559 
560 static int nvme_loop_create_io_queues(struct nvme_loop_ctrl *ctrl)
561 {
562 	int ret, i;
563 
564 	ret = nvme_loop_init_io_queues(ctrl);
565 	if (ret)
566 		return ret;
567 
568 	memset(&ctrl->tag_set, 0, sizeof(ctrl->tag_set));
569 	ctrl->tag_set.ops = &nvme_loop_mq_ops;
570 	ctrl->tag_set.queue_depth = ctrl->ctrl.opts->queue_size;
571 	ctrl->tag_set.reserved_tags = 1; /* fabric connect */
572 	ctrl->tag_set.numa_node = NUMA_NO_NODE;
573 	ctrl->tag_set.flags = BLK_MQ_F_SHOULD_MERGE;
574 	ctrl->tag_set.cmd_size = sizeof(struct nvme_loop_iod) +
575 		SG_CHUNK_SIZE * sizeof(struct scatterlist);
576 	ctrl->tag_set.driver_data = ctrl;
577 	ctrl->tag_set.nr_hw_queues = ctrl->queue_count - 1;
578 	ctrl->tag_set.timeout = NVME_IO_TIMEOUT;
579 	ctrl->ctrl.tagset = &ctrl->tag_set;
580 
581 	ret = blk_mq_alloc_tag_set(&ctrl->tag_set);
582 	if (ret)
583 		goto out_destroy_queues;
584 
585 	ctrl->ctrl.connect_q = blk_mq_init_queue(&ctrl->tag_set);
586 	if (IS_ERR(ctrl->ctrl.connect_q)) {
587 		ret = PTR_ERR(ctrl->ctrl.connect_q);
588 		goto out_free_tagset;
589 	}
590 
591 	for (i = 1; i < ctrl->queue_count; i++) {
592 		ret = nvmf_connect_io_queue(&ctrl->ctrl, i);
593 		if (ret)
594 			goto out_cleanup_connect_q;
595 	}
596 
597 	return 0;
598 
599 out_cleanup_connect_q:
600 	blk_cleanup_queue(ctrl->ctrl.connect_q);
601 out_free_tagset:
602 	blk_mq_free_tag_set(&ctrl->tag_set);
603 out_destroy_queues:
604 	nvme_loop_destroy_io_queues(ctrl);
605 	return ret;
606 }
607 
608 static struct nvme_ctrl *nvme_loop_create_ctrl(struct device *dev,
609 		struct nvmf_ctrl_options *opts)
610 {
611 	struct nvme_loop_ctrl *ctrl;
612 	bool changed;
613 	int ret;
614 
615 	ctrl = kzalloc(sizeof(*ctrl), GFP_KERNEL);
616 	if (!ctrl)
617 		return ERR_PTR(-ENOMEM);
618 	ctrl->ctrl.opts = opts;
619 	INIT_LIST_HEAD(&ctrl->list);
620 
621 	INIT_WORK(&ctrl->delete_work, nvme_loop_del_ctrl_work);
622 	INIT_WORK(&ctrl->reset_work, nvme_loop_reset_ctrl_work);
623 
624 	ret = nvme_init_ctrl(&ctrl->ctrl, dev, &nvme_loop_ctrl_ops,
625 				0 /* no quirks, we're perfect! */);
626 	if (ret)
627 		goto out_put_ctrl;
628 
629 	spin_lock_init(&ctrl->lock);
630 
631 	ret = -ENOMEM;
632 
633 	ctrl->ctrl.sqsize = opts->queue_size - 1;
634 	ctrl->ctrl.kato = opts->kato;
635 
636 	ctrl->queues = kcalloc(opts->nr_io_queues + 1, sizeof(*ctrl->queues),
637 			GFP_KERNEL);
638 	if (!ctrl->queues)
639 		goto out_uninit_ctrl;
640 
641 	ret = nvme_loop_configure_admin_queue(ctrl);
642 	if (ret)
643 		goto out_free_queues;
644 
645 	if (opts->queue_size > ctrl->ctrl.maxcmd) {
646 		/* warn if maxcmd is lower than queue_size */
647 		dev_warn(ctrl->ctrl.device,
648 			"queue_size %zu > ctrl maxcmd %u, clamping down\n",
649 			opts->queue_size, ctrl->ctrl.maxcmd);
650 		opts->queue_size = ctrl->ctrl.maxcmd;
651 	}
652 
653 	if (opts->nr_io_queues) {
654 		ret = nvme_loop_create_io_queues(ctrl);
655 		if (ret)
656 			goto out_remove_admin_queue;
657 	}
658 
659 	nvme_loop_init_iod(ctrl, &ctrl->async_event_iod, 0);
660 
661 	dev_info(ctrl->ctrl.device,
662 		 "new ctrl: \"%s\"\n", ctrl->ctrl.opts->subsysnqn);
663 
664 	kref_get(&ctrl->ctrl.kref);
665 
666 	changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE);
667 	WARN_ON_ONCE(!changed);
668 
669 	mutex_lock(&nvme_loop_ctrl_mutex);
670 	list_add_tail(&ctrl->list, &nvme_loop_ctrl_list);
671 	mutex_unlock(&nvme_loop_ctrl_mutex);
672 
673 	if (opts->nr_io_queues) {
674 		nvme_queue_scan(&ctrl->ctrl);
675 		nvme_queue_async_events(&ctrl->ctrl);
676 	}
677 
678 	return &ctrl->ctrl;
679 
680 out_remove_admin_queue:
681 	nvme_loop_destroy_admin_queue(ctrl);
682 out_free_queues:
683 	kfree(ctrl->queues);
684 out_uninit_ctrl:
685 	nvme_uninit_ctrl(&ctrl->ctrl);
686 out_put_ctrl:
687 	nvme_put_ctrl(&ctrl->ctrl);
688 	if (ret > 0)
689 		ret = -EIO;
690 	return ERR_PTR(ret);
691 }
692 
693 static int nvme_loop_add_port(struct nvmet_port *port)
694 {
695 	/*
696 	 * XXX: disalow adding more than one port so
697 	 * there is no connection rejections when a
698 	 * a subsystem is assigned to a port for which
699 	 * loop doesn't have a pointer.
700 	 * This scenario would be possible if we allowed
701 	 * more than one port to be added and a subsystem
702 	 * was assigned to a port other than nvmet_loop_port.
703 	 */
704 
705 	if (nvmet_loop_port)
706 		return -EPERM;
707 
708 	nvmet_loop_port = port;
709 	return 0;
710 }
711 
712 static void nvme_loop_remove_port(struct nvmet_port *port)
713 {
714 	if (port == nvmet_loop_port)
715 		nvmet_loop_port = NULL;
716 }
717 
718 static struct nvmet_fabrics_ops nvme_loop_ops = {
719 	.owner		= THIS_MODULE,
720 	.type		= NVMF_TRTYPE_LOOP,
721 	.add_port	= nvme_loop_add_port,
722 	.remove_port	= nvme_loop_remove_port,
723 	.queue_response = nvme_loop_queue_response,
724 	.delete_ctrl	= nvme_loop_delete_ctrl,
725 };
726 
727 static struct nvmf_transport_ops nvme_loop_transport = {
728 	.name		= "loop",
729 	.create_ctrl	= nvme_loop_create_ctrl,
730 };
731 
732 static int __init nvme_loop_init_module(void)
733 {
734 	int ret;
735 
736 	ret = nvmet_register_transport(&nvme_loop_ops);
737 	if (ret)
738 		return ret;
739 	return nvmf_register_transport(&nvme_loop_transport);
740 }
741 
742 static void __exit nvme_loop_cleanup_module(void)
743 {
744 	struct nvme_loop_ctrl *ctrl, *next;
745 
746 	nvmf_unregister_transport(&nvme_loop_transport);
747 	nvmet_unregister_transport(&nvme_loop_ops);
748 
749 	mutex_lock(&nvme_loop_ctrl_mutex);
750 	list_for_each_entry_safe(ctrl, next, &nvme_loop_ctrl_list, list)
751 		__nvme_loop_del_ctrl(ctrl);
752 	mutex_unlock(&nvme_loop_ctrl_mutex);
753 
754 	flush_scheduled_work();
755 }
756 
757 module_init(nvme_loop_init_module);
758 module_exit(nvme_loop_cleanup_module);
759 
760 MODULE_LICENSE("GPL v2");
761 MODULE_ALIAS("nvmet-transport-254"); /* 254 == NVMF_TRTYPE_LOOP */
762