xref: /openbmc/linux/drivers/nvme/target/core.c (revision 77a87824)
1 /*
2  * Common code for the NVMe target.
3  * Copyright (c) 2015-2016 HGST, a Western Digital Company.
4  *
5  * This program is free software; you can redistribute it and/or modify it
6  * under the terms and conditions of the GNU General Public License,
7  * version 2, as published by the Free Software Foundation.
8  *
9  * This program is distributed in the hope it will be useful, but WITHOUT
10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
12  * more details.
13  */
14 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
15 #include <linux/module.h>
16 #include "nvmet.h"
17 
18 static struct nvmet_fabrics_ops *nvmet_transports[NVMF_TRTYPE_MAX];
19 
20 /*
21  * This read/write semaphore is used to synchronize access to configuration
22  * information on a target system that will result in discovery log page
23  * information change for at least one host.
24  * The full list of resources to protected by this semaphore is:
25  *
26  *  - subsystems list
27  *  - per-subsystem allowed hosts list
28  *  - allow_any_host subsystem attribute
29  *  - nvmet_genctr
30  *  - the nvmet_transports array
31  *
32  * When updating any of those lists/structures write lock should be obtained,
33  * while when reading (popolating discovery log page or checking host-subsystem
34  * link) read lock is obtained to allow concurrent reads.
35  */
36 DECLARE_RWSEM(nvmet_config_sem);
37 
38 static struct nvmet_subsys *nvmet_find_get_subsys(struct nvmet_port *port,
39 		const char *subsysnqn);
40 
41 u16 nvmet_copy_to_sgl(struct nvmet_req *req, off_t off, const void *buf,
42 		size_t len)
43 {
44 	if (sg_pcopy_from_buffer(req->sg, req->sg_cnt, buf, len, off) != len)
45 		return NVME_SC_SGL_INVALID_DATA | NVME_SC_DNR;
46 	return 0;
47 }
48 
49 u16 nvmet_copy_from_sgl(struct nvmet_req *req, off_t off, void *buf, size_t len)
50 {
51 	if (sg_pcopy_to_buffer(req->sg, req->sg_cnt, buf, len, off) != len)
52 		return NVME_SC_SGL_INVALID_DATA | NVME_SC_DNR;
53 	return 0;
54 }
55 
56 static u32 nvmet_async_event_result(struct nvmet_async_event *aen)
57 {
58 	return aen->event_type | (aen->event_info << 8) | (aen->log_page << 16);
59 }
60 
61 static void nvmet_async_events_free(struct nvmet_ctrl *ctrl)
62 {
63 	struct nvmet_req *req;
64 
65 	while (1) {
66 		mutex_lock(&ctrl->lock);
67 		if (!ctrl->nr_async_event_cmds) {
68 			mutex_unlock(&ctrl->lock);
69 			return;
70 		}
71 
72 		req = ctrl->async_event_cmds[--ctrl->nr_async_event_cmds];
73 		mutex_unlock(&ctrl->lock);
74 		nvmet_req_complete(req, NVME_SC_INTERNAL | NVME_SC_DNR);
75 	}
76 }
77 
78 static void nvmet_async_event_work(struct work_struct *work)
79 {
80 	struct nvmet_ctrl *ctrl =
81 		container_of(work, struct nvmet_ctrl, async_event_work);
82 	struct nvmet_async_event *aen;
83 	struct nvmet_req *req;
84 
85 	while (1) {
86 		mutex_lock(&ctrl->lock);
87 		aen = list_first_entry_or_null(&ctrl->async_events,
88 				struct nvmet_async_event, entry);
89 		if (!aen || !ctrl->nr_async_event_cmds) {
90 			mutex_unlock(&ctrl->lock);
91 			return;
92 		}
93 
94 		req = ctrl->async_event_cmds[--ctrl->nr_async_event_cmds];
95 		nvmet_set_result(req, nvmet_async_event_result(aen));
96 
97 		list_del(&aen->entry);
98 		kfree(aen);
99 
100 		mutex_unlock(&ctrl->lock);
101 		nvmet_req_complete(req, 0);
102 	}
103 }
104 
105 static void nvmet_add_async_event(struct nvmet_ctrl *ctrl, u8 event_type,
106 		u8 event_info, u8 log_page)
107 {
108 	struct nvmet_async_event *aen;
109 
110 	aen = kmalloc(sizeof(*aen), GFP_KERNEL);
111 	if (!aen)
112 		return;
113 
114 	aen->event_type = event_type;
115 	aen->event_info = event_info;
116 	aen->log_page = log_page;
117 
118 	mutex_lock(&ctrl->lock);
119 	list_add_tail(&aen->entry, &ctrl->async_events);
120 	mutex_unlock(&ctrl->lock);
121 
122 	schedule_work(&ctrl->async_event_work);
123 }
124 
125 int nvmet_register_transport(struct nvmet_fabrics_ops *ops)
126 {
127 	int ret = 0;
128 
129 	down_write(&nvmet_config_sem);
130 	if (nvmet_transports[ops->type])
131 		ret = -EINVAL;
132 	else
133 		nvmet_transports[ops->type] = ops;
134 	up_write(&nvmet_config_sem);
135 
136 	return ret;
137 }
138 EXPORT_SYMBOL_GPL(nvmet_register_transport);
139 
140 void nvmet_unregister_transport(struct nvmet_fabrics_ops *ops)
141 {
142 	down_write(&nvmet_config_sem);
143 	nvmet_transports[ops->type] = NULL;
144 	up_write(&nvmet_config_sem);
145 }
146 EXPORT_SYMBOL_GPL(nvmet_unregister_transport);
147 
148 int nvmet_enable_port(struct nvmet_port *port)
149 {
150 	struct nvmet_fabrics_ops *ops;
151 	int ret;
152 
153 	lockdep_assert_held(&nvmet_config_sem);
154 
155 	ops = nvmet_transports[port->disc_addr.trtype];
156 	if (!ops) {
157 		up_write(&nvmet_config_sem);
158 		request_module("nvmet-transport-%d", port->disc_addr.trtype);
159 		down_write(&nvmet_config_sem);
160 		ops = nvmet_transports[port->disc_addr.trtype];
161 		if (!ops) {
162 			pr_err("transport type %d not supported\n",
163 				port->disc_addr.trtype);
164 			return -EINVAL;
165 		}
166 	}
167 
168 	if (!try_module_get(ops->owner))
169 		return -EINVAL;
170 
171 	ret = ops->add_port(port);
172 	if (ret) {
173 		module_put(ops->owner);
174 		return ret;
175 	}
176 
177 	port->enabled = true;
178 	return 0;
179 }
180 
181 void nvmet_disable_port(struct nvmet_port *port)
182 {
183 	struct nvmet_fabrics_ops *ops;
184 
185 	lockdep_assert_held(&nvmet_config_sem);
186 
187 	port->enabled = false;
188 
189 	ops = nvmet_transports[port->disc_addr.trtype];
190 	ops->remove_port(port);
191 	module_put(ops->owner);
192 }
193 
194 static void nvmet_keep_alive_timer(struct work_struct *work)
195 {
196 	struct nvmet_ctrl *ctrl = container_of(to_delayed_work(work),
197 			struct nvmet_ctrl, ka_work);
198 
199 	pr_err("ctrl %d keep-alive timer (%d seconds) expired!\n",
200 		ctrl->cntlid, ctrl->kato);
201 
202 	ctrl->ops->delete_ctrl(ctrl);
203 }
204 
205 static void nvmet_start_keep_alive_timer(struct nvmet_ctrl *ctrl)
206 {
207 	pr_debug("ctrl %d start keep-alive timer for %d secs\n",
208 		ctrl->cntlid, ctrl->kato);
209 
210 	INIT_DELAYED_WORK(&ctrl->ka_work, nvmet_keep_alive_timer);
211 	schedule_delayed_work(&ctrl->ka_work, ctrl->kato * HZ);
212 }
213 
214 static void nvmet_stop_keep_alive_timer(struct nvmet_ctrl *ctrl)
215 {
216 	pr_debug("ctrl %d stop keep-alive\n", ctrl->cntlid);
217 
218 	cancel_delayed_work_sync(&ctrl->ka_work);
219 }
220 
221 static struct nvmet_ns *__nvmet_find_namespace(struct nvmet_ctrl *ctrl,
222 		__le32 nsid)
223 {
224 	struct nvmet_ns *ns;
225 
226 	list_for_each_entry_rcu(ns, &ctrl->subsys->namespaces, dev_link) {
227 		if (ns->nsid == le32_to_cpu(nsid))
228 			return ns;
229 	}
230 
231 	return NULL;
232 }
233 
234 struct nvmet_ns *nvmet_find_namespace(struct nvmet_ctrl *ctrl, __le32 nsid)
235 {
236 	struct nvmet_ns *ns;
237 
238 	rcu_read_lock();
239 	ns = __nvmet_find_namespace(ctrl, nsid);
240 	if (ns)
241 		percpu_ref_get(&ns->ref);
242 	rcu_read_unlock();
243 
244 	return ns;
245 }
246 
247 static void nvmet_destroy_namespace(struct percpu_ref *ref)
248 {
249 	struct nvmet_ns *ns = container_of(ref, struct nvmet_ns, ref);
250 
251 	complete(&ns->disable_done);
252 }
253 
254 void nvmet_put_namespace(struct nvmet_ns *ns)
255 {
256 	percpu_ref_put(&ns->ref);
257 }
258 
259 int nvmet_ns_enable(struct nvmet_ns *ns)
260 {
261 	struct nvmet_subsys *subsys = ns->subsys;
262 	struct nvmet_ctrl *ctrl;
263 	int ret = 0;
264 
265 	mutex_lock(&subsys->lock);
266 	if (!list_empty(&ns->dev_link))
267 		goto out_unlock;
268 
269 	ns->bdev = blkdev_get_by_path(ns->device_path, FMODE_READ | FMODE_WRITE,
270 			NULL);
271 	if (IS_ERR(ns->bdev)) {
272 		pr_err("nvmet: failed to open block device %s: (%ld)\n",
273 			ns->device_path, PTR_ERR(ns->bdev));
274 		ret = PTR_ERR(ns->bdev);
275 		ns->bdev = NULL;
276 		goto out_unlock;
277 	}
278 
279 	ns->size = i_size_read(ns->bdev->bd_inode);
280 	ns->blksize_shift = blksize_bits(bdev_logical_block_size(ns->bdev));
281 
282 	ret = percpu_ref_init(&ns->ref, nvmet_destroy_namespace,
283 				0, GFP_KERNEL);
284 	if (ret)
285 		goto out_blkdev_put;
286 
287 	if (ns->nsid > subsys->max_nsid)
288 		subsys->max_nsid = ns->nsid;
289 
290 	/*
291 	 * The namespaces list needs to be sorted to simplify the implementation
292 	 * of the Identify Namepace List subcommand.
293 	 */
294 	if (list_empty(&subsys->namespaces)) {
295 		list_add_tail_rcu(&ns->dev_link, &subsys->namespaces);
296 	} else {
297 		struct nvmet_ns *old;
298 
299 		list_for_each_entry_rcu(old, &subsys->namespaces, dev_link) {
300 			BUG_ON(ns->nsid == old->nsid);
301 			if (ns->nsid < old->nsid)
302 				break;
303 		}
304 
305 		list_add_tail_rcu(&ns->dev_link, &old->dev_link);
306 	}
307 
308 	list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry)
309 		nvmet_add_async_event(ctrl, NVME_AER_TYPE_NOTICE, 0, 0);
310 
311 	ret = 0;
312 out_unlock:
313 	mutex_unlock(&subsys->lock);
314 	return ret;
315 out_blkdev_put:
316 	blkdev_put(ns->bdev, FMODE_WRITE|FMODE_READ);
317 	ns->bdev = NULL;
318 	goto out_unlock;
319 }
320 
321 void nvmet_ns_disable(struct nvmet_ns *ns)
322 {
323 	struct nvmet_subsys *subsys = ns->subsys;
324 	struct nvmet_ctrl *ctrl;
325 
326 	mutex_lock(&subsys->lock);
327 	if (list_empty(&ns->dev_link)) {
328 		mutex_unlock(&subsys->lock);
329 		return;
330 	}
331 	list_del_init(&ns->dev_link);
332 	mutex_unlock(&subsys->lock);
333 
334 	/*
335 	 * Now that we removed the namespaces from the lookup list, we
336 	 * can kill the per_cpu ref and wait for any remaining references
337 	 * to be dropped, as well as a RCU grace period for anyone only
338 	 * using the namepace under rcu_read_lock().  Note that we can't
339 	 * use call_rcu here as we need to ensure the namespaces have
340 	 * been fully destroyed before unloading the module.
341 	 */
342 	percpu_ref_kill(&ns->ref);
343 	synchronize_rcu();
344 	wait_for_completion(&ns->disable_done);
345 	percpu_ref_exit(&ns->ref);
346 
347 	mutex_lock(&subsys->lock);
348 	list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry)
349 		nvmet_add_async_event(ctrl, NVME_AER_TYPE_NOTICE, 0, 0);
350 
351 	if (ns->bdev)
352 		blkdev_put(ns->bdev, FMODE_WRITE|FMODE_READ);
353 	mutex_unlock(&subsys->lock);
354 }
355 
356 void nvmet_ns_free(struct nvmet_ns *ns)
357 {
358 	nvmet_ns_disable(ns);
359 
360 	kfree(ns->device_path);
361 	kfree(ns);
362 }
363 
364 struct nvmet_ns *nvmet_ns_alloc(struct nvmet_subsys *subsys, u32 nsid)
365 {
366 	struct nvmet_ns *ns;
367 
368 	ns = kzalloc(sizeof(*ns), GFP_KERNEL);
369 	if (!ns)
370 		return NULL;
371 
372 	INIT_LIST_HEAD(&ns->dev_link);
373 	init_completion(&ns->disable_done);
374 
375 	ns->nsid = nsid;
376 	ns->subsys = subsys;
377 
378 	return ns;
379 }
380 
381 static void __nvmet_req_complete(struct nvmet_req *req, u16 status)
382 {
383 	if (status)
384 		nvmet_set_status(req, status);
385 
386 	/* XXX: need to fill in something useful for sq_head */
387 	req->rsp->sq_head = 0;
388 	if (likely(req->sq)) /* may happen during early failure */
389 		req->rsp->sq_id = cpu_to_le16(req->sq->qid);
390 	req->rsp->command_id = req->cmd->common.command_id;
391 
392 	if (req->ns)
393 		nvmet_put_namespace(req->ns);
394 	req->ops->queue_response(req);
395 }
396 
397 void nvmet_req_complete(struct nvmet_req *req, u16 status)
398 {
399 	__nvmet_req_complete(req, status);
400 	percpu_ref_put(&req->sq->ref);
401 }
402 EXPORT_SYMBOL_GPL(nvmet_req_complete);
403 
404 void nvmet_cq_setup(struct nvmet_ctrl *ctrl, struct nvmet_cq *cq,
405 		u16 qid, u16 size)
406 {
407 	cq->qid = qid;
408 	cq->size = size;
409 
410 	ctrl->cqs[qid] = cq;
411 }
412 
413 void nvmet_sq_setup(struct nvmet_ctrl *ctrl, struct nvmet_sq *sq,
414 		u16 qid, u16 size)
415 {
416 	sq->qid = qid;
417 	sq->size = size;
418 
419 	ctrl->sqs[qid] = sq;
420 }
421 
422 void nvmet_sq_destroy(struct nvmet_sq *sq)
423 {
424 	/*
425 	 * If this is the admin queue, complete all AERs so that our
426 	 * queue doesn't have outstanding requests on it.
427 	 */
428 	if (sq->ctrl && sq->ctrl->sqs && sq->ctrl->sqs[0] == sq)
429 		nvmet_async_events_free(sq->ctrl);
430 	percpu_ref_kill(&sq->ref);
431 	wait_for_completion(&sq->free_done);
432 	percpu_ref_exit(&sq->ref);
433 
434 	if (sq->ctrl) {
435 		nvmet_ctrl_put(sq->ctrl);
436 		sq->ctrl = NULL; /* allows reusing the queue later */
437 	}
438 }
439 EXPORT_SYMBOL_GPL(nvmet_sq_destroy);
440 
441 static void nvmet_sq_free(struct percpu_ref *ref)
442 {
443 	struct nvmet_sq *sq = container_of(ref, struct nvmet_sq, ref);
444 
445 	complete(&sq->free_done);
446 }
447 
448 int nvmet_sq_init(struct nvmet_sq *sq)
449 {
450 	int ret;
451 
452 	ret = percpu_ref_init(&sq->ref, nvmet_sq_free, 0, GFP_KERNEL);
453 	if (ret) {
454 		pr_err("percpu_ref init failed!\n");
455 		return ret;
456 	}
457 	init_completion(&sq->free_done);
458 
459 	return 0;
460 }
461 EXPORT_SYMBOL_GPL(nvmet_sq_init);
462 
463 bool nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq,
464 		struct nvmet_sq *sq, struct nvmet_fabrics_ops *ops)
465 {
466 	u8 flags = req->cmd->common.flags;
467 	u16 status;
468 
469 	req->cq = cq;
470 	req->sq = sq;
471 	req->ops = ops;
472 	req->sg = NULL;
473 	req->sg_cnt = 0;
474 	req->rsp->status = 0;
475 
476 	/* no support for fused commands yet */
477 	if (unlikely(flags & (NVME_CMD_FUSE_FIRST | NVME_CMD_FUSE_SECOND))) {
478 		status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
479 		goto fail;
480 	}
481 
482 	/* either variant of SGLs is fine, as we don't support metadata */
483 	if (unlikely((flags & NVME_CMD_SGL_ALL) != NVME_CMD_SGL_METABUF &&
484 		     (flags & NVME_CMD_SGL_ALL) != NVME_CMD_SGL_METASEG)) {
485 		status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
486 		goto fail;
487 	}
488 
489 	if (unlikely(!req->sq->ctrl))
490 		/* will return an error for any Non-connect command: */
491 		status = nvmet_parse_connect_cmd(req);
492 	else if (likely(req->sq->qid != 0))
493 		status = nvmet_parse_io_cmd(req);
494 	else if (req->cmd->common.opcode == nvme_fabrics_command)
495 		status = nvmet_parse_fabrics_cmd(req);
496 	else if (req->sq->ctrl->subsys->type == NVME_NQN_DISC)
497 		status = nvmet_parse_discovery_cmd(req);
498 	else
499 		status = nvmet_parse_admin_cmd(req);
500 
501 	if (status)
502 		goto fail;
503 
504 	if (unlikely(!percpu_ref_tryget_live(&sq->ref))) {
505 		status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
506 		goto fail;
507 	}
508 
509 	return true;
510 
511 fail:
512 	__nvmet_req_complete(req, status);
513 	return false;
514 }
515 EXPORT_SYMBOL_GPL(nvmet_req_init);
516 
517 static inline bool nvmet_cc_en(u32 cc)
518 {
519 	return cc & 0x1;
520 }
521 
522 static inline u8 nvmet_cc_css(u32 cc)
523 {
524 	return (cc >> 4) & 0x7;
525 }
526 
527 static inline u8 nvmet_cc_mps(u32 cc)
528 {
529 	return (cc >> 7) & 0xf;
530 }
531 
532 static inline u8 nvmet_cc_ams(u32 cc)
533 {
534 	return (cc >> 11) & 0x7;
535 }
536 
537 static inline u8 nvmet_cc_shn(u32 cc)
538 {
539 	return (cc >> 14) & 0x3;
540 }
541 
542 static inline u8 nvmet_cc_iosqes(u32 cc)
543 {
544 	return (cc >> 16) & 0xf;
545 }
546 
547 static inline u8 nvmet_cc_iocqes(u32 cc)
548 {
549 	return (cc >> 20) & 0xf;
550 }
551 
552 static void nvmet_start_ctrl(struct nvmet_ctrl *ctrl)
553 {
554 	lockdep_assert_held(&ctrl->lock);
555 
556 	if (nvmet_cc_iosqes(ctrl->cc) != NVME_NVM_IOSQES ||
557 	    nvmet_cc_iocqes(ctrl->cc) != NVME_NVM_IOCQES ||
558 	    nvmet_cc_mps(ctrl->cc) != 0 ||
559 	    nvmet_cc_ams(ctrl->cc) != 0 ||
560 	    nvmet_cc_css(ctrl->cc) != 0) {
561 		ctrl->csts = NVME_CSTS_CFS;
562 		return;
563 	}
564 
565 	ctrl->csts = NVME_CSTS_RDY;
566 }
567 
568 static void nvmet_clear_ctrl(struct nvmet_ctrl *ctrl)
569 {
570 	lockdep_assert_held(&ctrl->lock);
571 
572 	/* XXX: tear down queues? */
573 	ctrl->csts &= ~NVME_CSTS_RDY;
574 	ctrl->cc = 0;
575 }
576 
577 void nvmet_update_cc(struct nvmet_ctrl *ctrl, u32 new)
578 {
579 	u32 old;
580 
581 	mutex_lock(&ctrl->lock);
582 	old = ctrl->cc;
583 	ctrl->cc = new;
584 
585 	if (nvmet_cc_en(new) && !nvmet_cc_en(old))
586 		nvmet_start_ctrl(ctrl);
587 	if (!nvmet_cc_en(new) && nvmet_cc_en(old))
588 		nvmet_clear_ctrl(ctrl);
589 	if (nvmet_cc_shn(new) && !nvmet_cc_shn(old)) {
590 		nvmet_clear_ctrl(ctrl);
591 		ctrl->csts |= NVME_CSTS_SHST_CMPLT;
592 	}
593 	if (!nvmet_cc_shn(new) && nvmet_cc_shn(old))
594 		ctrl->csts &= ~NVME_CSTS_SHST_CMPLT;
595 	mutex_unlock(&ctrl->lock);
596 }
597 
598 static void nvmet_init_cap(struct nvmet_ctrl *ctrl)
599 {
600 	/* command sets supported: NVMe command set: */
601 	ctrl->cap = (1ULL << 37);
602 	/* CC.EN timeout in 500msec units: */
603 	ctrl->cap |= (15ULL << 24);
604 	/* maximum queue entries supported: */
605 	ctrl->cap |= NVMET_QUEUE_SIZE - 1;
606 }
607 
608 u16 nvmet_ctrl_find_get(const char *subsysnqn, const char *hostnqn, u16 cntlid,
609 		struct nvmet_req *req, struct nvmet_ctrl **ret)
610 {
611 	struct nvmet_subsys *subsys;
612 	struct nvmet_ctrl *ctrl;
613 	u16 status = 0;
614 
615 	subsys = nvmet_find_get_subsys(req->port, subsysnqn);
616 	if (!subsys) {
617 		pr_warn("connect request for invalid subsystem %s!\n",
618 			subsysnqn);
619 		req->rsp->result = IPO_IATTR_CONNECT_DATA(subsysnqn);
620 		return NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR;
621 	}
622 
623 	mutex_lock(&subsys->lock);
624 	list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) {
625 		if (ctrl->cntlid == cntlid) {
626 			if (strncmp(hostnqn, ctrl->hostnqn, NVMF_NQN_SIZE)) {
627 				pr_warn("hostnqn mismatch.\n");
628 				continue;
629 			}
630 			if (!kref_get_unless_zero(&ctrl->ref))
631 				continue;
632 
633 			*ret = ctrl;
634 			goto out;
635 		}
636 	}
637 
638 	pr_warn("could not find controller %d for subsys %s / host %s\n",
639 		cntlid, subsysnqn, hostnqn);
640 	req->rsp->result = IPO_IATTR_CONNECT_DATA(cntlid);
641 	status = NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR;
642 
643 out:
644 	mutex_unlock(&subsys->lock);
645 	nvmet_subsys_put(subsys);
646 	return status;
647 }
648 
649 static bool __nvmet_host_allowed(struct nvmet_subsys *subsys,
650 		const char *hostnqn)
651 {
652 	struct nvmet_host_link *p;
653 
654 	if (subsys->allow_any_host)
655 		return true;
656 
657 	list_for_each_entry(p, &subsys->hosts, entry) {
658 		if (!strcmp(nvmet_host_name(p->host), hostnqn))
659 			return true;
660 	}
661 
662 	return false;
663 }
664 
665 static bool nvmet_host_discovery_allowed(struct nvmet_req *req,
666 		const char *hostnqn)
667 {
668 	struct nvmet_subsys_link *s;
669 
670 	list_for_each_entry(s, &req->port->subsystems, entry) {
671 		if (__nvmet_host_allowed(s->subsys, hostnqn))
672 			return true;
673 	}
674 
675 	return false;
676 }
677 
678 bool nvmet_host_allowed(struct nvmet_req *req, struct nvmet_subsys *subsys,
679 		const char *hostnqn)
680 {
681 	lockdep_assert_held(&nvmet_config_sem);
682 
683 	if (subsys->type == NVME_NQN_DISC)
684 		return nvmet_host_discovery_allowed(req, hostnqn);
685 	else
686 		return __nvmet_host_allowed(subsys, hostnqn);
687 }
688 
689 u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn,
690 		struct nvmet_req *req, u32 kato, struct nvmet_ctrl **ctrlp)
691 {
692 	struct nvmet_subsys *subsys;
693 	struct nvmet_ctrl *ctrl;
694 	int ret;
695 	u16 status;
696 
697 	status = NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR;
698 	subsys = nvmet_find_get_subsys(req->port, subsysnqn);
699 	if (!subsys) {
700 		pr_warn("connect request for invalid subsystem %s!\n",
701 			subsysnqn);
702 		req->rsp->result = IPO_IATTR_CONNECT_DATA(subsysnqn);
703 		goto out;
704 	}
705 
706 	status = NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR;
707 	down_read(&nvmet_config_sem);
708 	if (!nvmet_host_allowed(req, subsys, hostnqn)) {
709 		pr_info("connect by host %s for subsystem %s not allowed\n",
710 			hostnqn, subsysnqn);
711 		req->rsp->result = IPO_IATTR_CONNECT_DATA(hostnqn);
712 		up_read(&nvmet_config_sem);
713 		goto out_put_subsystem;
714 	}
715 	up_read(&nvmet_config_sem);
716 
717 	status = NVME_SC_INTERNAL;
718 	ctrl = kzalloc(sizeof(*ctrl), GFP_KERNEL);
719 	if (!ctrl)
720 		goto out_put_subsystem;
721 	mutex_init(&ctrl->lock);
722 
723 	nvmet_init_cap(ctrl);
724 
725 	INIT_WORK(&ctrl->async_event_work, nvmet_async_event_work);
726 	INIT_LIST_HEAD(&ctrl->async_events);
727 
728 	memcpy(ctrl->subsysnqn, subsysnqn, NVMF_NQN_SIZE);
729 	memcpy(ctrl->hostnqn, hostnqn, NVMF_NQN_SIZE);
730 
731 	kref_init(&ctrl->ref);
732 	ctrl->subsys = subsys;
733 
734 	ctrl->cqs = kcalloc(subsys->max_qid + 1,
735 			sizeof(struct nvmet_cq *),
736 			GFP_KERNEL);
737 	if (!ctrl->cqs)
738 		goto out_free_ctrl;
739 
740 	ctrl->sqs = kcalloc(subsys->max_qid + 1,
741 			sizeof(struct nvmet_sq *),
742 			GFP_KERNEL);
743 	if (!ctrl->sqs)
744 		goto out_free_cqs;
745 
746 	ret = ida_simple_get(&subsys->cntlid_ida,
747 			     NVME_CNTLID_MIN, NVME_CNTLID_MAX,
748 			     GFP_KERNEL);
749 	if (ret < 0) {
750 		status = NVME_SC_CONNECT_CTRL_BUSY | NVME_SC_DNR;
751 		goto out_free_sqs;
752 	}
753 	ctrl->cntlid = ret;
754 
755 	ctrl->ops = req->ops;
756 	if (ctrl->subsys->type == NVME_NQN_DISC) {
757 		/* Don't accept keep-alive timeout for discovery controllers */
758 		if (kato) {
759 			status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
760 			goto out_free_sqs;
761 		}
762 
763 		/*
764 		 * Discovery controllers use some arbitrary high value in order
765 		 * to cleanup stale discovery sessions
766 		 *
767 		 * From the latest base diff RC:
768 		 * "The Keep Alive command is not supported by
769 		 * Discovery controllers. A transport may specify a
770 		 * fixed Discovery controller activity timeout value
771 		 * (e.g., 2 minutes).  If no commands are received
772 		 * by a Discovery controller within that time
773 		 * period, the controller may perform the
774 		 * actions for Keep Alive Timer expiration".
775 		 */
776 		ctrl->kato = NVMET_DISC_KATO;
777 	} else {
778 		/* keep-alive timeout in seconds */
779 		ctrl->kato = DIV_ROUND_UP(kato, 1000);
780 	}
781 	nvmet_start_keep_alive_timer(ctrl);
782 
783 	mutex_lock(&subsys->lock);
784 	list_add_tail(&ctrl->subsys_entry, &subsys->ctrls);
785 	mutex_unlock(&subsys->lock);
786 
787 	*ctrlp = ctrl;
788 	return 0;
789 
790 out_free_sqs:
791 	kfree(ctrl->sqs);
792 out_free_cqs:
793 	kfree(ctrl->cqs);
794 out_free_ctrl:
795 	kfree(ctrl);
796 out_put_subsystem:
797 	nvmet_subsys_put(subsys);
798 out:
799 	return status;
800 }
801 
802 static void nvmet_ctrl_free(struct kref *ref)
803 {
804 	struct nvmet_ctrl *ctrl = container_of(ref, struct nvmet_ctrl, ref);
805 	struct nvmet_subsys *subsys = ctrl->subsys;
806 
807 	nvmet_stop_keep_alive_timer(ctrl);
808 
809 	mutex_lock(&subsys->lock);
810 	list_del(&ctrl->subsys_entry);
811 	mutex_unlock(&subsys->lock);
812 
813 	ida_simple_remove(&subsys->cntlid_ida, ctrl->cntlid);
814 	nvmet_subsys_put(subsys);
815 
816 	kfree(ctrl->sqs);
817 	kfree(ctrl->cqs);
818 	kfree(ctrl);
819 }
820 
821 void nvmet_ctrl_put(struct nvmet_ctrl *ctrl)
822 {
823 	kref_put(&ctrl->ref, nvmet_ctrl_free);
824 }
825 
826 static void nvmet_fatal_error_handler(struct work_struct *work)
827 {
828 	struct nvmet_ctrl *ctrl =
829 			container_of(work, struct nvmet_ctrl, fatal_err_work);
830 
831 	pr_err("ctrl %d fatal error occurred!\n", ctrl->cntlid);
832 	ctrl->ops->delete_ctrl(ctrl);
833 }
834 
835 void nvmet_ctrl_fatal_error(struct nvmet_ctrl *ctrl)
836 {
837 	ctrl->csts |= NVME_CSTS_CFS;
838 	INIT_WORK(&ctrl->fatal_err_work, nvmet_fatal_error_handler);
839 	schedule_work(&ctrl->fatal_err_work);
840 }
841 EXPORT_SYMBOL_GPL(nvmet_ctrl_fatal_error);
842 
843 static struct nvmet_subsys *nvmet_find_get_subsys(struct nvmet_port *port,
844 		const char *subsysnqn)
845 {
846 	struct nvmet_subsys_link *p;
847 
848 	if (!port)
849 		return NULL;
850 
851 	if (!strncmp(NVME_DISC_SUBSYS_NAME, subsysnqn,
852 			NVMF_NQN_SIZE)) {
853 		if (!kref_get_unless_zero(&nvmet_disc_subsys->ref))
854 			return NULL;
855 		return nvmet_disc_subsys;
856 	}
857 
858 	down_read(&nvmet_config_sem);
859 	list_for_each_entry(p, &port->subsystems, entry) {
860 		if (!strncmp(p->subsys->subsysnqn, subsysnqn,
861 				NVMF_NQN_SIZE)) {
862 			if (!kref_get_unless_zero(&p->subsys->ref))
863 				break;
864 			up_read(&nvmet_config_sem);
865 			return p->subsys;
866 		}
867 	}
868 	up_read(&nvmet_config_sem);
869 	return NULL;
870 }
871 
872 struct nvmet_subsys *nvmet_subsys_alloc(const char *subsysnqn,
873 		enum nvme_subsys_type type)
874 {
875 	struct nvmet_subsys *subsys;
876 
877 	subsys = kzalloc(sizeof(*subsys), GFP_KERNEL);
878 	if (!subsys)
879 		return NULL;
880 
881 	subsys->ver = (1 << 16) | (2 << 8) | 1; /* NVMe 1.2.1 */
882 
883 	switch (type) {
884 	case NVME_NQN_NVME:
885 		subsys->max_qid = NVMET_NR_QUEUES;
886 		break;
887 	case NVME_NQN_DISC:
888 		subsys->max_qid = 0;
889 		break;
890 	default:
891 		pr_err("%s: Unknown Subsystem type - %d\n", __func__, type);
892 		kfree(subsys);
893 		return NULL;
894 	}
895 	subsys->type = type;
896 	subsys->subsysnqn = kstrndup(subsysnqn, NVMF_NQN_SIZE,
897 			GFP_KERNEL);
898 	if (!subsys->subsysnqn) {
899 		kfree(subsys);
900 		return NULL;
901 	}
902 
903 	kref_init(&subsys->ref);
904 
905 	mutex_init(&subsys->lock);
906 	INIT_LIST_HEAD(&subsys->namespaces);
907 	INIT_LIST_HEAD(&subsys->ctrls);
908 
909 	ida_init(&subsys->cntlid_ida);
910 
911 	INIT_LIST_HEAD(&subsys->hosts);
912 
913 	return subsys;
914 }
915 
916 static void nvmet_subsys_free(struct kref *ref)
917 {
918 	struct nvmet_subsys *subsys =
919 		container_of(ref, struct nvmet_subsys, ref);
920 
921 	WARN_ON_ONCE(!list_empty(&subsys->namespaces));
922 
923 	ida_destroy(&subsys->cntlid_ida);
924 	kfree(subsys->subsysnqn);
925 	kfree(subsys);
926 }
927 
928 void nvmet_subsys_put(struct nvmet_subsys *subsys)
929 {
930 	kref_put(&subsys->ref, nvmet_subsys_free);
931 }
932 
933 static int __init nvmet_init(void)
934 {
935 	int error;
936 
937 	error = nvmet_init_discovery();
938 	if (error)
939 		goto out;
940 
941 	error = nvmet_init_configfs();
942 	if (error)
943 		goto out_exit_discovery;
944 	return 0;
945 
946 out_exit_discovery:
947 	nvmet_exit_discovery();
948 out:
949 	return error;
950 }
951 
952 static void __exit nvmet_exit(void)
953 {
954 	nvmet_exit_configfs();
955 	nvmet_exit_discovery();
956 
957 	BUILD_BUG_ON(sizeof(struct nvmf_disc_rsp_page_entry) != 1024);
958 	BUILD_BUG_ON(sizeof(struct nvmf_disc_rsp_page_hdr) != 1024);
959 }
960 
961 module_init(nvmet_init);
962 module_exit(nvmet_exit);
963 
964 MODULE_LICENSE("GPL v2");
965