xref: /openbmc/linux/drivers/nvme/target/core.c (revision 4e1a33b1)
1 /*
2  * Common code for the NVMe target.
3  * Copyright (c) 2015-2016 HGST, a Western Digital Company.
4  *
5  * This program is free software; you can redistribute it and/or modify it
6  * under the terms and conditions of the GNU General Public License,
7  * version 2, as published by the Free Software Foundation.
8  *
9  * This program is distributed in the hope it will be useful, but WITHOUT
10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
12  * more details.
13  */
14 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
15 #include <linux/module.h>
16 #include <linux/random.h>
17 #include "nvmet.h"
18 
19 static struct nvmet_fabrics_ops *nvmet_transports[NVMF_TRTYPE_MAX];
20 
21 /*
22  * This read/write semaphore is used to synchronize access to configuration
23  * information on a target system that will result in discovery log page
24  * information change for at least one host.
25  * The full list of resources to protected by this semaphore is:
26  *
27  *  - subsystems list
28  *  - per-subsystem allowed hosts list
29  *  - allow_any_host subsystem attribute
30  *  - nvmet_genctr
31  *  - the nvmet_transports array
32  *
33  * When updating any of those lists/structures write lock should be obtained,
34  * while when reading (popolating discovery log page or checking host-subsystem
35  * link) read lock is obtained to allow concurrent reads.
36  */
37 DECLARE_RWSEM(nvmet_config_sem);
38 
39 static struct nvmet_subsys *nvmet_find_get_subsys(struct nvmet_port *port,
40 		const char *subsysnqn);
41 
42 u16 nvmet_copy_to_sgl(struct nvmet_req *req, off_t off, const void *buf,
43 		size_t len)
44 {
45 	if (sg_pcopy_from_buffer(req->sg, req->sg_cnt, buf, len, off) != len)
46 		return NVME_SC_SGL_INVALID_DATA | NVME_SC_DNR;
47 	return 0;
48 }
49 
50 u16 nvmet_copy_from_sgl(struct nvmet_req *req, off_t off, void *buf, size_t len)
51 {
52 	if (sg_pcopy_to_buffer(req->sg, req->sg_cnt, buf, len, off) != len)
53 		return NVME_SC_SGL_INVALID_DATA | NVME_SC_DNR;
54 	return 0;
55 }
56 
57 static u32 nvmet_async_event_result(struct nvmet_async_event *aen)
58 {
59 	return aen->event_type | (aen->event_info << 8) | (aen->log_page << 16);
60 }
61 
62 static void nvmet_async_events_free(struct nvmet_ctrl *ctrl)
63 {
64 	struct nvmet_req *req;
65 
66 	while (1) {
67 		mutex_lock(&ctrl->lock);
68 		if (!ctrl->nr_async_event_cmds) {
69 			mutex_unlock(&ctrl->lock);
70 			return;
71 		}
72 
73 		req = ctrl->async_event_cmds[--ctrl->nr_async_event_cmds];
74 		mutex_unlock(&ctrl->lock);
75 		nvmet_req_complete(req, NVME_SC_INTERNAL | NVME_SC_DNR);
76 	}
77 }
78 
79 static void nvmet_async_event_work(struct work_struct *work)
80 {
81 	struct nvmet_ctrl *ctrl =
82 		container_of(work, struct nvmet_ctrl, async_event_work);
83 	struct nvmet_async_event *aen;
84 	struct nvmet_req *req;
85 
86 	while (1) {
87 		mutex_lock(&ctrl->lock);
88 		aen = list_first_entry_or_null(&ctrl->async_events,
89 				struct nvmet_async_event, entry);
90 		if (!aen || !ctrl->nr_async_event_cmds) {
91 			mutex_unlock(&ctrl->lock);
92 			return;
93 		}
94 
95 		req = ctrl->async_event_cmds[--ctrl->nr_async_event_cmds];
96 		nvmet_set_result(req, nvmet_async_event_result(aen));
97 
98 		list_del(&aen->entry);
99 		kfree(aen);
100 
101 		mutex_unlock(&ctrl->lock);
102 		nvmet_req_complete(req, 0);
103 	}
104 }
105 
106 static void nvmet_add_async_event(struct nvmet_ctrl *ctrl, u8 event_type,
107 		u8 event_info, u8 log_page)
108 {
109 	struct nvmet_async_event *aen;
110 
111 	aen = kmalloc(sizeof(*aen), GFP_KERNEL);
112 	if (!aen)
113 		return;
114 
115 	aen->event_type = event_type;
116 	aen->event_info = event_info;
117 	aen->log_page = log_page;
118 
119 	mutex_lock(&ctrl->lock);
120 	list_add_tail(&aen->entry, &ctrl->async_events);
121 	mutex_unlock(&ctrl->lock);
122 
123 	schedule_work(&ctrl->async_event_work);
124 }
125 
126 int nvmet_register_transport(struct nvmet_fabrics_ops *ops)
127 {
128 	int ret = 0;
129 
130 	down_write(&nvmet_config_sem);
131 	if (nvmet_transports[ops->type])
132 		ret = -EINVAL;
133 	else
134 		nvmet_transports[ops->type] = ops;
135 	up_write(&nvmet_config_sem);
136 
137 	return ret;
138 }
139 EXPORT_SYMBOL_GPL(nvmet_register_transport);
140 
141 void nvmet_unregister_transport(struct nvmet_fabrics_ops *ops)
142 {
143 	down_write(&nvmet_config_sem);
144 	nvmet_transports[ops->type] = NULL;
145 	up_write(&nvmet_config_sem);
146 }
147 EXPORT_SYMBOL_GPL(nvmet_unregister_transport);
148 
149 int nvmet_enable_port(struct nvmet_port *port)
150 {
151 	struct nvmet_fabrics_ops *ops;
152 	int ret;
153 
154 	lockdep_assert_held(&nvmet_config_sem);
155 
156 	ops = nvmet_transports[port->disc_addr.trtype];
157 	if (!ops) {
158 		up_write(&nvmet_config_sem);
159 		request_module("nvmet-transport-%d", port->disc_addr.trtype);
160 		down_write(&nvmet_config_sem);
161 		ops = nvmet_transports[port->disc_addr.trtype];
162 		if (!ops) {
163 			pr_err("transport type %d not supported\n",
164 				port->disc_addr.trtype);
165 			return -EINVAL;
166 		}
167 	}
168 
169 	if (!try_module_get(ops->owner))
170 		return -EINVAL;
171 
172 	ret = ops->add_port(port);
173 	if (ret) {
174 		module_put(ops->owner);
175 		return ret;
176 	}
177 
178 	port->enabled = true;
179 	return 0;
180 }
181 
182 void nvmet_disable_port(struct nvmet_port *port)
183 {
184 	struct nvmet_fabrics_ops *ops;
185 
186 	lockdep_assert_held(&nvmet_config_sem);
187 
188 	port->enabled = false;
189 
190 	ops = nvmet_transports[port->disc_addr.trtype];
191 	ops->remove_port(port);
192 	module_put(ops->owner);
193 }
194 
195 static void nvmet_keep_alive_timer(struct work_struct *work)
196 {
197 	struct nvmet_ctrl *ctrl = container_of(to_delayed_work(work),
198 			struct nvmet_ctrl, ka_work);
199 
200 	pr_err("ctrl %d keep-alive timer (%d seconds) expired!\n",
201 		ctrl->cntlid, ctrl->kato);
202 
203 	nvmet_ctrl_fatal_error(ctrl);
204 }
205 
206 static void nvmet_start_keep_alive_timer(struct nvmet_ctrl *ctrl)
207 {
208 	pr_debug("ctrl %d start keep-alive timer for %d secs\n",
209 		ctrl->cntlid, ctrl->kato);
210 
211 	INIT_DELAYED_WORK(&ctrl->ka_work, nvmet_keep_alive_timer);
212 	schedule_delayed_work(&ctrl->ka_work, ctrl->kato * HZ);
213 }
214 
215 static void nvmet_stop_keep_alive_timer(struct nvmet_ctrl *ctrl)
216 {
217 	pr_debug("ctrl %d stop keep-alive\n", ctrl->cntlid);
218 
219 	cancel_delayed_work_sync(&ctrl->ka_work);
220 }
221 
222 static struct nvmet_ns *__nvmet_find_namespace(struct nvmet_ctrl *ctrl,
223 		__le32 nsid)
224 {
225 	struct nvmet_ns *ns;
226 
227 	list_for_each_entry_rcu(ns, &ctrl->subsys->namespaces, dev_link) {
228 		if (ns->nsid == le32_to_cpu(nsid))
229 			return ns;
230 	}
231 
232 	return NULL;
233 }
234 
235 struct nvmet_ns *nvmet_find_namespace(struct nvmet_ctrl *ctrl, __le32 nsid)
236 {
237 	struct nvmet_ns *ns;
238 
239 	rcu_read_lock();
240 	ns = __nvmet_find_namespace(ctrl, nsid);
241 	if (ns)
242 		percpu_ref_get(&ns->ref);
243 	rcu_read_unlock();
244 
245 	return ns;
246 }
247 
248 static void nvmet_destroy_namespace(struct percpu_ref *ref)
249 {
250 	struct nvmet_ns *ns = container_of(ref, struct nvmet_ns, ref);
251 
252 	complete(&ns->disable_done);
253 }
254 
255 void nvmet_put_namespace(struct nvmet_ns *ns)
256 {
257 	percpu_ref_put(&ns->ref);
258 }
259 
260 int nvmet_ns_enable(struct nvmet_ns *ns)
261 {
262 	struct nvmet_subsys *subsys = ns->subsys;
263 	struct nvmet_ctrl *ctrl;
264 	int ret = 0;
265 
266 	mutex_lock(&subsys->lock);
267 	if (ns->enabled)
268 		goto out_unlock;
269 
270 	ns->bdev = blkdev_get_by_path(ns->device_path, FMODE_READ | FMODE_WRITE,
271 			NULL);
272 	if (IS_ERR(ns->bdev)) {
273 		pr_err("nvmet: failed to open block device %s: (%ld)\n",
274 			ns->device_path, PTR_ERR(ns->bdev));
275 		ret = PTR_ERR(ns->bdev);
276 		ns->bdev = NULL;
277 		goto out_unlock;
278 	}
279 
280 	ns->size = i_size_read(ns->bdev->bd_inode);
281 	ns->blksize_shift = blksize_bits(bdev_logical_block_size(ns->bdev));
282 
283 	ret = percpu_ref_init(&ns->ref, nvmet_destroy_namespace,
284 				0, GFP_KERNEL);
285 	if (ret)
286 		goto out_blkdev_put;
287 
288 	if (ns->nsid > subsys->max_nsid)
289 		subsys->max_nsid = ns->nsid;
290 
291 	/*
292 	 * The namespaces list needs to be sorted to simplify the implementation
293 	 * of the Identify Namepace List subcommand.
294 	 */
295 	if (list_empty(&subsys->namespaces)) {
296 		list_add_tail_rcu(&ns->dev_link, &subsys->namespaces);
297 	} else {
298 		struct nvmet_ns *old;
299 
300 		list_for_each_entry_rcu(old, &subsys->namespaces, dev_link) {
301 			BUG_ON(ns->nsid == old->nsid);
302 			if (ns->nsid < old->nsid)
303 				break;
304 		}
305 
306 		list_add_tail_rcu(&ns->dev_link, &old->dev_link);
307 	}
308 
309 	list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry)
310 		nvmet_add_async_event(ctrl, NVME_AER_TYPE_NOTICE, 0, 0);
311 
312 	ns->enabled = true;
313 	ret = 0;
314 out_unlock:
315 	mutex_unlock(&subsys->lock);
316 	return ret;
317 out_blkdev_put:
318 	blkdev_put(ns->bdev, FMODE_WRITE|FMODE_READ);
319 	ns->bdev = NULL;
320 	goto out_unlock;
321 }
322 
323 void nvmet_ns_disable(struct nvmet_ns *ns)
324 {
325 	struct nvmet_subsys *subsys = ns->subsys;
326 	struct nvmet_ctrl *ctrl;
327 
328 	mutex_lock(&subsys->lock);
329 	if (!ns->enabled)
330 		goto out_unlock;
331 
332 	ns->enabled = false;
333 	list_del_rcu(&ns->dev_link);
334 	mutex_unlock(&subsys->lock);
335 
336 	/*
337 	 * Now that we removed the namespaces from the lookup list, we
338 	 * can kill the per_cpu ref and wait for any remaining references
339 	 * to be dropped, as well as a RCU grace period for anyone only
340 	 * using the namepace under rcu_read_lock().  Note that we can't
341 	 * use call_rcu here as we need to ensure the namespaces have
342 	 * been fully destroyed before unloading the module.
343 	 */
344 	percpu_ref_kill(&ns->ref);
345 	synchronize_rcu();
346 	wait_for_completion(&ns->disable_done);
347 	percpu_ref_exit(&ns->ref);
348 
349 	mutex_lock(&subsys->lock);
350 	list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry)
351 		nvmet_add_async_event(ctrl, NVME_AER_TYPE_NOTICE, 0, 0);
352 
353 	if (ns->bdev)
354 		blkdev_put(ns->bdev, FMODE_WRITE|FMODE_READ);
355 out_unlock:
356 	mutex_unlock(&subsys->lock);
357 }
358 
359 void nvmet_ns_free(struct nvmet_ns *ns)
360 {
361 	nvmet_ns_disable(ns);
362 
363 	kfree(ns->device_path);
364 	kfree(ns);
365 }
366 
367 struct nvmet_ns *nvmet_ns_alloc(struct nvmet_subsys *subsys, u32 nsid)
368 {
369 	struct nvmet_ns *ns;
370 
371 	ns = kzalloc(sizeof(*ns), GFP_KERNEL);
372 	if (!ns)
373 		return NULL;
374 
375 	INIT_LIST_HEAD(&ns->dev_link);
376 	init_completion(&ns->disable_done);
377 
378 	ns->nsid = nsid;
379 	ns->subsys = subsys;
380 
381 	return ns;
382 }
383 
384 static void __nvmet_req_complete(struct nvmet_req *req, u16 status)
385 {
386 	if (status)
387 		nvmet_set_status(req, status);
388 
389 	/* XXX: need to fill in something useful for sq_head */
390 	req->rsp->sq_head = 0;
391 	if (likely(req->sq)) /* may happen during early failure */
392 		req->rsp->sq_id = cpu_to_le16(req->sq->qid);
393 	req->rsp->command_id = req->cmd->common.command_id;
394 
395 	if (req->ns)
396 		nvmet_put_namespace(req->ns);
397 	req->ops->queue_response(req);
398 }
399 
400 void nvmet_req_complete(struct nvmet_req *req, u16 status)
401 {
402 	__nvmet_req_complete(req, status);
403 	percpu_ref_put(&req->sq->ref);
404 }
405 EXPORT_SYMBOL_GPL(nvmet_req_complete);
406 
407 void nvmet_cq_setup(struct nvmet_ctrl *ctrl, struct nvmet_cq *cq,
408 		u16 qid, u16 size)
409 {
410 	cq->qid = qid;
411 	cq->size = size;
412 
413 	ctrl->cqs[qid] = cq;
414 }
415 
416 void nvmet_sq_setup(struct nvmet_ctrl *ctrl, struct nvmet_sq *sq,
417 		u16 qid, u16 size)
418 {
419 	sq->qid = qid;
420 	sq->size = size;
421 
422 	ctrl->sqs[qid] = sq;
423 }
424 
425 void nvmet_sq_destroy(struct nvmet_sq *sq)
426 {
427 	/*
428 	 * If this is the admin queue, complete all AERs so that our
429 	 * queue doesn't have outstanding requests on it.
430 	 */
431 	if (sq->ctrl && sq->ctrl->sqs && sq->ctrl->sqs[0] == sq)
432 		nvmet_async_events_free(sq->ctrl);
433 	percpu_ref_kill(&sq->ref);
434 	wait_for_completion(&sq->free_done);
435 	percpu_ref_exit(&sq->ref);
436 
437 	if (sq->ctrl) {
438 		nvmet_ctrl_put(sq->ctrl);
439 		sq->ctrl = NULL; /* allows reusing the queue later */
440 	}
441 }
442 EXPORT_SYMBOL_GPL(nvmet_sq_destroy);
443 
444 static void nvmet_sq_free(struct percpu_ref *ref)
445 {
446 	struct nvmet_sq *sq = container_of(ref, struct nvmet_sq, ref);
447 
448 	complete(&sq->free_done);
449 }
450 
451 int nvmet_sq_init(struct nvmet_sq *sq)
452 {
453 	int ret;
454 
455 	ret = percpu_ref_init(&sq->ref, nvmet_sq_free, 0, GFP_KERNEL);
456 	if (ret) {
457 		pr_err("percpu_ref init failed!\n");
458 		return ret;
459 	}
460 	init_completion(&sq->free_done);
461 
462 	return 0;
463 }
464 EXPORT_SYMBOL_GPL(nvmet_sq_init);
465 
466 bool nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq,
467 		struct nvmet_sq *sq, struct nvmet_fabrics_ops *ops)
468 {
469 	u8 flags = req->cmd->common.flags;
470 	u16 status;
471 
472 	req->cq = cq;
473 	req->sq = sq;
474 	req->ops = ops;
475 	req->sg = NULL;
476 	req->sg_cnt = 0;
477 	req->rsp->status = 0;
478 
479 	/* no support for fused commands yet */
480 	if (unlikely(flags & (NVME_CMD_FUSE_FIRST | NVME_CMD_FUSE_SECOND))) {
481 		status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
482 		goto fail;
483 	}
484 
485 	/* either variant of SGLs is fine, as we don't support metadata */
486 	if (unlikely((flags & NVME_CMD_SGL_ALL) != NVME_CMD_SGL_METABUF &&
487 		     (flags & NVME_CMD_SGL_ALL) != NVME_CMD_SGL_METASEG)) {
488 		status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
489 		goto fail;
490 	}
491 
492 	if (unlikely(!req->sq->ctrl))
493 		/* will return an error for any Non-connect command: */
494 		status = nvmet_parse_connect_cmd(req);
495 	else if (likely(req->sq->qid != 0))
496 		status = nvmet_parse_io_cmd(req);
497 	else if (req->cmd->common.opcode == nvme_fabrics_command)
498 		status = nvmet_parse_fabrics_cmd(req);
499 	else if (req->sq->ctrl->subsys->type == NVME_NQN_DISC)
500 		status = nvmet_parse_discovery_cmd(req);
501 	else
502 		status = nvmet_parse_admin_cmd(req);
503 
504 	if (status)
505 		goto fail;
506 
507 	if (unlikely(!percpu_ref_tryget_live(&sq->ref))) {
508 		status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
509 		goto fail;
510 	}
511 
512 	return true;
513 
514 fail:
515 	__nvmet_req_complete(req, status);
516 	return false;
517 }
518 EXPORT_SYMBOL_GPL(nvmet_req_init);
519 
520 static inline bool nvmet_cc_en(u32 cc)
521 {
522 	return cc & 0x1;
523 }
524 
525 static inline u8 nvmet_cc_css(u32 cc)
526 {
527 	return (cc >> 4) & 0x7;
528 }
529 
530 static inline u8 nvmet_cc_mps(u32 cc)
531 {
532 	return (cc >> 7) & 0xf;
533 }
534 
535 static inline u8 nvmet_cc_ams(u32 cc)
536 {
537 	return (cc >> 11) & 0x7;
538 }
539 
540 static inline u8 nvmet_cc_shn(u32 cc)
541 {
542 	return (cc >> 14) & 0x3;
543 }
544 
545 static inline u8 nvmet_cc_iosqes(u32 cc)
546 {
547 	return (cc >> 16) & 0xf;
548 }
549 
550 static inline u8 nvmet_cc_iocqes(u32 cc)
551 {
552 	return (cc >> 20) & 0xf;
553 }
554 
555 static void nvmet_start_ctrl(struct nvmet_ctrl *ctrl)
556 {
557 	lockdep_assert_held(&ctrl->lock);
558 
559 	if (nvmet_cc_iosqes(ctrl->cc) != NVME_NVM_IOSQES ||
560 	    nvmet_cc_iocqes(ctrl->cc) != NVME_NVM_IOCQES ||
561 	    nvmet_cc_mps(ctrl->cc) != 0 ||
562 	    nvmet_cc_ams(ctrl->cc) != 0 ||
563 	    nvmet_cc_css(ctrl->cc) != 0) {
564 		ctrl->csts = NVME_CSTS_CFS;
565 		return;
566 	}
567 
568 	ctrl->csts = NVME_CSTS_RDY;
569 }
570 
571 static void nvmet_clear_ctrl(struct nvmet_ctrl *ctrl)
572 {
573 	lockdep_assert_held(&ctrl->lock);
574 
575 	/* XXX: tear down queues? */
576 	ctrl->csts &= ~NVME_CSTS_RDY;
577 	ctrl->cc = 0;
578 }
579 
580 void nvmet_update_cc(struct nvmet_ctrl *ctrl, u32 new)
581 {
582 	u32 old;
583 
584 	mutex_lock(&ctrl->lock);
585 	old = ctrl->cc;
586 	ctrl->cc = new;
587 
588 	if (nvmet_cc_en(new) && !nvmet_cc_en(old))
589 		nvmet_start_ctrl(ctrl);
590 	if (!nvmet_cc_en(new) && nvmet_cc_en(old))
591 		nvmet_clear_ctrl(ctrl);
592 	if (nvmet_cc_shn(new) && !nvmet_cc_shn(old)) {
593 		nvmet_clear_ctrl(ctrl);
594 		ctrl->csts |= NVME_CSTS_SHST_CMPLT;
595 	}
596 	if (!nvmet_cc_shn(new) && nvmet_cc_shn(old))
597 		ctrl->csts &= ~NVME_CSTS_SHST_CMPLT;
598 	mutex_unlock(&ctrl->lock);
599 }
600 
601 static void nvmet_init_cap(struct nvmet_ctrl *ctrl)
602 {
603 	/* command sets supported: NVMe command set: */
604 	ctrl->cap = (1ULL << 37);
605 	/* CC.EN timeout in 500msec units: */
606 	ctrl->cap |= (15ULL << 24);
607 	/* maximum queue entries supported: */
608 	ctrl->cap |= NVMET_QUEUE_SIZE - 1;
609 }
610 
611 u16 nvmet_ctrl_find_get(const char *subsysnqn, const char *hostnqn, u16 cntlid,
612 		struct nvmet_req *req, struct nvmet_ctrl **ret)
613 {
614 	struct nvmet_subsys *subsys;
615 	struct nvmet_ctrl *ctrl;
616 	u16 status = 0;
617 
618 	subsys = nvmet_find_get_subsys(req->port, subsysnqn);
619 	if (!subsys) {
620 		pr_warn("connect request for invalid subsystem %s!\n",
621 			subsysnqn);
622 		req->rsp->result.u32 = IPO_IATTR_CONNECT_DATA(subsysnqn);
623 		return NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR;
624 	}
625 
626 	mutex_lock(&subsys->lock);
627 	list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) {
628 		if (ctrl->cntlid == cntlid) {
629 			if (strncmp(hostnqn, ctrl->hostnqn, NVMF_NQN_SIZE)) {
630 				pr_warn("hostnqn mismatch.\n");
631 				continue;
632 			}
633 			if (!kref_get_unless_zero(&ctrl->ref))
634 				continue;
635 
636 			*ret = ctrl;
637 			goto out;
638 		}
639 	}
640 
641 	pr_warn("could not find controller %d for subsys %s / host %s\n",
642 		cntlid, subsysnqn, hostnqn);
643 	req->rsp->result.u32 = IPO_IATTR_CONNECT_DATA(cntlid);
644 	status = NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR;
645 
646 out:
647 	mutex_unlock(&subsys->lock);
648 	nvmet_subsys_put(subsys);
649 	return status;
650 }
651 
652 static bool __nvmet_host_allowed(struct nvmet_subsys *subsys,
653 		const char *hostnqn)
654 {
655 	struct nvmet_host_link *p;
656 
657 	if (subsys->allow_any_host)
658 		return true;
659 
660 	list_for_each_entry(p, &subsys->hosts, entry) {
661 		if (!strcmp(nvmet_host_name(p->host), hostnqn))
662 			return true;
663 	}
664 
665 	return false;
666 }
667 
668 static bool nvmet_host_discovery_allowed(struct nvmet_req *req,
669 		const char *hostnqn)
670 {
671 	struct nvmet_subsys_link *s;
672 
673 	list_for_each_entry(s, &req->port->subsystems, entry) {
674 		if (__nvmet_host_allowed(s->subsys, hostnqn))
675 			return true;
676 	}
677 
678 	return false;
679 }
680 
681 bool nvmet_host_allowed(struct nvmet_req *req, struct nvmet_subsys *subsys,
682 		const char *hostnqn)
683 {
684 	lockdep_assert_held(&nvmet_config_sem);
685 
686 	if (subsys->type == NVME_NQN_DISC)
687 		return nvmet_host_discovery_allowed(req, hostnqn);
688 	else
689 		return __nvmet_host_allowed(subsys, hostnqn);
690 }
691 
692 u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn,
693 		struct nvmet_req *req, u32 kato, struct nvmet_ctrl **ctrlp)
694 {
695 	struct nvmet_subsys *subsys;
696 	struct nvmet_ctrl *ctrl;
697 	int ret;
698 	u16 status;
699 
700 	status = NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR;
701 	subsys = nvmet_find_get_subsys(req->port, subsysnqn);
702 	if (!subsys) {
703 		pr_warn("connect request for invalid subsystem %s!\n",
704 			subsysnqn);
705 		req->rsp->result.u32 = IPO_IATTR_CONNECT_DATA(subsysnqn);
706 		goto out;
707 	}
708 
709 	status = NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR;
710 	down_read(&nvmet_config_sem);
711 	if (!nvmet_host_allowed(req, subsys, hostnqn)) {
712 		pr_info("connect by host %s for subsystem %s not allowed\n",
713 			hostnqn, subsysnqn);
714 		req->rsp->result.u32 = IPO_IATTR_CONNECT_DATA(hostnqn);
715 		up_read(&nvmet_config_sem);
716 		goto out_put_subsystem;
717 	}
718 	up_read(&nvmet_config_sem);
719 
720 	status = NVME_SC_INTERNAL;
721 	ctrl = kzalloc(sizeof(*ctrl), GFP_KERNEL);
722 	if (!ctrl)
723 		goto out_put_subsystem;
724 	mutex_init(&ctrl->lock);
725 
726 	nvmet_init_cap(ctrl);
727 
728 	INIT_WORK(&ctrl->async_event_work, nvmet_async_event_work);
729 	INIT_LIST_HEAD(&ctrl->async_events);
730 
731 	memcpy(ctrl->subsysnqn, subsysnqn, NVMF_NQN_SIZE);
732 	memcpy(ctrl->hostnqn, hostnqn, NVMF_NQN_SIZE);
733 
734 	/* generate a random serial number as our controllers are ephemeral: */
735 	get_random_bytes(&ctrl->serial, sizeof(ctrl->serial));
736 
737 	kref_init(&ctrl->ref);
738 	ctrl->subsys = subsys;
739 
740 	ctrl->cqs = kcalloc(subsys->max_qid + 1,
741 			sizeof(struct nvmet_cq *),
742 			GFP_KERNEL);
743 	if (!ctrl->cqs)
744 		goto out_free_ctrl;
745 
746 	ctrl->sqs = kcalloc(subsys->max_qid + 1,
747 			sizeof(struct nvmet_sq *),
748 			GFP_KERNEL);
749 	if (!ctrl->sqs)
750 		goto out_free_cqs;
751 
752 	ret = ida_simple_get(&subsys->cntlid_ida,
753 			     NVME_CNTLID_MIN, NVME_CNTLID_MAX,
754 			     GFP_KERNEL);
755 	if (ret < 0) {
756 		status = NVME_SC_CONNECT_CTRL_BUSY | NVME_SC_DNR;
757 		goto out_free_sqs;
758 	}
759 	ctrl->cntlid = ret;
760 
761 	ctrl->ops = req->ops;
762 	if (ctrl->subsys->type == NVME_NQN_DISC) {
763 		/* Don't accept keep-alive timeout for discovery controllers */
764 		if (kato) {
765 			status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
766 			goto out_free_sqs;
767 		}
768 
769 		/*
770 		 * Discovery controllers use some arbitrary high value in order
771 		 * to cleanup stale discovery sessions
772 		 *
773 		 * From the latest base diff RC:
774 		 * "The Keep Alive command is not supported by
775 		 * Discovery controllers. A transport may specify a
776 		 * fixed Discovery controller activity timeout value
777 		 * (e.g., 2 minutes).  If no commands are received
778 		 * by a Discovery controller within that time
779 		 * period, the controller may perform the
780 		 * actions for Keep Alive Timer expiration".
781 		 */
782 		ctrl->kato = NVMET_DISC_KATO;
783 	} else {
784 		/* keep-alive timeout in seconds */
785 		ctrl->kato = DIV_ROUND_UP(kato, 1000);
786 	}
787 	nvmet_start_keep_alive_timer(ctrl);
788 
789 	mutex_lock(&subsys->lock);
790 	list_add_tail(&ctrl->subsys_entry, &subsys->ctrls);
791 	mutex_unlock(&subsys->lock);
792 
793 	*ctrlp = ctrl;
794 	return 0;
795 
796 out_free_sqs:
797 	kfree(ctrl->sqs);
798 out_free_cqs:
799 	kfree(ctrl->cqs);
800 out_free_ctrl:
801 	kfree(ctrl);
802 out_put_subsystem:
803 	nvmet_subsys_put(subsys);
804 out:
805 	return status;
806 }
807 
808 static void nvmet_ctrl_free(struct kref *ref)
809 {
810 	struct nvmet_ctrl *ctrl = container_of(ref, struct nvmet_ctrl, ref);
811 	struct nvmet_subsys *subsys = ctrl->subsys;
812 
813 	nvmet_stop_keep_alive_timer(ctrl);
814 
815 	mutex_lock(&subsys->lock);
816 	list_del(&ctrl->subsys_entry);
817 	mutex_unlock(&subsys->lock);
818 
819 	flush_work(&ctrl->async_event_work);
820 	cancel_work_sync(&ctrl->fatal_err_work);
821 
822 	ida_simple_remove(&subsys->cntlid_ida, ctrl->cntlid);
823 	nvmet_subsys_put(subsys);
824 
825 	kfree(ctrl->sqs);
826 	kfree(ctrl->cqs);
827 	kfree(ctrl);
828 }
829 
830 void nvmet_ctrl_put(struct nvmet_ctrl *ctrl)
831 {
832 	kref_put(&ctrl->ref, nvmet_ctrl_free);
833 }
834 
835 static void nvmet_fatal_error_handler(struct work_struct *work)
836 {
837 	struct nvmet_ctrl *ctrl =
838 			container_of(work, struct nvmet_ctrl, fatal_err_work);
839 
840 	pr_err("ctrl %d fatal error occurred!\n", ctrl->cntlid);
841 	ctrl->ops->delete_ctrl(ctrl);
842 }
843 
844 void nvmet_ctrl_fatal_error(struct nvmet_ctrl *ctrl)
845 {
846 	mutex_lock(&ctrl->lock);
847 	if (!(ctrl->csts & NVME_CSTS_CFS)) {
848 		ctrl->csts |= NVME_CSTS_CFS;
849 		INIT_WORK(&ctrl->fatal_err_work, nvmet_fatal_error_handler);
850 		schedule_work(&ctrl->fatal_err_work);
851 	}
852 	mutex_unlock(&ctrl->lock);
853 }
854 EXPORT_SYMBOL_GPL(nvmet_ctrl_fatal_error);
855 
856 static struct nvmet_subsys *nvmet_find_get_subsys(struct nvmet_port *port,
857 		const char *subsysnqn)
858 {
859 	struct nvmet_subsys_link *p;
860 
861 	if (!port)
862 		return NULL;
863 
864 	if (!strncmp(NVME_DISC_SUBSYS_NAME, subsysnqn,
865 			NVMF_NQN_SIZE)) {
866 		if (!kref_get_unless_zero(&nvmet_disc_subsys->ref))
867 			return NULL;
868 		return nvmet_disc_subsys;
869 	}
870 
871 	down_read(&nvmet_config_sem);
872 	list_for_each_entry(p, &port->subsystems, entry) {
873 		if (!strncmp(p->subsys->subsysnqn, subsysnqn,
874 				NVMF_NQN_SIZE)) {
875 			if (!kref_get_unless_zero(&p->subsys->ref))
876 				break;
877 			up_read(&nvmet_config_sem);
878 			return p->subsys;
879 		}
880 	}
881 	up_read(&nvmet_config_sem);
882 	return NULL;
883 }
884 
885 struct nvmet_subsys *nvmet_subsys_alloc(const char *subsysnqn,
886 		enum nvme_subsys_type type)
887 {
888 	struct nvmet_subsys *subsys;
889 
890 	subsys = kzalloc(sizeof(*subsys), GFP_KERNEL);
891 	if (!subsys)
892 		return NULL;
893 
894 	subsys->ver = NVME_VS(1, 2, 1); /* NVMe 1.2.1 */
895 
896 	switch (type) {
897 	case NVME_NQN_NVME:
898 		subsys->max_qid = NVMET_NR_QUEUES;
899 		break;
900 	case NVME_NQN_DISC:
901 		subsys->max_qid = 0;
902 		break;
903 	default:
904 		pr_err("%s: Unknown Subsystem type - %d\n", __func__, type);
905 		kfree(subsys);
906 		return NULL;
907 	}
908 	subsys->type = type;
909 	subsys->subsysnqn = kstrndup(subsysnqn, NVMF_NQN_SIZE,
910 			GFP_KERNEL);
911 	if (!subsys->subsysnqn) {
912 		kfree(subsys);
913 		return NULL;
914 	}
915 
916 	kref_init(&subsys->ref);
917 
918 	mutex_init(&subsys->lock);
919 	INIT_LIST_HEAD(&subsys->namespaces);
920 	INIT_LIST_HEAD(&subsys->ctrls);
921 
922 	ida_init(&subsys->cntlid_ida);
923 
924 	INIT_LIST_HEAD(&subsys->hosts);
925 
926 	return subsys;
927 }
928 
929 static void nvmet_subsys_free(struct kref *ref)
930 {
931 	struct nvmet_subsys *subsys =
932 		container_of(ref, struct nvmet_subsys, ref);
933 
934 	WARN_ON_ONCE(!list_empty(&subsys->namespaces));
935 
936 	ida_destroy(&subsys->cntlid_ida);
937 	kfree(subsys->subsysnqn);
938 	kfree(subsys);
939 }
940 
941 void nvmet_subsys_del_ctrls(struct nvmet_subsys *subsys)
942 {
943 	struct nvmet_ctrl *ctrl;
944 
945 	mutex_lock(&subsys->lock);
946 	list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry)
947 		ctrl->ops->delete_ctrl(ctrl);
948 	mutex_unlock(&subsys->lock);
949 }
950 
951 void nvmet_subsys_put(struct nvmet_subsys *subsys)
952 {
953 	kref_put(&subsys->ref, nvmet_subsys_free);
954 }
955 
956 static int __init nvmet_init(void)
957 {
958 	int error;
959 
960 	error = nvmet_init_discovery();
961 	if (error)
962 		goto out;
963 
964 	error = nvmet_init_configfs();
965 	if (error)
966 		goto out_exit_discovery;
967 	return 0;
968 
969 out_exit_discovery:
970 	nvmet_exit_discovery();
971 out:
972 	return error;
973 }
974 
975 static void __exit nvmet_exit(void)
976 {
977 	nvmet_exit_configfs();
978 	nvmet_exit_discovery();
979 
980 	BUILD_BUG_ON(sizeof(struct nvmf_disc_rsp_page_entry) != 1024);
981 	BUILD_BUG_ON(sizeof(struct nvmf_disc_rsp_page_hdr) != 1024);
982 }
983 
984 module_init(nvmet_init);
985 module_exit(nvmet_exit);
986 
987 MODULE_LICENSE("GPL v2");
988