xref: /openbmc/linux/drivers/nvme/host/fabrics.c (revision 3bf2fde6)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * NVMe over Fabrics common host code.
4  * Copyright (c) 2015-2016 HGST, a Western Digital Company.
5  */
6 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
7 #include <linux/init.h>
8 #include <linux/miscdevice.h>
9 #include <linux/module.h>
10 #include <linux/mutex.h>
11 #include <linux/parser.h>
12 #include <linux/seq_file.h>
13 #include "nvme.h"
14 #include "fabrics.h"
15 
16 static LIST_HEAD(nvmf_transports);
17 static DECLARE_RWSEM(nvmf_transports_rwsem);
18 
19 static LIST_HEAD(nvmf_hosts);
20 static DEFINE_MUTEX(nvmf_hosts_mutex);
21 
22 static struct nvmf_host *nvmf_default_host;
23 
24 static struct nvmf_host *__nvmf_host_find(const char *hostnqn)
25 {
26 	struct nvmf_host *host;
27 
28 	list_for_each_entry(host, &nvmf_hosts, list) {
29 		if (!strcmp(host->nqn, hostnqn))
30 			return host;
31 	}
32 
33 	return NULL;
34 }
35 
36 static struct nvmf_host *nvmf_host_add(const char *hostnqn)
37 {
38 	struct nvmf_host *host;
39 
40 	mutex_lock(&nvmf_hosts_mutex);
41 	host = __nvmf_host_find(hostnqn);
42 	if (host) {
43 		kref_get(&host->ref);
44 		goto out_unlock;
45 	}
46 
47 	host = kmalloc(sizeof(*host), GFP_KERNEL);
48 	if (!host)
49 		goto out_unlock;
50 
51 	kref_init(&host->ref);
52 	strlcpy(host->nqn, hostnqn, NVMF_NQN_SIZE);
53 
54 	list_add_tail(&host->list, &nvmf_hosts);
55 out_unlock:
56 	mutex_unlock(&nvmf_hosts_mutex);
57 	return host;
58 }
59 
60 static struct nvmf_host *nvmf_host_default(void)
61 {
62 	struct nvmf_host *host;
63 
64 	host = kmalloc(sizeof(*host), GFP_KERNEL);
65 	if (!host)
66 		return NULL;
67 
68 	kref_init(&host->ref);
69 	uuid_gen(&host->id);
70 	snprintf(host->nqn, NVMF_NQN_SIZE,
71 		"nqn.2014-08.org.nvmexpress:uuid:%pUb", &host->id);
72 
73 	mutex_lock(&nvmf_hosts_mutex);
74 	list_add_tail(&host->list, &nvmf_hosts);
75 	mutex_unlock(&nvmf_hosts_mutex);
76 
77 	return host;
78 }
79 
80 static void nvmf_host_destroy(struct kref *ref)
81 {
82 	struct nvmf_host *host = container_of(ref, struct nvmf_host, ref);
83 
84 	mutex_lock(&nvmf_hosts_mutex);
85 	list_del(&host->list);
86 	mutex_unlock(&nvmf_hosts_mutex);
87 
88 	kfree(host);
89 }
90 
91 static void nvmf_host_put(struct nvmf_host *host)
92 {
93 	if (host)
94 		kref_put(&host->ref, nvmf_host_destroy);
95 }
96 
97 /**
98  * nvmf_get_address() -  Get address/port
99  * @ctrl:	Host NVMe controller instance which we got the address
100  * @buf:	OUTPUT parameter that will contain the address/port
101  * @size:	buffer size
102  */
103 int nvmf_get_address(struct nvme_ctrl *ctrl, char *buf, int size)
104 {
105 	int len = 0;
106 
107 	if (ctrl->opts->mask & NVMF_OPT_TRADDR)
108 		len += scnprintf(buf, size, "traddr=%s", ctrl->opts->traddr);
109 	if (ctrl->opts->mask & NVMF_OPT_TRSVCID)
110 		len += scnprintf(buf + len, size - len, "%strsvcid=%s",
111 				(len) ? "," : "", ctrl->opts->trsvcid);
112 	if (ctrl->opts->mask & NVMF_OPT_HOST_TRADDR)
113 		len += scnprintf(buf + len, size - len, "%shost_traddr=%s",
114 				(len) ? "," : "", ctrl->opts->host_traddr);
115 	if (ctrl->opts->mask & NVMF_OPT_HOST_IFACE)
116 		len += scnprintf(buf + len, size - len, "%shost_iface=%s",
117 				(len) ? "," : "", ctrl->opts->host_iface);
118 	len += scnprintf(buf + len, size - len, "\n");
119 
120 	return len;
121 }
122 EXPORT_SYMBOL_GPL(nvmf_get_address);
123 
124 /**
125  * nvmf_reg_read32() -  NVMe Fabrics "Property Get" API function.
126  * @ctrl:	Host NVMe controller instance maintaining the admin
127  *		queue used to submit the property read command to
128  *		the allocated NVMe controller resource on the target system.
129  * @off:	Starting offset value of the targeted property
130  *		register (see the fabrics section of the NVMe standard).
131  * @val:	OUTPUT parameter that will contain the value of
132  *		the property after a successful read.
133  *
134  * Used by the host system to retrieve a 32-bit capsule property value
135  * from an NVMe controller on the target system.
136  *
137  * ("Capsule property" is an "PCIe register concept" applied to the
138  * NVMe fabrics space.)
139  *
140  * Return:
141  *	0: successful read
142  *	> 0: NVMe error status code
143  *	< 0: Linux errno error code
144  */
145 int nvmf_reg_read32(struct nvme_ctrl *ctrl, u32 off, u32 *val)
146 {
147 	struct nvme_command cmd = { };
148 	union nvme_result res;
149 	int ret;
150 
151 	cmd.prop_get.opcode = nvme_fabrics_command;
152 	cmd.prop_get.fctype = nvme_fabrics_type_property_get;
153 	cmd.prop_get.offset = cpu_to_le32(off);
154 
155 	ret = __nvme_submit_sync_cmd(ctrl->fabrics_q, &cmd, &res, NULL, 0,
156 			NVME_QID_ANY, 0, 0);
157 
158 	if (ret >= 0)
159 		*val = le64_to_cpu(res.u64);
160 	if (unlikely(ret != 0))
161 		dev_err(ctrl->device,
162 			"Property Get error: %d, offset %#x\n",
163 			ret > 0 ? ret & ~NVME_SC_DNR : ret, off);
164 
165 	return ret;
166 }
167 EXPORT_SYMBOL_GPL(nvmf_reg_read32);
168 
169 /**
170  * nvmf_reg_read64() -  NVMe Fabrics "Property Get" API function.
171  * @ctrl:	Host NVMe controller instance maintaining the admin
172  *		queue used to submit the property read command to
173  *		the allocated controller resource on the target system.
174  * @off:	Starting offset value of the targeted property
175  *		register (see the fabrics section of the NVMe standard).
176  * @val:	OUTPUT parameter that will contain the value of
177  *		the property after a successful read.
178  *
179  * Used by the host system to retrieve a 64-bit capsule property value
180  * from an NVMe controller on the target system.
181  *
182  * ("Capsule property" is an "PCIe register concept" applied to the
183  * NVMe fabrics space.)
184  *
185  * Return:
186  *	0: successful read
187  *	> 0: NVMe error status code
188  *	< 0: Linux errno error code
189  */
190 int nvmf_reg_read64(struct nvme_ctrl *ctrl, u32 off, u64 *val)
191 {
192 	struct nvme_command cmd = { };
193 	union nvme_result res;
194 	int ret;
195 
196 	cmd.prop_get.opcode = nvme_fabrics_command;
197 	cmd.prop_get.fctype = nvme_fabrics_type_property_get;
198 	cmd.prop_get.attrib = 1;
199 	cmd.prop_get.offset = cpu_to_le32(off);
200 
201 	ret = __nvme_submit_sync_cmd(ctrl->fabrics_q, &cmd, &res, NULL, 0,
202 			NVME_QID_ANY, 0, 0);
203 
204 	if (ret >= 0)
205 		*val = le64_to_cpu(res.u64);
206 	if (unlikely(ret != 0))
207 		dev_err(ctrl->device,
208 			"Property Get error: %d, offset %#x\n",
209 			ret > 0 ? ret & ~NVME_SC_DNR : ret, off);
210 	return ret;
211 }
212 EXPORT_SYMBOL_GPL(nvmf_reg_read64);
213 
214 /**
215  * nvmf_reg_write32() -  NVMe Fabrics "Property Write" API function.
216  * @ctrl:	Host NVMe controller instance maintaining the admin
217  *		queue used to submit the property read command to
218  *		the allocated NVMe controller resource on the target system.
219  * @off:	Starting offset value of the targeted property
220  *		register (see the fabrics section of the NVMe standard).
221  * @val:	Input parameter that contains the value to be
222  *		written to the property.
223  *
224  * Used by the NVMe host system to write a 32-bit capsule property value
225  * to an NVMe controller on the target system.
226  *
227  * ("Capsule property" is an "PCIe register concept" applied to the
228  * NVMe fabrics space.)
229  *
230  * Return:
231  *	0: successful write
232  *	> 0: NVMe error status code
233  *	< 0: Linux errno error code
234  */
235 int nvmf_reg_write32(struct nvme_ctrl *ctrl, u32 off, u32 val)
236 {
237 	struct nvme_command cmd = { };
238 	int ret;
239 
240 	cmd.prop_set.opcode = nvme_fabrics_command;
241 	cmd.prop_set.fctype = nvme_fabrics_type_property_set;
242 	cmd.prop_set.attrib = 0;
243 	cmd.prop_set.offset = cpu_to_le32(off);
244 	cmd.prop_set.value = cpu_to_le64(val);
245 
246 	ret = __nvme_submit_sync_cmd(ctrl->fabrics_q, &cmd, NULL, NULL, 0,
247 			NVME_QID_ANY, 0, 0);
248 	if (unlikely(ret))
249 		dev_err(ctrl->device,
250 			"Property Set error: %d, offset %#x\n",
251 			ret > 0 ? ret & ~NVME_SC_DNR : ret, off);
252 	return ret;
253 }
254 EXPORT_SYMBOL_GPL(nvmf_reg_write32);
255 
256 /**
257  * nvmf_log_connect_error() - Error-parsing-diagnostic print out function for
258  * 				connect() errors.
259  * @ctrl:	The specific /dev/nvmeX device that had the error.
260  * @errval:	Error code to be decoded in a more human-friendly
261  * 		printout.
262  * @offset:	For use with the NVMe error code
263  * 		NVME_SC_CONNECT_INVALID_PARAM.
264  * @cmd:	This is the SQE portion of a submission capsule.
265  * @data:	This is the "Data" portion of a submission capsule.
266  */
267 static void nvmf_log_connect_error(struct nvme_ctrl *ctrl,
268 		int errval, int offset, struct nvme_command *cmd,
269 		struct nvmf_connect_data *data)
270 {
271 	int err_sctype = errval & ~NVME_SC_DNR;
272 
273 	switch (err_sctype) {
274 	case NVME_SC_CONNECT_INVALID_PARAM:
275 		if (offset >> 16) {
276 			char *inv_data = "Connect Invalid Data Parameter";
277 
278 			switch (offset & 0xffff) {
279 			case (offsetof(struct nvmf_connect_data, cntlid)):
280 				dev_err(ctrl->device,
281 					"%s, cntlid: %d\n",
282 					inv_data, data->cntlid);
283 				break;
284 			case (offsetof(struct nvmf_connect_data, hostnqn)):
285 				dev_err(ctrl->device,
286 					"%s, hostnqn \"%s\"\n",
287 					inv_data, data->hostnqn);
288 				break;
289 			case (offsetof(struct nvmf_connect_data, subsysnqn)):
290 				dev_err(ctrl->device,
291 					"%s, subsysnqn \"%s\"\n",
292 					inv_data, data->subsysnqn);
293 				break;
294 			default:
295 				dev_err(ctrl->device,
296 					"%s, starting byte offset: %d\n",
297 				       inv_data, offset & 0xffff);
298 				break;
299 			}
300 		} else {
301 			char *inv_sqe = "Connect Invalid SQE Parameter";
302 
303 			switch (offset) {
304 			case (offsetof(struct nvmf_connect_command, qid)):
305 				dev_err(ctrl->device,
306 				       "%s, qid %d\n",
307 					inv_sqe, cmd->connect.qid);
308 				break;
309 			default:
310 				dev_err(ctrl->device,
311 					"%s, starting byte offset: %d\n",
312 					inv_sqe, offset);
313 			}
314 		}
315 		break;
316 	case NVME_SC_CONNECT_INVALID_HOST:
317 		dev_err(ctrl->device,
318 			"Connect for subsystem %s is not allowed, hostnqn: %s\n",
319 			data->subsysnqn, data->hostnqn);
320 		break;
321 	case NVME_SC_CONNECT_CTRL_BUSY:
322 		dev_err(ctrl->device,
323 			"Connect command failed: controller is busy or not available\n");
324 		break;
325 	case NVME_SC_CONNECT_FORMAT:
326 		dev_err(ctrl->device,
327 			"Connect incompatible format: %d",
328 			cmd->connect.recfmt);
329 		break;
330 	case NVME_SC_HOST_PATH_ERROR:
331 		dev_err(ctrl->device,
332 			"Connect command failed: host path error\n");
333 		break;
334 	case NVME_SC_AUTH_REQUIRED:
335 		dev_err(ctrl->device,
336 			"Connect command failed: authentication required\n");
337 		break;
338 	default:
339 		dev_err(ctrl->device,
340 			"Connect command failed, error wo/DNR bit: %d\n",
341 			err_sctype);
342 		break;
343 	}
344 }
345 
346 /**
347  * nvmf_connect_admin_queue() - NVMe Fabrics Admin Queue "Connect"
348  *				API function.
349  * @ctrl:	Host nvme controller instance used to request
350  *              a new NVMe controller allocation on the target
351  *              system and  establish an NVMe Admin connection to
352  *              that controller.
353  *
354  * This function enables an NVMe host device to request a new allocation of
355  * an NVMe controller resource on a target system as well establish a
356  * fabrics-protocol connection of the NVMe Admin queue between the
357  * host system device and the allocated NVMe controller on the
358  * target system via a NVMe Fabrics "Connect" command.
359  *
360  * Return:
361  *	0: success
362  *	> 0: NVMe error status code
363  *	< 0: Linux errno error code
364  *
365  */
366 int nvmf_connect_admin_queue(struct nvme_ctrl *ctrl)
367 {
368 	struct nvme_command cmd = { };
369 	union nvme_result res;
370 	struct nvmf_connect_data *data;
371 	int ret;
372 
373 	cmd.connect.opcode = nvme_fabrics_command;
374 	cmd.connect.fctype = nvme_fabrics_type_connect;
375 	cmd.connect.qid = 0;
376 	cmd.connect.sqsize = cpu_to_le16(NVME_AQ_DEPTH - 1);
377 
378 	/*
379 	 * Set keep-alive timeout in seconds granularity (ms * 1000)
380 	 */
381 	cmd.connect.kato = cpu_to_le32(ctrl->kato * 1000);
382 
383 	if (ctrl->opts->disable_sqflow)
384 		cmd.connect.cattr |= NVME_CONNECT_DISABLE_SQFLOW;
385 
386 	data = kzalloc(sizeof(*data), GFP_KERNEL);
387 	if (!data)
388 		return -ENOMEM;
389 
390 	uuid_copy(&data->hostid, &ctrl->opts->host->id);
391 	data->cntlid = cpu_to_le16(0xffff);
392 	strncpy(data->subsysnqn, ctrl->opts->subsysnqn, NVMF_NQN_SIZE);
393 	strncpy(data->hostnqn, ctrl->opts->host->nqn, NVMF_NQN_SIZE);
394 
395 	ret = __nvme_submit_sync_cmd(ctrl->fabrics_q, &cmd, &res,
396 			data, sizeof(*data), NVME_QID_ANY, 1,
397 			BLK_MQ_REQ_RESERVED | BLK_MQ_REQ_NOWAIT);
398 	if (ret) {
399 		nvmf_log_connect_error(ctrl, ret, le32_to_cpu(res.u32),
400 				       &cmd, data);
401 		goto out_free_data;
402 	}
403 
404 	ctrl->cntlid = le16_to_cpu(res.u16);
405 
406 out_free_data:
407 	kfree(data);
408 	return ret;
409 }
410 EXPORT_SYMBOL_GPL(nvmf_connect_admin_queue);
411 
412 /**
413  * nvmf_connect_io_queue() - NVMe Fabrics I/O Queue "Connect"
414  *			     API function.
415  * @ctrl:	Host nvme controller instance used to establish an
416  *		NVMe I/O queue connection to the already allocated NVMe
417  *		controller on the target system.
418  * @qid:	NVMe I/O queue number for the new I/O connection between
419  *		host and target (note qid == 0 is illegal as this is
420  *		the Admin queue, per NVMe standard).
421  *
422  * This function issues a fabrics-protocol connection
423  * of a NVMe I/O queue (via NVMe Fabrics "Connect" command)
424  * between the host system device and the allocated NVMe controller
425  * on the target system.
426  *
427  * Return:
428  *	0: success
429  *	> 0: NVMe error status code
430  *	< 0: Linux errno error code
431  */
432 int nvmf_connect_io_queue(struct nvme_ctrl *ctrl, u16 qid)
433 {
434 	struct nvme_command cmd = { };
435 	struct nvmf_connect_data *data;
436 	union nvme_result res;
437 	int ret;
438 
439 	cmd.connect.opcode = nvme_fabrics_command;
440 	cmd.connect.fctype = nvme_fabrics_type_connect;
441 	cmd.connect.qid = cpu_to_le16(qid);
442 	cmd.connect.sqsize = cpu_to_le16(ctrl->sqsize);
443 
444 	if (ctrl->opts->disable_sqflow)
445 		cmd.connect.cattr |= NVME_CONNECT_DISABLE_SQFLOW;
446 
447 	data = kzalloc(sizeof(*data), GFP_KERNEL);
448 	if (!data)
449 		return -ENOMEM;
450 
451 	uuid_copy(&data->hostid, &ctrl->opts->host->id);
452 	data->cntlid = cpu_to_le16(ctrl->cntlid);
453 	strncpy(data->subsysnqn, ctrl->opts->subsysnqn, NVMF_NQN_SIZE);
454 	strncpy(data->hostnqn, ctrl->opts->host->nqn, NVMF_NQN_SIZE);
455 
456 	ret = __nvme_submit_sync_cmd(ctrl->connect_q, &cmd, &res,
457 			data, sizeof(*data), qid, 1,
458 			BLK_MQ_REQ_RESERVED | BLK_MQ_REQ_NOWAIT);
459 	if (ret) {
460 		nvmf_log_connect_error(ctrl, ret, le32_to_cpu(res.u32),
461 				       &cmd, data);
462 	}
463 	kfree(data);
464 	return ret;
465 }
466 EXPORT_SYMBOL_GPL(nvmf_connect_io_queue);
467 
468 bool nvmf_should_reconnect(struct nvme_ctrl *ctrl)
469 {
470 	if (ctrl->opts->max_reconnects == -1 ||
471 	    ctrl->nr_reconnects < ctrl->opts->max_reconnects)
472 		return true;
473 
474 	return false;
475 }
476 EXPORT_SYMBOL_GPL(nvmf_should_reconnect);
477 
478 /**
479  * nvmf_register_transport() - NVMe Fabrics Library registration function.
480  * @ops:	Transport ops instance to be registered to the
481  *		common fabrics library.
482  *
483  * API function that registers the type of specific transport fabric
484  * being implemented to the common NVMe fabrics library. Part of
485  * the overall init sequence of starting up a fabrics driver.
486  */
487 int nvmf_register_transport(struct nvmf_transport_ops *ops)
488 {
489 	if (!ops->create_ctrl)
490 		return -EINVAL;
491 
492 	down_write(&nvmf_transports_rwsem);
493 	list_add_tail(&ops->entry, &nvmf_transports);
494 	up_write(&nvmf_transports_rwsem);
495 
496 	return 0;
497 }
498 EXPORT_SYMBOL_GPL(nvmf_register_transport);
499 
500 /**
501  * nvmf_unregister_transport() - NVMe Fabrics Library unregistration function.
502  * @ops:	Transport ops instance to be unregistered from the
503  *		common fabrics library.
504  *
505  * Fabrics API function that unregisters the type of specific transport
506  * fabric being implemented from the common NVMe fabrics library.
507  * Part of the overall exit sequence of unloading the implemented driver.
508  */
509 void nvmf_unregister_transport(struct nvmf_transport_ops *ops)
510 {
511 	down_write(&nvmf_transports_rwsem);
512 	list_del(&ops->entry);
513 	up_write(&nvmf_transports_rwsem);
514 }
515 EXPORT_SYMBOL_GPL(nvmf_unregister_transport);
516 
517 static struct nvmf_transport_ops *nvmf_lookup_transport(
518 		struct nvmf_ctrl_options *opts)
519 {
520 	struct nvmf_transport_ops *ops;
521 
522 	lockdep_assert_held(&nvmf_transports_rwsem);
523 
524 	list_for_each_entry(ops, &nvmf_transports, entry) {
525 		if (strcmp(ops->name, opts->transport) == 0)
526 			return ops;
527 	}
528 
529 	return NULL;
530 }
531 
532 static const match_table_t opt_tokens = {
533 	{ NVMF_OPT_TRANSPORT,		"transport=%s"		},
534 	{ NVMF_OPT_TRADDR,		"traddr=%s"		},
535 	{ NVMF_OPT_TRSVCID,		"trsvcid=%s"		},
536 	{ NVMF_OPT_NQN,			"nqn=%s"		},
537 	{ NVMF_OPT_QUEUE_SIZE,		"queue_size=%d"		},
538 	{ NVMF_OPT_NR_IO_QUEUES,	"nr_io_queues=%d"	},
539 	{ NVMF_OPT_RECONNECT_DELAY,	"reconnect_delay=%d"	},
540 	{ NVMF_OPT_CTRL_LOSS_TMO,	"ctrl_loss_tmo=%d"	},
541 	{ NVMF_OPT_KATO,		"keep_alive_tmo=%d"	},
542 	{ NVMF_OPT_HOSTNQN,		"hostnqn=%s"		},
543 	{ NVMF_OPT_HOST_TRADDR,		"host_traddr=%s"	},
544 	{ NVMF_OPT_HOST_IFACE,		"host_iface=%s"		},
545 	{ NVMF_OPT_HOST_ID,		"hostid=%s"		},
546 	{ NVMF_OPT_DUP_CONNECT,		"duplicate_connect"	},
547 	{ NVMF_OPT_DISABLE_SQFLOW,	"disable_sqflow"	},
548 	{ NVMF_OPT_HDR_DIGEST,		"hdr_digest"		},
549 	{ NVMF_OPT_DATA_DIGEST,		"data_digest"		},
550 	{ NVMF_OPT_NR_WRITE_QUEUES,	"nr_write_queues=%d"	},
551 	{ NVMF_OPT_NR_POLL_QUEUES,	"nr_poll_queues=%d"	},
552 	{ NVMF_OPT_TOS,			"tos=%d"		},
553 	{ NVMF_OPT_FAIL_FAST_TMO,	"fast_io_fail_tmo=%d"	},
554 	{ NVMF_OPT_DISCOVERY,		"discovery"		},
555 	{ NVMF_OPT_ERR,			NULL			}
556 };
557 
558 static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
559 		const char *buf)
560 {
561 	substring_t args[MAX_OPT_ARGS];
562 	char *options, *o, *p;
563 	int token, ret = 0;
564 	size_t nqnlen  = 0;
565 	int ctrl_loss_tmo = NVMF_DEF_CTRL_LOSS_TMO;
566 	uuid_t hostid;
567 
568 	/* Set defaults */
569 	opts->queue_size = NVMF_DEF_QUEUE_SIZE;
570 	opts->nr_io_queues = num_online_cpus();
571 	opts->reconnect_delay = NVMF_DEF_RECONNECT_DELAY;
572 	opts->kato = 0;
573 	opts->duplicate_connect = false;
574 	opts->fast_io_fail_tmo = NVMF_DEF_FAIL_FAST_TMO;
575 	opts->hdr_digest = false;
576 	opts->data_digest = false;
577 	opts->tos = -1; /* < 0 == use transport default */
578 
579 	options = o = kstrdup(buf, GFP_KERNEL);
580 	if (!options)
581 		return -ENOMEM;
582 
583 	uuid_gen(&hostid);
584 
585 	while ((p = strsep(&o, ",\n")) != NULL) {
586 		if (!*p)
587 			continue;
588 
589 		token = match_token(p, opt_tokens, args);
590 		opts->mask |= token;
591 		switch (token) {
592 		case NVMF_OPT_TRANSPORT:
593 			p = match_strdup(args);
594 			if (!p) {
595 				ret = -ENOMEM;
596 				goto out;
597 			}
598 			kfree(opts->transport);
599 			opts->transport = p;
600 			break;
601 		case NVMF_OPT_NQN:
602 			p = match_strdup(args);
603 			if (!p) {
604 				ret = -ENOMEM;
605 				goto out;
606 			}
607 			kfree(opts->subsysnqn);
608 			opts->subsysnqn = p;
609 			nqnlen = strlen(opts->subsysnqn);
610 			if (nqnlen >= NVMF_NQN_SIZE) {
611 				pr_err("%s needs to be < %d bytes\n",
612 					opts->subsysnqn, NVMF_NQN_SIZE);
613 				ret = -EINVAL;
614 				goto out;
615 			}
616 			opts->discovery_nqn =
617 				!(strcmp(opts->subsysnqn,
618 					 NVME_DISC_SUBSYS_NAME));
619 			break;
620 		case NVMF_OPT_TRADDR:
621 			p = match_strdup(args);
622 			if (!p) {
623 				ret = -ENOMEM;
624 				goto out;
625 			}
626 			kfree(opts->traddr);
627 			opts->traddr = p;
628 			break;
629 		case NVMF_OPT_TRSVCID:
630 			p = match_strdup(args);
631 			if (!p) {
632 				ret = -ENOMEM;
633 				goto out;
634 			}
635 			kfree(opts->trsvcid);
636 			opts->trsvcid = p;
637 			break;
638 		case NVMF_OPT_QUEUE_SIZE:
639 			if (match_int(args, &token)) {
640 				ret = -EINVAL;
641 				goto out;
642 			}
643 			if (token < NVMF_MIN_QUEUE_SIZE ||
644 			    token > NVMF_MAX_QUEUE_SIZE) {
645 				pr_err("Invalid queue_size %d\n", token);
646 				ret = -EINVAL;
647 				goto out;
648 			}
649 			opts->queue_size = token;
650 			break;
651 		case NVMF_OPT_NR_IO_QUEUES:
652 			if (match_int(args, &token)) {
653 				ret = -EINVAL;
654 				goto out;
655 			}
656 			if (token <= 0) {
657 				pr_err("Invalid number of IOQs %d\n", token);
658 				ret = -EINVAL;
659 				goto out;
660 			}
661 			if (opts->discovery_nqn) {
662 				pr_debug("Ignoring nr_io_queues value for discovery controller\n");
663 				break;
664 			}
665 
666 			opts->nr_io_queues = min_t(unsigned int,
667 					num_online_cpus(), token);
668 			break;
669 		case NVMF_OPT_KATO:
670 			if (match_int(args, &token)) {
671 				ret = -EINVAL;
672 				goto out;
673 			}
674 
675 			if (token < 0) {
676 				pr_err("Invalid keep_alive_tmo %d\n", token);
677 				ret = -EINVAL;
678 				goto out;
679 			} else if (token == 0 && !opts->discovery_nqn) {
680 				/* Allowed for debug */
681 				pr_warn("keep_alive_tmo 0 won't execute keep alives!!!\n");
682 			}
683 			opts->kato = token;
684 			break;
685 		case NVMF_OPT_CTRL_LOSS_TMO:
686 			if (match_int(args, &token)) {
687 				ret = -EINVAL;
688 				goto out;
689 			}
690 
691 			if (token < 0)
692 				pr_warn("ctrl_loss_tmo < 0 will reconnect forever\n");
693 			ctrl_loss_tmo = token;
694 			break;
695 		case NVMF_OPT_FAIL_FAST_TMO:
696 			if (match_int(args, &token)) {
697 				ret = -EINVAL;
698 				goto out;
699 			}
700 
701 			if (token >= 0)
702 				pr_warn("I/O fail on reconnect controller after %d sec\n",
703 					token);
704 			else
705 				token = -1;
706 
707 			opts->fast_io_fail_tmo = token;
708 			break;
709 		case NVMF_OPT_HOSTNQN:
710 			if (opts->host) {
711 				pr_err("hostnqn already user-assigned: %s\n",
712 				       opts->host->nqn);
713 				ret = -EADDRINUSE;
714 				goto out;
715 			}
716 			p = match_strdup(args);
717 			if (!p) {
718 				ret = -ENOMEM;
719 				goto out;
720 			}
721 			nqnlen = strlen(p);
722 			if (nqnlen >= NVMF_NQN_SIZE) {
723 				pr_err("%s needs to be < %d bytes\n",
724 					p, NVMF_NQN_SIZE);
725 				kfree(p);
726 				ret = -EINVAL;
727 				goto out;
728 			}
729 			opts->host = nvmf_host_add(p);
730 			kfree(p);
731 			if (!opts->host) {
732 				ret = -ENOMEM;
733 				goto out;
734 			}
735 			break;
736 		case NVMF_OPT_RECONNECT_DELAY:
737 			if (match_int(args, &token)) {
738 				ret = -EINVAL;
739 				goto out;
740 			}
741 			if (token <= 0) {
742 				pr_err("Invalid reconnect_delay %d\n", token);
743 				ret = -EINVAL;
744 				goto out;
745 			}
746 			opts->reconnect_delay = token;
747 			break;
748 		case NVMF_OPT_HOST_TRADDR:
749 			p = match_strdup(args);
750 			if (!p) {
751 				ret = -ENOMEM;
752 				goto out;
753 			}
754 			kfree(opts->host_traddr);
755 			opts->host_traddr = p;
756 			break;
757 		case NVMF_OPT_HOST_IFACE:
758 			p = match_strdup(args);
759 			if (!p) {
760 				ret = -ENOMEM;
761 				goto out;
762 			}
763 			kfree(opts->host_iface);
764 			opts->host_iface = p;
765 			break;
766 		case NVMF_OPT_HOST_ID:
767 			p = match_strdup(args);
768 			if (!p) {
769 				ret = -ENOMEM;
770 				goto out;
771 			}
772 			ret = uuid_parse(p, &hostid);
773 			if (ret) {
774 				pr_err("Invalid hostid %s\n", p);
775 				ret = -EINVAL;
776 				kfree(p);
777 				goto out;
778 			}
779 			kfree(p);
780 			break;
781 		case NVMF_OPT_DUP_CONNECT:
782 			opts->duplicate_connect = true;
783 			break;
784 		case NVMF_OPT_DISABLE_SQFLOW:
785 			opts->disable_sqflow = true;
786 			break;
787 		case NVMF_OPT_HDR_DIGEST:
788 			opts->hdr_digest = true;
789 			break;
790 		case NVMF_OPT_DATA_DIGEST:
791 			opts->data_digest = true;
792 			break;
793 		case NVMF_OPT_NR_WRITE_QUEUES:
794 			if (match_int(args, &token)) {
795 				ret = -EINVAL;
796 				goto out;
797 			}
798 			if (token <= 0) {
799 				pr_err("Invalid nr_write_queues %d\n", token);
800 				ret = -EINVAL;
801 				goto out;
802 			}
803 			opts->nr_write_queues = token;
804 			break;
805 		case NVMF_OPT_NR_POLL_QUEUES:
806 			if (match_int(args, &token)) {
807 				ret = -EINVAL;
808 				goto out;
809 			}
810 			if (token <= 0) {
811 				pr_err("Invalid nr_poll_queues %d\n", token);
812 				ret = -EINVAL;
813 				goto out;
814 			}
815 			opts->nr_poll_queues = token;
816 			break;
817 		case NVMF_OPT_TOS:
818 			if (match_int(args, &token)) {
819 				ret = -EINVAL;
820 				goto out;
821 			}
822 			if (token < 0) {
823 				pr_err("Invalid type of service %d\n", token);
824 				ret = -EINVAL;
825 				goto out;
826 			}
827 			if (token > 255) {
828 				pr_warn("Clamping type of service to 255\n");
829 				token = 255;
830 			}
831 			opts->tos = token;
832 			break;
833 		case NVMF_OPT_DISCOVERY:
834 			opts->discovery_nqn = true;
835 			break;
836 		default:
837 			pr_warn("unknown parameter or missing value '%s' in ctrl creation request\n",
838 				p);
839 			ret = -EINVAL;
840 			goto out;
841 		}
842 	}
843 
844 	if (opts->discovery_nqn) {
845 		opts->nr_io_queues = 0;
846 		opts->nr_write_queues = 0;
847 		opts->nr_poll_queues = 0;
848 		opts->duplicate_connect = true;
849 	} else {
850 		if (!opts->kato)
851 			opts->kato = NVME_DEFAULT_KATO;
852 	}
853 	if (ctrl_loss_tmo < 0) {
854 		opts->max_reconnects = -1;
855 	} else {
856 		opts->max_reconnects = DIV_ROUND_UP(ctrl_loss_tmo,
857 						opts->reconnect_delay);
858 		if (ctrl_loss_tmo < opts->fast_io_fail_tmo)
859 			pr_warn("failfast tmo (%d) larger than controller loss tmo (%d)\n",
860 				opts->fast_io_fail_tmo, ctrl_loss_tmo);
861 	}
862 
863 	if (!opts->host) {
864 		kref_get(&nvmf_default_host->ref);
865 		opts->host = nvmf_default_host;
866 	}
867 
868 	uuid_copy(&opts->host->id, &hostid);
869 
870 out:
871 	kfree(options);
872 	return ret;
873 }
874 
875 static int nvmf_check_required_opts(struct nvmf_ctrl_options *opts,
876 		unsigned int required_opts)
877 {
878 	if ((opts->mask & required_opts) != required_opts) {
879 		unsigned int i;
880 
881 		for (i = 0; i < ARRAY_SIZE(opt_tokens); i++) {
882 			if ((opt_tokens[i].token & required_opts) &&
883 			    !(opt_tokens[i].token & opts->mask)) {
884 				pr_warn("missing parameter '%s'\n",
885 					opt_tokens[i].pattern);
886 			}
887 		}
888 
889 		return -EINVAL;
890 	}
891 
892 	return 0;
893 }
894 
895 bool nvmf_ip_options_match(struct nvme_ctrl *ctrl,
896 		struct nvmf_ctrl_options *opts)
897 {
898 	if (!nvmf_ctlr_matches_baseopts(ctrl, opts) ||
899 	    strcmp(opts->traddr, ctrl->opts->traddr) ||
900 	    strcmp(opts->trsvcid, ctrl->opts->trsvcid))
901 		return false;
902 
903 	/*
904 	 * Checking the local address is rough. In most cases, none is specified
905 	 * and the host port is selected by the stack.
906 	 *
907 	 * Assume no match if:
908 	 * -  local address is specified and address is not the same
909 	 * -  local address is not specified but remote is, or vice versa
910 	 *    (admin using specific host_traddr when it matters).
911 	 */
912 	if ((opts->mask & NVMF_OPT_HOST_TRADDR) &&
913 	    (ctrl->opts->mask & NVMF_OPT_HOST_TRADDR)) {
914 		if (strcmp(opts->host_traddr, ctrl->opts->host_traddr))
915 			return false;
916 	} else if ((opts->mask & NVMF_OPT_HOST_TRADDR) ||
917 		   (ctrl->opts->mask & NVMF_OPT_HOST_TRADDR)) {
918 		return false;
919 	}
920 
921 	return true;
922 }
923 EXPORT_SYMBOL_GPL(nvmf_ip_options_match);
924 
925 static int nvmf_check_allowed_opts(struct nvmf_ctrl_options *opts,
926 		unsigned int allowed_opts)
927 {
928 	if (opts->mask & ~allowed_opts) {
929 		unsigned int i;
930 
931 		for (i = 0; i < ARRAY_SIZE(opt_tokens); i++) {
932 			if ((opt_tokens[i].token & opts->mask) &&
933 			    (opt_tokens[i].token & ~allowed_opts)) {
934 				pr_warn("invalid parameter '%s'\n",
935 					opt_tokens[i].pattern);
936 			}
937 		}
938 
939 		return -EINVAL;
940 	}
941 
942 	return 0;
943 }
944 
945 void nvmf_free_options(struct nvmf_ctrl_options *opts)
946 {
947 	nvmf_host_put(opts->host);
948 	kfree(opts->transport);
949 	kfree(opts->traddr);
950 	kfree(opts->trsvcid);
951 	kfree(opts->subsysnqn);
952 	kfree(opts->host_traddr);
953 	kfree(opts->host_iface);
954 	kfree(opts);
955 }
956 EXPORT_SYMBOL_GPL(nvmf_free_options);
957 
958 #define NVMF_REQUIRED_OPTS	(NVMF_OPT_TRANSPORT | NVMF_OPT_NQN)
959 #define NVMF_ALLOWED_OPTS	(NVMF_OPT_QUEUE_SIZE | NVMF_OPT_NR_IO_QUEUES | \
960 				 NVMF_OPT_KATO | NVMF_OPT_HOSTNQN | \
961 				 NVMF_OPT_HOST_ID | NVMF_OPT_DUP_CONNECT |\
962 				 NVMF_OPT_DISABLE_SQFLOW | NVMF_OPT_DISCOVERY |\
963 				 NVMF_OPT_FAIL_FAST_TMO)
964 
965 static struct nvme_ctrl *
966 nvmf_create_ctrl(struct device *dev, const char *buf)
967 {
968 	struct nvmf_ctrl_options *opts;
969 	struct nvmf_transport_ops *ops;
970 	struct nvme_ctrl *ctrl;
971 	int ret;
972 
973 	opts = kzalloc(sizeof(*opts), GFP_KERNEL);
974 	if (!opts)
975 		return ERR_PTR(-ENOMEM);
976 
977 	ret = nvmf_parse_options(opts, buf);
978 	if (ret)
979 		goto out_free_opts;
980 
981 
982 	request_module("nvme-%s", opts->transport);
983 
984 	/*
985 	 * Check the generic options first as we need a valid transport for
986 	 * the lookup below.  Then clear the generic flags so that transport
987 	 * drivers don't have to care about them.
988 	 */
989 	ret = nvmf_check_required_opts(opts, NVMF_REQUIRED_OPTS);
990 	if (ret)
991 		goto out_free_opts;
992 	opts->mask &= ~NVMF_REQUIRED_OPTS;
993 
994 	down_read(&nvmf_transports_rwsem);
995 	ops = nvmf_lookup_transport(opts);
996 	if (!ops) {
997 		pr_info("no handler found for transport %s.\n",
998 			opts->transport);
999 		ret = -EINVAL;
1000 		goto out_unlock;
1001 	}
1002 
1003 	if (!try_module_get(ops->module)) {
1004 		ret = -EBUSY;
1005 		goto out_unlock;
1006 	}
1007 	up_read(&nvmf_transports_rwsem);
1008 
1009 	ret = nvmf_check_required_opts(opts, ops->required_opts);
1010 	if (ret)
1011 		goto out_module_put;
1012 	ret = nvmf_check_allowed_opts(opts, NVMF_ALLOWED_OPTS |
1013 				ops->allowed_opts | ops->required_opts);
1014 	if (ret)
1015 		goto out_module_put;
1016 
1017 	ctrl = ops->create_ctrl(dev, opts);
1018 	if (IS_ERR(ctrl)) {
1019 		ret = PTR_ERR(ctrl);
1020 		goto out_module_put;
1021 	}
1022 
1023 	module_put(ops->module);
1024 	return ctrl;
1025 
1026 out_module_put:
1027 	module_put(ops->module);
1028 	goto out_free_opts;
1029 out_unlock:
1030 	up_read(&nvmf_transports_rwsem);
1031 out_free_opts:
1032 	nvmf_free_options(opts);
1033 	return ERR_PTR(ret);
1034 }
1035 
1036 static struct class *nvmf_class;
1037 static struct device *nvmf_device;
1038 static DEFINE_MUTEX(nvmf_dev_mutex);
1039 
1040 static ssize_t nvmf_dev_write(struct file *file, const char __user *ubuf,
1041 		size_t count, loff_t *pos)
1042 {
1043 	struct seq_file *seq_file = file->private_data;
1044 	struct nvme_ctrl *ctrl;
1045 	const char *buf;
1046 	int ret = 0;
1047 
1048 	if (count > PAGE_SIZE)
1049 		return -ENOMEM;
1050 
1051 	buf = memdup_user_nul(ubuf, count);
1052 	if (IS_ERR(buf))
1053 		return PTR_ERR(buf);
1054 
1055 	mutex_lock(&nvmf_dev_mutex);
1056 	if (seq_file->private) {
1057 		ret = -EINVAL;
1058 		goto out_unlock;
1059 	}
1060 
1061 	ctrl = nvmf_create_ctrl(nvmf_device, buf);
1062 	if (IS_ERR(ctrl)) {
1063 		ret = PTR_ERR(ctrl);
1064 		goto out_unlock;
1065 	}
1066 
1067 	seq_file->private = ctrl;
1068 
1069 out_unlock:
1070 	mutex_unlock(&nvmf_dev_mutex);
1071 	kfree(buf);
1072 	return ret ? ret : count;
1073 }
1074 
1075 static void __nvmf_concat_opt_tokens(struct seq_file *seq_file)
1076 {
1077 	const struct match_token *tok;
1078 	int idx;
1079 
1080 	/*
1081 	 * Add dummy entries for instance and cntlid to
1082 	 * signal an invalid/non-existing controller
1083 	 */
1084 	seq_puts(seq_file, "instance=-1,cntlid=-1");
1085 	for (idx = 0; idx < ARRAY_SIZE(opt_tokens); idx++) {
1086 		tok = &opt_tokens[idx];
1087 		if (tok->token == NVMF_OPT_ERR)
1088 			continue;
1089 		seq_puts(seq_file, ",");
1090 		seq_puts(seq_file, tok->pattern);
1091 	}
1092 	seq_puts(seq_file, "\n");
1093 }
1094 
1095 static int nvmf_dev_show(struct seq_file *seq_file, void *private)
1096 {
1097 	struct nvme_ctrl *ctrl;
1098 
1099 	mutex_lock(&nvmf_dev_mutex);
1100 	ctrl = seq_file->private;
1101 	if (!ctrl) {
1102 		__nvmf_concat_opt_tokens(seq_file);
1103 		goto out_unlock;
1104 	}
1105 
1106 	seq_printf(seq_file, "instance=%d,cntlid=%d\n",
1107 			ctrl->instance, ctrl->cntlid);
1108 
1109 out_unlock:
1110 	mutex_unlock(&nvmf_dev_mutex);
1111 	return 0;
1112 }
1113 
1114 static int nvmf_dev_open(struct inode *inode, struct file *file)
1115 {
1116 	/*
1117 	 * The miscdevice code initializes file->private_data, but doesn't
1118 	 * make use of it later.
1119 	 */
1120 	file->private_data = NULL;
1121 	return single_open(file, nvmf_dev_show, NULL);
1122 }
1123 
1124 static int nvmf_dev_release(struct inode *inode, struct file *file)
1125 {
1126 	struct seq_file *seq_file = file->private_data;
1127 	struct nvme_ctrl *ctrl = seq_file->private;
1128 
1129 	if (ctrl)
1130 		nvme_put_ctrl(ctrl);
1131 	return single_release(inode, file);
1132 }
1133 
1134 static const struct file_operations nvmf_dev_fops = {
1135 	.owner		= THIS_MODULE,
1136 	.write		= nvmf_dev_write,
1137 	.read		= seq_read,
1138 	.open		= nvmf_dev_open,
1139 	.release	= nvmf_dev_release,
1140 };
1141 
1142 static struct miscdevice nvmf_misc = {
1143 	.minor		= MISC_DYNAMIC_MINOR,
1144 	.name           = "nvme-fabrics",
1145 	.fops		= &nvmf_dev_fops,
1146 };
1147 
1148 static int __init nvmf_init(void)
1149 {
1150 	int ret;
1151 
1152 	nvmf_default_host = nvmf_host_default();
1153 	if (!nvmf_default_host)
1154 		return -ENOMEM;
1155 
1156 	nvmf_class = class_create(THIS_MODULE, "nvme-fabrics");
1157 	if (IS_ERR(nvmf_class)) {
1158 		pr_err("couldn't register class nvme-fabrics\n");
1159 		ret = PTR_ERR(nvmf_class);
1160 		goto out_free_host;
1161 	}
1162 
1163 	nvmf_device =
1164 		device_create(nvmf_class, NULL, MKDEV(0, 0), NULL, "ctl");
1165 	if (IS_ERR(nvmf_device)) {
1166 		pr_err("couldn't create nvme-fabris device!\n");
1167 		ret = PTR_ERR(nvmf_device);
1168 		goto out_destroy_class;
1169 	}
1170 
1171 	ret = misc_register(&nvmf_misc);
1172 	if (ret) {
1173 		pr_err("couldn't register misc device: %d\n", ret);
1174 		goto out_destroy_device;
1175 	}
1176 
1177 	return 0;
1178 
1179 out_destroy_device:
1180 	device_destroy(nvmf_class, MKDEV(0, 0));
1181 out_destroy_class:
1182 	class_destroy(nvmf_class);
1183 out_free_host:
1184 	nvmf_host_put(nvmf_default_host);
1185 	return ret;
1186 }
1187 
1188 static void __exit nvmf_exit(void)
1189 {
1190 	misc_deregister(&nvmf_misc);
1191 	device_destroy(nvmf_class, MKDEV(0, 0));
1192 	class_destroy(nvmf_class);
1193 	nvmf_host_put(nvmf_default_host);
1194 
1195 	BUILD_BUG_ON(sizeof(struct nvmf_common_command) != 64);
1196 	BUILD_BUG_ON(sizeof(struct nvmf_connect_command) != 64);
1197 	BUILD_BUG_ON(sizeof(struct nvmf_property_get_command) != 64);
1198 	BUILD_BUG_ON(sizeof(struct nvmf_property_set_command) != 64);
1199 	BUILD_BUG_ON(sizeof(struct nvmf_connect_data) != 1024);
1200 }
1201 
1202 MODULE_LICENSE("GPL v2");
1203 
1204 module_init(nvmf_init);
1205 module_exit(nvmf_exit);
1206