xref: /openbmc/linux/drivers/nvme/target/nvmet.h (revision 852a53a0)
1 /* SPDX-License-Identifier: GPL-2.0 */
2 /*
3  * Copyright (c) 2015-2016 HGST, a Western Digital Company.
4  */
5 
6 #ifndef _NVMET_H
7 #define _NVMET_H
8 
9 #include <linux/dma-mapping.h>
10 #include <linux/types.h>
11 #include <linux/device.h>
12 #include <linux/kref.h>
13 #include <linux/percpu-refcount.h>
14 #include <linux/list.h>
15 #include <linux/mutex.h>
16 #include <linux/uuid.h>
17 #include <linux/nvme.h>
18 #include <linux/configfs.h>
19 #include <linux/rcupdate.h>
20 #include <linux/blkdev.h>
21 #include <linux/radix-tree.h>
22 #include <linux/t10-pi.h>
23 
24 #define NVMET_DEFAULT_VS		NVME_VS(1, 3, 0)
25 
26 #define NVMET_ASYNC_EVENTS		4
27 #define NVMET_ERROR_LOG_SLOTS		128
28 #define NVMET_NO_ERROR_LOC		((u16)-1)
29 #define NVMET_DEFAULT_CTRL_MODEL	"Linux"
30 
31 /*
32  * Supported optional AENs:
33  */
34 #define NVMET_AEN_CFG_OPTIONAL \
35 	(NVME_AEN_CFG_NS_ATTR | NVME_AEN_CFG_ANA_CHANGE)
36 #define NVMET_DISC_AEN_CFG_OPTIONAL \
37 	(NVME_AEN_CFG_DISC_CHANGE)
38 
39 /*
40  * Plus mandatory SMART AENs (we'll never send them, but allow enabling them):
41  */
42 #define NVMET_AEN_CFG_ALL \
43 	(NVME_SMART_CRIT_SPARE | NVME_SMART_CRIT_TEMPERATURE | \
44 	 NVME_SMART_CRIT_RELIABILITY | NVME_SMART_CRIT_MEDIA | \
45 	 NVME_SMART_CRIT_VOLATILE_MEMORY | NVMET_AEN_CFG_OPTIONAL)
46 
47 /* Helper Macros when NVMe error is NVME_SC_CONNECT_INVALID_PARAM
48  * The 16 bit shift is to set IATTR bit to 1, which means offending
49  * offset starts in the data section of connect()
50  */
51 #define IPO_IATTR_CONNECT_DATA(x)	\
52 	(cpu_to_le32((1 << 16) | (offsetof(struct nvmf_connect_data, x))))
53 #define IPO_IATTR_CONNECT_SQE(x)	\
54 	(cpu_to_le32(offsetof(struct nvmf_connect_command, x)))
55 
56 struct nvmet_ns {
57 	struct percpu_ref	ref;
58 	struct block_device	*bdev;
59 	struct file		*file;
60 	bool			readonly;
61 	u32			nsid;
62 	u32			blksize_shift;
63 	loff_t			size;
64 	u8			nguid[16];
65 	uuid_t			uuid;
66 	u32			anagrpid;
67 
68 	bool			buffered_io;
69 	bool			enabled;
70 	struct nvmet_subsys	*subsys;
71 	const char		*device_path;
72 
73 	struct config_group	device_group;
74 	struct config_group	group;
75 
76 	struct completion	disable_done;
77 	mempool_t		*bvec_pool;
78 	struct kmem_cache	*bvec_cache;
79 
80 	int			use_p2pmem;
81 	struct pci_dev		*p2p_dev;
82 	int			pi_type;
83 	int			metadata_size;
84 };
85 
86 static inline struct nvmet_ns *to_nvmet_ns(struct config_item *item)
87 {
88 	return container_of(to_config_group(item), struct nvmet_ns, group);
89 }
90 
91 static inline struct device *nvmet_ns_dev(struct nvmet_ns *ns)
92 {
93 	return ns->bdev ? disk_to_dev(ns->bdev->bd_disk) : NULL;
94 }
95 
96 struct nvmet_cq {
97 	u16			qid;
98 	u16			size;
99 };
100 
101 struct nvmet_sq {
102 	struct nvmet_ctrl	*ctrl;
103 	struct percpu_ref	ref;
104 	u16			qid;
105 	u16			size;
106 	u32			sqhd;
107 	bool			sqhd_disabled;
108 	struct completion	free_done;
109 	struct completion	confirm_done;
110 };
111 
112 struct nvmet_ana_group {
113 	struct config_group	group;
114 	struct nvmet_port	*port;
115 	u32			grpid;
116 };
117 
118 static inline struct nvmet_ana_group *to_ana_group(struct config_item *item)
119 {
120 	return container_of(to_config_group(item), struct nvmet_ana_group,
121 			group);
122 }
123 
124 /**
125  * struct nvmet_port -	Common structure to keep port
126  *				information for the target.
127  * @entry:		Entry into referrals or transport list.
128  * @disc_addr:		Address information is stored in a format defined
129  *				for a discovery log page entry.
130  * @group:		ConfigFS group for this element's folder.
131  * @priv:		Private data for the transport.
132  */
133 struct nvmet_port {
134 	struct list_head		entry;
135 	struct nvmf_disc_rsp_page_entry	disc_addr;
136 	struct config_group		group;
137 	struct config_group		subsys_group;
138 	struct list_head		subsystems;
139 	struct config_group		referrals_group;
140 	struct list_head		referrals;
141 	struct list_head		global_entry;
142 	struct config_group		ana_groups_group;
143 	struct nvmet_ana_group		ana_default_group;
144 	enum nvme_ana_state		*ana_state;
145 	void				*priv;
146 	bool				enabled;
147 	int				inline_data_size;
148 	const struct nvmet_fabrics_ops	*tr_ops;
149 	bool				pi_enable;
150 };
151 
152 static inline struct nvmet_port *to_nvmet_port(struct config_item *item)
153 {
154 	return container_of(to_config_group(item), struct nvmet_port,
155 			group);
156 }
157 
158 static inline struct nvmet_port *ana_groups_to_port(
159 		struct config_item *item)
160 {
161 	return container_of(to_config_group(item), struct nvmet_port,
162 			ana_groups_group);
163 }
164 
165 struct nvmet_ctrl {
166 	struct nvmet_subsys	*subsys;
167 	struct nvmet_cq		**cqs;
168 	struct nvmet_sq		**sqs;
169 
170 	bool			cmd_seen;
171 
172 	struct mutex		lock;
173 	u64			cap;
174 	u32			cc;
175 	u32			csts;
176 
177 	uuid_t			hostid;
178 	u16			cntlid;
179 	u32			kato;
180 
181 	struct nvmet_port	*port;
182 
183 	u32			aen_enabled;
184 	unsigned long		aen_masked;
185 	struct nvmet_req	*async_event_cmds[NVMET_ASYNC_EVENTS];
186 	unsigned int		nr_async_event_cmds;
187 	struct list_head	async_events;
188 	struct work_struct	async_event_work;
189 
190 	struct list_head	subsys_entry;
191 	struct kref		ref;
192 	struct delayed_work	ka_work;
193 	struct work_struct	fatal_err_work;
194 
195 	const struct nvmet_fabrics_ops *ops;
196 
197 	__le32			*changed_ns_list;
198 	u32			nr_changed_ns;
199 
200 	char			subsysnqn[NVMF_NQN_FIELD_LEN];
201 	char			hostnqn[NVMF_NQN_FIELD_LEN];
202 
203 	struct device		*p2p_client;
204 	struct radix_tree_root	p2p_ns_map;
205 
206 	spinlock_t		error_lock;
207 	u64			err_counter;
208 	struct nvme_error_slot	slots[NVMET_ERROR_LOG_SLOTS];
209 	bool			pi_support;
210 };
211 
212 struct nvmet_subsys_model {
213 	struct rcu_head		rcuhead;
214 	char			number[];
215 };
216 
217 struct nvmet_subsys {
218 	enum nvme_subsys_type	type;
219 
220 	struct mutex		lock;
221 	struct kref		ref;
222 
223 	struct xarray		namespaces;
224 	unsigned int		nr_namespaces;
225 	unsigned int		max_nsid;
226 	u16			cntlid_min;
227 	u16			cntlid_max;
228 
229 	struct list_head	ctrls;
230 
231 	struct list_head	hosts;
232 	bool			allow_any_host;
233 
234 	u16			max_qid;
235 
236 	u64			ver;
237 	u64			serial;
238 	char			*subsysnqn;
239 	bool			pi_support;
240 
241 	struct config_group	group;
242 
243 	struct config_group	namespaces_group;
244 	struct config_group	allowed_hosts_group;
245 
246 	struct nvmet_subsys_model	__rcu *model;
247 
248 #ifdef CONFIG_NVME_TARGET_PASSTHRU
249 	struct nvme_ctrl	*passthru_ctrl;
250 	char			*passthru_ctrl_path;
251 	struct config_group	passthru_group;
252 #endif /* CONFIG_NVME_TARGET_PASSTHRU */
253 };
254 
255 static inline struct nvmet_subsys *to_subsys(struct config_item *item)
256 {
257 	return container_of(to_config_group(item), struct nvmet_subsys, group);
258 }
259 
260 static inline struct nvmet_subsys *namespaces_to_subsys(
261 		struct config_item *item)
262 {
263 	return container_of(to_config_group(item), struct nvmet_subsys,
264 			namespaces_group);
265 }
266 
267 struct nvmet_host {
268 	struct config_group	group;
269 };
270 
271 static inline struct nvmet_host *to_host(struct config_item *item)
272 {
273 	return container_of(to_config_group(item), struct nvmet_host, group);
274 }
275 
276 static inline char *nvmet_host_name(struct nvmet_host *host)
277 {
278 	return config_item_name(&host->group.cg_item);
279 }
280 
281 struct nvmet_host_link {
282 	struct list_head	entry;
283 	struct nvmet_host	*host;
284 };
285 
286 struct nvmet_subsys_link {
287 	struct list_head	entry;
288 	struct nvmet_subsys	*subsys;
289 };
290 
291 struct nvmet_req;
292 struct nvmet_fabrics_ops {
293 	struct module *owner;
294 	unsigned int type;
295 	unsigned int msdbd;
296 	unsigned int flags;
297 #define NVMF_KEYED_SGLS			(1 << 0)
298 #define NVMF_METADATA_SUPPORTED		(1 << 1)
299 	void (*queue_response)(struct nvmet_req *req);
300 	int (*add_port)(struct nvmet_port *port);
301 	void (*remove_port)(struct nvmet_port *port);
302 	void (*delete_ctrl)(struct nvmet_ctrl *ctrl);
303 	void (*disc_traddr)(struct nvmet_req *req,
304 			struct nvmet_port *port, char *traddr);
305 	u16 (*install_queue)(struct nvmet_sq *nvme_sq);
306 	void (*discovery_chg)(struct nvmet_port *port);
307 	u8 (*get_mdts)(const struct nvmet_ctrl *ctrl);
308 };
309 
310 #define NVMET_MAX_INLINE_BIOVEC	8
311 #define NVMET_MAX_INLINE_DATA_LEN NVMET_MAX_INLINE_BIOVEC * PAGE_SIZE
312 
313 struct nvmet_req {
314 	struct nvme_command	*cmd;
315 	struct nvme_completion	*cqe;
316 	struct nvmet_sq		*sq;
317 	struct nvmet_cq		*cq;
318 	struct nvmet_ns		*ns;
319 	struct scatterlist	*sg;
320 	struct scatterlist	*metadata_sg;
321 	struct bio_vec		inline_bvec[NVMET_MAX_INLINE_BIOVEC];
322 	union {
323 		struct {
324 			struct bio      inline_bio;
325 		} b;
326 		struct {
327 			bool			mpool_alloc;
328 			struct kiocb            iocb;
329 			struct bio_vec          *bvec;
330 			struct work_struct      work;
331 		} f;
332 		struct {
333 			struct request		*rq;
334 			struct work_struct      work;
335 			bool			use_workqueue;
336 		} p;
337 	};
338 	int			sg_cnt;
339 	int			metadata_sg_cnt;
340 	/* data length as parsed from the SGL descriptor: */
341 	size_t			transfer_len;
342 	size_t			metadata_len;
343 
344 	struct nvmet_port	*port;
345 
346 	void (*execute)(struct nvmet_req *req);
347 	const struct nvmet_fabrics_ops *ops;
348 
349 	struct pci_dev		*p2p_dev;
350 	struct device		*p2p_client;
351 	u16			error_loc;
352 	u64			error_slba;
353 };
354 
355 extern struct workqueue_struct *buffered_io_wq;
356 
357 static inline void nvmet_set_result(struct nvmet_req *req, u32 result)
358 {
359 	req->cqe->result.u32 = cpu_to_le32(result);
360 }
361 
362 /*
363  * NVMe command writes actually are DMA reads for us on the target side.
364  */
365 static inline enum dma_data_direction
366 nvmet_data_dir(struct nvmet_req *req)
367 {
368 	return nvme_is_write(req->cmd) ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
369 }
370 
371 struct nvmet_async_event {
372 	struct list_head	entry;
373 	u8			event_type;
374 	u8			event_info;
375 	u8			log_page;
376 };
377 
378 static inline void nvmet_clear_aen_bit(struct nvmet_req *req, u32 bn)
379 {
380 	int rae = le32_to_cpu(req->cmd->common.cdw10) & 1 << 15;
381 
382 	if (!rae)
383 		clear_bit(bn, &req->sq->ctrl->aen_masked);
384 }
385 
386 static inline bool nvmet_aen_bit_disabled(struct nvmet_ctrl *ctrl, u32 bn)
387 {
388 	if (!(READ_ONCE(ctrl->aen_enabled) & (1 << bn)))
389 		return true;
390 	return test_and_set_bit(bn, &ctrl->aen_masked);
391 }
392 
393 void nvmet_get_feat_kato(struct nvmet_req *req);
394 void nvmet_get_feat_async_event(struct nvmet_req *req);
395 u16 nvmet_set_feat_kato(struct nvmet_req *req);
396 u16 nvmet_set_feat_async_event(struct nvmet_req *req, u32 mask);
397 void nvmet_execute_async_event(struct nvmet_req *req);
398 
399 u16 nvmet_parse_connect_cmd(struct nvmet_req *req);
400 void nvmet_bdev_set_limits(struct block_device *bdev, struct nvme_id_ns *id);
401 u16 nvmet_bdev_parse_io_cmd(struct nvmet_req *req);
402 u16 nvmet_file_parse_io_cmd(struct nvmet_req *req);
403 u16 nvmet_parse_admin_cmd(struct nvmet_req *req);
404 u16 nvmet_parse_discovery_cmd(struct nvmet_req *req);
405 u16 nvmet_parse_fabrics_cmd(struct nvmet_req *req);
406 
407 bool nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq,
408 		struct nvmet_sq *sq, const struct nvmet_fabrics_ops *ops);
409 void nvmet_req_uninit(struct nvmet_req *req);
410 bool nvmet_check_transfer_len(struct nvmet_req *req, size_t len);
411 bool nvmet_check_data_len_lte(struct nvmet_req *req, size_t data_len);
412 void nvmet_req_complete(struct nvmet_req *req, u16 status);
413 int nvmet_req_alloc_sgls(struct nvmet_req *req);
414 void nvmet_req_free_sgls(struct nvmet_req *req);
415 
416 void nvmet_execute_set_features(struct nvmet_req *req);
417 void nvmet_execute_get_features(struct nvmet_req *req);
418 void nvmet_execute_keep_alive(struct nvmet_req *req);
419 
420 void nvmet_cq_setup(struct nvmet_ctrl *ctrl, struct nvmet_cq *cq, u16 qid,
421 		u16 size);
422 void nvmet_sq_setup(struct nvmet_ctrl *ctrl, struct nvmet_sq *sq, u16 qid,
423 		u16 size);
424 void nvmet_sq_destroy(struct nvmet_sq *sq);
425 int nvmet_sq_init(struct nvmet_sq *sq);
426 
427 void nvmet_ctrl_fatal_error(struct nvmet_ctrl *ctrl);
428 
429 void nvmet_update_cc(struct nvmet_ctrl *ctrl, u32 new);
430 u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn,
431 		struct nvmet_req *req, u32 kato, struct nvmet_ctrl **ctrlp);
432 u16 nvmet_ctrl_find_get(const char *subsysnqn, const char *hostnqn, u16 cntlid,
433 		struct nvmet_req *req, struct nvmet_ctrl **ret);
434 void nvmet_ctrl_put(struct nvmet_ctrl *ctrl);
435 u16 nvmet_check_ctrl_status(struct nvmet_req *req, struct nvme_command *cmd);
436 
437 struct nvmet_subsys *nvmet_subsys_alloc(const char *subsysnqn,
438 		enum nvme_subsys_type type);
439 void nvmet_subsys_put(struct nvmet_subsys *subsys);
440 void nvmet_subsys_del_ctrls(struct nvmet_subsys *subsys);
441 
442 struct nvmet_ns *nvmet_find_namespace(struct nvmet_ctrl *ctrl, __le32 nsid);
443 void nvmet_put_namespace(struct nvmet_ns *ns);
444 int nvmet_ns_enable(struct nvmet_ns *ns);
445 void nvmet_ns_disable(struct nvmet_ns *ns);
446 struct nvmet_ns *nvmet_ns_alloc(struct nvmet_subsys *subsys, u32 nsid);
447 void nvmet_ns_free(struct nvmet_ns *ns);
448 
449 void nvmet_send_ana_event(struct nvmet_subsys *subsys,
450 		struct nvmet_port *port);
451 void nvmet_port_send_ana_event(struct nvmet_port *port);
452 
453 int nvmet_register_transport(const struct nvmet_fabrics_ops *ops);
454 void nvmet_unregister_transport(const struct nvmet_fabrics_ops *ops);
455 
456 void nvmet_port_del_ctrls(struct nvmet_port *port,
457 			  struct nvmet_subsys *subsys);
458 
459 int nvmet_enable_port(struct nvmet_port *port);
460 void nvmet_disable_port(struct nvmet_port *port);
461 
462 void nvmet_referral_enable(struct nvmet_port *parent, struct nvmet_port *port);
463 void nvmet_referral_disable(struct nvmet_port *parent, struct nvmet_port *port);
464 
465 u16 nvmet_copy_to_sgl(struct nvmet_req *req, off_t off, const void *buf,
466 		size_t len);
467 u16 nvmet_copy_from_sgl(struct nvmet_req *req, off_t off, void *buf,
468 		size_t len);
469 u16 nvmet_zero_sgl(struct nvmet_req *req, off_t off, size_t len);
470 
471 u32 nvmet_get_log_page_len(struct nvme_command *cmd);
472 u64 nvmet_get_log_page_offset(struct nvme_command *cmd);
473 
474 extern struct list_head *nvmet_ports;
475 void nvmet_port_disc_changed(struct nvmet_port *port,
476 		struct nvmet_subsys *subsys);
477 void nvmet_subsys_disc_changed(struct nvmet_subsys *subsys,
478 		struct nvmet_host *host);
479 void nvmet_add_async_event(struct nvmet_ctrl *ctrl, u8 event_type,
480 		u8 event_info, u8 log_page);
481 
482 #define NVMET_QUEUE_SIZE	1024
483 #define NVMET_NR_QUEUES		128
484 #define NVMET_MAX_CMD		NVMET_QUEUE_SIZE
485 
486 /*
487  * Nice round number that makes a list of nsids fit into a page.
488  * Should become tunable at some point in the future.
489  */
490 #define NVMET_MAX_NAMESPACES	1024
491 
492 /*
493  * 0 is not a valid ANA group ID, so we start numbering at 1.
494  *
495  * ANA Group 1 exists without manual intervention, has namespaces assigned to it
496  * by default, and is available in an optimized state through all ports.
497  */
498 #define NVMET_MAX_ANAGRPS	128
499 #define NVMET_DEFAULT_ANA_GRPID	1
500 
501 #define NVMET_KAS		10
502 #define NVMET_DISC_KATO_MS		120000
503 
504 int __init nvmet_init_configfs(void);
505 void __exit nvmet_exit_configfs(void);
506 
507 int __init nvmet_init_discovery(void);
508 void nvmet_exit_discovery(void);
509 
510 extern struct nvmet_subsys *nvmet_disc_subsys;
511 extern struct rw_semaphore nvmet_config_sem;
512 
513 extern u32 nvmet_ana_group_enabled[NVMET_MAX_ANAGRPS + 1];
514 extern u64 nvmet_ana_chgcnt;
515 extern struct rw_semaphore nvmet_ana_sem;
516 
517 bool nvmet_host_allowed(struct nvmet_subsys *subsys, const char *hostnqn);
518 
519 int nvmet_bdev_ns_enable(struct nvmet_ns *ns);
520 int nvmet_file_ns_enable(struct nvmet_ns *ns);
521 void nvmet_bdev_ns_disable(struct nvmet_ns *ns);
522 void nvmet_file_ns_disable(struct nvmet_ns *ns);
523 u16 nvmet_bdev_flush(struct nvmet_req *req);
524 u16 nvmet_file_flush(struct nvmet_req *req);
525 void nvmet_ns_changed(struct nvmet_subsys *subsys, u32 nsid);
526 void nvmet_bdev_ns_revalidate(struct nvmet_ns *ns);
527 int nvmet_file_ns_revalidate(struct nvmet_ns *ns);
528 void nvmet_ns_revalidate(struct nvmet_ns *ns);
529 
530 static inline u32 nvmet_rw_data_len(struct nvmet_req *req)
531 {
532 	return ((u32)le16_to_cpu(req->cmd->rw.length) + 1) <<
533 			req->ns->blksize_shift;
534 }
535 
536 static inline u32 nvmet_rw_metadata_len(struct nvmet_req *req)
537 {
538 	if (!IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY))
539 		return 0;
540 	return ((u32)le16_to_cpu(req->cmd->rw.length) + 1) *
541 			req->ns->metadata_size;
542 }
543 
544 static inline u32 nvmet_dsm_len(struct nvmet_req *req)
545 {
546 	return (le32_to_cpu(req->cmd->dsm.nr) + 1) *
547 		sizeof(struct nvme_dsm_range);
548 }
549 
550 #ifdef CONFIG_NVME_TARGET_PASSTHRU
551 void nvmet_passthru_subsys_free(struct nvmet_subsys *subsys);
552 int nvmet_passthru_ctrl_enable(struct nvmet_subsys *subsys);
553 void nvmet_passthru_ctrl_disable(struct nvmet_subsys *subsys);
554 u16 nvmet_parse_passthru_admin_cmd(struct nvmet_req *req);
555 u16 nvmet_parse_passthru_io_cmd(struct nvmet_req *req);
556 static inline struct nvme_ctrl *nvmet_passthru_ctrl(struct nvmet_subsys *subsys)
557 {
558 	return subsys->passthru_ctrl;
559 }
560 #else /* CONFIG_NVME_TARGET_PASSTHRU */
561 static inline void nvmet_passthru_subsys_free(struct nvmet_subsys *subsys)
562 {
563 }
564 static inline void nvmet_passthru_ctrl_disable(struct nvmet_subsys *subsys)
565 {
566 }
567 static inline u16 nvmet_parse_passthru_admin_cmd(struct nvmet_req *req)
568 {
569 	return 0;
570 }
571 static inline u16 nvmet_parse_passthru_io_cmd(struct nvmet_req *req)
572 {
573 	return 0;
574 }
575 static inline struct nvme_ctrl *nvmet_passthru_ctrl(struct nvmet_subsys *subsys)
576 {
577 	return NULL;
578 }
579 #endif /* CONFIG_NVME_TARGET_PASSTHRU */
580 
581 static inline struct nvme_ctrl *
582 nvmet_req_passthru_ctrl(struct nvmet_req *req)
583 {
584 	return nvmet_passthru_ctrl(req->sq->ctrl->subsys);
585 }
586 
587 u16 errno_to_nvme_status(struct nvmet_req *req, int errno);
588 
589 /* Convert a 32-bit number to a 16-bit 0's based number */
590 static inline __le16 to0based(u32 a)
591 {
592 	return cpu_to_le16(max(1U, min(1U << 16, a)) - 1);
593 }
594 
595 static inline bool nvmet_ns_has_pi(struct nvmet_ns *ns)
596 {
597 	if (!IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY))
598 		return false;
599 	return ns->pi_type && ns->metadata_size == sizeof(struct t10_pi_tuple);
600 }
601 
602 #endif /* _NVMET_H */
603