xref: /openbmc/linux/include/linux/ceph/osd_client.h (revision f628d799)
1b2441318SGreg Kroah-Hartman /* SPDX-License-Identifier: GPL-2.0 */
23d14c5d2SYehuda Sadeh #ifndef _FS_CEPH_OSD_CLIENT_H
33d14c5d2SYehuda Sadeh #define _FS_CEPH_OSD_CLIENT_H
43d14c5d2SYehuda Sadeh 
5a02a946dSIlya Dryomov #include <linux/bitrev.h>
63d14c5d2SYehuda Sadeh #include <linux/completion.h>
73d14c5d2SYehuda Sadeh #include <linux/kref.h>
83d14c5d2SYehuda Sadeh #include <linux/mempool.h>
93d14c5d2SYehuda Sadeh #include <linux/rbtree.h>
1002113a0fSElena Reshetova #include <linux/refcount.h>
1197e27aaaSXiubo Li #include <linux/ktime.h>
123d14c5d2SYehuda Sadeh 
136c4a1915SAlex Elder #include <linux/ceph/types.h>
146c4a1915SAlex Elder #include <linux/ceph/osdmap.h>
156c4a1915SAlex Elder #include <linux/ceph/messenger.h>
16b2aa5d0bSIlya Dryomov #include <linux/ceph/msgpool.h>
176c4a1915SAlex Elder #include <linux/ceph/auth.h>
18c885837fSAlex Elder #include <linux/ceph/pagelist.h>
193d14c5d2SYehuda Sadeh 
203d14c5d2SYehuda Sadeh struct ceph_msg;
213d14c5d2SYehuda Sadeh struct ceph_snap_context;
223d14c5d2SYehuda Sadeh struct ceph_osd_request;
233d14c5d2SYehuda Sadeh struct ceph_osd_client;
243d14c5d2SYehuda Sadeh 
253d14c5d2SYehuda Sadeh /*
263d14c5d2SYehuda Sadeh  * completion callback for async writepages
273d14c5d2SYehuda Sadeh  */
2885e084feSIlya Dryomov typedef void (*ceph_osdc_callback_t)(struct ceph_osd_request *);
293d14c5d2SYehuda Sadeh 
3063244fa1SIlya Dryomov #define CEPH_HOMELESS_OSD	-1
3163244fa1SIlya Dryomov 
3208b8a044SJeff Layton /*
33a679e50fSJeff Layton  * A single extent in a SPARSE_READ reply.
34a679e50fSJeff Layton  *
35a679e50fSJeff Layton  * Note that these come from the OSD as little-endian values. On BE arches,
36a679e50fSJeff Layton  * we convert them in-place after receipt.
37a679e50fSJeff Layton  */
38a679e50fSJeff Layton struct ceph_sparse_extent {
39a679e50fSJeff Layton 	u64	off;
40a679e50fSJeff Layton 	u64	len;
41a679e50fSJeff Layton } __packed;
42a679e50fSJeff Layton 
43*f628d799SJeff Layton /* Sparse read state machine state values */
44*f628d799SJeff Layton enum ceph_sparse_read_state {
45*f628d799SJeff Layton 	CEPH_SPARSE_READ_HDR	= 0,
46*f628d799SJeff Layton 	CEPH_SPARSE_READ_EXTENTS,
47*f628d799SJeff Layton 	CEPH_SPARSE_READ_DATA_LEN,
48*f628d799SJeff Layton 	CEPH_SPARSE_READ_DATA,
49*f628d799SJeff Layton };
50*f628d799SJeff Layton 
51*f628d799SJeff Layton /*
52*f628d799SJeff Layton  * A SPARSE_READ reply is a 32-bit count of extents, followed by an array of
53*f628d799SJeff Layton  * 64-bit offset/length pairs, and then all of the actual file data
54*f628d799SJeff Layton  * concatenated after it (sans holes).
55*f628d799SJeff Layton  *
56*f628d799SJeff Layton  * Unfortunately, we don't know how long the extent array is until we've
57*f628d799SJeff Layton  * started reading the data section of the reply. The caller should send down
58*f628d799SJeff Layton  * a destination buffer for the array, but we'll alloc one if it's too small
59*f628d799SJeff Layton  * or if the caller doesn't.
60*f628d799SJeff Layton  */
61*f628d799SJeff Layton struct ceph_sparse_read {
62*f628d799SJeff Layton 	enum ceph_sparse_read_state	sr_state;    /* state machine state */
63*f628d799SJeff Layton 	u64				sr_req_off;  /* orig request offset */
64*f628d799SJeff Layton 	u64				sr_req_len;  /* orig request length */
65*f628d799SJeff Layton 	u64				sr_pos;      /* current pos in buffer */
66*f628d799SJeff Layton 	int				sr_index;    /* current extent index */
67*f628d799SJeff Layton 	__le32				sr_datalen;  /* length of actual data */
68*f628d799SJeff Layton 	u32				sr_count;    /* extent count in reply */
69*f628d799SJeff Layton 	int				sr_ext_len;  /* length of extent array */
70*f628d799SJeff Layton 	struct ceph_sparse_extent	*sr_extent;  /* extent array */
71*f628d799SJeff Layton };
72*f628d799SJeff Layton 
73a679e50fSJeff Layton /*
7408b8a044SJeff Layton  * A given osd we're communicating with.
7508b8a044SJeff Layton  *
7608b8a044SJeff Layton  * Note that the o_requests tree can be searched while holding the "lock" mutex
7708b8a044SJeff Layton  * or the "o_requests_lock" spinlock. Insertion or removal requires both!
7808b8a044SJeff Layton  */
793d14c5d2SYehuda Sadeh struct ceph_osd {
8002113a0fSElena Reshetova 	refcount_t o_ref;
81*f628d799SJeff Layton 	int o_sparse_op_idx;
823d14c5d2SYehuda Sadeh 	struct ceph_osd_client *o_osdc;
833d14c5d2SYehuda Sadeh 	int o_osd;
843d14c5d2SYehuda Sadeh 	int o_incarnation;
853d14c5d2SYehuda Sadeh 	struct rb_node o_node;
863d14c5d2SYehuda Sadeh 	struct ceph_connection o_con;
8708b8a044SJeff Layton 	spinlock_t o_requests_lock;
885aea3dcdSIlya Dryomov 	struct rb_root o_requests;
89922dab61SIlya Dryomov 	struct rb_root o_linger_requests;
90a02a946dSIlya Dryomov 	struct rb_root o_backoff_mappings;
91a02a946dSIlya Dryomov 	struct rb_root o_backoffs_by_id;
923d14c5d2SYehuda Sadeh 	struct list_head o_osd_lru;
936c4a1915SAlex Elder 	struct ceph_auth_handshake o_auth;
943d14c5d2SYehuda Sadeh 	unsigned long lru_ttl;
953d14c5d2SYehuda Sadeh 	struct list_head o_keepalive_item;
965aea3dcdSIlya Dryomov 	struct mutex lock;
97*f628d799SJeff Layton 	struct ceph_sparse_read	o_sparse_read;
983d14c5d2SYehuda Sadeh };
993d14c5d2SYehuda Sadeh 
1003f1af42aSIlya Dryomov #define CEPH_OSD_SLAB_OPS	2
1013f1af42aSIlya Dryomov #define CEPH_OSD_MAX_OPS	16
1021b83bef2SSage Weil 
1032ac2b7a6SAlex Elder enum ceph_osd_data_type {
104ec9123c5SAlex Elder 	CEPH_OSD_DATA_TYPE_NONE = 0,
1052ac2b7a6SAlex Elder 	CEPH_OSD_DATA_TYPE_PAGES,
1069a5e6d09SAlex Elder 	CEPH_OSD_DATA_TYPE_PAGELIST,
1072ac2b7a6SAlex Elder #ifdef CONFIG_BLOCK
1082ac2b7a6SAlex Elder 	CEPH_OSD_DATA_TYPE_BIO,
1092ac2b7a6SAlex Elder #endif /* CONFIG_BLOCK */
110b9e281c2SIlya Dryomov 	CEPH_OSD_DATA_TYPE_BVECS,
1112ac2b7a6SAlex Elder };
1122ac2b7a6SAlex Elder 
1132794a82aSAlex Elder struct ceph_osd_data {
1142ac2b7a6SAlex Elder 	enum ceph_osd_data_type	type;
1152ac2b7a6SAlex Elder 	union {
1162794a82aSAlex Elder 		struct {
1172794a82aSAlex Elder 			struct page	**pages;
118e0c59487SAlex Elder 			u64		length;
1192794a82aSAlex Elder 			u32		alignment;
1202794a82aSAlex Elder 			bool		pages_from_pool;
1212794a82aSAlex Elder 			bool		own_pages;
1222794a82aSAlex Elder 		};
1239a5e6d09SAlex Elder 		struct ceph_pagelist	*pagelist;
1242794a82aSAlex Elder #ifdef CONFIG_BLOCK
125fdce58ccSAlex Elder 		struct {
1265359a17dSIlya Dryomov 			struct ceph_bio_iter	bio_pos;
1275359a17dSIlya Dryomov 			u32			bio_length;
128fdce58ccSAlex Elder 		};
1292794a82aSAlex Elder #endif /* CONFIG_BLOCK */
1300010f705SIlya Dryomov 		struct {
131b9e281c2SIlya Dryomov 			struct ceph_bvec_iter	bvec_pos;
1320010f705SIlya Dryomov 			u32			num_bvecs;
1330010f705SIlya Dryomov 		};
1342794a82aSAlex Elder 	};
1352794a82aSAlex Elder };
1362794a82aSAlex Elder 
13779528734SAlex Elder struct ceph_osd_req_op {
13879528734SAlex Elder 	u16 op;           /* CEPH_OSD_OP_* */
1397b25bf5fSIlya Dryomov 	u32 flags;        /* CEPH_OSD_OP_FLAG_* */
140de2aa102SIlya Dryomov 	u32 indata_len;   /* request */
1417665d85bSYan, Zheng 	u32 outdata_len;  /* reply */
1427665d85bSYan, Zheng 	s32 rval;
1437665d85bSYan, Zheng 
14479528734SAlex Elder 	union {
14549719778SAlex Elder 		struct ceph_osd_data raw_data_in;
14679528734SAlex Elder 		struct {
14779528734SAlex Elder 			u64 offset, length;
14879528734SAlex Elder 			u64 truncate_size;
14979528734SAlex Elder 			u32 truncate_seq;
150a679e50fSJeff Layton 			int sparse_ext_cnt;
151a679e50fSJeff Layton 			struct ceph_sparse_extent *sparse_ext;
1525476492fSAlex Elder 			struct ceph_osd_data osd_data;
15379528734SAlex Elder 		} extent;
15479528734SAlex Elder 		struct {
155d7d5a007SIlya Dryomov 			u32 name_len;
156d7d5a007SIlya Dryomov 			u32 value_len;
157d74b50beSYan, Zheng 			__u8 cmp_op;       /* CEPH_OSD_CMPXATTR_OP_* */
158d74b50beSYan, Zheng 			__u8 cmp_mode;     /* CEPH_OSD_CMPXATTR_MODE_* */
159d74b50beSYan, Zheng 			struct ceph_osd_data osd_data;
160d74b50beSYan, Zheng 		} xattr;
161d74b50beSYan, Zheng 		struct {
16279528734SAlex Elder 			const char *class_name;
16379528734SAlex Elder 			const char *method_name;
1645476492fSAlex Elder 			struct ceph_osd_data request_info;
16504017e29SAlex Elder 			struct ceph_osd_data request_data;
1665476492fSAlex Elder 			struct ceph_osd_data response_data;
16779528734SAlex Elder 			__u8 class_len;
16879528734SAlex Elder 			__u8 method_len;
169bb873b53SIlya Dryomov 			u32 indata_len;
17079528734SAlex Elder 		} cls;
17179528734SAlex Elder 		struct {
17279528734SAlex Elder 			u64 cookie;
173922dab61SIlya Dryomov 			__u8 op;           /* CEPH_OSD_WATCH_OP_ */
174922dab61SIlya Dryomov 			u32 gen;
17579528734SAlex Elder 		} watch;
176c647b8a8SIlya Dryomov 		struct {
177922dab61SIlya Dryomov 			struct ceph_osd_data request_data;
178922dab61SIlya Dryomov 		} notify_ack;
179922dab61SIlya Dryomov 		struct {
18019079203SIlya Dryomov 			u64 cookie;
18119079203SIlya Dryomov 			struct ceph_osd_data request_data;
18219079203SIlya Dryomov 			struct ceph_osd_data response_data;
18319079203SIlya Dryomov 		} notify;
18419079203SIlya Dryomov 		struct {
185a4ed38d7SDouglas Fuller 			struct ceph_osd_data response_data;
186a4ed38d7SDouglas Fuller 		} list_watchers;
187a4ed38d7SDouglas Fuller 		struct {
188c647b8a8SIlya Dryomov 			u64 expected_object_size;
189c647b8a8SIlya Dryomov 			u64 expected_write_size;
190d3798accSIlya Dryomov 			u32 flags;  /* CEPH_OSD_OP_ALLOC_HINT_FLAG_* */
191c647b8a8SIlya Dryomov 		} alloc_hint;
19223ddf9beSLuis Henriques 		struct {
19323ddf9beSLuis Henriques 			u64 snapid;
19423ddf9beSLuis Henriques 			u64 src_version;
19523ddf9beSLuis Henriques 			u8 flags;
19623ddf9beSLuis Henriques 			u32 src_fadvise_flags;
19723ddf9beSLuis Henriques 			struct ceph_osd_data osd_data;
19823ddf9beSLuis Henriques 		} copy_from;
19979528734SAlex Elder 	};
20079528734SAlex Elder };
20179528734SAlex Elder 
20263244fa1SIlya Dryomov struct ceph_osd_request_target {
20363244fa1SIlya Dryomov 	struct ceph_object_id base_oid;
20463244fa1SIlya Dryomov 	struct ceph_object_locator base_oloc;
20563244fa1SIlya Dryomov 	struct ceph_object_id target_oid;
20663244fa1SIlya Dryomov 	struct ceph_object_locator target_oloc;
20763244fa1SIlya Dryomov 
208dc98ff72SIlya Dryomov 	struct ceph_pg pgid;               /* last raw pg we mapped to */
209dc98ff72SIlya Dryomov 	struct ceph_spg spgid;             /* last actual spg we mapped to */
21063244fa1SIlya Dryomov 	u32 pg_num;
21163244fa1SIlya Dryomov 	u32 pg_num_mask;
21263244fa1SIlya Dryomov 	struct ceph_osds acting;
21363244fa1SIlya Dryomov 	struct ceph_osds up;
21463244fa1SIlya Dryomov 	int size;
21563244fa1SIlya Dryomov 	int min_size;
21663244fa1SIlya Dryomov 	bool sort_bitwise;
217ae78dd81SIlya Dryomov 	bool recovery_deletes;
21863244fa1SIlya Dryomov 
21963244fa1SIlya Dryomov 	unsigned int flags;                /* CEPH_OSD_FLAG_* */
220117d96a0SIlya Dryomov 	bool used_replica;
22163244fa1SIlya Dryomov 	bool paused;
22263244fa1SIlya Dryomov 
22304c7d789SIlya Dryomov 	u32 epoch;
224dc93e0e2SIlya Dryomov 	u32 last_force_resend;
225dc93e0e2SIlya Dryomov 
22663244fa1SIlya Dryomov 	int osd;
22763244fa1SIlya Dryomov };
22863244fa1SIlya Dryomov 
2293d14c5d2SYehuda Sadeh /* an in-flight request */
2303d14c5d2SYehuda Sadeh struct ceph_osd_request {
2313d14c5d2SYehuda Sadeh 	u64             r_tid;              /* unique for this client */
2323d14c5d2SYehuda Sadeh 	struct rb_node  r_node;
2334609245eSIlya Dryomov 	struct rb_node  r_mc_node;          /* map check */
23488bc1922SIlya Dryomov 	struct work_struct r_complete_work;
2353d14c5d2SYehuda Sadeh 	struct ceph_osd *r_osd;
236a66dd383SIlya Dryomov 
237a66dd383SIlya Dryomov 	struct ceph_osd_request_target r_t;
238a66dd383SIlya Dryomov #define r_base_oid	r_t.base_oid
239a66dd383SIlya Dryomov #define r_base_oloc	r_t.base_oloc
240a66dd383SIlya Dryomov #define r_flags		r_t.flags
2413d14c5d2SYehuda Sadeh 
2423d14c5d2SYehuda Sadeh 	struct ceph_msg  *r_request, *r_reply;
2433d14c5d2SYehuda Sadeh 	u32               r_sent;      /* >0 if r_request is sending/sent */
2441b83bef2SSage Weil 
24579528734SAlex Elder 	/* request osd ops array  */
24679528734SAlex Elder 	unsigned int		r_num_ops;
24779528734SAlex Elder 
2481b83bef2SSage Weil 	int               r_result;
2493d14c5d2SYehuda Sadeh 
2503d14c5d2SYehuda Sadeh 	struct ceph_osd_client *r_osdc;
2513d14c5d2SYehuda Sadeh 	struct kref       r_kref;
2523d14c5d2SYehuda Sadeh 	bool              r_mempool;
253b18b9550SIlya Dryomov 	struct completion r_completion;       /* private to osd_client.c */
25426be8808SAlex Elder 	ceph_osdc_callback_t r_callback;
2553d14c5d2SYehuda Sadeh 
2563d14c5d2SYehuda Sadeh 	struct inode *r_inode;         	      /* for use by callbacks */
25794e85771SIlya Dryomov 	struct list_head r_private_item;      /* ditto */
2583d14c5d2SYehuda Sadeh 	void *r_priv;			      /* ditto */
2593d14c5d2SYehuda Sadeh 
260bb873b53SIlya Dryomov 	/* set by submitter */
261bb873b53SIlya Dryomov 	u64 r_snapid;                         /* for reads, CEPH_NOSNAP o/w */
262bb873b53SIlya Dryomov 	struct ceph_snap_context *r_snapc;    /* for writes */
263fac02ddfSArnd Bergmann 	struct timespec64 r_mtime;            /* ditto */
264bb873b53SIlya Dryomov 	u64 r_data_offset;                    /* ditto */
265922dab61SIlya Dryomov 	bool r_linger;                        /* don't resend on failure */
2663d14c5d2SYehuda Sadeh 
267bb873b53SIlya Dryomov 	/* internal */
268bb873b53SIlya Dryomov 	unsigned long r_stamp;                /* jiffies, send or check time */
2697cc5e38fSIlya Dryomov 	unsigned long r_start_stamp;          /* jiffies */
27097e27aaaSXiubo Li 	ktime_t r_start_latency;              /* ktime_t */
27197e27aaaSXiubo Li 	ktime_t r_end_latency;                /* ktime_t */
272bb873b53SIlya Dryomov 	int r_attempts;
2734609245eSIlya Dryomov 	u32 r_map_dne_bound;
2743f1af42aSIlya Dryomov 
2753f1af42aSIlya Dryomov 	struct ceph_osd_req_op r_ops[];
2763d14c5d2SYehuda Sadeh };
2773d14c5d2SYehuda Sadeh 
278205ee118SIlya Dryomov struct ceph_request_redirect {
279205ee118SIlya Dryomov 	struct ceph_object_locator oloc;
280205ee118SIlya Dryomov };
281205ee118SIlya Dryomov 
2828cb441c0SIlya Dryomov /*
2838cb441c0SIlya Dryomov  * osd request identifier
2848cb441c0SIlya Dryomov  *
2858cb441c0SIlya Dryomov  * caller name + incarnation# + tid to unique identify this request
2868cb441c0SIlya Dryomov  */
2878cb441c0SIlya Dryomov struct ceph_osd_reqid {
2888cb441c0SIlya Dryomov 	struct ceph_entity_name name;
2898cb441c0SIlya Dryomov 	__le64 tid;
2908cb441c0SIlya Dryomov 	__le32 inc;
2918cb441c0SIlya Dryomov } __packed;
2928cb441c0SIlya Dryomov 
2938cb441c0SIlya Dryomov struct ceph_blkin_trace_info {
2948cb441c0SIlya Dryomov 	__le64 trace_id;
2958cb441c0SIlya Dryomov 	__le64 span_id;
2968cb441c0SIlya Dryomov 	__le64 parent_span_id;
2978cb441c0SIlya Dryomov } __packed;
2988cb441c0SIlya Dryomov 
299922dab61SIlya Dryomov typedef void (*rados_watchcb2_t)(void *arg, u64 notify_id, u64 cookie,
300922dab61SIlya Dryomov 				 u64 notifier_id, void *data, size_t data_len);
301922dab61SIlya Dryomov typedef void (*rados_watcherrcb_t)(void *arg, u64 cookie, int err);
302a40c4f10SYehuda Sadeh 
303922dab61SIlya Dryomov struct ceph_osd_linger_request {
304922dab61SIlya Dryomov 	struct ceph_osd_client *osdc;
305922dab61SIlya Dryomov 	u64 linger_id;
306922dab61SIlya Dryomov 	bool committed;
30719079203SIlya Dryomov 	bool is_watch;                  /* watch or notify */
308922dab61SIlya Dryomov 
309922dab61SIlya Dryomov 	struct ceph_osd *osd;
310922dab61SIlya Dryomov 	struct ceph_osd_request *reg_req;
311922dab61SIlya Dryomov 	struct ceph_osd_request *ping_req;
312922dab61SIlya Dryomov 	unsigned long ping_sent;
313b07d3c4bSIlya Dryomov 	unsigned long watch_valid_thru;
314b07d3c4bSIlya Dryomov 	struct list_head pending_lworks;
315922dab61SIlya Dryomov 
316922dab61SIlya Dryomov 	struct ceph_osd_request_target t;
3174609245eSIlya Dryomov 	u32 map_dne_bound;
318922dab61SIlya Dryomov 
319fac02ddfSArnd Bergmann 	struct timespec64 mtime;
320922dab61SIlya Dryomov 
321922dab61SIlya Dryomov 	struct kref kref;
322922dab61SIlya Dryomov 	struct mutex lock;
323922dab61SIlya Dryomov 	struct rb_node node;            /* osd */
324922dab61SIlya Dryomov 	struct rb_node osdc_node;       /* osdc */
3254609245eSIlya Dryomov 	struct rb_node mc_node;         /* map check */
326922dab61SIlya Dryomov 	struct list_head scan_item;
327922dab61SIlya Dryomov 
328922dab61SIlya Dryomov 	struct completion reg_commit_wait;
32919079203SIlya Dryomov 	struct completion notify_finish_wait;
330922dab61SIlya Dryomov 	int reg_commit_error;
33119079203SIlya Dryomov 	int notify_finish_error;
332922dab61SIlya Dryomov 	int last_error;
333922dab61SIlya Dryomov 
334922dab61SIlya Dryomov 	u32 register_gen;
33519079203SIlya Dryomov 	u64 notify_id;
336922dab61SIlya Dryomov 
337922dab61SIlya Dryomov 	rados_watchcb2_t wcb;
338922dab61SIlya Dryomov 	rados_watcherrcb_t errcb;
339922dab61SIlya Dryomov 	void *data;
34019079203SIlya Dryomov 
34175dbb685SIlya Dryomov 	struct ceph_pagelist *request_pl;
34275dbb685SIlya Dryomov 	struct page **notify_id_pages;
34375dbb685SIlya Dryomov 
34419079203SIlya Dryomov 	struct page ***preply_pages;
34519079203SIlya Dryomov 	size_t *preply_len;
346a40c4f10SYehuda Sadeh };
347a40c4f10SYehuda Sadeh 
348a4ed38d7SDouglas Fuller struct ceph_watch_item {
349a4ed38d7SDouglas Fuller 	struct ceph_entity_name name;
350a4ed38d7SDouglas Fuller 	u64 cookie;
351a4ed38d7SDouglas Fuller 	struct ceph_entity_addr addr;
352a4ed38d7SDouglas Fuller };
353a4ed38d7SDouglas Fuller 
354a02a946dSIlya Dryomov struct ceph_spg_mapping {
355a02a946dSIlya Dryomov 	struct rb_node node;
356a02a946dSIlya Dryomov 	struct ceph_spg spgid;
357a02a946dSIlya Dryomov 
358a02a946dSIlya Dryomov 	struct rb_root backoffs;
359a02a946dSIlya Dryomov };
360a02a946dSIlya Dryomov 
361a02a946dSIlya Dryomov struct ceph_hobject_id {
362a02a946dSIlya Dryomov 	void *key;
363a02a946dSIlya Dryomov 	size_t key_len;
364a02a946dSIlya Dryomov 	void *oid;
365a02a946dSIlya Dryomov 	size_t oid_len;
366a02a946dSIlya Dryomov 	u64 snapid;
367a02a946dSIlya Dryomov 	u32 hash;
368a02a946dSIlya Dryomov 	u8 is_max;
369a02a946dSIlya Dryomov 	void *nspace;
370a02a946dSIlya Dryomov 	size_t nspace_len;
371a02a946dSIlya Dryomov 	s64 pool;
372a02a946dSIlya Dryomov 
373a02a946dSIlya Dryomov 	/* cache */
374a02a946dSIlya Dryomov 	u32 hash_reverse_bits;
375a02a946dSIlya Dryomov };
376a02a946dSIlya Dryomov 
377a02a946dSIlya Dryomov static inline void ceph_hoid_build_hash_cache(struct ceph_hobject_id *hoid)
378a02a946dSIlya Dryomov {
379a02a946dSIlya Dryomov 	hoid->hash_reverse_bits = bitrev32(hoid->hash);
380a02a946dSIlya Dryomov }
381a02a946dSIlya Dryomov 
382a02a946dSIlya Dryomov /*
383a02a946dSIlya Dryomov  * PG-wide backoff: [begin, end)
384a02a946dSIlya Dryomov  * per-object backoff: begin == end
385a02a946dSIlya Dryomov  */
386a02a946dSIlya Dryomov struct ceph_osd_backoff {
387a02a946dSIlya Dryomov 	struct rb_node spg_node;
388a02a946dSIlya Dryomov 	struct rb_node id_node;
389a02a946dSIlya Dryomov 
390a02a946dSIlya Dryomov 	struct ceph_spg spgid;
391a02a946dSIlya Dryomov 	u64 id;
392a02a946dSIlya Dryomov 	struct ceph_hobject_id *begin;
393a02a946dSIlya Dryomov 	struct ceph_hobject_id *end;
394a02a946dSIlya Dryomov };
395a02a946dSIlya Dryomov 
396264048afSIlya Dryomov #define CEPH_LINGER_ID_START	0xffff000000000000ULL
397264048afSIlya Dryomov 
3983d14c5d2SYehuda Sadeh struct ceph_osd_client {
3993d14c5d2SYehuda Sadeh 	struct ceph_client     *client;
4003d14c5d2SYehuda Sadeh 
4013d14c5d2SYehuda Sadeh 	struct ceph_osdmap     *osdmap;       /* current map */
4025aea3dcdSIlya Dryomov 	struct rw_semaphore    lock;
4033d14c5d2SYehuda Sadeh 
4043d14c5d2SYehuda Sadeh 	struct rb_root         osds;          /* osds */
4053d14c5d2SYehuda Sadeh 	struct list_head       osd_lru;       /* idle osds */
4069dd2845cSIlya Dryomov 	spinlock_t             osd_lru_lock;
40758eb7932SJeff Layton 	u32		       epoch_barrier;
4085aea3dcdSIlya Dryomov 	struct ceph_osd        homeless_osd;
4095aea3dcdSIlya Dryomov 	atomic64_t             last_tid;      /* tid of last request */
410922dab61SIlya Dryomov 	u64                    last_linger_id;
411922dab61SIlya Dryomov 	struct rb_root         linger_requests; /* lingering requests */
4124609245eSIlya Dryomov 	struct rb_root         map_checks;
4134609245eSIlya Dryomov 	struct rb_root         linger_map_checks;
4145aea3dcdSIlya Dryomov 	atomic_t               num_requests;
4155aea3dcdSIlya Dryomov 	atomic_t               num_homeless;
41666850df5SIlya Dryomov 	int                    abort_err;
4173d14c5d2SYehuda Sadeh 	struct delayed_work    timeout_work;
4183d14c5d2SYehuda Sadeh 	struct delayed_work    osds_timeout_work;
4193d14c5d2SYehuda Sadeh #ifdef CONFIG_DEBUG_FS
4203d14c5d2SYehuda Sadeh 	struct dentry 	       *debugfs_file;
4213d14c5d2SYehuda Sadeh #endif
4223d14c5d2SYehuda Sadeh 
4233d14c5d2SYehuda Sadeh 	mempool_t              *req_mempool;
4243d14c5d2SYehuda Sadeh 
4253d14c5d2SYehuda Sadeh 	struct ceph_msgpool	msgpool_op;
4263d14c5d2SYehuda Sadeh 	struct ceph_msgpool	msgpool_op_reply;
427a40c4f10SYehuda Sadeh 
428a40c4f10SYehuda Sadeh 	struct workqueue_struct	*notify_wq;
42988bc1922SIlya Dryomov 	struct workqueue_struct	*completion_wq;
4303d14c5d2SYehuda Sadeh };
4313d14c5d2SYehuda Sadeh 
432b7ec35b3SIlya Dryomov static inline bool ceph_osdmap_flag(struct ceph_osd_client *osdc, int flag)
433b7ec35b3SIlya Dryomov {
434b7ec35b3SIlya Dryomov 	return osdc->osdmap->flags & flag;
435b7ec35b3SIlya Dryomov }
436b7ec35b3SIlya Dryomov 
4375522ae0bSAlex Elder extern int ceph_osdc_setup(void);
4385522ae0bSAlex Elder extern void ceph_osdc_cleanup(void);
4395522ae0bSAlex Elder 
4403d14c5d2SYehuda Sadeh extern int ceph_osdc_init(struct ceph_osd_client *osdc,
4413d14c5d2SYehuda Sadeh 			  struct ceph_client *client);
4423d14c5d2SYehuda Sadeh extern void ceph_osdc_stop(struct ceph_osd_client *osdc);
443120a75eaSYan, Zheng extern void ceph_osdc_reopen_osds(struct ceph_osd_client *osdc);
4443d14c5d2SYehuda Sadeh 
4453d14c5d2SYehuda Sadeh extern void ceph_osdc_handle_reply(struct ceph_osd_client *osdc,
4463d14c5d2SYehuda Sadeh 				   struct ceph_msg *msg);
4473d14c5d2SYehuda Sadeh extern void ceph_osdc_handle_map(struct ceph_osd_client *osdc,
4483d14c5d2SYehuda Sadeh 				 struct ceph_msg *msg);
44958eb7932SJeff Layton void ceph_osdc_update_epoch_barrier(struct ceph_osd_client *osdc, u32 eb);
45066850df5SIlya Dryomov void ceph_osdc_abort_requests(struct ceph_osd_client *osdc, int err);
4512cef0ba8SYan, Zheng void ceph_osdc_clear_abort_err(struct ceph_osd_client *osdc);
4523d14c5d2SYehuda Sadeh 
4534cf3e6dfSIlya Dryomov #define osd_req_op_data(oreq, whch, typ, fld)				\
4544cf3e6dfSIlya Dryomov ({									\
4554cf3e6dfSIlya Dryomov 	struct ceph_osd_request *__oreq = (oreq);			\
4564cf3e6dfSIlya Dryomov 	unsigned int __whch = (whch);					\
4574cf3e6dfSIlya Dryomov 	BUG_ON(__whch >= __oreq->r_num_ops);				\
4584cf3e6dfSIlya Dryomov 	&__oreq->r_ops[__whch].typ.fld;					\
4594cf3e6dfSIlya Dryomov })
4604cf3e6dfSIlya Dryomov 
461042f6498SJeff Layton struct ceph_osd_req_op *osd_req_op_init(struct ceph_osd_request *osd_req,
462144cba14SYan, Zheng 			    unsigned int which, u16 opcode, u32 flags);
46349719778SAlex Elder 
46449719778SAlex Elder extern void osd_req_op_raw_data_in_pages(struct ceph_osd_request *,
46549719778SAlex Elder 					unsigned int which,
46649719778SAlex Elder 					struct page **pages, u64 length,
46749719778SAlex Elder 					u32 alignment, bool pages_from_pool,
46849719778SAlex Elder 					bool own_pages);
46949719778SAlex Elder 
470c99d2d4aSAlex Elder extern void osd_req_op_extent_init(struct ceph_osd_request *osd_req,
471c99d2d4aSAlex Elder 					unsigned int which, u16 opcode,
47233803f33SAlex Elder 					u64 offset, u64 length,
47333803f33SAlex Elder 					u64 truncate_size, u32 truncate_seq);
474c99d2d4aSAlex Elder extern void osd_req_op_extent_update(struct ceph_osd_request *osd_req,
475c99d2d4aSAlex Elder 					unsigned int which, u64 length);
4762c63f49aSYan, Zheng extern void osd_req_op_extent_dup_last(struct ceph_osd_request *osd_req,
4772c63f49aSYan, Zheng 				       unsigned int which, u64 offset_inc);
478a4ce40a9SAlex Elder 
479a4ce40a9SAlex Elder extern struct ceph_osd_data *osd_req_op_extent_osd_data(
480a4ce40a9SAlex Elder 					struct ceph_osd_request *osd_req,
481406e2c9fSAlex Elder 					unsigned int which);
482a4ce40a9SAlex Elder 
483a4ce40a9SAlex Elder extern void osd_req_op_extent_osd_data_pages(struct ceph_osd_request *,
484406e2c9fSAlex Elder 					unsigned int which,
485a4ce40a9SAlex Elder 					struct page **pages, u64 length,
486a4ce40a9SAlex Elder 					u32 alignment, bool pages_from_pool,
487a4ce40a9SAlex Elder 					bool own_pages);
488a4ce40a9SAlex Elder extern void osd_req_op_extent_osd_data_pagelist(struct ceph_osd_request *,
489406e2c9fSAlex Elder 					unsigned int which,
490a4ce40a9SAlex Elder 					struct ceph_pagelist *pagelist);
491a4ce40a9SAlex Elder #ifdef CONFIG_BLOCK
4925359a17dSIlya Dryomov void osd_req_op_extent_osd_data_bio(struct ceph_osd_request *osd_req,
493406e2c9fSAlex Elder 				    unsigned int which,
4945359a17dSIlya Dryomov 				    struct ceph_bio_iter *bio_pos,
4955359a17dSIlya Dryomov 				    u32 bio_length);
496a4ce40a9SAlex Elder #endif /* CONFIG_BLOCK */
4970010f705SIlya Dryomov void osd_req_op_extent_osd_data_bvecs(struct ceph_osd_request *osd_req,
4980010f705SIlya Dryomov 				      unsigned int which,
4990010f705SIlya Dryomov 				      struct bio_vec *bvecs, u32 num_bvecs,
5000010f705SIlya Dryomov 				      u32 bytes);
501b9e281c2SIlya Dryomov void osd_req_op_extent_osd_data_bvec_pos(struct ceph_osd_request *osd_req,
502b9e281c2SIlya Dryomov 					 unsigned int which,
503b9e281c2SIlya Dryomov 					 struct ceph_bvec_iter *bvec_pos);
504a4ce40a9SAlex Elder 
50504017e29SAlex Elder extern void osd_req_op_cls_request_data_pagelist(struct ceph_osd_request *,
50604017e29SAlex Elder 					unsigned int which,
50704017e29SAlex Elder 					struct ceph_pagelist *pagelist);
5086c57b554SAlex Elder extern void osd_req_op_cls_request_data_pages(struct ceph_osd_request *,
5096c57b554SAlex Elder 					unsigned int which,
5106c57b554SAlex Elder 					struct page **pages, u64 length,
5116c57b554SAlex Elder 					u32 alignment, bool pages_from_pool,
5126c57b554SAlex Elder 					bool own_pages);
513b9e281c2SIlya Dryomov void osd_req_op_cls_request_data_bvecs(struct ceph_osd_request *osd_req,
514b9e281c2SIlya Dryomov 				       unsigned int which,
5150010f705SIlya Dryomov 				       struct bio_vec *bvecs, u32 num_bvecs,
5160010f705SIlya Dryomov 				       u32 bytes);
517a4ce40a9SAlex Elder extern void osd_req_op_cls_response_data_pages(struct ceph_osd_request *,
518c99d2d4aSAlex Elder 					unsigned int which,
519a4ce40a9SAlex Elder 					struct page **pages, u64 length,
520a4ce40a9SAlex Elder 					u32 alignment, bool pages_from_pool,
521a4ce40a9SAlex Elder 					bool own_pages);
52224639ce5SIlya Dryomov int osd_req_op_cls_init(struct ceph_osd_request *osd_req, unsigned int which,
52304017e29SAlex Elder 			const char *class, const char *method);
524d74b50beSYan, Zheng extern int osd_req_op_xattr_init(struct ceph_osd_request *osd_req, unsigned int which,
525d74b50beSYan, Zheng 				 u16 opcode, const char *name, const void *value,
526d74b50beSYan, Zheng 				 size_t size, u8 cmp_op, u8 cmp_mode);
527c647b8a8SIlya Dryomov extern void osd_req_op_alloc_hint_init(struct ceph_osd_request *osd_req,
528c647b8a8SIlya Dryomov 				       unsigned int which,
529c647b8a8SIlya Dryomov 				       u64 expected_object_size,
530d3798accSIlya Dryomov 				       u64 expected_write_size,
531d3798accSIlya Dryomov 				       u32 flags);
532aca39d9eSLuís Henriques extern int osd_req_op_copy_from_init(struct ceph_osd_request *req,
533aca39d9eSLuís Henriques 				     u64 src_snapid, u64 src_version,
534aca39d9eSLuís Henriques 				     struct ceph_object_id *src_oid,
535aca39d9eSLuís Henriques 				     struct ceph_object_locator *src_oloc,
536aca39d9eSLuís Henriques 				     u32 src_fadvise_flags,
537aca39d9eSLuís Henriques 				     u32 dst_fadvise_flags,
538aca39d9eSLuís Henriques 				     u32 truncate_seq, u64 truncate_size,
539aca39d9eSLuís Henriques 				     u8 copy_from_flags);
54033803f33SAlex Elder 
5413d14c5d2SYehuda Sadeh extern struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc,
5423d14c5d2SYehuda Sadeh 					       struct ceph_snap_context *snapc,
543acead002SAlex Elder 					       unsigned int num_ops,
5443d14c5d2SYehuda Sadeh 					       bool use_mempool,
54554a54007SAlex Elder 					       gfp_t gfp_flags);
54613d1ad16SIlya Dryomov int ceph_osdc_alloc_messages(struct ceph_osd_request *req, gfp_t gfp);
5473d14c5d2SYehuda Sadeh 
5483d14c5d2SYehuda Sadeh extern struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *,
5493d14c5d2SYehuda Sadeh 				      struct ceph_file_layout *layout,
5503d14c5d2SYehuda Sadeh 				      struct ceph_vino vino,
551acead002SAlex Elder 				      u64 offset, u64 *len,
552715e4cd4SYan, Zheng 				      unsigned int which, int num_ops,
553715e4cd4SYan, Zheng 				      int opcode, int flags,
5543d14c5d2SYehuda Sadeh 				      struct ceph_snap_context *snapc,
555acead002SAlex Elder 				      u32 truncate_seq, u64 truncate_size,
556153e5167SAlex Elder 				      bool use_mempool);
5573d14c5d2SYehuda Sadeh 
558a679e50fSJeff Layton int __ceph_alloc_sparse_ext_map(struct ceph_osd_req_op *op, int cnt);
559a679e50fSJeff Layton 
560a679e50fSJeff Layton /*
561a679e50fSJeff Layton  * How big an extent array should we preallocate for a sparse read? This is
562a679e50fSJeff Layton  * just a starting value.  If we get more than this back from the OSD, the
563a679e50fSJeff Layton  * receiver will reallocate.
564a679e50fSJeff Layton  */
565a679e50fSJeff Layton #define CEPH_SPARSE_EXT_ARRAY_INITIAL  16
566a679e50fSJeff Layton 
567a679e50fSJeff Layton static inline int ceph_alloc_sparse_ext_map(struct ceph_osd_req_op *op)
568a679e50fSJeff Layton {
569a679e50fSJeff Layton 	return __ceph_alloc_sparse_ext_map(op, CEPH_SPARSE_EXT_ARRAY_INITIAL);
570a679e50fSJeff Layton }
571a679e50fSJeff Layton 
5729e94af20SIlya Dryomov extern void ceph_osdc_get_request(struct ceph_osd_request *req);
5739e94af20SIlya Dryomov extern void ceph_osdc_put_request(struct ceph_osd_request *req);
5743d14c5d2SYehuda Sadeh 
575a8af0d68SJeff Layton void ceph_osdc_start_request(struct ceph_osd_client *osdc,
576a8af0d68SJeff Layton 			     struct ceph_osd_request *req);
577c9f9b93dSIlya Dryomov extern void ceph_osdc_cancel_request(struct ceph_osd_request *req);
5783d14c5d2SYehuda Sadeh extern int ceph_osdc_wait_request(struct ceph_osd_client *osdc,
5793d14c5d2SYehuda Sadeh 				  struct ceph_osd_request *req);
5803d14c5d2SYehuda Sadeh extern void ceph_osdc_sync(struct ceph_osd_client *osdc);
5813d14c5d2SYehuda Sadeh 
582dd935f44SJosh Durgin extern void ceph_osdc_flush_notifies(struct ceph_osd_client *osdc);
5837cca78c9SIlya Dryomov void ceph_osdc_maybe_request_map(struct ceph_osd_client *osdc);
584dd935f44SJosh Durgin 
585428a7158SDouglas Fuller int ceph_osdc_call(struct ceph_osd_client *osdc,
586428a7158SDouglas Fuller 		   struct ceph_object_id *oid,
587428a7158SDouglas Fuller 		   struct ceph_object_locator *oloc,
588428a7158SDouglas Fuller 		   const char *class, const char *method,
589428a7158SDouglas Fuller 		   unsigned int flags,
590428a7158SDouglas Fuller 		   struct page *req_page, size_t req_len,
59168ada915SIlya Dryomov 		   struct page **resp_pages, size_t *resp_len);
592428a7158SDouglas Fuller 
593922dab61SIlya Dryomov /* watch/notify */
594922dab61SIlya Dryomov struct ceph_osd_linger_request *
595922dab61SIlya Dryomov ceph_osdc_watch(struct ceph_osd_client *osdc,
596922dab61SIlya Dryomov 		struct ceph_object_id *oid,
597922dab61SIlya Dryomov 		struct ceph_object_locator *oloc,
598922dab61SIlya Dryomov 		rados_watchcb2_t wcb,
599922dab61SIlya Dryomov 		rados_watcherrcb_t errcb,
600922dab61SIlya Dryomov 		void *data);
601922dab61SIlya Dryomov int ceph_osdc_unwatch(struct ceph_osd_client *osdc,
602922dab61SIlya Dryomov 		      struct ceph_osd_linger_request *lreq);
603922dab61SIlya Dryomov 
604922dab61SIlya Dryomov int ceph_osdc_notify_ack(struct ceph_osd_client *osdc,
605922dab61SIlya Dryomov 			 struct ceph_object_id *oid,
606922dab61SIlya Dryomov 			 struct ceph_object_locator *oloc,
607922dab61SIlya Dryomov 			 u64 notify_id,
608922dab61SIlya Dryomov 			 u64 cookie,
609922dab61SIlya Dryomov 			 void *payload,
6106d54228fSIlya Dryomov 			 u32 payload_len);
61119079203SIlya Dryomov int ceph_osdc_notify(struct ceph_osd_client *osdc,
61219079203SIlya Dryomov 		     struct ceph_object_id *oid,
61319079203SIlya Dryomov 		     struct ceph_object_locator *oloc,
61419079203SIlya Dryomov 		     void *payload,
6156d54228fSIlya Dryomov 		     u32 payload_len,
61619079203SIlya Dryomov 		     u32 timeout,
61719079203SIlya Dryomov 		     struct page ***preply_pages,
61819079203SIlya Dryomov 		     size_t *preply_len);
619b07d3c4bSIlya Dryomov int ceph_osdc_watch_check(struct ceph_osd_client *osdc,
620b07d3c4bSIlya Dryomov 			  struct ceph_osd_linger_request *lreq);
621a4ed38d7SDouglas Fuller int ceph_osdc_list_watchers(struct ceph_osd_client *osdc,
622a4ed38d7SDouglas Fuller 			    struct ceph_object_id *oid,
623a4ed38d7SDouglas Fuller 			    struct ceph_object_locator *oloc,
624a4ed38d7SDouglas Fuller 			    struct ceph_watch_item **watchers,
625a4ed38d7SDouglas Fuller 			    u32 *num_watchers);
6263d14c5d2SYehuda Sadeh 
627a679e50fSJeff Layton /* Find offset into the buffer of the end of the extent map */
628a679e50fSJeff Layton static inline u64 ceph_sparse_ext_map_end(struct ceph_osd_req_op *op)
629a679e50fSJeff Layton {
630a679e50fSJeff Layton 	struct ceph_sparse_extent *ext;
631a679e50fSJeff Layton 
632a679e50fSJeff Layton 	/* No extents? No data */
633a679e50fSJeff Layton 	if (op->extent.sparse_ext_cnt == 0)
634a679e50fSJeff Layton 		return 0;
635a679e50fSJeff Layton 
636a679e50fSJeff Layton 	ext = &op->extent.sparse_ext[op->extent.sparse_ext_cnt - 1];
637a679e50fSJeff Layton 
638a679e50fSJeff Layton 	return ext->off + ext->len - op->extent.offset;
639a679e50fSJeff Layton }
640a679e50fSJeff Layton 
641a679e50fSJeff Layton #endif
642