xref: /openbmc/linux/include/linux/ceph/osd_client.h (revision 7d7046a6)
1b2441318SGreg Kroah-Hartman /* SPDX-License-Identifier: GPL-2.0 */
23d14c5d2SYehuda Sadeh #ifndef _FS_CEPH_OSD_CLIENT_H
33d14c5d2SYehuda Sadeh #define _FS_CEPH_OSD_CLIENT_H
43d14c5d2SYehuda Sadeh 
5a02a946dSIlya Dryomov #include <linux/bitrev.h>
63d14c5d2SYehuda Sadeh #include <linux/completion.h>
73d14c5d2SYehuda Sadeh #include <linux/kref.h>
83d14c5d2SYehuda Sadeh #include <linux/mempool.h>
93d14c5d2SYehuda Sadeh #include <linux/rbtree.h>
1002113a0fSElena Reshetova #include <linux/refcount.h>
1197e27aaaSXiubo Li #include <linux/ktime.h>
123d14c5d2SYehuda Sadeh 
136c4a1915SAlex Elder #include <linux/ceph/types.h>
146c4a1915SAlex Elder #include <linux/ceph/osdmap.h>
156c4a1915SAlex Elder #include <linux/ceph/messenger.h>
16b2aa5d0bSIlya Dryomov #include <linux/ceph/msgpool.h>
176c4a1915SAlex Elder #include <linux/ceph/auth.h>
18c885837fSAlex Elder #include <linux/ceph/pagelist.h>
193d14c5d2SYehuda Sadeh 
203d14c5d2SYehuda Sadeh struct ceph_msg;
213d14c5d2SYehuda Sadeh struct ceph_snap_context;
223d14c5d2SYehuda Sadeh struct ceph_osd_request;
233d14c5d2SYehuda Sadeh struct ceph_osd_client;
243d14c5d2SYehuda Sadeh 
253d14c5d2SYehuda Sadeh /*
263d14c5d2SYehuda Sadeh  * completion callback for async writepages
273d14c5d2SYehuda Sadeh  */
2885e084feSIlya Dryomov typedef void (*ceph_osdc_callback_t)(struct ceph_osd_request *);
293d14c5d2SYehuda Sadeh 
3063244fa1SIlya Dryomov #define CEPH_HOMELESS_OSD	-1
3163244fa1SIlya Dryomov 
3208b8a044SJeff Layton /*
33a679e50fSJeff Layton  * A single extent in a SPARSE_READ reply.
34a679e50fSJeff Layton  *
35a679e50fSJeff Layton  * Note that these come from the OSD as little-endian values. On BE arches,
36a679e50fSJeff Layton  * we convert them in-place after receipt.
37a679e50fSJeff Layton  */
38a679e50fSJeff Layton struct ceph_sparse_extent {
39a679e50fSJeff Layton 	u64	off;
40a679e50fSJeff Layton 	u64	len;
41a679e50fSJeff Layton } __packed;
42a679e50fSJeff Layton 
43f628d799SJeff Layton /* Sparse read state machine state values */
44f628d799SJeff Layton enum ceph_sparse_read_state {
45f628d799SJeff Layton 	CEPH_SPARSE_READ_HDR	= 0,
46f628d799SJeff Layton 	CEPH_SPARSE_READ_EXTENTS,
47f628d799SJeff Layton 	CEPH_SPARSE_READ_DATA_LEN,
48*7d7046a6SXiubo Li 	CEPH_SPARSE_READ_DATA_PRE,
49f628d799SJeff Layton 	CEPH_SPARSE_READ_DATA,
50f628d799SJeff Layton };
51f628d799SJeff Layton 
52f628d799SJeff Layton /*
53f628d799SJeff Layton  * A SPARSE_READ reply is a 32-bit count of extents, followed by an array of
54f628d799SJeff Layton  * 64-bit offset/length pairs, and then all of the actual file data
55f628d799SJeff Layton  * concatenated after it (sans holes).
56f628d799SJeff Layton  *
57f628d799SJeff Layton  * Unfortunately, we don't know how long the extent array is until we've
58f628d799SJeff Layton  * started reading the data section of the reply. The caller should send down
59f628d799SJeff Layton  * a destination buffer for the array, but we'll alloc one if it's too small
60f628d799SJeff Layton  * or if the caller doesn't.
61f628d799SJeff Layton  */
62f628d799SJeff Layton struct ceph_sparse_read {
63f628d799SJeff Layton 	enum ceph_sparse_read_state	sr_state;    /* state machine state */
64f628d799SJeff Layton 	u64				sr_req_off;  /* orig request offset */
65f628d799SJeff Layton 	u64				sr_req_len;  /* orig request length */
66f628d799SJeff Layton 	u64				sr_pos;      /* current pos in buffer */
67f628d799SJeff Layton 	int				sr_index;    /* current extent index */
68*7d7046a6SXiubo Li 	u32				sr_datalen;  /* length of actual data */
69f628d799SJeff Layton 	u32				sr_count;    /* extent count in reply */
70f628d799SJeff Layton 	int				sr_ext_len;  /* length of extent array */
71f628d799SJeff Layton 	struct ceph_sparse_extent	*sr_extent;  /* extent array */
72f628d799SJeff Layton };
73f628d799SJeff Layton 
74a679e50fSJeff Layton /*
7508b8a044SJeff Layton  * A given osd we're communicating with.
7608b8a044SJeff Layton  *
7708b8a044SJeff Layton  * Note that the o_requests tree can be searched while holding the "lock" mutex
7808b8a044SJeff Layton  * or the "o_requests_lock" spinlock. Insertion or removal requires both!
7908b8a044SJeff Layton  */
803d14c5d2SYehuda Sadeh struct ceph_osd {
8102113a0fSElena Reshetova 	refcount_t o_ref;
82f628d799SJeff Layton 	int o_sparse_op_idx;
833d14c5d2SYehuda Sadeh 	struct ceph_osd_client *o_osdc;
843d14c5d2SYehuda Sadeh 	int o_osd;
853d14c5d2SYehuda Sadeh 	int o_incarnation;
863d14c5d2SYehuda Sadeh 	struct rb_node o_node;
873d14c5d2SYehuda Sadeh 	struct ceph_connection o_con;
8808b8a044SJeff Layton 	spinlock_t o_requests_lock;
895aea3dcdSIlya Dryomov 	struct rb_root o_requests;
90922dab61SIlya Dryomov 	struct rb_root o_linger_requests;
91a02a946dSIlya Dryomov 	struct rb_root o_backoff_mappings;
92a02a946dSIlya Dryomov 	struct rb_root o_backoffs_by_id;
933d14c5d2SYehuda Sadeh 	struct list_head o_osd_lru;
946c4a1915SAlex Elder 	struct ceph_auth_handshake o_auth;
953d14c5d2SYehuda Sadeh 	unsigned long lru_ttl;
963d14c5d2SYehuda Sadeh 	struct list_head o_keepalive_item;
975aea3dcdSIlya Dryomov 	struct mutex lock;
98f628d799SJeff Layton 	struct ceph_sparse_read	o_sparse_read;
993d14c5d2SYehuda Sadeh };
1003d14c5d2SYehuda Sadeh 
1013f1af42aSIlya Dryomov #define CEPH_OSD_SLAB_OPS	2
1023f1af42aSIlya Dryomov #define CEPH_OSD_MAX_OPS	16
1031b83bef2SSage Weil 
1042ac2b7a6SAlex Elder enum ceph_osd_data_type {
105ec9123c5SAlex Elder 	CEPH_OSD_DATA_TYPE_NONE = 0,
1062ac2b7a6SAlex Elder 	CEPH_OSD_DATA_TYPE_PAGES,
1079a5e6d09SAlex Elder 	CEPH_OSD_DATA_TYPE_PAGELIST,
1082ac2b7a6SAlex Elder #ifdef CONFIG_BLOCK
1092ac2b7a6SAlex Elder 	CEPH_OSD_DATA_TYPE_BIO,
1102ac2b7a6SAlex Elder #endif /* CONFIG_BLOCK */
111b9e281c2SIlya Dryomov 	CEPH_OSD_DATA_TYPE_BVECS,
112dee0c5f8SJeff Layton 	CEPH_OSD_DATA_TYPE_ITER,
1132ac2b7a6SAlex Elder };
1142ac2b7a6SAlex Elder 
1152794a82aSAlex Elder struct ceph_osd_data {
1162ac2b7a6SAlex Elder 	enum ceph_osd_data_type	type;
1172ac2b7a6SAlex Elder 	union {
1182794a82aSAlex Elder 		struct {
1192794a82aSAlex Elder 			struct page	**pages;
120e0c59487SAlex Elder 			u64		length;
1212794a82aSAlex Elder 			u32		alignment;
1222794a82aSAlex Elder 			bool		pages_from_pool;
1232794a82aSAlex Elder 			bool		own_pages;
1242794a82aSAlex Elder 		};
1259a5e6d09SAlex Elder 		struct ceph_pagelist	*pagelist;
1262794a82aSAlex Elder #ifdef CONFIG_BLOCK
127fdce58ccSAlex Elder 		struct {
1285359a17dSIlya Dryomov 			struct ceph_bio_iter	bio_pos;
1295359a17dSIlya Dryomov 			u32			bio_length;
130fdce58ccSAlex Elder 		};
1312794a82aSAlex Elder #endif /* CONFIG_BLOCK */
1320010f705SIlya Dryomov 		struct {
133b9e281c2SIlya Dryomov 			struct ceph_bvec_iter	bvec_pos;
1340010f705SIlya Dryomov 			u32			num_bvecs;
1350010f705SIlya Dryomov 		};
136dee0c5f8SJeff Layton 		struct iov_iter		iter;
1372794a82aSAlex Elder 	};
1382794a82aSAlex Elder };
1392794a82aSAlex Elder 
14079528734SAlex Elder struct ceph_osd_req_op {
14179528734SAlex Elder 	u16 op;           /* CEPH_OSD_OP_* */
1427b25bf5fSIlya Dryomov 	u32 flags;        /* CEPH_OSD_OP_FLAG_* */
143de2aa102SIlya Dryomov 	u32 indata_len;   /* request */
1447665d85bSYan, Zheng 	u32 outdata_len;  /* reply */
1457665d85bSYan, Zheng 	s32 rval;
1467665d85bSYan, Zheng 
14779528734SAlex Elder 	union {
14849719778SAlex Elder 		struct ceph_osd_data raw_data_in;
14979528734SAlex Elder 		struct {
15079528734SAlex Elder 			u64 offset, length;
15179528734SAlex Elder 			u64 truncate_size;
15279528734SAlex Elder 			u32 truncate_seq;
153a679e50fSJeff Layton 			int sparse_ext_cnt;
154a679e50fSJeff Layton 			struct ceph_sparse_extent *sparse_ext;
1555476492fSAlex Elder 			struct ceph_osd_data osd_data;
15679528734SAlex Elder 		} extent;
15779528734SAlex Elder 		struct {
158d7d5a007SIlya Dryomov 			u32 name_len;
159d7d5a007SIlya Dryomov 			u32 value_len;
160d74b50beSYan, Zheng 			__u8 cmp_op;       /* CEPH_OSD_CMPXATTR_OP_* */
161d74b50beSYan, Zheng 			__u8 cmp_mode;     /* CEPH_OSD_CMPXATTR_MODE_* */
162d74b50beSYan, Zheng 			struct ceph_osd_data osd_data;
163d74b50beSYan, Zheng 		} xattr;
164d74b50beSYan, Zheng 		struct {
16579528734SAlex Elder 			const char *class_name;
16679528734SAlex Elder 			const char *method_name;
1675476492fSAlex Elder 			struct ceph_osd_data request_info;
16804017e29SAlex Elder 			struct ceph_osd_data request_data;
1695476492fSAlex Elder 			struct ceph_osd_data response_data;
17079528734SAlex Elder 			__u8 class_len;
17179528734SAlex Elder 			__u8 method_len;
172bb873b53SIlya Dryomov 			u32 indata_len;
17379528734SAlex Elder 		} cls;
17479528734SAlex Elder 		struct {
17579528734SAlex Elder 			u64 cookie;
176922dab61SIlya Dryomov 			__u8 op;           /* CEPH_OSD_WATCH_OP_ */
177922dab61SIlya Dryomov 			u32 gen;
17879528734SAlex Elder 		} watch;
179c647b8a8SIlya Dryomov 		struct {
180922dab61SIlya Dryomov 			struct ceph_osd_data request_data;
181922dab61SIlya Dryomov 		} notify_ack;
182922dab61SIlya Dryomov 		struct {
18319079203SIlya Dryomov 			u64 cookie;
18419079203SIlya Dryomov 			struct ceph_osd_data request_data;
18519079203SIlya Dryomov 			struct ceph_osd_data response_data;
18619079203SIlya Dryomov 		} notify;
18719079203SIlya Dryomov 		struct {
188a4ed38d7SDouglas Fuller 			struct ceph_osd_data response_data;
189a4ed38d7SDouglas Fuller 		} list_watchers;
190a4ed38d7SDouglas Fuller 		struct {
191c647b8a8SIlya Dryomov 			u64 expected_object_size;
192c647b8a8SIlya Dryomov 			u64 expected_write_size;
193d3798accSIlya Dryomov 			u32 flags;  /* CEPH_OSD_OP_ALLOC_HINT_FLAG_* */
194c647b8a8SIlya Dryomov 		} alloc_hint;
19523ddf9beSLuis Henriques 		struct {
19623ddf9beSLuis Henriques 			u64 snapid;
19723ddf9beSLuis Henriques 			u64 src_version;
19823ddf9beSLuis Henriques 			u8 flags;
19923ddf9beSLuis Henriques 			u32 src_fadvise_flags;
20023ddf9beSLuis Henriques 			struct ceph_osd_data osd_data;
20123ddf9beSLuis Henriques 		} copy_from;
20269dd3b39SJeff Layton 		struct {
20369dd3b39SJeff Layton 			u64 ver;
20469dd3b39SJeff Layton 		} assert_ver;
20579528734SAlex Elder 	};
20679528734SAlex Elder };
20779528734SAlex Elder 
20863244fa1SIlya Dryomov struct ceph_osd_request_target {
20963244fa1SIlya Dryomov 	struct ceph_object_id base_oid;
21063244fa1SIlya Dryomov 	struct ceph_object_locator base_oloc;
21163244fa1SIlya Dryomov 	struct ceph_object_id target_oid;
21263244fa1SIlya Dryomov 	struct ceph_object_locator target_oloc;
21363244fa1SIlya Dryomov 
214dc98ff72SIlya Dryomov 	struct ceph_pg pgid;               /* last raw pg we mapped to */
215dc98ff72SIlya Dryomov 	struct ceph_spg spgid;             /* last actual spg we mapped to */
21663244fa1SIlya Dryomov 	u32 pg_num;
21763244fa1SIlya Dryomov 	u32 pg_num_mask;
21863244fa1SIlya Dryomov 	struct ceph_osds acting;
21963244fa1SIlya Dryomov 	struct ceph_osds up;
22063244fa1SIlya Dryomov 	int size;
22163244fa1SIlya Dryomov 	int min_size;
22263244fa1SIlya Dryomov 	bool sort_bitwise;
223ae78dd81SIlya Dryomov 	bool recovery_deletes;
22463244fa1SIlya Dryomov 
22563244fa1SIlya Dryomov 	unsigned int flags;                /* CEPH_OSD_FLAG_* */
226117d96a0SIlya Dryomov 	bool used_replica;
22763244fa1SIlya Dryomov 	bool paused;
22863244fa1SIlya Dryomov 
22904c7d789SIlya Dryomov 	u32 epoch;
230dc93e0e2SIlya Dryomov 	u32 last_force_resend;
231dc93e0e2SIlya Dryomov 
23263244fa1SIlya Dryomov 	int osd;
23363244fa1SIlya Dryomov };
23463244fa1SIlya Dryomov 
2353d14c5d2SYehuda Sadeh /* an in-flight request */
2363d14c5d2SYehuda Sadeh struct ceph_osd_request {
2373d14c5d2SYehuda Sadeh 	u64             r_tid;              /* unique for this client */
2383d14c5d2SYehuda Sadeh 	struct rb_node  r_node;
2394609245eSIlya Dryomov 	struct rb_node  r_mc_node;          /* map check */
24088bc1922SIlya Dryomov 	struct work_struct r_complete_work;
2413d14c5d2SYehuda Sadeh 	struct ceph_osd *r_osd;
242a66dd383SIlya Dryomov 
243a66dd383SIlya Dryomov 	struct ceph_osd_request_target r_t;
244a66dd383SIlya Dryomov #define r_base_oid	r_t.base_oid
245a66dd383SIlya Dryomov #define r_base_oloc	r_t.base_oloc
246a66dd383SIlya Dryomov #define r_flags		r_t.flags
2473d14c5d2SYehuda Sadeh 
2483d14c5d2SYehuda Sadeh 	struct ceph_msg  *r_request, *r_reply;
2493d14c5d2SYehuda Sadeh 	u32               r_sent;      /* >0 if r_request is sending/sent */
2501b83bef2SSage Weil 
25179528734SAlex Elder 	/* request osd ops array  */
25279528734SAlex Elder 	unsigned int		r_num_ops;
25379528734SAlex Elder 
2541b83bef2SSage Weil 	int               r_result;
2553d14c5d2SYehuda Sadeh 
2563d14c5d2SYehuda Sadeh 	struct ceph_osd_client *r_osdc;
2573d14c5d2SYehuda Sadeh 	struct kref       r_kref;
2583d14c5d2SYehuda Sadeh 	bool              r_mempool;
25969dd3b39SJeff Layton 	bool		  r_linger;           /* don't resend on failure */
260b18b9550SIlya Dryomov 	struct completion r_completion;       /* private to osd_client.c */
26126be8808SAlex Elder 	ceph_osdc_callback_t r_callback;
2623d14c5d2SYehuda Sadeh 
2633d14c5d2SYehuda Sadeh 	struct inode *r_inode;         	      /* for use by callbacks */
26494e85771SIlya Dryomov 	struct list_head r_private_item;      /* ditto */
2653d14c5d2SYehuda Sadeh 	void *r_priv;			      /* ditto */
2663d14c5d2SYehuda Sadeh 
267bb873b53SIlya Dryomov 	/* set by submitter */
268bb873b53SIlya Dryomov 	u64 r_snapid;                         /* for reads, CEPH_NOSNAP o/w */
269bb873b53SIlya Dryomov 	struct ceph_snap_context *r_snapc;    /* for writes */
270fac02ddfSArnd Bergmann 	struct timespec64 r_mtime;            /* ditto */
271bb873b53SIlya Dryomov 	u64 r_data_offset;                    /* ditto */
2723d14c5d2SYehuda Sadeh 
273bb873b53SIlya Dryomov 	/* internal */
27469dd3b39SJeff Layton 	u64 r_version;			      /* data version sent in reply */
275bb873b53SIlya Dryomov 	unsigned long r_stamp;                /* jiffies, send or check time */
2767cc5e38fSIlya Dryomov 	unsigned long r_start_stamp;          /* jiffies */
27797e27aaaSXiubo Li 	ktime_t r_start_latency;              /* ktime_t */
27897e27aaaSXiubo Li 	ktime_t r_end_latency;                /* ktime_t */
279bb873b53SIlya Dryomov 	int r_attempts;
2804609245eSIlya Dryomov 	u32 r_map_dne_bound;
2813f1af42aSIlya Dryomov 
2823f1af42aSIlya Dryomov 	struct ceph_osd_req_op r_ops[];
2833d14c5d2SYehuda Sadeh };
2843d14c5d2SYehuda Sadeh 
285205ee118SIlya Dryomov struct ceph_request_redirect {
286205ee118SIlya Dryomov 	struct ceph_object_locator oloc;
287205ee118SIlya Dryomov };
288205ee118SIlya Dryomov 
2898cb441c0SIlya Dryomov /*
2908cb441c0SIlya Dryomov  * osd request identifier
2918cb441c0SIlya Dryomov  *
2928cb441c0SIlya Dryomov  * caller name + incarnation# + tid to unique identify this request
2938cb441c0SIlya Dryomov  */
2948cb441c0SIlya Dryomov struct ceph_osd_reqid {
2958cb441c0SIlya Dryomov 	struct ceph_entity_name name;
2968cb441c0SIlya Dryomov 	__le64 tid;
2978cb441c0SIlya Dryomov 	__le32 inc;
2988cb441c0SIlya Dryomov } __packed;
2998cb441c0SIlya Dryomov 
3008cb441c0SIlya Dryomov struct ceph_blkin_trace_info {
3018cb441c0SIlya Dryomov 	__le64 trace_id;
3028cb441c0SIlya Dryomov 	__le64 span_id;
3038cb441c0SIlya Dryomov 	__le64 parent_span_id;
3048cb441c0SIlya Dryomov } __packed;
3058cb441c0SIlya Dryomov 
306922dab61SIlya Dryomov typedef void (*rados_watchcb2_t)(void *arg, u64 notify_id, u64 cookie,
307922dab61SIlya Dryomov 				 u64 notifier_id, void *data, size_t data_len);
308922dab61SIlya Dryomov typedef void (*rados_watcherrcb_t)(void *arg, u64 cookie, int err);
309a40c4f10SYehuda Sadeh 
310922dab61SIlya Dryomov struct ceph_osd_linger_request {
311922dab61SIlya Dryomov 	struct ceph_osd_client *osdc;
312922dab61SIlya Dryomov 	u64 linger_id;
313922dab61SIlya Dryomov 	bool committed;
31419079203SIlya Dryomov 	bool is_watch;                  /* watch or notify */
315922dab61SIlya Dryomov 
316922dab61SIlya Dryomov 	struct ceph_osd *osd;
317922dab61SIlya Dryomov 	struct ceph_osd_request *reg_req;
318922dab61SIlya Dryomov 	struct ceph_osd_request *ping_req;
319922dab61SIlya Dryomov 	unsigned long ping_sent;
320b07d3c4bSIlya Dryomov 	unsigned long watch_valid_thru;
321b07d3c4bSIlya Dryomov 	struct list_head pending_lworks;
322922dab61SIlya Dryomov 
323922dab61SIlya Dryomov 	struct ceph_osd_request_target t;
3244609245eSIlya Dryomov 	u32 map_dne_bound;
325922dab61SIlya Dryomov 
326fac02ddfSArnd Bergmann 	struct timespec64 mtime;
327922dab61SIlya Dryomov 
328922dab61SIlya Dryomov 	struct kref kref;
329922dab61SIlya Dryomov 	struct mutex lock;
330922dab61SIlya Dryomov 	struct rb_node node;            /* osd */
331922dab61SIlya Dryomov 	struct rb_node osdc_node;       /* osdc */
3324609245eSIlya Dryomov 	struct rb_node mc_node;         /* map check */
333922dab61SIlya Dryomov 	struct list_head scan_item;
334922dab61SIlya Dryomov 
335922dab61SIlya Dryomov 	struct completion reg_commit_wait;
33619079203SIlya Dryomov 	struct completion notify_finish_wait;
337922dab61SIlya Dryomov 	int reg_commit_error;
33819079203SIlya Dryomov 	int notify_finish_error;
339922dab61SIlya Dryomov 	int last_error;
340922dab61SIlya Dryomov 
341922dab61SIlya Dryomov 	u32 register_gen;
34219079203SIlya Dryomov 	u64 notify_id;
343922dab61SIlya Dryomov 
344922dab61SIlya Dryomov 	rados_watchcb2_t wcb;
345922dab61SIlya Dryomov 	rados_watcherrcb_t errcb;
346922dab61SIlya Dryomov 	void *data;
34719079203SIlya Dryomov 
34875dbb685SIlya Dryomov 	struct ceph_pagelist *request_pl;
34975dbb685SIlya Dryomov 	struct page **notify_id_pages;
35075dbb685SIlya Dryomov 
35119079203SIlya Dryomov 	struct page ***preply_pages;
35219079203SIlya Dryomov 	size_t *preply_len;
353a40c4f10SYehuda Sadeh };
354a40c4f10SYehuda Sadeh 
355a4ed38d7SDouglas Fuller struct ceph_watch_item {
356a4ed38d7SDouglas Fuller 	struct ceph_entity_name name;
357a4ed38d7SDouglas Fuller 	u64 cookie;
358a4ed38d7SDouglas Fuller 	struct ceph_entity_addr addr;
359a4ed38d7SDouglas Fuller };
360a4ed38d7SDouglas Fuller 
361a02a946dSIlya Dryomov struct ceph_spg_mapping {
362a02a946dSIlya Dryomov 	struct rb_node node;
363a02a946dSIlya Dryomov 	struct ceph_spg spgid;
364a02a946dSIlya Dryomov 
365a02a946dSIlya Dryomov 	struct rb_root backoffs;
366a02a946dSIlya Dryomov };
367a02a946dSIlya Dryomov 
368a02a946dSIlya Dryomov struct ceph_hobject_id {
369a02a946dSIlya Dryomov 	void *key;
370a02a946dSIlya Dryomov 	size_t key_len;
371a02a946dSIlya Dryomov 	void *oid;
372a02a946dSIlya Dryomov 	size_t oid_len;
373a02a946dSIlya Dryomov 	u64 snapid;
374a02a946dSIlya Dryomov 	u32 hash;
375a02a946dSIlya Dryomov 	u8 is_max;
376a02a946dSIlya Dryomov 	void *nspace;
377a02a946dSIlya Dryomov 	size_t nspace_len;
378a02a946dSIlya Dryomov 	s64 pool;
379a02a946dSIlya Dryomov 
380a02a946dSIlya Dryomov 	/* cache */
381a02a946dSIlya Dryomov 	u32 hash_reverse_bits;
382a02a946dSIlya Dryomov };
383a02a946dSIlya Dryomov 
ceph_hoid_build_hash_cache(struct ceph_hobject_id * hoid)384a02a946dSIlya Dryomov static inline void ceph_hoid_build_hash_cache(struct ceph_hobject_id *hoid)
385a02a946dSIlya Dryomov {
386a02a946dSIlya Dryomov 	hoid->hash_reverse_bits = bitrev32(hoid->hash);
387a02a946dSIlya Dryomov }
388a02a946dSIlya Dryomov 
389a02a946dSIlya Dryomov /*
390a02a946dSIlya Dryomov  * PG-wide backoff: [begin, end)
391a02a946dSIlya Dryomov  * per-object backoff: begin == end
392a02a946dSIlya Dryomov  */
393a02a946dSIlya Dryomov struct ceph_osd_backoff {
394a02a946dSIlya Dryomov 	struct rb_node spg_node;
395a02a946dSIlya Dryomov 	struct rb_node id_node;
396a02a946dSIlya Dryomov 
397a02a946dSIlya Dryomov 	struct ceph_spg spgid;
398a02a946dSIlya Dryomov 	u64 id;
399a02a946dSIlya Dryomov 	struct ceph_hobject_id *begin;
400a02a946dSIlya Dryomov 	struct ceph_hobject_id *end;
401a02a946dSIlya Dryomov };
402a02a946dSIlya Dryomov 
403264048afSIlya Dryomov #define CEPH_LINGER_ID_START	0xffff000000000000ULL
404264048afSIlya Dryomov 
4053d14c5d2SYehuda Sadeh struct ceph_osd_client {
4063d14c5d2SYehuda Sadeh 	struct ceph_client     *client;
4073d14c5d2SYehuda Sadeh 
4083d14c5d2SYehuda Sadeh 	struct ceph_osdmap     *osdmap;       /* current map */
4095aea3dcdSIlya Dryomov 	struct rw_semaphore    lock;
4103d14c5d2SYehuda Sadeh 
4113d14c5d2SYehuda Sadeh 	struct rb_root         osds;          /* osds */
4123d14c5d2SYehuda Sadeh 	struct list_head       osd_lru;       /* idle osds */
4139dd2845cSIlya Dryomov 	spinlock_t             osd_lru_lock;
41458eb7932SJeff Layton 	u32		       epoch_barrier;
4155aea3dcdSIlya Dryomov 	struct ceph_osd        homeless_osd;
4165aea3dcdSIlya Dryomov 	atomic64_t             last_tid;      /* tid of last request */
417922dab61SIlya Dryomov 	u64                    last_linger_id;
418922dab61SIlya Dryomov 	struct rb_root         linger_requests; /* lingering requests */
4194609245eSIlya Dryomov 	struct rb_root         map_checks;
4204609245eSIlya Dryomov 	struct rb_root         linger_map_checks;
4215aea3dcdSIlya Dryomov 	atomic_t               num_requests;
4225aea3dcdSIlya Dryomov 	atomic_t               num_homeless;
42366850df5SIlya Dryomov 	int                    abort_err;
4243d14c5d2SYehuda Sadeh 	struct delayed_work    timeout_work;
4253d14c5d2SYehuda Sadeh 	struct delayed_work    osds_timeout_work;
4263d14c5d2SYehuda Sadeh #ifdef CONFIG_DEBUG_FS
4273d14c5d2SYehuda Sadeh 	struct dentry 	       *debugfs_file;
4283d14c5d2SYehuda Sadeh #endif
4293d14c5d2SYehuda Sadeh 
4303d14c5d2SYehuda Sadeh 	mempool_t              *req_mempool;
4313d14c5d2SYehuda Sadeh 
4323d14c5d2SYehuda Sadeh 	struct ceph_msgpool	msgpool_op;
4333d14c5d2SYehuda Sadeh 	struct ceph_msgpool	msgpool_op_reply;
434a40c4f10SYehuda Sadeh 
435a40c4f10SYehuda Sadeh 	struct workqueue_struct	*notify_wq;
43688bc1922SIlya Dryomov 	struct workqueue_struct	*completion_wq;
4373d14c5d2SYehuda Sadeh };
4383d14c5d2SYehuda Sadeh 
ceph_osdmap_flag(struct ceph_osd_client * osdc,int flag)439b7ec35b3SIlya Dryomov static inline bool ceph_osdmap_flag(struct ceph_osd_client *osdc, int flag)
440b7ec35b3SIlya Dryomov {
441b7ec35b3SIlya Dryomov 	return osdc->osdmap->flags & flag;
442b7ec35b3SIlya Dryomov }
443b7ec35b3SIlya Dryomov 
4445522ae0bSAlex Elder extern int ceph_osdc_setup(void);
4455522ae0bSAlex Elder extern void ceph_osdc_cleanup(void);
4465522ae0bSAlex Elder 
4473d14c5d2SYehuda Sadeh extern int ceph_osdc_init(struct ceph_osd_client *osdc,
4483d14c5d2SYehuda Sadeh 			  struct ceph_client *client);
4493d14c5d2SYehuda Sadeh extern void ceph_osdc_stop(struct ceph_osd_client *osdc);
450120a75eaSYan, Zheng extern void ceph_osdc_reopen_osds(struct ceph_osd_client *osdc);
4513d14c5d2SYehuda Sadeh 
4523d14c5d2SYehuda Sadeh extern void ceph_osdc_handle_reply(struct ceph_osd_client *osdc,
4533d14c5d2SYehuda Sadeh 				   struct ceph_msg *msg);
4543d14c5d2SYehuda Sadeh extern void ceph_osdc_handle_map(struct ceph_osd_client *osdc,
4553d14c5d2SYehuda Sadeh 				 struct ceph_msg *msg);
45658eb7932SJeff Layton void ceph_osdc_update_epoch_barrier(struct ceph_osd_client *osdc, u32 eb);
45766850df5SIlya Dryomov void ceph_osdc_abort_requests(struct ceph_osd_client *osdc, int err);
4582cef0ba8SYan, Zheng void ceph_osdc_clear_abort_err(struct ceph_osd_client *osdc);
4593d14c5d2SYehuda Sadeh 
4604cf3e6dfSIlya Dryomov #define osd_req_op_data(oreq, whch, typ, fld)				\
4614cf3e6dfSIlya Dryomov ({									\
4624cf3e6dfSIlya Dryomov 	struct ceph_osd_request *__oreq = (oreq);			\
4634cf3e6dfSIlya Dryomov 	unsigned int __whch = (whch);					\
4644cf3e6dfSIlya Dryomov 	BUG_ON(__whch >= __oreq->r_num_ops);				\
4654cf3e6dfSIlya Dryomov 	&__oreq->r_ops[__whch].typ.fld;					\
4664cf3e6dfSIlya Dryomov })
4674cf3e6dfSIlya Dryomov 
468042f6498SJeff Layton struct ceph_osd_req_op *osd_req_op_init(struct ceph_osd_request *osd_req,
469144cba14SYan, Zheng 			    unsigned int which, u16 opcode, u32 flags);
47049719778SAlex Elder 
47149719778SAlex Elder extern void osd_req_op_raw_data_in_pages(struct ceph_osd_request *,
47249719778SAlex Elder 					unsigned int which,
47349719778SAlex Elder 					struct page **pages, u64 length,
47449719778SAlex Elder 					u32 alignment, bool pages_from_pool,
47549719778SAlex Elder 					bool own_pages);
47649719778SAlex Elder 
477c99d2d4aSAlex Elder extern void osd_req_op_extent_init(struct ceph_osd_request *osd_req,
478c99d2d4aSAlex Elder 					unsigned int which, u16 opcode,
47933803f33SAlex Elder 					u64 offset, u64 length,
48033803f33SAlex Elder 					u64 truncate_size, u32 truncate_seq);
481c99d2d4aSAlex Elder extern void osd_req_op_extent_update(struct ceph_osd_request *osd_req,
482c99d2d4aSAlex Elder 					unsigned int which, u64 length);
4832c63f49aSYan, Zheng extern void osd_req_op_extent_dup_last(struct ceph_osd_request *osd_req,
4842c63f49aSYan, Zheng 				       unsigned int which, u64 offset_inc);
485a4ce40a9SAlex Elder 
486a4ce40a9SAlex Elder extern struct ceph_osd_data *osd_req_op_extent_osd_data(
487a4ce40a9SAlex Elder 					struct ceph_osd_request *osd_req,
488406e2c9fSAlex Elder 					unsigned int which);
489a4ce40a9SAlex Elder 
490a4ce40a9SAlex Elder extern void osd_req_op_extent_osd_data_pages(struct ceph_osd_request *,
491406e2c9fSAlex Elder 					unsigned int which,
492a4ce40a9SAlex Elder 					struct page **pages, u64 length,
493a4ce40a9SAlex Elder 					u32 alignment, bool pages_from_pool,
494a4ce40a9SAlex Elder 					bool own_pages);
495a4ce40a9SAlex Elder extern void osd_req_op_extent_osd_data_pagelist(struct ceph_osd_request *,
496406e2c9fSAlex Elder 					unsigned int which,
497a4ce40a9SAlex Elder 					struct ceph_pagelist *pagelist);
498a4ce40a9SAlex Elder #ifdef CONFIG_BLOCK
4995359a17dSIlya Dryomov void osd_req_op_extent_osd_data_bio(struct ceph_osd_request *osd_req,
500406e2c9fSAlex Elder 				    unsigned int which,
5015359a17dSIlya Dryomov 				    struct ceph_bio_iter *bio_pos,
5025359a17dSIlya Dryomov 				    u32 bio_length);
503a4ce40a9SAlex Elder #endif /* CONFIG_BLOCK */
5040010f705SIlya Dryomov void osd_req_op_extent_osd_data_bvecs(struct ceph_osd_request *osd_req,
5050010f705SIlya Dryomov 				      unsigned int which,
5060010f705SIlya Dryomov 				      struct bio_vec *bvecs, u32 num_bvecs,
5070010f705SIlya Dryomov 				      u32 bytes);
508b9e281c2SIlya Dryomov void osd_req_op_extent_osd_data_bvec_pos(struct ceph_osd_request *osd_req,
509b9e281c2SIlya Dryomov 					 unsigned int which,
510b9e281c2SIlya Dryomov 					 struct ceph_bvec_iter *bvec_pos);
511dee0c5f8SJeff Layton void osd_req_op_extent_osd_iter(struct ceph_osd_request *osd_req,
512dee0c5f8SJeff Layton 				unsigned int which, struct iov_iter *iter);
513a4ce40a9SAlex Elder 
51404017e29SAlex Elder extern void osd_req_op_cls_request_data_pagelist(struct ceph_osd_request *,
51504017e29SAlex Elder 					unsigned int which,
51604017e29SAlex Elder 					struct ceph_pagelist *pagelist);
5176c57b554SAlex Elder extern void osd_req_op_cls_request_data_pages(struct ceph_osd_request *,
5186c57b554SAlex Elder 					unsigned int which,
5196c57b554SAlex Elder 					struct page **pages, u64 length,
5206c57b554SAlex Elder 					u32 alignment, bool pages_from_pool,
5216c57b554SAlex Elder 					bool own_pages);
522b9e281c2SIlya Dryomov void osd_req_op_cls_request_data_bvecs(struct ceph_osd_request *osd_req,
523b9e281c2SIlya Dryomov 				       unsigned int which,
5240010f705SIlya Dryomov 				       struct bio_vec *bvecs, u32 num_bvecs,
5250010f705SIlya Dryomov 				       u32 bytes);
526a4ce40a9SAlex Elder extern void osd_req_op_cls_response_data_pages(struct ceph_osd_request *,
527c99d2d4aSAlex Elder 					unsigned int which,
528a4ce40a9SAlex Elder 					struct page **pages, u64 length,
529a4ce40a9SAlex Elder 					u32 alignment, bool pages_from_pool,
530a4ce40a9SAlex Elder 					bool own_pages);
53124639ce5SIlya Dryomov int osd_req_op_cls_init(struct ceph_osd_request *osd_req, unsigned int which,
53204017e29SAlex Elder 			const char *class, const char *method);
533d74b50beSYan, Zheng extern int osd_req_op_xattr_init(struct ceph_osd_request *osd_req, unsigned int which,
534d74b50beSYan, Zheng 				 u16 opcode, const char *name, const void *value,
535d74b50beSYan, Zheng 				 size_t size, u8 cmp_op, u8 cmp_mode);
536c647b8a8SIlya Dryomov extern void osd_req_op_alloc_hint_init(struct ceph_osd_request *osd_req,
537c647b8a8SIlya Dryomov 				       unsigned int which,
538c647b8a8SIlya Dryomov 				       u64 expected_object_size,
539d3798accSIlya Dryomov 				       u64 expected_write_size,
540d3798accSIlya Dryomov 				       u32 flags);
541aca39d9eSLuís Henriques extern int osd_req_op_copy_from_init(struct ceph_osd_request *req,
542aca39d9eSLuís Henriques 				     u64 src_snapid, u64 src_version,
543aca39d9eSLuís Henriques 				     struct ceph_object_id *src_oid,
544aca39d9eSLuís Henriques 				     struct ceph_object_locator *src_oloc,
545aca39d9eSLuís Henriques 				     u32 src_fadvise_flags,
546aca39d9eSLuís Henriques 				     u32 dst_fadvise_flags,
547aca39d9eSLuís Henriques 				     u32 truncate_seq, u64 truncate_size,
548aca39d9eSLuís Henriques 				     u8 copy_from_flags);
54933803f33SAlex Elder 
5503d14c5d2SYehuda Sadeh extern struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc,
5513d14c5d2SYehuda Sadeh 					       struct ceph_snap_context *snapc,
552acead002SAlex Elder 					       unsigned int num_ops,
5533d14c5d2SYehuda Sadeh 					       bool use_mempool,
55454a54007SAlex Elder 					       gfp_t gfp_flags);
55513d1ad16SIlya Dryomov int ceph_osdc_alloc_messages(struct ceph_osd_request *req, gfp_t gfp);
5563d14c5d2SYehuda Sadeh 
5573d14c5d2SYehuda Sadeh extern struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *,
5583d14c5d2SYehuda Sadeh 				      struct ceph_file_layout *layout,
5593d14c5d2SYehuda Sadeh 				      struct ceph_vino vino,
560acead002SAlex Elder 				      u64 offset, u64 *len,
561715e4cd4SYan, Zheng 				      unsigned int which, int num_ops,
562715e4cd4SYan, Zheng 				      int opcode, int flags,
5633d14c5d2SYehuda Sadeh 				      struct ceph_snap_context *snapc,
564acead002SAlex Elder 				      u32 truncate_seq, u64 truncate_size,
565153e5167SAlex Elder 				      bool use_mempool);
5663d14c5d2SYehuda Sadeh 
567a679e50fSJeff Layton int __ceph_alloc_sparse_ext_map(struct ceph_osd_req_op *op, int cnt);
568a679e50fSJeff Layton 
569a679e50fSJeff Layton /*
570a679e50fSJeff Layton  * How big an extent array should we preallocate for a sparse read? This is
571a679e50fSJeff Layton  * just a starting value.  If we get more than this back from the OSD, the
572a679e50fSJeff Layton  * receiver will reallocate.
573a679e50fSJeff Layton  */
574a679e50fSJeff Layton #define CEPH_SPARSE_EXT_ARRAY_INITIAL  16
575a679e50fSJeff Layton 
ceph_alloc_sparse_ext_map(struct ceph_osd_req_op * op)576a679e50fSJeff Layton static inline int ceph_alloc_sparse_ext_map(struct ceph_osd_req_op *op)
577a679e50fSJeff Layton {
578a679e50fSJeff Layton 	return __ceph_alloc_sparse_ext_map(op, CEPH_SPARSE_EXT_ARRAY_INITIAL);
579a679e50fSJeff Layton }
580a679e50fSJeff Layton 
5819e94af20SIlya Dryomov extern void ceph_osdc_get_request(struct ceph_osd_request *req);
5829e94af20SIlya Dryomov extern void ceph_osdc_put_request(struct ceph_osd_request *req);
5833d14c5d2SYehuda Sadeh 
584a8af0d68SJeff Layton void ceph_osdc_start_request(struct ceph_osd_client *osdc,
585a8af0d68SJeff Layton 			     struct ceph_osd_request *req);
586c9f9b93dSIlya Dryomov extern void ceph_osdc_cancel_request(struct ceph_osd_request *req);
5873d14c5d2SYehuda Sadeh extern int ceph_osdc_wait_request(struct ceph_osd_client *osdc,
5883d14c5d2SYehuda Sadeh 				  struct ceph_osd_request *req);
5893d14c5d2SYehuda Sadeh extern void ceph_osdc_sync(struct ceph_osd_client *osdc);
5903d14c5d2SYehuda Sadeh 
591dd935f44SJosh Durgin extern void ceph_osdc_flush_notifies(struct ceph_osd_client *osdc);
5927cca78c9SIlya Dryomov void ceph_osdc_maybe_request_map(struct ceph_osd_client *osdc);
593dd935f44SJosh Durgin 
594428a7158SDouglas Fuller int ceph_osdc_call(struct ceph_osd_client *osdc,
595428a7158SDouglas Fuller 		   struct ceph_object_id *oid,
596428a7158SDouglas Fuller 		   struct ceph_object_locator *oloc,
597428a7158SDouglas Fuller 		   const char *class, const char *method,
598428a7158SDouglas Fuller 		   unsigned int flags,
599428a7158SDouglas Fuller 		   struct page *req_page, size_t req_len,
60068ada915SIlya Dryomov 		   struct page **resp_pages, size_t *resp_len);
601428a7158SDouglas Fuller 
602922dab61SIlya Dryomov /* watch/notify */
603922dab61SIlya Dryomov struct ceph_osd_linger_request *
604922dab61SIlya Dryomov ceph_osdc_watch(struct ceph_osd_client *osdc,
605922dab61SIlya Dryomov 		struct ceph_object_id *oid,
606922dab61SIlya Dryomov 		struct ceph_object_locator *oloc,
607922dab61SIlya Dryomov 		rados_watchcb2_t wcb,
608922dab61SIlya Dryomov 		rados_watcherrcb_t errcb,
609922dab61SIlya Dryomov 		void *data);
610922dab61SIlya Dryomov int ceph_osdc_unwatch(struct ceph_osd_client *osdc,
611922dab61SIlya Dryomov 		      struct ceph_osd_linger_request *lreq);
612922dab61SIlya Dryomov 
613922dab61SIlya Dryomov int ceph_osdc_notify_ack(struct ceph_osd_client *osdc,
614922dab61SIlya Dryomov 			 struct ceph_object_id *oid,
615922dab61SIlya Dryomov 			 struct ceph_object_locator *oloc,
616922dab61SIlya Dryomov 			 u64 notify_id,
617922dab61SIlya Dryomov 			 u64 cookie,
618922dab61SIlya Dryomov 			 void *payload,
6196d54228fSIlya Dryomov 			 u32 payload_len);
62019079203SIlya Dryomov int ceph_osdc_notify(struct ceph_osd_client *osdc,
62119079203SIlya Dryomov 		     struct ceph_object_id *oid,
62219079203SIlya Dryomov 		     struct ceph_object_locator *oloc,
62319079203SIlya Dryomov 		     void *payload,
6246d54228fSIlya Dryomov 		     u32 payload_len,
62519079203SIlya Dryomov 		     u32 timeout,
62619079203SIlya Dryomov 		     struct page ***preply_pages,
62719079203SIlya Dryomov 		     size_t *preply_len);
628b07d3c4bSIlya Dryomov int ceph_osdc_watch_check(struct ceph_osd_client *osdc,
629b07d3c4bSIlya Dryomov 			  struct ceph_osd_linger_request *lreq);
630a4ed38d7SDouglas Fuller int ceph_osdc_list_watchers(struct ceph_osd_client *osdc,
631a4ed38d7SDouglas Fuller 			    struct ceph_object_id *oid,
632a4ed38d7SDouglas Fuller 			    struct ceph_object_locator *oloc,
633a4ed38d7SDouglas Fuller 			    struct ceph_watch_item **watchers,
634a4ed38d7SDouglas Fuller 			    u32 *num_watchers);
6353d14c5d2SYehuda Sadeh 
636a679e50fSJeff Layton /* Find offset into the buffer of the end of the extent map */
ceph_sparse_ext_map_end(struct ceph_osd_req_op * op)637a679e50fSJeff Layton static inline u64 ceph_sparse_ext_map_end(struct ceph_osd_req_op *op)
638a679e50fSJeff Layton {
639a679e50fSJeff Layton 	struct ceph_sparse_extent *ext;
640a679e50fSJeff Layton 
641a679e50fSJeff Layton 	/* No extents? No data */
642a679e50fSJeff Layton 	if (op->extent.sparse_ext_cnt == 0)
643a679e50fSJeff Layton 		return 0;
644a679e50fSJeff Layton 
645a679e50fSJeff Layton 	ext = &op->extent.sparse_ext[op->extent.sparse_ext_cnt - 1];
646a679e50fSJeff Layton 
647a679e50fSJeff Layton 	return ext->off + ext->len - op->extent.offset;
648a679e50fSJeff Layton }
649a679e50fSJeff Layton 
650a679e50fSJeff Layton #endif
651