xref: /openbmc/linux/include/linux/ceph/osd_client.h (revision a679e50f)
1b2441318SGreg Kroah-Hartman /* SPDX-License-Identifier: GPL-2.0 */
23d14c5d2SYehuda Sadeh #ifndef _FS_CEPH_OSD_CLIENT_H
33d14c5d2SYehuda Sadeh #define _FS_CEPH_OSD_CLIENT_H
43d14c5d2SYehuda Sadeh 
5a02a946dSIlya Dryomov #include <linux/bitrev.h>
63d14c5d2SYehuda Sadeh #include <linux/completion.h>
73d14c5d2SYehuda Sadeh #include <linux/kref.h>
83d14c5d2SYehuda Sadeh #include <linux/mempool.h>
93d14c5d2SYehuda Sadeh #include <linux/rbtree.h>
1002113a0fSElena Reshetova #include <linux/refcount.h>
1197e27aaaSXiubo Li #include <linux/ktime.h>
123d14c5d2SYehuda Sadeh 
136c4a1915SAlex Elder #include <linux/ceph/types.h>
146c4a1915SAlex Elder #include <linux/ceph/osdmap.h>
156c4a1915SAlex Elder #include <linux/ceph/messenger.h>
16b2aa5d0bSIlya Dryomov #include <linux/ceph/msgpool.h>
176c4a1915SAlex Elder #include <linux/ceph/auth.h>
18c885837fSAlex Elder #include <linux/ceph/pagelist.h>
193d14c5d2SYehuda Sadeh 
203d14c5d2SYehuda Sadeh struct ceph_msg;
213d14c5d2SYehuda Sadeh struct ceph_snap_context;
223d14c5d2SYehuda Sadeh struct ceph_osd_request;
233d14c5d2SYehuda Sadeh struct ceph_osd_client;
243d14c5d2SYehuda Sadeh 
253d14c5d2SYehuda Sadeh /*
263d14c5d2SYehuda Sadeh  * completion callback for async writepages
273d14c5d2SYehuda Sadeh  */
2885e084feSIlya Dryomov typedef void (*ceph_osdc_callback_t)(struct ceph_osd_request *);
293d14c5d2SYehuda Sadeh 
3063244fa1SIlya Dryomov #define CEPH_HOMELESS_OSD	-1
3163244fa1SIlya Dryomov 
3208b8a044SJeff Layton /*
33*a679e50fSJeff Layton  * A single extent in a SPARSE_READ reply.
34*a679e50fSJeff Layton  *
35*a679e50fSJeff Layton  * Note that these come from the OSD as little-endian values. On BE arches,
36*a679e50fSJeff Layton  * we convert them in-place after receipt.
37*a679e50fSJeff Layton  */
38*a679e50fSJeff Layton struct ceph_sparse_extent {
39*a679e50fSJeff Layton 	u64	off;
40*a679e50fSJeff Layton 	u64	len;
41*a679e50fSJeff Layton } __packed;
42*a679e50fSJeff Layton 
43*a679e50fSJeff Layton /*
4408b8a044SJeff Layton  * A given osd we're communicating with.
4508b8a044SJeff Layton  *
4608b8a044SJeff Layton  * Note that the o_requests tree can be searched while holding the "lock" mutex
4708b8a044SJeff Layton  * or the "o_requests_lock" spinlock. Insertion or removal requires both!
4808b8a044SJeff Layton  */
493d14c5d2SYehuda Sadeh struct ceph_osd {
5002113a0fSElena Reshetova 	refcount_t o_ref;
513d14c5d2SYehuda Sadeh 	struct ceph_osd_client *o_osdc;
523d14c5d2SYehuda Sadeh 	int o_osd;
533d14c5d2SYehuda Sadeh 	int o_incarnation;
543d14c5d2SYehuda Sadeh 	struct rb_node o_node;
553d14c5d2SYehuda Sadeh 	struct ceph_connection o_con;
5608b8a044SJeff Layton 	spinlock_t o_requests_lock;
575aea3dcdSIlya Dryomov 	struct rb_root o_requests;
58922dab61SIlya Dryomov 	struct rb_root o_linger_requests;
59a02a946dSIlya Dryomov 	struct rb_root o_backoff_mappings;
60a02a946dSIlya Dryomov 	struct rb_root o_backoffs_by_id;
613d14c5d2SYehuda Sadeh 	struct list_head o_osd_lru;
626c4a1915SAlex Elder 	struct ceph_auth_handshake o_auth;
633d14c5d2SYehuda Sadeh 	unsigned long lru_ttl;
643d14c5d2SYehuda Sadeh 	struct list_head o_keepalive_item;
655aea3dcdSIlya Dryomov 	struct mutex lock;
663d14c5d2SYehuda Sadeh };
673d14c5d2SYehuda Sadeh 
683f1af42aSIlya Dryomov #define CEPH_OSD_SLAB_OPS	2
693f1af42aSIlya Dryomov #define CEPH_OSD_MAX_OPS	16
701b83bef2SSage Weil 
712ac2b7a6SAlex Elder enum ceph_osd_data_type {
72ec9123c5SAlex Elder 	CEPH_OSD_DATA_TYPE_NONE = 0,
732ac2b7a6SAlex Elder 	CEPH_OSD_DATA_TYPE_PAGES,
749a5e6d09SAlex Elder 	CEPH_OSD_DATA_TYPE_PAGELIST,
752ac2b7a6SAlex Elder #ifdef CONFIG_BLOCK
762ac2b7a6SAlex Elder 	CEPH_OSD_DATA_TYPE_BIO,
772ac2b7a6SAlex Elder #endif /* CONFIG_BLOCK */
78b9e281c2SIlya Dryomov 	CEPH_OSD_DATA_TYPE_BVECS,
792ac2b7a6SAlex Elder };
802ac2b7a6SAlex Elder 
812794a82aSAlex Elder struct ceph_osd_data {
822ac2b7a6SAlex Elder 	enum ceph_osd_data_type	type;
832ac2b7a6SAlex Elder 	union {
842794a82aSAlex Elder 		struct {
852794a82aSAlex Elder 			struct page	**pages;
86e0c59487SAlex Elder 			u64		length;
872794a82aSAlex Elder 			u32		alignment;
882794a82aSAlex Elder 			bool		pages_from_pool;
892794a82aSAlex Elder 			bool		own_pages;
902794a82aSAlex Elder 		};
919a5e6d09SAlex Elder 		struct ceph_pagelist	*pagelist;
922794a82aSAlex Elder #ifdef CONFIG_BLOCK
93fdce58ccSAlex Elder 		struct {
945359a17dSIlya Dryomov 			struct ceph_bio_iter	bio_pos;
955359a17dSIlya Dryomov 			u32			bio_length;
96fdce58ccSAlex Elder 		};
972794a82aSAlex Elder #endif /* CONFIG_BLOCK */
980010f705SIlya Dryomov 		struct {
99b9e281c2SIlya Dryomov 			struct ceph_bvec_iter	bvec_pos;
1000010f705SIlya Dryomov 			u32			num_bvecs;
1010010f705SIlya Dryomov 		};
1022794a82aSAlex Elder 	};
1032794a82aSAlex Elder };
1042794a82aSAlex Elder 
10579528734SAlex Elder struct ceph_osd_req_op {
10679528734SAlex Elder 	u16 op;           /* CEPH_OSD_OP_* */
1077b25bf5fSIlya Dryomov 	u32 flags;        /* CEPH_OSD_OP_FLAG_* */
108de2aa102SIlya Dryomov 	u32 indata_len;   /* request */
1097665d85bSYan, Zheng 	u32 outdata_len;  /* reply */
1107665d85bSYan, Zheng 	s32 rval;
1117665d85bSYan, Zheng 
11279528734SAlex Elder 	union {
11349719778SAlex Elder 		struct ceph_osd_data raw_data_in;
11479528734SAlex Elder 		struct {
11579528734SAlex Elder 			u64 offset, length;
11679528734SAlex Elder 			u64 truncate_size;
11779528734SAlex Elder 			u32 truncate_seq;
118*a679e50fSJeff Layton 			int sparse_ext_cnt;
119*a679e50fSJeff Layton 			struct ceph_sparse_extent *sparse_ext;
1205476492fSAlex Elder 			struct ceph_osd_data osd_data;
12179528734SAlex Elder 		} extent;
12279528734SAlex Elder 		struct {
123d7d5a007SIlya Dryomov 			u32 name_len;
124d7d5a007SIlya Dryomov 			u32 value_len;
125d74b50beSYan, Zheng 			__u8 cmp_op;       /* CEPH_OSD_CMPXATTR_OP_* */
126d74b50beSYan, Zheng 			__u8 cmp_mode;     /* CEPH_OSD_CMPXATTR_MODE_* */
127d74b50beSYan, Zheng 			struct ceph_osd_data osd_data;
128d74b50beSYan, Zheng 		} xattr;
129d74b50beSYan, Zheng 		struct {
13079528734SAlex Elder 			const char *class_name;
13179528734SAlex Elder 			const char *method_name;
1325476492fSAlex Elder 			struct ceph_osd_data request_info;
13304017e29SAlex Elder 			struct ceph_osd_data request_data;
1345476492fSAlex Elder 			struct ceph_osd_data response_data;
13579528734SAlex Elder 			__u8 class_len;
13679528734SAlex Elder 			__u8 method_len;
137bb873b53SIlya Dryomov 			u32 indata_len;
13879528734SAlex Elder 		} cls;
13979528734SAlex Elder 		struct {
14079528734SAlex Elder 			u64 cookie;
141922dab61SIlya Dryomov 			__u8 op;           /* CEPH_OSD_WATCH_OP_ */
142922dab61SIlya Dryomov 			u32 gen;
14379528734SAlex Elder 		} watch;
144c647b8a8SIlya Dryomov 		struct {
145922dab61SIlya Dryomov 			struct ceph_osd_data request_data;
146922dab61SIlya Dryomov 		} notify_ack;
147922dab61SIlya Dryomov 		struct {
14819079203SIlya Dryomov 			u64 cookie;
14919079203SIlya Dryomov 			struct ceph_osd_data request_data;
15019079203SIlya Dryomov 			struct ceph_osd_data response_data;
15119079203SIlya Dryomov 		} notify;
15219079203SIlya Dryomov 		struct {
153a4ed38d7SDouglas Fuller 			struct ceph_osd_data response_data;
154a4ed38d7SDouglas Fuller 		} list_watchers;
155a4ed38d7SDouglas Fuller 		struct {
156c647b8a8SIlya Dryomov 			u64 expected_object_size;
157c647b8a8SIlya Dryomov 			u64 expected_write_size;
158d3798accSIlya Dryomov 			u32 flags;  /* CEPH_OSD_OP_ALLOC_HINT_FLAG_* */
159c647b8a8SIlya Dryomov 		} alloc_hint;
16023ddf9beSLuis Henriques 		struct {
16123ddf9beSLuis Henriques 			u64 snapid;
16223ddf9beSLuis Henriques 			u64 src_version;
16323ddf9beSLuis Henriques 			u8 flags;
16423ddf9beSLuis Henriques 			u32 src_fadvise_flags;
16523ddf9beSLuis Henriques 			struct ceph_osd_data osd_data;
16623ddf9beSLuis Henriques 		} copy_from;
16779528734SAlex Elder 	};
16879528734SAlex Elder };
16979528734SAlex Elder 
17063244fa1SIlya Dryomov struct ceph_osd_request_target {
17163244fa1SIlya Dryomov 	struct ceph_object_id base_oid;
17263244fa1SIlya Dryomov 	struct ceph_object_locator base_oloc;
17363244fa1SIlya Dryomov 	struct ceph_object_id target_oid;
17463244fa1SIlya Dryomov 	struct ceph_object_locator target_oloc;
17563244fa1SIlya Dryomov 
176dc98ff72SIlya Dryomov 	struct ceph_pg pgid;               /* last raw pg we mapped to */
177dc98ff72SIlya Dryomov 	struct ceph_spg spgid;             /* last actual spg we mapped to */
17863244fa1SIlya Dryomov 	u32 pg_num;
17963244fa1SIlya Dryomov 	u32 pg_num_mask;
18063244fa1SIlya Dryomov 	struct ceph_osds acting;
18163244fa1SIlya Dryomov 	struct ceph_osds up;
18263244fa1SIlya Dryomov 	int size;
18363244fa1SIlya Dryomov 	int min_size;
18463244fa1SIlya Dryomov 	bool sort_bitwise;
185ae78dd81SIlya Dryomov 	bool recovery_deletes;
18663244fa1SIlya Dryomov 
18763244fa1SIlya Dryomov 	unsigned int flags;                /* CEPH_OSD_FLAG_* */
188117d96a0SIlya Dryomov 	bool used_replica;
18963244fa1SIlya Dryomov 	bool paused;
19063244fa1SIlya Dryomov 
19104c7d789SIlya Dryomov 	u32 epoch;
192dc93e0e2SIlya Dryomov 	u32 last_force_resend;
193dc93e0e2SIlya Dryomov 
19463244fa1SIlya Dryomov 	int osd;
19563244fa1SIlya Dryomov };
19663244fa1SIlya Dryomov 
1973d14c5d2SYehuda Sadeh /* an in-flight request */
1983d14c5d2SYehuda Sadeh struct ceph_osd_request {
1993d14c5d2SYehuda Sadeh 	u64             r_tid;              /* unique for this client */
2003d14c5d2SYehuda Sadeh 	struct rb_node  r_node;
2014609245eSIlya Dryomov 	struct rb_node  r_mc_node;          /* map check */
20288bc1922SIlya Dryomov 	struct work_struct r_complete_work;
2033d14c5d2SYehuda Sadeh 	struct ceph_osd *r_osd;
204a66dd383SIlya Dryomov 
205a66dd383SIlya Dryomov 	struct ceph_osd_request_target r_t;
206a66dd383SIlya Dryomov #define r_base_oid	r_t.base_oid
207a66dd383SIlya Dryomov #define r_base_oloc	r_t.base_oloc
208a66dd383SIlya Dryomov #define r_flags		r_t.flags
2093d14c5d2SYehuda Sadeh 
2103d14c5d2SYehuda Sadeh 	struct ceph_msg  *r_request, *r_reply;
2113d14c5d2SYehuda Sadeh 	u32               r_sent;      /* >0 if r_request is sending/sent */
2121b83bef2SSage Weil 
21379528734SAlex Elder 	/* request osd ops array  */
21479528734SAlex Elder 	unsigned int		r_num_ops;
21579528734SAlex Elder 
2161b83bef2SSage Weil 	int               r_result;
2173d14c5d2SYehuda Sadeh 
2183d14c5d2SYehuda Sadeh 	struct ceph_osd_client *r_osdc;
2193d14c5d2SYehuda Sadeh 	struct kref       r_kref;
2203d14c5d2SYehuda Sadeh 	bool              r_mempool;
221b18b9550SIlya Dryomov 	struct completion r_completion;       /* private to osd_client.c */
22226be8808SAlex Elder 	ceph_osdc_callback_t r_callback;
2233d14c5d2SYehuda Sadeh 
2243d14c5d2SYehuda Sadeh 	struct inode *r_inode;         	      /* for use by callbacks */
22594e85771SIlya Dryomov 	struct list_head r_private_item;      /* ditto */
2263d14c5d2SYehuda Sadeh 	void *r_priv;			      /* ditto */
2273d14c5d2SYehuda Sadeh 
228bb873b53SIlya Dryomov 	/* set by submitter */
229bb873b53SIlya Dryomov 	u64 r_snapid;                         /* for reads, CEPH_NOSNAP o/w */
230bb873b53SIlya Dryomov 	struct ceph_snap_context *r_snapc;    /* for writes */
231fac02ddfSArnd Bergmann 	struct timespec64 r_mtime;            /* ditto */
232bb873b53SIlya Dryomov 	u64 r_data_offset;                    /* ditto */
233922dab61SIlya Dryomov 	bool r_linger;                        /* don't resend on failure */
2343d14c5d2SYehuda Sadeh 
235bb873b53SIlya Dryomov 	/* internal */
236bb873b53SIlya Dryomov 	unsigned long r_stamp;                /* jiffies, send or check time */
2377cc5e38fSIlya Dryomov 	unsigned long r_start_stamp;          /* jiffies */
23897e27aaaSXiubo Li 	ktime_t r_start_latency;              /* ktime_t */
23997e27aaaSXiubo Li 	ktime_t r_end_latency;                /* ktime_t */
240bb873b53SIlya Dryomov 	int r_attempts;
2414609245eSIlya Dryomov 	u32 r_map_dne_bound;
2423f1af42aSIlya Dryomov 
2433f1af42aSIlya Dryomov 	struct ceph_osd_req_op r_ops[];
2443d14c5d2SYehuda Sadeh };
2453d14c5d2SYehuda Sadeh 
246205ee118SIlya Dryomov struct ceph_request_redirect {
247205ee118SIlya Dryomov 	struct ceph_object_locator oloc;
248205ee118SIlya Dryomov };
249205ee118SIlya Dryomov 
2508cb441c0SIlya Dryomov /*
2518cb441c0SIlya Dryomov  * osd request identifier
2528cb441c0SIlya Dryomov  *
2538cb441c0SIlya Dryomov  * caller name + incarnation# + tid to unique identify this request
2548cb441c0SIlya Dryomov  */
2558cb441c0SIlya Dryomov struct ceph_osd_reqid {
2568cb441c0SIlya Dryomov 	struct ceph_entity_name name;
2578cb441c0SIlya Dryomov 	__le64 tid;
2588cb441c0SIlya Dryomov 	__le32 inc;
2598cb441c0SIlya Dryomov } __packed;
2608cb441c0SIlya Dryomov 
2618cb441c0SIlya Dryomov struct ceph_blkin_trace_info {
2628cb441c0SIlya Dryomov 	__le64 trace_id;
2638cb441c0SIlya Dryomov 	__le64 span_id;
2648cb441c0SIlya Dryomov 	__le64 parent_span_id;
2658cb441c0SIlya Dryomov } __packed;
2668cb441c0SIlya Dryomov 
267922dab61SIlya Dryomov typedef void (*rados_watchcb2_t)(void *arg, u64 notify_id, u64 cookie,
268922dab61SIlya Dryomov 				 u64 notifier_id, void *data, size_t data_len);
269922dab61SIlya Dryomov typedef void (*rados_watcherrcb_t)(void *arg, u64 cookie, int err);
270a40c4f10SYehuda Sadeh 
271922dab61SIlya Dryomov struct ceph_osd_linger_request {
272922dab61SIlya Dryomov 	struct ceph_osd_client *osdc;
273922dab61SIlya Dryomov 	u64 linger_id;
274922dab61SIlya Dryomov 	bool committed;
27519079203SIlya Dryomov 	bool is_watch;                  /* watch or notify */
276922dab61SIlya Dryomov 
277922dab61SIlya Dryomov 	struct ceph_osd *osd;
278922dab61SIlya Dryomov 	struct ceph_osd_request *reg_req;
279922dab61SIlya Dryomov 	struct ceph_osd_request *ping_req;
280922dab61SIlya Dryomov 	unsigned long ping_sent;
281b07d3c4bSIlya Dryomov 	unsigned long watch_valid_thru;
282b07d3c4bSIlya Dryomov 	struct list_head pending_lworks;
283922dab61SIlya Dryomov 
284922dab61SIlya Dryomov 	struct ceph_osd_request_target t;
2854609245eSIlya Dryomov 	u32 map_dne_bound;
286922dab61SIlya Dryomov 
287fac02ddfSArnd Bergmann 	struct timespec64 mtime;
288922dab61SIlya Dryomov 
289922dab61SIlya Dryomov 	struct kref kref;
290922dab61SIlya Dryomov 	struct mutex lock;
291922dab61SIlya Dryomov 	struct rb_node node;            /* osd */
292922dab61SIlya Dryomov 	struct rb_node osdc_node;       /* osdc */
2934609245eSIlya Dryomov 	struct rb_node mc_node;         /* map check */
294922dab61SIlya Dryomov 	struct list_head scan_item;
295922dab61SIlya Dryomov 
296922dab61SIlya Dryomov 	struct completion reg_commit_wait;
29719079203SIlya Dryomov 	struct completion notify_finish_wait;
298922dab61SIlya Dryomov 	int reg_commit_error;
29919079203SIlya Dryomov 	int notify_finish_error;
300922dab61SIlya Dryomov 	int last_error;
301922dab61SIlya Dryomov 
302922dab61SIlya Dryomov 	u32 register_gen;
30319079203SIlya Dryomov 	u64 notify_id;
304922dab61SIlya Dryomov 
305922dab61SIlya Dryomov 	rados_watchcb2_t wcb;
306922dab61SIlya Dryomov 	rados_watcherrcb_t errcb;
307922dab61SIlya Dryomov 	void *data;
30819079203SIlya Dryomov 
30975dbb685SIlya Dryomov 	struct ceph_pagelist *request_pl;
31075dbb685SIlya Dryomov 	struct page **notify_id_pages;
31175dbb685SIlya Dryomov 
31219079203SIlya Dryomov 	struct page ***preply_pages;
31319079203SIlya Dryomov 	size_t *preply_len;
314a40c4f10SYehuda Sadeh };
315a40c4f10SYehuda Sadeh 
316a4ed38d7SDouglas Fuller struct ceph_watch_item {
317a4ed38d7SDouglas Fuller 	struct ceph_entity_name name;
318a4ed38d7SDouglas Fuller 	u64 cookie;
319a4ed38d7SDouglas Fuller 	struct ceph_entity_addr addr;
320a4ed38d7SDouglas Fuller };
321a4ed38d7SDouglas Fuller 
322a02a946dSIlya Dryomov struct ceph_spg_mapping {
323a02a946dSIlya Dryomov 	struct rb_node node;
324a02a946dSIlya Dryomov 	struct ceph_spg spgid;
325a02a946dSIlya Dryomov 
326a02a946dSIlya Dryomov 	struct rb_root backoffs;
327a02a946dSIlya Dryomov };
328a02a946dSIlya Dryomov 
329a02a946dSIlya Dryomov struct ceph_hobject_id {
330a02a946dSIlya Dryomov 	void *key;
331a02a946dSIlya Dryomov 	size_t key_len;
332a02a946dSIlya Dryomov 	void *oid;
333a02a946dSIlya Dryomov 	size_t oid_len;
334a02a946dSIlya Dryomov 	u64 snapid;
335a02a946dSIlya Dryomov 	u32 hash;
336a02a946dSIlya Dryomov 	u8 is_max;
337a02a946dSIlya Dryomov 	void *nspace;
338a02a946dSIlya Dryomov 	size_t nspace_len;
339a02a946dSIlya Dryomov 	s64 pool;
340a02a946dSIlya Dryomov 
341a02a946dSIlya Dryomov 	/* cache */
342a02a946dSIlya Dryomov 	u32 hash_reverse_bits;
343a02a946dSIlya Dryomov };
344a02a946dSIlya Dryomov 
345a02a946dSIlya Dryomov static inline void ceph_hoid_build_hash_cache(struct ceph_hobject_id *hoid)
346a02a946dSIlya Dryomov {
347a02a946dSIlya Dryomov 	hoid->hash_reverse_bits = bitrev32(hoid->hash);
348a02a946dSIlya Dryomov }
349a02a946dSIlya Dryomov 
350a02a946dSIlya Dryomov /*
351a02a946dSIlya Dryomov  * PG-wide backoff: [begin, end)
352a02a946dSIlya Dryomov  * per-object backoff: begin == end
353a02a946dSIlya Dryomov  */
354a02a946dSIlya Dryomov struct ceph_osd_backoff {
355a02a946dSIlya Dryomov 	struct rb_node spg_node;
356a02a946dSIlya Dryomov 	struct rb_node id_node;
357a02a946dSIlya Dryomov 
358a02a946dSIlya Dryomov 	struct ceph_spg spgid;
359a02a946dSIlya Dryomov 	u64 id;
360a02a946dSIlya Dryomov 	struct ceph_hobject_id *begin;
361a02a946dSIlya Dryomov 	struct ceph_hobject_id *end;
362a02a946dSIlya Dryomov };
363a02a946dSIlya Dryomov 
364264048afSIlya Dryomov #define CEPH_LINGER_ID_START	0xffff000000000000ULL
365264048afSIlya Dryomov 
3663d14c5d2SYehuda Sadeh struct ceph_osd_client {
3673d14c5d2SYehuda Sadeh 	struct ceph_client     *client;
3683d14c5d2SYehuda Sadeh 
3693d14c5d2SYehuda Sadeh 	struct ceph_osdmap     *osdmap;       /* current map */
3705aea3dcdSIlya Dryomov 	struct rw_semaphore    lock;
3713d14c5d2SYehuda Sadeh 
3723d14c5d2SYehuda Sadeh 	struct rb_root         osds;          /* osds */
3733d14c5d2SYehuda Sadeh 	struct list_head       osd_lru;       /* idle osds */
3749dd2845cSIlya Dryomov 	spinlock_t             osd_lru_lock;
37558eb7932SJeff Layton 	u32		       epoch_barrier;
3765aea3dcdSIlya Dryomov 	struct ceph_osd        homeless_osd;
3775aea3dcdSIlya Dryomov 	atomic64_t             last_tid;      /* tid of last request */
378922dab61SIlya Dryomov 	u64                    last_linger_id;
379922dab61SIlya Dryomov 	struct rb_root         linger_requests; /* lingering requests */
3804609245eSIlya Dryomov 	struct rb_root         map_checks;
3814609245eSIlya Dryomov 	struct rb_root         linger_map_checks;
3825aea3dcdSIlya Dryomov 	atomic_t               num_requests;
3835aea3dcdSIlya Dryomov 	atomic_t               num_homeless;
38466850df5SIlya Dryomov 	int                    abort_err;
3853d14c5d2SYehuda Sadeh 	struct delayed_work    timeout_work;
3863d14c5d2SYehuda Sadeh 	struct delayed_work    osds_timeout_work;
3873d14c5d2SYehuda Sadeh #ifdef CONFIG_DEBUG_FS
3883d14c5d2SYehuda Sadeh 	struct dentry 	       *debugfs_file;
3893d14c5d2SYehuda Sadeh #endif
3903d14c5d2SYehuda Sadeh 
3913d14c5d2SYehuda Sadeh 	mempool_t              *req_mempool;
3923d14c5d2SYehuda Sadeh 
3933d14c5d2SYehuda Sadeh 	struct ceph_msgpool	msgpool_op;
3943d14c5d2SYehuda Sadeh 	struct ceph_msgpool	msgpool_op_reply;
395a40c4f10SYehuda Sadeh 
396a40c4f10SYehuda Sadeh 	struct workqueue_struct	*notify_wq;
39788bc1922SIlya Dryomov 	struct workqueue_struct	*completion_wq;
3983d14c5d2SYehuda Sadeh };
3993d14c5d2SYehuda Sadeh 
400b7ec35b3SIlya Dryomov static inline bool ceph_osdmap_flag(struct ceph_osd_client *osdc, int flag)
401b7ec35b3SIlya Dryomov {
402b7ec35b3SIlya Dryomov 	return osdc->osdmap->flags & flag;
403b7ec35b3SIlya Dryomov }
404b7ec35b3SIlya Dryomov 
4055522ae0bSAlex Elder extern int ceph_osdc_setup(void);
4065522ae0bSAlex Elder extern void ceph_osdc_cleanup(void);
4075522ae0bSAlex Elder 
4083d14c5d2SYehuda Sadeh extern int ceph_osdc_init(struct ceph_osd_client *osdc,
4093d14c5d2SYehuda Sadeh 			  struct ceph_client *client);
4103d14c5d2SYehuda Sadeh extern void ceph_osdc_stop(struct ceph_osd_client *osdc);
411120a75eaSYan, Zheng extern void ceph_osdc_reopen_osds(struct ceph_osd_client *osdc);
4123d14c5d2SYehuda Sadeh 
4133d14c5d2SYehuda Sadeh extern void ceph_osdc_handle_reply(struct ceph_osd_client *osdc,
4143d14c5d2SYehuda Sadeh 				   struct ceph_msg *msg);
4153d14c5d2SYehuda Sadeh extern void ceph_osdc_handle_map(struct ceph_osd_client *osdc,
4163d14c5d2SYehuda Sadeh 				 struct ceph_msg *msg);
41758eb7932SJeff Layton void ceph_osdc_update_epoch_barrier(struct ceph_osd_client *osdc, u32 eb);
41866850df5SIlya Dryomov void ceph_osdc_abort_requests(struct ceph_osd_client *osdc, int err);
4192cef0ba8SYan, Zheng void ceph_osdc_clear_abort_err(struct ceph_osd_client *osdc);
4203d14c5d2SYehuda Sadeh 
4214cf3e6dfSIlya Dryomov #define osd_req_op_data(oreq, whch, typ, fld)				\
4224cf3e6dfSIlya Dryomov ({									\
4234cf3e6dfSIlya Dryomov 	struct ceph_osd_request *__oreq = (oreq);			\
4244cf3e6dfSIlya Dryomov 	unsigned int __whch = (whch);					\
4254cf3e6dfSIlya Dryomov 	BUG_ON(__whch >= __oreq->r_num_ops);				\
4264cf3e6dfSIlya Dryomov 	&__oreq->r_ops[__whch].typ.fld;					\
4274cf3e6dfSIlya Dryomov })
4284cf3e6dfSIlya Dryomov 
429042f6498SJeff Layton struct ceph_osd_req_op *osd_req_op_init(struct ceph_osd_request *osd_req,
430144cba14SYan, Zheng 			    unsigned int which, u16 opcode, u32 flags);
43149719778SAlex Elder 
43249719778SAlex Elder extern void osd_req_op_raw_data_in_pages(struct ceph_osd_request *,
43349719778SAlex Elder 					unsigned int which,
43449719778SAlex Elder 					struct page **pages, u64 length,
43549719778SAlex Elder 					u32 alignment, bool pages_from_pool,
43649719778SAlex Elder 					bool own_pages);
43749719778SAlex Elder 
438c99d2d4aSAlex Elder extern void osd_req_op_extent_init(struct ceph_osd_request *osd_req,
439c99d2d4aSAlex Elder 					unsigned int which, u16 opcode,
44033803f33SAlex Elder 					u64 offset, u64 length,
44133803f33SAlex Elder 					u64 truncate_size, u32 truncate_seq);
442c99d2d4aSAlex Elder extern void osd_req_op_extent_update(struct ceph_osd_request *osd_req,
443c99d2d4aSAlex Elder 					unsigned int which, u64 length);
4442c63f49aSYan, Zheng extern void osd_req_op_extent_dup_last(struct ceph_osd_request *osd_req,
4452c63f49aSYan, Zheng 				       unsigned int which, u64 offset_inc);
446a4ce40a9SAlex Elder 
447a4ce40a9SAlex Elder extern struct ceph_osd_data *osd_req_op_extent_osd_data(
448a4ce40a9SAlex Elder 					struct ceph_osd_request *osd_req,
449406e2c9fSAlex Elder 					unsigned int which);
450a4ce40a9SAlex Elder 
451a4ce40a9SAlex Elder extern void osd_req_op_extent_osd_data_pages(struct ceph_osd_request *,
452406e2c9fSAlex Elder 					unsigned int which,
453a4ce40a9SAlex Elder 					struct page **pages, u64 length,
454a4ce40a9SAlex Elder 					u32 alignment, bool pages_from_pool,
455a4ce40a9SAlex Elder 					bool own_pages);
456a4ce40a9SAlex Elder extern void osd_req_op_extent_osd_data_pagelist(struct ceph_osd_request *,
457406e2c9fSAlex Elder 					unsigned int which,
458a4ce40a9SAlex Elder 					struct ceph_pagelist *pagelist);
459a4ce40a9SAlex Elder #ifdef CONFIG_BLOCK
4605359a17dSIlya Dryomov void osd_req_op_extent_osd_data_bio(struct ceph_osd_request *osd_req,
461406e2c9fSAlex Elder 				    unsigned int which,
4625359a17dSIlya Dryomov 				    struct ceph_bio_iter *bio_pos,
4635359a17dSIlya Dryomov 				    u32 bio_length);
464a4ce40a9SAlex Elder #endif /* CONFIG_BLOCK */
4650010f705SIlya Dryomov void osd_req_op_extent_osd_data_bvecs(struct ceph_osd_request *osd_req,
4660010f705SIlya Dryomov 				      unsigned int which,
4670010f705SIlya Dryomov 				      struct bio_vec *bvecs, u32 num_bvecs,
4680010f705SIlya Dryomov 				      u32 bytes);
469b9e281c2SIlya Dryomov void osd_req_op_extent_osd_data_bvec_pos(struct ceph_osd_request *osd_req,
470b9e281c2SIlya Dryomov 					 unsigned int which,
471b9e281c2SIlya Dryomov 					 struct ceph_bvec_iter *bvec_pos);
472a4ce40a9SAlex Elder 
47304017e29SAlex Elder extern void osd_req_op_cls_request_data_pagelist(struct ceph_osd_request *,
47404017e29SAlex Elder 					unsigned int which,
47504017e29SAlex Elder 					struct ceph_pagelist *pagelist);
4766c57b554SAlex Elder extern void osd_req_op_cls_request_data_pages(struct ceph_osd_request *,
4776c57b554SAlex Elder 					unsigned int which,
4786c57b554SAlex Elder 					struct page **pages, u64 length,
4796c57b554SAlex Elder 					u32 alignment, bool pages_from_pool,
4806c57b554SAlex Elder 					bool own_pages);
481b9e281c2SIlya Dryomov void osd_req_op_cls_request_data_bvecs(struct ceph_osd_request *osd_req,
482b9e281c2SIlya Dryomov 				       unsigned int which,
4830010f705SIlya Dryomov 				       struct bio_vec *bvecs, u32 num_bvecs,
4840010f705SIlya Dryomov 				       u32 bytes);
485a4ce40a9SAlex Elder extern void osd_req_op_cls_response_data_pages(struct ceph_osd_request *,
486c99d2d4aSAlex Elder 					unsigned int which,
487a4ce40a9SAlex Elder 					struct page **pages, u64 length,
488a4ce40a9SAlex Elder 					u32 alignment, bool pages_from_pool,
489a4ce40a9SAlex Elder 					bool own_pages);
49024639ce5SIlya Dryomov int osd_req_op_cls_init(struct ceph_osd_request *osd_req, unsigned int which,
49104017e29SAlex Elder 			const char *class, const char *method);
492d74b50beSYan, Zheng extern int osd_req_op_xattr_init(struct ceph_osd_request *osd_req, unsigned int which,
493d74b50beSYan, Zheng 				 u16 opcode, const char *name, const void *value,
494d74b50beSYan, Zheng 				 size_t size, u8 cmp_op, u8 cmp_mode);
495c647b8a8SIlya Dryomov extern void osd_req_op_alloc_hint_init(struct ceph_osd_request *osd_req,
496c647b8a8SIlya Dryomov 				       unsigned int which,
497c647b8a8SIlya Dryomov 				       u64 expected_object_size,
498d3798accSIlya Dryomov 				       u64 expected_write_size,
499d3798accSIlya Dryomov 				       u32 flags);
500aca39d9eSLuís Henriques extern int osd_req_op_copy_from_init(struct ceph_osd_request *req,
501aca39d9eSLuís Henriques 				     u64 src_snapid, u64 src_version,
502aca39d9eSLuís Henriques 				     struct ceph_object_id *src_oid,
503aca39d9eSLuís Henriques 				     struct ceph_object_locator *src_oloc,
504aca39d9eSLuís Henriques 				     u32 src_fadvise_flags,
505aca39d9eSLuís Henriques 				     u32 dst_fadvise_flags,
506aca39d9eSLuís Henriques 				     u32 truncate_seq, u64 truncate_size,
507aca39d9eSLuís Henriques 				     u8 copy_from_flags);
50833803f33SAlex Elder 
5093d14c5d2SYehuda Sadeh extern struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc,
5103d14c5d2SYehuda Sadeh 					       struct ceph_snap_context *snapc,
511acead002SAlex Elder 					       unsigned int num_ops,
5123d14c5d2SYehuda Sadeh 					       bool use_mempool,
51354a54007SAlex Elder 					       gfp_t gfp_flags);
51413d1ad16SIlya Dryomov int ceph_osdc_alloc_messages(struct ceph_osd_request *req, gfp_t gfp);
5153d14c5d2SYehuda Sadeh 
5163d14c5d2SYehuda Sadeh extern struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *,
5173d14c5d2SYehuda Sadeh 				      struct ceph_file_layout *layout,
5183d14c5d2SYehuda Sadeh 				      struct ceph_vino vino,
519acead002SAlex Elder 				      u64 offset, u64 *len,
520715e4cd4SYan, Zheng 				      unsigned int which, int num_ops,
521715e4cd4SYan, Zheng 				      int opcode, int flags,
5223d14c5d2SYehuda Sadeh 				      struct ceph_snap_context *snapc,
523acead002SAlex Elder 				      u32 truncate_seq, u64 truncate_size,
524153e5167SAlex Elder 				      bool use_mempool);
5253d14c5d2SYehuda Sadeh 
526*a679e50fSJeff Layton int __ceph_alloc_sparse_ext_map(struct ceph_osd_req_op *op, int cnt);
527*a679e50fSJeff Layton 
528*a679e50fSJeff Layton /*
529*a679e50fSJeff Layton  * How big an extent array should we preallocate for a sparse read? This is
530*a679e50fSJeff Layton  * just a starting value.  If we get more than this back from the OSD, the
531*a679e50fSJeff Layton  * receiver will reallocate.
532*a679e50fSJeff Layton  */
533*a679e50fSJeff Layton #define CEPH_SPARSE_EXT_ARRAY_INITIAL  16
534*a679e50fSJeff Layton 
535*a679e50fSJeff Layton static inline int ceph_alloc_sparse_ext_map(struct ceph_osd_req_op *op)
536*a679e50fSJeff Layton {
537*a679e50fSJeff Layton 	return __ceph_alloc_sparse_ext_map(op, CEPH_SPARSE_EXT_ARRAY_INITIAL);
538*a679e50fSJeff Layton }
539*a679e50fSJeff Layton 
5409e94af20SIlya Dryomov extern void ceph_osdc_get_request(struct ceph_osd_request *req);
5419e94af20SIlya Dryomov extern void ceph_osdc_put_request(struct ceph_osd_request *req);
5423d14c5d2SYehuda Sadeh 
543a8af0d68SJeff Layton void ceph_osdc_start_request(struct ceph_osd_client *osdc,
544a8af0d68SJeff Layton 			     struct ceph_osd_request *req);
545c9f9b93dSIlya Dryomov extern void ceph_osdc_cancel_request(struct ceph_osd_request *req);
5463d14c5d2SYehuda Sadeh extern int ceph_osdc_wait_request(struct ceph_osd_client *osdc,
5473d14c5d2SYehuda Sadeh 				  struct ceph_osd_request *req);
5483d14c5d2SYehuda Sadeh extern void ceph_osdc_sync(struct ceph_osd_client *osdc);
5493d14c5d2SYehuda Sadeh 
550dd935f44SJosh Durgin extern void ceph_osdc_flush_notifies(struct ceph_osd_client *osdc);
5517cca78c9SIlya Dryomov void ceph_osdc_maybe_request_map(struct ceph_osd_client *osdc);
552dd935f44SJosh Durgin 
553428a7158SDouglas Fuller int ceph_osdc_call(struct ceph_osd_client *osdc,
554428a7158SDouglas Fuller 		   struct ceph_object_id *oid,
555428a7158SDouglas Fuller 		   struct ceph_object_locator *oloc,
556428a7158SDouglas Fuller 		   const char *class, const char *method,
557428a7158SDouglas Fuller 		   unsigned int flags,
558428a7158SDouglas Fuller 		   struct page *req_page, size_t req_len,
55968ada915SIlya Dryomov 		   struct page **resp_pages, size_t *resp_len);
560428a7158SDouglas Fuller 
561922dab61SIlya Dryomov /* watch/notify */
562922dab61SIlya Dryomov struct ceph_osd_linger_request *
563922dab61SIlya Dryomov ceph_osdc_watch(struct ceph_osd_client *osdc,
564922dab61SIlya Dryomov 		struct ceph_object_id *oid,
565922dab61SIlya Dryomov 		struct ceph_object_locator *oloc,
566922dab61SIlya Dryomov 		rados_watchcb2_t wcb,
567922dab61SIlya Dryomov 		rados_watcherrcb_t errcb,
568922dab61SIlya Dryomov 		void *data);
569922dab61SIlya Dryomov int ceph_osdc_unwatch(struct ceph_osd_client *osdc,
570922dab61SIlya Dryomov 		      struct ceph_osd_linger_request *lreq);
571922dab61SIlya Dryomov 
572922dab61SIlya Dryomov int ceph_osdc_notify_ack(struct ceph_osd_client *osdc,
573922dab61SIlya Dryomov 			 struct ceph_object_id *oid,
574922dab61SIlya Dryomov 			 struct ceph_object_locator *oloc,
575922dab61SIlya Dryomov 			 u64 notify_id,
576922dab61SIlya Dryomov 			 u64 cookie,
577922dab61SIlya Dryomov 			 void *payload,
5786d54228fSIlya Dryomov 			 u32 payload_len);
57919079203SIlya Dryomov int ceph_osdc_notify(struct ceph_osd_client *osdc,
58019079203SIlya Dryomov 		     struct ceph_object_id *oid,
58119079203SIlya Dryomov 		     struct ceph_object_locator *oloc,
58219079203SIlya Dryomov 		     void *payload,
5836d54228fSIlya Dryomov 		     u32 payload_len,
58419079203SIlya Dryomov 		     u32 timeout,
58519079203SIlya Dryomov 		     struct page ***preply_pages,
58619079203SIlya Dryomov 		     size_t *preply_len);
587b07d3c4bSIlya Dryomov int ceph_osdc_watch_check(struct ceph_osd_client *osdc,
588b07d3c4bSIlya Dryomov 			  struct ceph_osd_linger_request *lreq);
589a4ed38d7SDouglas Fuller int ceph_osdc_list_watchers(struct ceph_osd_client *osdc,
590a4ed38d7SDouglas Fuller 			    struct ceph_object_id *oid,
591a4ed38d7SDouglas Fuller 			    struct ceph_object_locator *oloc,
592a4ed38d7SDouglas Fuller 			    struct ceph_watch_item **watchers,
593a4ed38d7SDouglas Fuller 			    u32 *num_watchers);
5943d14c5d2SYehuda Sadeh 
595*a679e50fSJeff Layton /* Find offset into the buffer of the end of the extent map */
596*a679e50fSJeff Layton static inline u64 ceph_sparse_ext_map_end(struct ceph_osd_req_op *op)
597*a679e50fSJeff Layton {
598*a679e50fSJeff Layton 	struct ceph_sparse_extent *ext;
599*a679e50fSJeff Layton 
600*a679e50fSJeff Layton 	/* No extents? No data */
601*a679e50fSJeff Layton 	if (op->extent.sparse_ext_cnt == 0)
602*a679e50fSJeff Layton 		return 0;
603*a679e50fSJeff Layton 
604*a679e50fSJeff Layton 	ext = &op->extent.sparse_ext[op->extent.sparse_ext_cnt - 1];
605*a679e50fSJeff Layton 
606*a679e50fSJeff Layton 	return ext->off + ext->len - op->extent.offset;
607*a679e50fSJeff Layton }
608*a679e50fSJeff Layton 
609*a679e50fSJeff Layton #endif
610